{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "[this doc on github](https://github.com/dotnet/interactive/tree/main/samples/notebooks/csharp/Samples)\n", "\n", "# Machine Learning over House Prices with ML.NET" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "dotnet_interactive": { "language": "csharp" } }, "outputs": [], "source": [ "#i \"nuget:https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet5/nuget/v3/index.json\" \n", "#i \"nuget:https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet-tools/nuget/v3/index.json\" \n", "\n", "#r \"nuget:Microsoft.ML, 1.7.0\"\n", "#r \"nuget:Microsoft.ML.AutoML, 0.19.0\"\n", "#r \"nuget:Microsoft.Data.Analysis, 0.19.0\"\n", "#r \"nuget:XPlot.Plotly.Interactive, 4.0.6\"" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "dotnet_interactive": { "language": "csharp" } }, "outputs": [], "source": [ "using static Microsoft.DotNet.Interactive.Formatting.PocketViewTags;\n", "using Microsoft.DotNet.Interactive.Formatting;\n", "using Microsoft.Data.Analysis;\n", "using XPlot.Plotly;" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "dotnet_interactive": { "language": "csharp" } }, "outputs": [], "source": [ "using System.IO;\n", "using System.Net.Http;\n", "string housingPath = \"housing.csv\";\n", "\n", "if (!File.Exists(housingPath))\n", "{\n", " var contents = await new HttpClient()\n", " .GetStringAsync(\"https://raw.githubusercontent.com/ageron/handson-ml2/master/datasets/housing/housing.csv\");\n", " \n", " File.WriteAllText(housingPath, contents);\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "dotnet_interactive": { "language": "csharp" } }, "outputs": [], "source": [ "var housingData = DataFrame.LoadCsv(housingPath);\n", "housingData" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "dotnet_interactive": { "language": "csharp" } }, "outputs": [], "source": [ "housingData.Description()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "dotnet_interactive": { "language": "csharp" } }, "outputs": [], "source": [ "Chart.Plot(\n", " new Histogram()\n", " {\n", " x = housingData.Columns[\"median_house_value\"],\n", " nbinsx = 20\n", " }\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "dotnet_interactive": { "language": "csharp" } }, "outputs": [], "source": [ "var chart = Chart.Plot(\n", " new Scattergl()\n", " {\n", " x = housingData.Columns[\"longitude\"],\n", " y = housingData.Columns[\"latitude\"],\n", " mode = \"markers\",\n", " marker = new Marker()\n", " {\n", " color = housingData.Columns[\"median_house_value\"],\n", " colorscale = \"Jet\"\n", " }\n", " }\n", ");\n", "\n", "chart.Width = 600;\n", "chart.Height = 600;\n", "chart.Display();" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "dotnet_interactive": { "language": "csharp" } }, "outputs": [], "source": [ "static T[] Shuffle(T[] array)\n", "{\n", " Random rand = new Random();\n", " for (int i = 0; i < array.Length; i++)\n", " {\n", " int r = i + rand.Next(array.Length - i);\n", " T temp = array[r];\n", " array[r] = array[i];\n", " array[i] = temp;\n", " }\n", " return array;\n", "}\n", "\n", "int[] randomIndices = Shuffle(Enumerable.Range(0, (int)housingData.Rows.Count).ToArray());\n", "int testSize = (int)(housingData.Rows.Count * .1);\n", "int[] trainRows = randomIndices[testSize..];\n", "int[] testRows = randomIndices[..testSize];\n", "\n", "DataFrame housing_train = housingData[trainRows];\n", "DataFrame housing_test = housingData[testRows];\n", "\n", "housing_train.Rows.Count.Display();\n", "housing_test.Rows.Count.Display();" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "dotnet_interactive": { "language": "csharp" } }, "outputs": [], "source": [ "using Microsoft.ML;\n", "using Microsoft.ML.Data;\n", "using Microsoft.ML.AutoML;" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "dotnet_interactive": { "language": "csharp" } }, "outputs": [], "source": [ "#!time\n", "\n", "var mlContext = new MLContext();\n", "\n", "var experiment = mlContext.Auto().CreateRegressionExperiment(maxExperimentTimeInSeconds: 15);\n", "var result = experiment.Execute(housing_train, labelColumnName:\"median_house_value\");" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "dotnet_interactive": { "language": "csharp" } }, "outputs": [], "source": [ "var scatters = result.RunDetails.Where(d => d.ValidationMetrics != null).GroupBy(\n", " r => r.TrainerName,\n", " (name, details) => new Scattergl()\n", " {\n", " name = name,\n", " x = details.Select(r => r.RuntimeInSeconds),\n", " y = details.Select(r => r.ValidationMetrics.MeanAbsoluteError),\n", " mode = \"markers\",\n", " marker = new Marker() { size = 12 }\n", " });\n", "\n", "var chart = Chart.Plot(scatters);\n", "chart.WithXTitle(\"Training Time\");\n", "chart.WithYTitle(\"Error\");\n", "chart.Display();\n", "\n", "Console.WriteLine($\"Best Trainer:{result.BestRun.TrainerName}\");" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "dotnet_interactive": { "language": "csharp" } }, "outputs": [], "source": [ "var testResults = result.BestRun.Model.Transform(housing_test);\n", "\n", "var trueValues = testResults.GetColumn(\"median_house_value\");\n", "var predictedValues = testResults.GetColumn(\"Score\");\n", "\n", "var predictedVsTrue = new Scattergl()\n", "{\n", " x = trueValues,\n", " y = predictedValues,\n", " mode = \"markers\",\n", "};\n", "\n", "var maximumValue = Math.Max(trueValues.Max(), predictedValues.Max());\n", "\n", "var perfectLine = new Scattergl()\n", "{\n", " x = new[] {0, maximumValue},\n", " y = new[] {0, maximumValue},\n", " mode = \"lines\",\n", "};\n", "\n", "var chart = Chart.Plot(new[] {predictedVsTrue, perfectLine });\n", "chart.WithXTitle(\"True Values\");\n", "chart.WithYTitle(\"Predicted Values\");\n", "chart.WithLegend(false);\n", "chart.Width = 600;\n", "chart.Height = 600;\n", "chart.Display();" ] } ], "metadata": { "kernelspec": { "display_name": ".NET (C#)", "language": "C#", "name": ".net-csharp" }, "language_info": { "file_extension": ".cs", "mimetype": "text/x-csharp", "name": "C#", "pygments_lexer": "csharp", "version": "8.0" } }, "nbformat": 4, "nbformat_minor": 4 }