{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "[this doc on github](https://github.com/dotnet/interactive/tree/master/samples/notebooks/csharp/Samples)\n", "\n", "# Machine Learning over House Prices with ML.NET" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "dotnet_interactive": { "language": "csharp" } }, "outputs": [], "source": [ "#i \"nuget:https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet5/nuget/v3/index.json\" \r\n", "#i \"nuget:https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet-tools/nuget/v3/index.json\" \r\n", "\r\n", "#r \"nuget:Microsoft.ML, 1.5.1\"\r\n", "#r \"nuget:Microsoft.ML.AutoML, 0.17.1\"\r\n", "#r \"nuget:Microsoft.Data.Analysis, 0.4.0\"\r\n", "#r \"nuget: XPlot.Plotly.Interactive, 4.0.2\"" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "dotnet_interactive": { "language": "csharp" } }, "outputs": [], "source": [ "using static Microsoft.DotNet.Interactive.Formatting.PocketViewTags;\n", "using Microsoft.DotNet.Interactive.Formatting;\n", "using Microsoft.Data.Analysis;\n", "using XPlot.Plotly;" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "dotnet_interactive": { "language": "csharp" } }, "outputs": [], "source": [ "using Microsoft.AspNetCore.Html;\n", "using Microsoft.DotNet.Interactive.Formatting;\n", "using static Microsoft.DotNet.Interactive.Formatting.PocketViewTags;\n", "\n", "Formatter.Register((df, writer) =>\n", "{\n", " var headers = new List();\n", " headers.Add(th(i(\"index\")));\n", " headers.AddRange(df.Columns.Select(c => (IHtmlContent) th(c.Name)));\n", " var rows = new List>();\n", " var take = 20;\n", " for (var i = 0; i < Math.Min(take, df.Rows.Count); i++)\n", " {\n", " var cells = new List();\n", " cells.Add(td(i));\n", " foreach (var obj in df.Rows[i])\n", " {\n", " cells.Add(td(obj));\n", " }\n", " rows.Add(cells);\n", " }\n", " \n", " var t = table(\n", " thead(\n", " headers),\n", " tbody(\n", " rows.Select(\n", " r => tr(r))));\n", " \n", " writer.Write(t);\n", "}, \"text/html\");" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "dotnet_interactive": { "language": "csharp" } }, "outputs": [], "source": [ "using System.IO;\n", "using System.Net.Http;\n", "string housingPath = \"housing.csv\";\n", "\n", "if (!File.Exists(housingPath))\n", "{\n", " var contents = await new HttpClient()\n", " .GetStringAsync(\"https://raw.githubusercontent.com/ageron/handson-ml2/master/datasets/housing/housing.csv\");\n", " \n", " File.WriteAllText(\"housing.csv\", contents);\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "dotnet_interactive": { "language": "csharp" } }, "outputs": [], "source": [ "var housingData = DataFrame.LoadCsv(housingPath);\n", "housingData" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "dotnet_interactive": { "language": "csharp" } }, "outputs": [], "source": [ "housingData.Description()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "dotnet_interactive": { "language": "csharp" } }, "outputs": [], "source": [ "Chart.Plot(\n", " new Histogram()\n", " {\n", " x = housingData.Columns[\"median_house_value\"],\n", " nbinsx = 20\n", " }\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "dotnet_interactive": { "language": "csharp" } }, "outputs": [], "source": [ "var chart = Chart.Plot(\n", " new Scattergl()\n", " {\n", " x = housingData.Columns[\"longitude\"],\n", " y = housingData.Columns[\"latitude\"],\n", " mode = \"markers\",\n", " marker = new Marker()\n", " {\n", " color = housingData.Columns[\"median_house_value\"],\n", " colorscale = \"Jet\"\n", " }\n", " }\n", ");\n", "\n", "chart.Width = 600;\n", "chart.Height = 600;\n", "display(chart);" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "dotnet_interactive": { "language": "csharp" } }, "outputs": [], "source": [ "static T[] Shuffle(T[] array)\n", "{\n", " Random rand = new Random();\n", " for (int i = 0; i < array.Length; i++)\n", " {\n", " int r = i + rand.Next(array.Length - i);\n", " T temp = array[r];\n", " array[r] = array[i];\n", " array[i] = temp;\n", " }\n", " return array;\n", "}\n", "\n", "int[] randomIndices = Shuffle(Enumerable.Range(0, (int)housingData.Rows.Count).ToArray());\n", "int testSize = (int)(housingData.Rows.Count * .1);\n", "int[] trainRows = randomIndices[testSize..];\n", "int[] testRows = randomIndices[..testSize];\n", "\n", "DataFrame housing_train = housingData[trainRows];\n", "DataFrame housing_test = housingData[testRows];\n", "\n", "display(housing_train.Rows.Count);\n", "display(housing_test.Rows.Count);" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "dotnet_interactive": { "language": "csharp" } }, "outputs": [], "source": [ "using Microsoft.ML;\n", "using Microsoft.ML.Data;\n", "using Microsoft.ML.AutoML;" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "dotnet_interactive": { "language": "csharp" } }, "outputs": [], "source": [ "#!time\n", "\n", "var mlContext = new MLContext();\n", "\n", "var experiment = mlContext.Auto().CreateRegressionExperiment(maxExperimentTimeInSeconds: 15);\n", "var result = experiment.Execute(housing_train, labelColumnName:\"median_house_value\");" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "dotnet_interactive": { "language": "csharp" } }, "outputs": [], "source": [ "var scatters = result.RunDetails.Where(d => d.ValidationMetrics != null).GroupBy(\n", " r => r.TrainerName,\n", " (name, details) => new Scattergl()\n", " {\n", " name = name,\n", " x = details.Select(r => r.RuntimeInSeconds),\n", " y = details.Select(r => r.ValidationMetrics.MeanAbsoluteError),\n", " mode = \"markers\",\n", " marker = new Marker() { size = 12 }\n", " });\n", "\n", "var chart = Chart.Plot(scatters);\n", "chart.WithXTitle(\"Training Time\");\n", "chart.WithYTitle(\"Error\");\n", "display(chart);\n", "\n", "Console.WriteLine($\"Best Trainer:{result.BestRun.TrainerName}\");" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "dotnet_interactive": { "language": "csharp" } }, "outputs": [], "source": [ "var testResults = result.BestRun.Model.Transform(housing_test);\n", "\n", "var trueValues = testResults.GetColumn(\"median_house_value\");\n", "var predictedValues = testResults.GetColumn(\"Score\");\n", "\n", "var predictedVsTrue = new Scattergl()\n", "{\n", " x = trueValues,\n", " y = predictedValues,\n", " mode = \"markers\",\n", "};\n", "\n", "var maximumValue = Math.Max(trueValues.Max(), predictedValues.Max());\n", "\n", "var perfectLine = new Scattergl()\n", "{\n", " x = new[] {0, maximumValue},\n", " y = new[] {0, maximumValue},\n", " mode = \"lines\",\n", "};\n", "\n", "var chart = Chart.Plot(new[] {predictedVsTrue, perfectLine });\n", "chart.WithXTitle(\"True Values\");\n", "chart.WithYTitle(\"Predicted Values\");\n", "chart.WithLegend(false);\n", "chart.Width = 600;\n", "chart.Height = 600;\n", "display(chart);" ] } ], "metadata": { "kernelspec": { "display_name": ".NET (C#)", "language": "C#", "name": ".net-csharp" }, "language_info": { "file_extension": ".cs", "mimetype": "text/x-csharp", "name": "C#", "pygments_lexer": "csharp", "version": "8.0" } }, "nbformat": 4, "nbformat_minor": 4 }