{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Draws some Box Plot diagrams on data distribution" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### NuGet package installation" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "data": { "text/html": [ "Installing package Microsoft.ML, version 1.4.0.............done!" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "Successfully added reference to package Microsoft.ML, version 1.4.0" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "Installing package XPlot.Plotly, version 3.0.1.....done!" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "Successfully added reference to package XPlot.Plotly, version 3.0.1" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "#r \"nuget:Microsoft.ML, 1.4.0\"\n", "#r \"nuget:XPlot.Plotly, 3.0.1\"" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Namespaces" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "using Microsoft.ML;\n", "using Microsoft.ML.Data;\n", "using XPlot.Plotly;" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Simple start: well prepared data" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Read the raw data" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "scrolled": true }, "outputs": [], "source": [ "var mlContext = new MLContext(seed: null);\n", "\n", "var reader = mlContext.Data.CreateTextLoader(\n", " new TextLoader.Options()\n", " {\n", " Separators = new[] { ',' },\n", " HasHeader = true,\n", " Columns = new[]\n", " {\n", " new TextLoader.Column(\"Age\", DataKind.Single, 2),\n", " new TextLoader.Column(\"AnnualIncome\", DataKind.Single, 3),\n", " new TextLoader.Column(\"SpendingScore\", DataKind.Single, 4),\n", " }\n", " });\n", " \n", "var dataView = reader.Load(\"./Mall_Customers.csv\");" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Visualize the data" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "var graph = new Graph.Box()\n", "{\n", " y = dataView.GetColumn(dataView.Schema[0]),\n", " name = \"Age\"\n", "};\n", "\n", "var graph2 = new Graph.Box()\n", "{\n", " y = dataView.GetColumn(dataView.Schema[1]),\n", " name = \"Annual Income\"\n", "};\n", "\n", "var graph3 = new Graph.Box()\n", "{\n", " y = dataView.GetColumn(dataView.Schema[2]),\n", " name = \"Spending Score\"\n", "};\n", "\n", "var chart = Chart.Plot(new List { graph, graph2, graph3 });\n", "\n", "var layout = new Layout.Layout(){ title=\"Shopping Mall Customers Data Distribution\", showlegend = false };\n", "chart.WithLayout(layout);\n", "\n", "display(chart);" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## A more interesting sample: dispersed data" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Read the raw data" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "var reader2 = mlContext.Data.CreateTextLoader(\n", " new TextLoader.Options()\n", " {\n", " Separators = new[] { ',' },\n", " HasHeader = true,\n", " Columns = new[]\n", " {\n", " new TextLoader.Column(\"Ts\", DataKind.Single, 9),\n", " new TextLoader.Column(\"Orb\", DataKind.Single, 12),\n", " new TextLoader.Column(\"Drb\", DataKind.Single, 13),\n", " new TextLoader.Column(\"Trb\", DataKind.Single, 14),\n", " new TextLoader.Column(\"Ast\", DataKind.Single, 15),\n", " new TextLoader.Column(\"Stl\", DataKind.Single, 16),\n", " new TextLoader.Column(\"Blk\", DataKind.Single, 17),\n", " new TextLoader.Column(\"Tov\", DataKind.Single, 18),\n", " new TextLoader.Column(\"Usg\", DataKind.Single, 19),\n", " new TextLoader.Column(\"Age\", DataKind.Single, 4)\n", " }\n", " });\n", "\n", "var dataView2 = reader2.Load(\"./2017-18_NBA_salary.csv\");" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Visualize the data" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "var chart2 = Chart.Plot(new List \n", "{ \n", " new Graph.Box()\n", " {\n", " y = dataView2.GetColumn(dataView2.Schema[0]),\n", " name = \"True Shootings\"\n", " },\n", " new Graph.Box()\n", " {\n", " y = dataView2.GetColumn(dataView2.Schema[1]),\n", " name = \"Offensive Rebounds\"\n", " },\n", " new Graph.Box()\n", " {\n", " y = dataView2.GetColumn(dataView2.Schema[2]),\n", " name = \"Defensive Rebounds\"\n", " },\n", " new Graph.Box()\n", " {\n", " y = dataView2.GetColumn(dataView2.Schema[3]),\n", " name = \"Team Rebounds\"\n", " },\n", " new Graph.Box()\n", " {\n", " y = dataView2.GetColumn(dataView2.Schema[4]),\n", " name = \"Assists\"\n", " }, \n", " new Graph.Box()\n", " {\n", " y = dataView2.GetColumn(dataView2.Schema[5]),\n", " name = \"Steals\"\n", " },\n", " new Graph.Box()\n", " {\n", " y = dataView2.GetColumn(dataView2.Schema[6]),\n", " name = \"Blocks\"\n", " },\n", " new Graph.Box()\n", " {\n", " y = dataView2.GetColumn(dataView2.Schema[7]),\n", " name = \"Turnover\"\n", " },\n", " new Graph.Box()\n", " {\n", " y = dataView2.GetColumn(dataView2.Schema[8]),\n", " name = \"Usage\"\n", " },\n", " new Graph.Box()\n", " {\n", " y = dataView2.GetColumn(dataView2.Schema[9]),\n", " name = \"Age\"\n", " }\n", "});\n", "\n", "var layout2 = new Layout.Layout(){ title=\"NBA Statistics Data Distribution\", showlegend = false };\n", "chart2.WithLayout(layout2);\n", "\n", "display(chart2);" ] } ], "metadata": { "kernelspec": { "display_name": ".NET (C#)", "language": "C#", "name": ".net-csharp" }, "language_info": { "file_extension": ".cs", "mimetype": "text/x-csharp", "name": "C#", "pygments_lexer": "csharp", "version": "8.0" } }, "nbformat": 4, "nbformat_minor": 2 }