{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "### Draws a Correlation Chart or Heatmap on the Titanic data set" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### NuGet package installation" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "data": { "text/html": [ "Installing package MathNet.Numerics, version 4.9.0.....................................done!" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "Successfully added reference to package MathNet.Numerics, version 4.9.0" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "Installing package Microsoft.ML, version 1.4.0.........done!" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "Successfully added reference to package Microsoft.ML, version 1.4.0" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "Installing package XPlot.Plotly, version 3.0.1........done!" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "Successfully added reference to package XPlot.Plotly, version 3.0.1" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "#r \"nuget:MathNet.Numerics, 4.9.0\"\n", "#r \"nuget:Microsoft.ML, 1.4.0\"\n", "#r \"nuget:XPlot.Plotly, 3.0.1\"" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Namespaces" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "using Microsoft.ML;\n", "using Microsoft.ML.Data;\n", "using XPlot.Plotly;\n", "using MathNet.Numerics.Statistics;" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Simple heatmap sample\n", "Warming up ..." ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "var graph = new Graph.Heatmap()\n", "{\n", " x = new [] { \"one\", \"two\", \"three\"},\n", " y = new [] { \"three\", \"two\", \"one\" },\n", " z = new List> \n", " { \n", " new List { 0, -.75, 1 }, \n", " new List { .75, 1, -.75 }, \n", " new List { 1, .75, 0 }\n", " },\n", " zmin = -1,\n", " zmax = 1\n", "};\n", "\n", "var chart = Chart.Plot(graph);\n", "\n", "var layout = new Layout.Layout(){ title=\"Sample Correlation Chart\" };\n", "chart.WithLayout(layout);\n", "\n", "display(chart);" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## And now for the real thing" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Read the data" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "var mlContext = new MLContext(seed: null);\n", "\n", "var readerOptions = new TextLoader.Options()\n", "{\n", " Separators = new[] { ',' },\n", " HasHeader = true,\n", " AllowQuoting = true,\n", " Columns = new[]\n", " {\n", " new TextLoader.Column(\"Survived\", DataKind.Single, 1),\n", " new TextLoader.Column(\"PClass\", DataKind.Single, 2),\n", " new TextLoader.Column(\"Age\", DataKind.Single, 5),\n", " new TextLoader.Column(\"SibSp\", DataKind.Single, 6),\n", " new TextLoader.Column(\"Parch\", DataKind.Single, 7),\n", " new TextLoader.Column(\"Fare\", DataKind.Single, 9)\n", " }\n", "};\n", "\n", "var dataView = mlContext.Data.LoadFromTextFile(\"./Titanic.csv\", readerOptions);" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Enter the Matrix\n", "\n", "ML.NET prefers singles (float), Math.NET prefers doubles ..." ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "var matrix = new List>();\n", "for (int i = 0; i < dataView.Schema.Count; i++)\n", "{\n", " var column = dataView.Schema[i];\n", " matrix.Add(dataView.GetColumn(column).Select(f => (double)f).ToList());\n", "}\n", "\n", "var data = new double[6, 6];\n", "for (int x = 0; x < 6; ++x)\n", "{\n", " for (int y = 0; y < 5 - x; ++y)\n", " {\n", " var seriesA = matrix[x];\n", " var seriesB = matrix[5 - y];\n", "\n", " var value = Correlation.Pearson(seriesA, seriesB);\n", "\n", " data[x, y] = value;\n", " data[5 - y, 5 - x] = value;\n", " }\n", "\n", " data[x, 5 - x] = 1;\n", "}" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Draw" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "var graph = new Graph.Heatmap()\n", "{\n", " y = new [] { \"Survived\", \"Class\", \"Age\", \"Sib / Sp\", \"Par / Chi\", \"Fare\" },\n", " x = new [] { \"Fare\", \"Parents / Children\", \"Siblings / Spouses\", \"Age\", \"Class\", \"Survived\" },\n", " z = data,\n", " zmin = -1,\n", " zmax = 1\n", "};\n", "\n", "var chart = Chart.Plot(graph);\n", "\n", "var layout = new Layout.Layout(){ title=\"Titanic Survival Correlation Chart\" };\n", "chart.WithLayout(layout);\n", "\n", "display(chart);" ] } ], "metadata": { "kernelspec": { "display_name": ".NET (C#)", "language": "C#", "name": ".net-csharp" }, "language_info": { "file_extension": ".cs", "mimetype": "text/x-csharp", "name": "C#", "pygments_lexer": "csharp", "version": "8.0" } }, "nbformat": 4, "nbformat_minor": 2 }