{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Creating and summarizing a correlation matrix with daru and statsample\n", "\n", "## This notebook also serves as a demostration of Daru.lazy_update and " ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [ { "data": { "application/javascript": [ "if(window['d3'] === undefined ||\n", " window['Nyaplot'] === undefined){\n", " var path = {\"d3\":\"https://cdnjs.cloudflare.com/ajax/libs/d3/3.5.5/d3.min\",\"downloadable\":\"https://cdn.rawgit.com/domitry/d3-downloadable/master/d3-downloadable\"};\n", "\n", "\n", "\n", " var shim = {\"d3\":{\"exports\":\"d3\"},\"downloadable\":{\"exports\":\"downloadable\"}};\n", "\n", " require.config({paths: path, shim:shim});\n", "\n", "\n", "require(['d3'], function(d3){window['d3']=d3;console.log('finished loading d3');require(['downloadable'], function(downloadable){window['downloadable']=downloadable;console.log('finished loading downloadable');\n", "\n", "\tvar script = d3.select(\"head\")\n", "\t .append(\"script\")\n", "\t .attr(\"src\", \"https://cdn.rawgit.com/domitry/Nyaplotjs/master/release/nyaplot.js\")\n", "\t .attr(\"async\", true);\n", "\n", "\tscript[0][0].onload = script[0][0].onreadystatechange = function(){\n", "\n", "\n", "\t var event = document.createEvent(\"HTMLEvents\");\n", "\t event.initEvent(\"load_nyaplot\",false,false);\n", "\t window.dispatchEvent(event);\n", "\t console.log('Finished loading Nyaplotjs');\n", "\n", "\t};\n", "\n", "\n", "});});\n", "}\n" ], "text/plain": [ "\"if(window['d3'] === undefined ||\\n window['Nyaplot'] === undefined){\\n var path = {\\\"d3\\\":\\\"https://cdnjs.cloudflare.com/ajax/libs/d3/3.5.5/d3.min\\\",\\\"downloadable\\\":\\\"https://cdn.rawgit.com/domitry/d3-downloadable/master/d3-downloadable\\\"};\\n\\n\\n\\n var shim = {\\\"d3\\\":{\\\"exports\\\":\\\"d3\\\"},\\\"downloadable\\\":{\\\"exports\\\":\\\"downloadable\\\"}};\\n\\n require.config({paths: path, shim:shim});\\n\\n\\nrequire(['d3'], function(d3){window['d3']=d3;console.log('finished loading d3');require(['downloadable'], function(downloadable){window['downloadable']=downloadable;console.log('finished loading downloadable');\\n\\n\\tvar script = d3.select(\\\"head\\\")\\n\\t .append(\\\"script\\\")\\n\\t .attr(\\\"src\\\", \\\"https://cdn.rawgit.com/domitry/Nyaplotjs/master/release/nyaplot.js\\\")\\n\\t .attr(\\\"async\\\", true);\\n\\n\\tscript[0][0].onload = script[0][0].onreadystatechange = function(){\\n\\n\\n\\t var event = document.createEvent(\\\"HTMLEvents\\\");\\n\\t event.initEvent(\\\"load_nyaplot\\\",false,false);\\n\\t window.dispatchEvent(event);\\n\\t console.log('Finished loading Nyaplotjs');\\n\\n\\t};\\n\\n\\n});});\\n}\\n\"" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "== DataFrame ==\n", "\n" ] }, { "data": { "text/html": [ "
Daru::DataFrame:13085860 rows: 10 cols: 4
abcd
00.9011467513657144-2.0698264996309637-0.05424188567538422-0.21497530674467752
10.34773905293396651.96937999883251650.64382100003735290.2691070769303308
21.169337960514499-1.2003500655322563-1.8876472885285303-0.21250262336698017
3-2.01492422282253520.37771960932499420.2339215509793050.39979121524058253
4-0.8464428241042591-1.47821823047429350.22458711725118236-0.33396263618551913
51.2471920549543476-0.887490499184695-1.10419913624543151.932482592873003
6-0.8529853403070782-0.4909368945208435-0.73064239758412050.8578541353085531
7-0.384705453247700170.48128789967429710.565122377348464-1.277608770535443
81.2856180977412772-1.288566320945822-1.78687461456970051.0500431060289053
90.51827387639413760.36345933022878907-0.463010770932286760.4544283438001362
" ], "text/plain": [ "\n", "#\n", " a b c d \n", " 0 0.90114675 -2.0698264 -0.0542418 -0.2149753 \n", " 1 0.34773905 1.96937999 0.64382100 0.26910707 \n", " 2 1.16933796 -1.2003500 -1.8876472 -0.2125026 \n", " 3 -2.0149242 0.37771960 0.23392155 0.39979121 \n", " 4 -0.8464428 -1.4782182 0.22458711 -0.3339626 \n", " 5 1.24719205 -0.8874904 -1.1041991 1.93248259 \n", " 6 -0.8529853 -0.4909368 -0.7306423 0.85785413 \n", " 7 -0.3847054 0.48128789 0.56512237 -1.2776087 \n", " 8 1.28561809 -1.2885663 -1.7868746 1.05004310 \n", " 9 0.51827387 0.36345933 -0.4630107 0.45442834 \n" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "== Correlation Matrix ==\n", "\n" ] }, { "data": { "text/latex": [ "$$\\left(\\begin{array}{cccc}\n", " 1.0&-0.01467367745077135&0.011913969445518504&-0.0034212506073735397\\\\\n", " -0.01467367745077135&1.0&-0.04653177857574501&0.022525025891610012\\\\\n", " 0.011913969445518504&-0.04653177857574501&1.0&0.031949296919919545\\\\\n", " -0.0034212506073735397&0.022525025891610012&0.031949296919919545&1.0\\\\\n", "\\end{array}\\right)$$" ], "text/plain": [ "Matrix[[1.0, -0.01467367745077135, 0.011913969445518504, -0.0034212506073735397], [-0.01467367745077135, 1.0, -0.04653177857574501, 0.022525025891610012], [0.011913969445518504, -0.04653177857574501, 1.0, 0.031949296919919545], [-0.0034212506073735397, 0.022525025891610012, 0.031949296919919545, 1.0]]" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Analysis 2016-03-24 11:58:04 +0000\n", "= Statsample::Bivariate.correlation_matrix\n", "\n" ] } ], "source": [ "require 'statsample'\n", "\n", "Statsample::Analysis.store(\"Statsample::Bivariate.correlation_matrix\") do\n", " \n", " # It so happens that Daru::Vector and Daru::DataFrame must update metadata\n", " # like positions of missing values every time they are created. \n", " #\n", " # Since we dont have any missing values in the data that we are creating, \n", " # we set Daru.lazy_update = true so that missing data is not updated every\n", " # time and things happen much faster.\n", " #\n", " # In case you do have missing data and lazy_update has been set to *true*, \n", " # you _SHOULD_ called `#update` on the concerned Vector or DataFrame object\n", " # everytime an assingment or deletion cycle is complete.\n", " Daru.lazy_update = true\n", " \n", " # Create a Daru::DataFrame containing 4 vectors a, b, c and d.\n", " #\n", " # Notice that the `clone` option has been set to *false*. This tells Daru\n", " # to not clone the Daru::Vectors being supplied by `rnorm`, since it would\n", " # be unnecessarily counter productive to clone the vectors once they have\n", " # been assigned to the dataframe.\n", " samples = 1000\n", " ds = Daru::DataFrame.new({\n", " :a => rnorm(samples),\n", " :b => rnorm(samples),\n", " :c => rnorm(samples),\n", " :d => rnorm(samples)\n", " }, clone: false)\n", " \n", " \n", " puts \"== DataFrame ==\\n\"\n", " IRuby.display ds.head\n", " \n", " # Calculate correlation matrix by calling the `cor` shorthand.\n", " cm = Statsample::Bivariate.correlation_matrix(ds)\n", " \n", " puts \"\\n== Correlation Matrix ==\\n\"\n", " IRuby.display cm\n", " \n", " # Set lazy_update to *false* once our job is done so that this analysis does\n", " # not accidentally affect code elsewhere.\n", " Daru.lazy_update = false\n", "end\n", "\n", "Statsample::Analysis.run_batch" ] } ], "metadata": { "kernelspec": { "display_name": "Ruby 2.2.1", "language": "ruby", "name": "ruby" }, "language_info": { "file_extension": ".rb", "mimetype": "application/x-ruby", "name": "ruby", "version": "2.2.1" } }, "nbformat": 4, "nbformat_minor": 0 }