{ "metadata": { "language": "ruby", "name": "", "signature": "sha256:1223e8e832a0be4b5485031beb02a976ce07640b21649c78c9880a6b0b1308b1" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "code", "collapsed": false, "input": [ "require 'mikon'" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "" ], "metadata": {}, "output_type": "pyout", "prompt_number": 1, "text": [ "\"if(window['d3'] === undefined ||\\n window['Nyaplot'] === undefined){\\n var path = {\\\"d3\\\":\\\"http://d3js.org/d3.v3.min\\\"};\\n\\n\\n\\n var shim = {\\\"d3\\\":{\\\"exports\\\":\\\"d3\\\"}};\\n\\n require.config({paths: path, shim:shim});\\n\\n\\nrequire(['d3'], function(d3){window['d3']=d3;console.log('finished loading d3');\\n\\n\\tvar script = d3.select(\\\"head\\\")\\n\\t .append(\\\"script\\\")\\n\\t .attr(\\\"src\\\", \\\"https://rawgit.com/domitry/Nyaplotjs/master/release/nyaplot.js\\\")\\n\\t .attr(\\\"async\\\", true);\\n\\n\\tscript[0][0].onload = script[0][0].onreadystatechange = function(){\\n\\n\\n\\t var event = document.createEvent(\\\"HTMLEvents\\\");\\n\\t event.initEvent(\\\"load_nyaplot\\\",false,false);\\n\\t window.dispatchEvent(event);\\n\\t console.log('Finished loading Nyaplotjs');\\n\\n\\t};\\n\\n\\n});\\n}\\n\"" ] }, { "metadata": {}, "output_type": "pyout", "prompt_number": 1, "text": [ "true" ] } ], "prompt_number": 1 }, { "cell_type": "heading", "level": 1, "metadata": {}, "source": [ "Analyzing Iris dataset using Mikon and StatSample" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "![](https://dl.dropboxusercontent.com/u/47978121/6705547029_cbf3c91b7e.jpg)\n", "* creative commons licensed (BY) flickr photo by Ian Sane: http://flickr.com/photos/31246066@N04/6705547029" ] }, { "cell_type": "code", "collapsed": false, "input": [ "path = File.expand_path(\"../iris.csv\", __FILE__)\n", "df = Mikon::DataFrame.from_csv(path)" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
sepal_lengthsepal_widthpetal_lengthpetal_widthspecies
05.13.51.40.2setosa
14.93.01.40.2setosa
24.73.21.30.2setosa
34.63.11.50.2setosa
45.03.61.40.2setosa
55.43.91.70.4setosa
64.63.41.40.3setosa
75.03.41.50.2setosa
84.42.91.40.2setosa
94.93.11.50.1setosa
105.43.71.50.2setosa
114.83.41.60.2setosa
124.83.01.40.1setosa
134.33.01.10.1setosa
145.84.01.20.2setosa
155.74.41.50.4setosa
165.43.91.30.4setosa
175.13.51.40.3setosa
185.73.81.70.3setosa
195.13.81.50.3setosa
205.43.41.70.2setosa
215.13.71.50.4setosa
224.63.61.00.2setosa
235.13.31.70.5setosa
244.83.41.90.2setosa
255.03.01.60.2setosa
265.03.41.60.4setosa
275.23.51.50.2setosa
285.23.41.40.2setosa
294.73.21.60.2setosa
304.83.11.60.2setosa
315.43.41.50.4setosa
325.24.11.50.1setosa
335.54.21.40.2setosa
344.93.11.50.1setosa
355.03.21.20.2setosa
365.53.51.30.2setosa
374.93.11.50.1setosa
384.43.01.30.2setosa
395.13.41.50.2setosa
405.03.51.30.3setosa
414.52.31.30.3setosa
424.43.21.30.2setosa
435.03.51.60.6setosa
445.13.81.90.4setosa
454.83.01.40.3setosa
465.13.81.60.2setosa
474.63.21.40.2setosa
485.33.71.50.2setosa
495.03.31.40.2setosa
507.03.24.71.4versicolor
..................
1495.93.05.11.8virginica
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 2, "text": [ "#, @dtype=:float64>, #, @dtype=:float64>, #, @dtype=:float64>, #, @dtype=:float64>, #], @name=\"e04b0a91-2369-49cf-a1f2-b5af42bc7a22\", @index=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149]>" ] } ], "prompt_number": 2 }, { "cell_type": "code", "collapsed": false, "input": [ "plot = df.plot(type: :scatter, x: :sepal_length, y: :petal_length, fill_by: :species, color: :qual)" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n" ], "metadata": {}, "output_type": "pyout", "prompt_number": 3, "text": [ "#[#:scatter, :options=>{:x=>:sepal_length, :y=>:petal_length, :color=>[\"rgb(179,226,205)\", \"rgb(253,205,172)\", \"rgb(203,213,232)\", \"rgb(244,202,228)\", \"rgb(230,245,201)\", \"rgb(255,242,174)\", \"rgb(241,226,204)\", \"rgb(204,204,204)\"], :fill_by=>:species}, :data=>\"e04b0a91-2369-49cf-a1f2-b5af42bc7a22\"}, @xrange=[4.3, 7.9], @yrange=[1.0, 6.9]>], :options=>{:x_label=>:sepal_length, :y_label=>:petal_length, :zoom=>true, :width=>700, :xrange=>[4.3, 7.9], :yrange=>[1.0, 6.9]}}>" ] } ], "prompt_number": 3 }, { "cell_type": "code", "collapsed": false, "input": [ "require 'statsample'" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 11, "text": [ "false" ] } ], "prompt_number": 11 }, { "cell_type": "markdown", "metadata": {}, "source": [ "Then draw a regression line using `Statsample::Regression`." ] }, { "cell_type": "code", "collapsed": false, "input": [ "lr = Statsample::Regression.simple(df[:sepal_length], df[:petal_length])\n", "puts lr.summary\n", "a, b = lr.a, lr.b" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "= Regression of sepal_length over petal_length\n", " Table 2\n", "+----------+--------+\n", "| Variable | Value |\n", "+----------+--------+\n", "| r | 0.872 |\n", "| r^2 | 0.760 |\n", "| a | -7.095 |\n", "| b | 1.858 |\n", "| s.e | 0.867 |\n", "+----------+--------+\n", "\n", "\n" ] }, { "metadata": {}, "output_type": "pyout", "prompt_number": 13, "text": [ "[-7.095381478279314, 1.8575096654214456]" ] } ], "prompt_number": 13 }, { "cell_type": "code", "collapsed": false, "input": [ "x = (df[:sepal_length].min.round..df[:sepal_length].max.round).to_a\n", "y = x.map{|v| b*v+a}\n", "plot.add(:line, x, y)\n", "plot" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n" ], "metadata": {}, "output_type": "pyout", "prompt_number": 14, "text": [ "#[#:scatter, :options=>{:x=>:sepal_length, :y=>:petal_length, :color=>[\"rgb(127,201,127)\", \"rgb(190,174,212)\", \"rgb(253,192,134)\", \"rgb(255,255,153)\", \"rgb(56,108,176)\", \"rgb(240,2,127)\", \"rgb(191,91,23)\", \"rgb(102,102,102)\"], :fill_by=>:species}, :data=>\"57a65d78-ecc2-4842-a37d-69ad38a8f9d1\"}, @xrange=[4.3, 7.9], @yrange=[1.0, 6.9]>, #:line, :options=>{:x=>\"data0\", :y=>\"data1\"}, :data=>\"4b5665e9-adc5-4127-af0f-931653704356\"}, @xrange=[4, 8], @yrange=[0.33465718340646866, 7.764695845092251]>], :options=>{:x_label=>:sepal_length, :y_label=>:petal_length, :zoom=>true, :width=>700, :xrange=>[4.3, 7.9], :yrange=>[1.0, 6.9]}}>" ] } ], "prompt_number": 14 } ], "metadata": {} } ] }