{ "metadata": { "language": "ruby", "name": "", "signature": "sha256:512fa2d68b8aca8e034679cd3f2eeb1ba0d25133ebbff930f2154a7c94a3479e" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "code", "collapsed": false, "input": [ "require 'daru'\n", "\n", "df = Daru::DataFrame.new({a: [1,2,3,4,5], b: [10,14,15,17,44]})\n", "df.plot legends: [:a, :b], type: :line do |p,d|\n", " p.yrange [0,100]\n", " p.legend true\n", " d.color \"green\"\n", "end" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "" ], "metadata": {}, "output_type": "pyout", "prompt_number": 1, "text": [ "\"if(window['d3'] === undefined ||\\n window['Nyaplot'] === undefined){\\n var path = {\\\"d3\\\":\\\"http://d3js.org/d3.v3.min\\\"};\\n\\n\\n\\n var shim = {\\\"d3\\\":{\\\"exports\\\":\\\"d3\\\"}};\\n\\n require.config({paths: path, shim:shim});\\n\\n\\nrequire(['d3'], function(d3){window['d3']=d3;console.log('finished loading d3');\\n\\n\\tvar script = d3.select(\\\"head\\\")\\n\\t .append(\\\"script\\\")\\n\\t .attr(\\\"src\\\", \\\"http://cdn.rawgit.com/domitry/Nyaplotjs/master/release/nyaplot.js\\\")\\n\\t .attr(\\\"async\\\", true);\\n\\n\\tscript[0][0].onload = script[0][0].onreadystatechange = function(){\\n\\n\\n\\t var event = document.createEvent(\\\"HTMLEvents\\\");\\n\\t event.initEvent(\\\"load_nyaplot\\\",false,false);\\n\\t window.dispatchEvent(event);\\n\\t console.log('Finished loading Nyaplotjs');\\n\\n\\t};\\n\\n\\n});\\n}\\n\"" ] }, { "html": [ "
\n", "\n" ], "metadata": {}, "output_type": "pyout", "prompt_number": 1, "text": [ "#[#[#:line, :options=>{:x=>:a, :y=>:b, :color=>\"green\"}, :data=>\"1ff5c864-961c-4dde-8595-13c7a3fdf9a3\"}, @xrange=[1, 5], @yrange=[10, 44]>], :options=>{:yrange=>[0, 100], :legend=>true, :zoom=>true, :width=>800, :xrange=>[1, 5]}}>], :data=>{\"1ff5c864-961c-4dde-8595-13c7a3fdf9a3\"=>#1, :b=>10}, {:a=>2, :b=>14}, {:a=>3, :b=>15}, {:a=>4, :b=>17}, {:a=>5, :b=>44}]>}, :extension=>[]}>" ] } ], "prompt_number": 1 }, { "cell_type": "code", "collapsed": false, "input": [ "require 'daru'\n", "# Calculate statistics of numeric columns\n", "df = Daru::DataFrame.new({\n", " a: ['foo' , 'foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar'], \n", " b: ['one' , 'one', 'one', 'two', 'two', 'one', 'one', 'two', 'two'],\n", " c: ['small','large','large','small','small','large','small','large','small'],\n", " d: [1,2,2,3,3,4,5,6,7],\n", " e: [2,4,4,6,6,8,10,12,14],\n", " f: [10,20,20,30,30,40,50,60,70]\n", " })\n", "df.mean" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
nil
d3.6666666666666665
e7.333333333333333
f36.666666666666664
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 2, "text": [ "\n", "#\n", " nil\n", " d 3.6666666666666665\n", " e 7.333333333333333\n", " f 36.666666666666664\n" ] } ], "prompt_number": 2 }, { "cell_type": "code", "collapsed": false, "input": [ "# Calculate multiple statistical measures in one shot\n", "df.describe" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
def
count999
mean3.66666666666666657.33333333333333336.666666666666664
std2.04.020.0
min1210
max71470
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 3, "text": [ "\n", "#\n", " d e f \n", " count 9 9 9 \n", " mean 3.66666666 7.33333333 36.6666666 \n", " std 2.0 4.0 20.0 \n", " min 1 2 10 \n", " max 7 14 70 \n" ] } ], "prompt_number": 3 }, { "cell_type": "code", "collapsed": false, "input": [ "# Create a multi-indexed DataFrame\n", "tuples = [\n", " [:a,:one,:bar],\n", " [:a,:one,:baz],\n", " [:a,:two,:bar],\n", " [:a,:two,:baz],\n", " [:b,:one,:bar],\n", " [:b,:two,:bar],\n", " [:b,:two,:baz],\n", " [:b,:one,:foo],\n", " [:c,:one,:bar],\n", " [:c,:one,:baz],\n", " [:c,:two,:foo],\n", " [:c,:two,:bar]\n", "]\n", "multi_index = Daru::MultiIndex.new(tuples)\n", "\n", "vector_arry1 = [11,12,13,14,11,12,13,14,11,12,13,14]\n", "vector_arry2 = [1,2,3,4,1,2,3,4,1,2,3,4]\n", "\n", "order_mi = Daru::MultiIndex.new([\n", " [:a,:one,:bar],\n", " [:a,:two,:baz],\n", " [:b,:two,:foo],\n", " [:b,:one,:foo]])\n", "\n", "df_mi = Daru::DataFrame.new([\n", " vector_arry1, \n", " vector_arry2, \n", " vector_arry1, \n", " vector_arry2], order: order_mi, index: multi_index)" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
[:a, :one, :bar][:a, :two, :baz][:b, :two, :foo][:b, :one, :foo]
[:a, :one, :bar]111111
[:a, :one, :baz]122122
[:a, :two, :bar]133133
[:a, :two, :baz]144144
[:b, :one, :bar]111111
[:b, :two, :bar]122122
[:b, :two, :baz]133133
[:b, :one, :foo]144144
[:c, :one, :bar]111111
[:c, :one, :baz]122122
[:c, :two, :foo]133133
[:c, :two, :bar]144144
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 4, "text": [ "\n", "#\n", " [:a, :one, [:a, :two, [:b, :two, [:b, :one, \n", "[:a, :one, 11 1 11 1 \n", "[:a, :one, 12 2 12 2 \n", "[:a, :two, 13 3 13 3 \n", "[:a, :two, 14 4 14 4 \n", "[:b, :one, 11 1 11 1 \n", "[:b, :two, 12 2 12 2 \n", "[:b, :two, 13 3 13 3 \n", "[:b, :one, 14 4 14 4 \n", "[:c, :one, 11 1 11 1 \n", "[:c, :one, 12 2 12 2 \n", "[:c, :two, 13 3 13 3 \n", "[:c, :two, 14 4 14 4 \n" ] } ], "prompt_number": 4 }, { "cell_type": "code", "collapsed": false, "input": [ "# Specify complete tuple to choose a single row\n", "df_mi.row[:a, :one,:bar]" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
0
[:a, :one, :bar]11
[:a, :two, :baz]1
[:b, :two, :foo]11
[:b, :one, :foo]1
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 5, "text": [ "\n", "#\n", " 0\n", "[:a, :one, :bar] 11\n", "[:a, :two, :baz] 1\n", "[:b, :two, :foo] 11\n", "[:b, :one, :foo] 1\n" ] } ], "prompt_number": 5 }, { "cell_type": "code", "collapsed": false, "input": [ "# Specify partial tuple to select index hierarchially\n", "df_mi.row[:a]" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
[:a, :one, :bar][:a, :two, :baz][:b, :two, :foo][:b, :one, :foo]
[:one, :bar]111111
[:one, :baz]122122
[:two, :bar]133133
[:two, :baz]144144
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 6, "text": [ "\n", "#\n", " [:a, :one, [:a, :two, [:b, :two, [:b, :one, \n", "[:one, :ba 11 1 11 1 \n", "[:one, :ba 12 2 12 2 \n", "[:two, :ba 13 3 13 3 \n", "[:two, :ba 14 4 14 4 \n" ] } ], "prompt_number": 6 }, { "cell_type": "code", "collapsed": false, "input": [ "# See grouped rows with the 'groups' method\n", "\n", "df = Daru::DataFrame.new({\n", " a: %w{foo bar foo bar foo bar foo foo},\n", " b: %w{one one two three two two one three},\n", " c: [1 ,2 ,3 ,1 ,3 ,6 ,3 ,8],\n", " d: [11 ,22 ,33 ,44 ,55 ,66 ,77 ,88]\n", "})\n", "grouped = df.group_by([:a, :b])\n", "grouped.groups" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 7, "text": [ "{[\"bar\", \"one\"]=>[1], [\"bar\", \"three\"]=>[3], [\"bar\", \"two\"]=>[5], [\"foo\", \"one\"]=>[0, 6], [\"foo\", \"three\"]=>[7], [\"foo\", \"two\"]=>[2, 4]}" ] } ], "prompt_number": 7 }, { "cell_type": "code", "collapsed": false, "input": [ "# First group by the columns :a and :b and then calculate mean of the grouped rows.\n", "grouped.mean" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
cd
[:bar, :one]222
[:bar, :three]144
[:bar, :two]666
[:foo, :one]2.044.0
[:foo, :three]888
[:foo, :two]3.044.0
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 8, "text": [ "\n", "#\n", " c d \n", "[:bar, :on 2 22 \n", "[:bar, :th 1 44 \n", "[:bar, :tw 6 66 \n", "[:foo, :on 2.0 44.0 \n", "[:foo, :th 8 88 \n", "[:foo, :tw 3.0 44.0 \n" ] } ], "prompt_number": 8 }, { "cell_type": "code", "collapsed": false, "input": [ "grouped.get_group([\"foo\", \"one\"])" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
abcd
0fooone111
6fooone377
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 9, "text": [ "\n", "#\n", " a b c d \n", " 0 foo one 1 11 \n", " 6 foo one 3 77 \n" ] } ], "prompt_number": 9 }, { "cell_type": "code", "collapsed": false, "input": [ "require 'daru'\n", "sales = Daru::DataFrame.from_csv '/home/sameer/sales-funnel.csv'" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
accountmanagernamepriceproductquantityrepstatus
0714466Debra HenleyTrantow-Barrows30000CPU1Craig Bookerpresented
1714466Debra HenleyTrantow-Barrows10000Software1Craig Bookerpresented
2714466Debra HenleyTrantow-Barrows5000Maintenance2Craig Bookerpending
3737550Debra HenleyFritsch, Russel and Anderson35000CPU1Craig Bookerdeclined
4146832Debra HenleyKiehn-Spinka65000CPU2Daniel Hiltonwon
5218895Debra HenleyKulas Inc40000CPU2Daniel Hiltonpending
6218895Debra HenleyKulas Inc10000Software1Daniel Hiltonpresented
7412290Debra HenleyJerde-Hilpert5000Maintenance2John Smithpending
8740150Debra HenleyBarton LLC35000CPU1John Smithdeclined
9141962Fred AndersonHerman LLC65000CPU2Cedric Mosswon
10163416Fred AndersonPurdy-Kunde30000CPU1Cedric Mosspresented
11239344Fred AndersonStokes LLC5000Maintenance1Cedric Mosspending
12239344Fred AndersonStokes LLC10000Software1Cedric Mosspresented
13307599Fred AndersonKassulke, Ondricka and Metz7000Maintenance3Wendy Yulewon
14688981Fred AndersonKeeling LLC100000CPU5Wendy Yulewon
15729833Fred AndersonKoepp Ltd65000CPU2Wendy Yuledeclined
16729833Fred AndersonKoepp Ltd5000Monitor2Wendy Yulepresented
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 10, "text": [ "\n", "#\n", " account manager name price product quantity rep status \n", " 0 714466 Debra Henl Trantow-Ba 30000 CPU 1 Craig Book presented \n", " 1 714466 Debra Henl Trantow-Ba 10000 Software 1 Craig Book presented \n", " 2 714466 Debra Henl Trantow-Ba 5000 Maintenanc 2 Craig Book pending \n", " 3 737550 Debra Henl Fritsch, R 35000 CPU 1 Craig Book declined \n", " 4 146832 Debra Henl Kiehn-Spin 65000 CPU 2 Daniel Hil won \n", " 5 218895 Debra Henl Kulas Inc 40000 CPU 2 Daniel Hil pending \n", " 6 218895 Debra Henl Kulas Inc 10000 Software 1 Daniel Hil presented \n", " 7 412290 Debra Henl Jerde-Hilp 5000 Maintenanc 2 John Smith pending \n", " 8 740150 Debra Henl Barton LLC 35000 CPU 1 John Smith declined \n", " 9 141962 Fred Ander Herman LLC 65000 CPU 2 Cedric Mos won \n", " 10 163416 Fred Ander Purdy-Kund 30000 CPU 1 Cedric Mos presented \n", " 11 239344 Fred Ander Stokes LLC 5000 Maintenanc 1 Cedric Mos pending \n", " 12 239344 Fred Ander Stokes LLC 10000 Software 1 Cedric Mos presented \n", " 13 307599 Fred Ander Kassulke, 7000 Maintenanc 3 Wendy Yule won \n", " 14 688981 Fred Ander Keeling LL 100000 CPU 5 Wendy Yule won \n", " ... ... ... ... ... ... ... ... ... \n" ] } ], "prompt_number": 10 }, { "cell_type": "code", "collapsed": false, "input": [ "sales.pivot_table index: [:manager, :rep]" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
accountpricequantity
[:\"Debra Henley\", :\"Craig Booker\"]720237.020000.01.25
[:\"Debra Henley\", :\"Daniel Hilton\"]194874.038333.3333333333361.6666666666666667
[:\"Debra Henley\", :\"John Smith\"]576220.020000.01.5
[:\"Fred Anderson\", :\"Cedric Moss\"]196016.527500.01.25
[:\"Fred Anderson\", :\"Wendy Yule\"]614061.544250.03.0
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 11, "text": [ "\n", "#\n", " account price quantity \n", "[:\"Debra H 720237.0 20000.0 1.25 \n", "[:\"Debra H 194874.0 38333.3333 1.66666666 \n", "[:\"Debra H 576220.0 20000.0 1.5 \n", "[:\"Fred An 196016.5 27500.0 1.25 \n", "[:\"Fred An 614061.5 44250.0 3.0 \n" ] } ], "prompt_number": 11 }, { "cell_type": "code", "collapsed": false, "input": [ "sales.pivot_table(index: [:manager,:rep], values: :price,vectors: [:product], agg: :sum)" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
[:price, :CPU][:price, :Software][:price, :Maintenance][:price, :Monitor]
[:\"Debra Henley\", :\"Craig Booker\"]65000100005000
[:\"Debra Henley\", :\"Daniel Hilton\"]10500010000
[:\"Debra Henley\", :\"John Smith\"]350005000
[:\"Fred Anderson\", :\"Cedric Moss\"]95000100005000
[:\"Fred Anderson\", :\"Wendy Yule\"]16500070005000
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 12, "text": [ "\n", "#\n", " [:price, : [:price, : [:price, : [:price, : \n", "[:\"Debra H 65000 10000 5000 nil \n", "[:\"Debra H 105000 10000 nil nil \n", "[:\"Debra H 35000 nil 5000 nil \n", "[:\"Fred An 95000 10000 5000 nil \n", "[:\"Fred An 165000 nil 7000 5000 \n" ] } ], "prompt_number": 12 }, { "cell_type": "code", "collapsed": false, "input": [ "df = Daru::DataFrame.new({\n", " a: ['ff' , 'fwwq', 'efe', 'a', 'efef', 'zzzz', 'efgg', 'q', 'ggf'], \n", " b: ['one' , 'one', 'one', 'two', 'two', 'one', 'one', 'two', 'two'],\n", " c: ['small','large','large','small','small','large','small','large','small'],\n", " d: [-1,2,-2,3,-3,4,-5,6,7],\n", " e: [2,4,4,6,6,8,10,12,14]\n", " })\n", " df.sort([:a,:d], by: {a: lambda {|a,b| a.length <=> b.length }, b: lambda {|a,b| a.abs <=> b.abs }}, ascending: [false, true])" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
abcde
6efggonesmall-510
4efeftwosmall-36
1fwwqonelarge24
5zzzzonelarge48
2efeonelarge-24
8ggftwosmall714
0ffonesmall-12
3atwosmall36
7qtwolarge612
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 13, "text": [ "\n", "#\n", " a b c d e \n", " 6 efgg one small -5 10 \n", " 4 efef two small -3 6 \n", " 1 fwwq one large 2 4 \n", " 5 zzzz one large 4 8 \n", " 2 efe one large -2 4 \n", " 8 ggf two small 7 14 \n", " 0 ff one small -1 2 \n", " 3 a two small 3 6 \n", " 7 q two large 6 12 \n" ] } ], "prompt_number": 13 } ], "metadata": {} } ] }