{
"metadata": {
"language": "ruby",
"name": "",
"signature": "sha256:512fa2d68b8aca8e034679cd3f2eeb1ba0d25133ebbff930f2154a7c94a3479e"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"require 'daru'\n",
"\n",
"df = Daru::DataFrame.new({a: [1,2,3,4,5], b: [10,14,15,17,44]})\n",
"df.plot legends: [:a, :b], type: :line do |p,d|\n",
" p.yrange [0,100]\n",
" p.legend true\n",
" d.color \"green\"\n",
"end"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
""
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 1,
"text": [
"\"if(window['d3'] === undefined ||\\n window['Nyaplot'] === undefined){\\n var path = {\\\"d3\\\":\\\"http://d3js.org/d3.v3.min\\\"};\\n\\n\\n\\n var shim = {\\\"d3\\\":{\\\"exports\\\":\\\"d3\\\"}};\\n\\n require.config({paths: path, shim:shim});\\n\\n\\nrequire(['d3'], function(d3){window['d3']=d3;console.log('finished loading d3');\\n\\n\\tvar script = d3.select(\\\"head\\\")\\n\\t .append(\\\"script\\\")\\n\\t .attr(\\\"src\\\", \\\"http://cdn.rawgit.com/domitry/Nyaplotjs/master/release/nyaplot.js\\\")\\n\\t .attr(\\\"async\\\", true);\\n\\n\\tscript[0][0].onload = script[0][0].onreadystatechange = function(){\\n\\n\\n\\t var event = document.createEvent(\\\"HTMLEvents\\\");\\n\\t event.initEvent(\\\"load_nyaplot\\\",false,false);\\n\\t window.dispatchEvent(event);\\n\\t console.log('Finished loading Nyaplotjs');\\n\\n\\t};\\n\\n\\n});\\n}\\n\""
]
},
{
"html": [
"
\n",
"\n"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 1,
"text": [
"#[#[#:line, :options=>{:x=>:a, :y=>:b, :color=>\"green\"}, :data=>\"1ff5c864-961c-4dde-8595-13c7a3fdf9a3\"}, @xrange=[1, 5], @yrange=[10, 44]>], :options=>{:yrange=>[0, 100], :legend=>true, :zoom=>true, :width=>800, :xrange=>[1, 5]}}>], :data=>{\"1ff5c864-961c-4dde-8595-13c7a3fdf9a3\"=>#1, :b=>10}, {:a=>2, :b=>14}, {:a=>3, :b=>15}, {:a=>4, :b=>17}, {:a=>5, :b=>44}]>}, :extension=>[]}>"
]
}
],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"require 'daru'\n",
"# Calculate statistics of numeric columns\n",
"df = Daru::DataFrame.new({\n",
" a: ['foo' , 'foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar'], \n",
" b: ['one' , 'one', 'one', 'two', 'two', 'one', 'one', 'two', 'two'],\n",
" c: ['small','large','large','small','small','large','small','large','small'],\n",
" d: [1,2,2,3,3,4,5,6,7],\n",
" e: [2,4,4,6,6,8,10,12,14],\n",
" f: [10,20,20,30,30,40,50,60,70]\n",
" })\n",
"df.mean"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"| | nil |
|---|
| d | 3.6666666666666665 |
| e | 7.333333333333333 |
| f | 36.666666666666664 |
"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 2,
"text": [
"\n",
"#\n",
" nil\n",
" d 3.6666666666666665\n",
" e 7.333333333333333\n",
" f 36.666666666666664\n"
]
}
],
"prompt_number": 2
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# Calculate multiple statistical measures in one shot\n",
"df.describe"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
" | d | e | f |
|---|
| count | 9 | 9 | 9 |
| mean | 3.6666666666666665 | 7.333333333333333 | 36.666666666666664 |
| std | 2.0 | 4.0 | 20.0 |
| min | 1 | 2 | 10 |
| max | 7 | 14 | 70 |
"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 3,
"text": [
"\n",
"#\n",
" d e f \n",
" count 9 9 9 \n",
" mean 3.66666666 7.33333333 36.6666666 \n",
" std 2.0 4.0 20.0 \n",
" min 1 2 10 \n",
" max 7 14 70 \n"
]
}
],
"prompt_number": 3
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# Create a multi-indexed DataFrame\n",
"tuples = [\n",
" [:a,:one,:bar],\n",
" [:a,:one,:baz],\n",
" [:a,:two,:bar],\n",
" [:a,:two,:baz],\n",
" [:b,:one,:bar],\n",
" [:b,:two,:bar],\n",
" [:b,:two,:baz],\n",
" [:b,:one,:foo],\n",
" [:c,:one,:bar],\n",
" [:c,:one,:baz],\n",
" [:c,:two,:foo],\n",
" [:c,:two,:bar]\n",
"]\n",
"multi_index = Daru::MultiIndex.new(tuples)\n",
"\n",
"vector_arry1 = [11,12,13,14,11,12,13,14,11,12,13,14]\n",
"vector_arry2 = [1,2,3,4,1,2,3,4,1,2,3,4]\n",
"\n",
"order_mi = Daru::MultiIndex.new([\n",
" [:a,:one,:bar],\n",
" [:a,:two,:baz],\n",
" [:b,:two,:foo],\n",
" [:b,:one,:foo]])\n",
"\n",
"df_mi = Daru::DataFrame.new([\n",
" vector_arry1, \n",
" vector_arry2, \n",
" vector_arry1, \n",
" vector_arry2], order: order_mi, index: multi_index)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
" | [:a, :one, :bar] | [:a, :two, :baz] | [:b, :two, :foo] | [:b, :one, :foo] |
|---|
| [:a, :one, :bar] | 11 | 1 | 11 | 1 |
| [:a, :one, :baz] | 12 | 2 | 12 | 2 |
| [:a, :two, :bar] | 13 | 3 | 13 | 3 |
| [:a, :two, :baz] | 14 | 4 | 14 | 4 |
| [:b, :one, :bar] | 11 | 1 | 11 | 1 |
| [:b, :two, :bar] | 12 | 2 | 12 | 2 |
| [:b, :two, :baz] | 13 | 3 | 13 | 3 |
| [:b, :one, :foo] | 14 | 4 | 14 | 4 |
| [:c, :one, :bar] | 11 | 1 | 11 | 1 |
| [:c, :one, :baz] | 12 | 2 | 12 | 2 |
| [:c, :two, :foo] | 13 | 3 | 13 | 3 |
| [:c, :two, :bar] | 14 | 4 | 14 | 4 |
"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 4,
"text": [
"\n",
"#\n",
" [:a, :one, [:a, :two, [:b, :two, [:b, :one, \n",
"[:a, :one, 11 1 11 1 \n",
"[:a, :one, 12 2 12 2 \n",
"[:a, :two, 13 3 13 3 \n",
"[:a, :two, 14 4 14 4 \n",
"[:b, :one, 11 1 11 1 \n",
"[:b, :two, 12 2 12 2 \n",
"[:b, :two, 13 3 13 3 \n",
"[:b, :one, 14 4 14 4 \n",
"[:c, :one, 11 1 11 1 \n",
"[:c, :one, 12 2 12 2 \n",
"[:c, :two, 13 3 13 3 \n",
"[:c, :two, 14 4 14 4 \n"
]
}
],
"prompt_number": 4
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# Specify complete tuple to choose a single row\n",
"df_mi.row[:a, :one,:bar]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"| | 0 |
|---|
| [:a, :one, :bar] | 11 |
| [:a, :two, :baz] | 1 |
| [:b, :two, :foo] | 11 |
| [:b, :one, :foo] | 1 |
"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 5,
"text": [
"\n",
"#\n",
" 0\n",
"[:a, :one, :bar] 11\n",
"[:a, :two, :baz] 1\n",
"[:b, :two, :foo] 11\n",
"[:b, :one, :foo] 1\n"
]
}
],
"prompt_number": 5
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# Specify partial tuple to select index hierarchially\n",
"df_mi.row[:a]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
" | [:a, :one, :bar] | [:a, :two, :baz] | [:b, :two, :foo] | [:b, :one, :foo] |
|---|
| [:one, :bar] | 11 | 1 | 11 | 1 |
| [:one, :baz] | 12 | 2 | 12 | 2 |
| [:two, :bar] | 13 | 3 | 13 | 3 |
| [:two, :baz] | 14 | 4 | 14 | 4 |
"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 6,
"text": [
"\n",
"#\n",
" [:a, :one, [:a, :two, [:b, :two, [:b, :one, \n",
"[:one, :ba 11 1 11 1 \n",
"[:one, :ba 12 2 12 2 \n",
"[:two, :ba 13 3 13 3 \n",
"[:two, :ba 14 4 14 4 \n"
]
}
],
"prompt_number": 6
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# See grouped rows with the 'groups' method\n",
"\n",
"df = Daru::DataFrame.new({\n",
" a: %w{foo bar foo bar foo bar foo foo},\n",
" b: %w{one one two three two two one three},\n",
" c: [1 ,2 ,3 ,1 ,3 ,6 ,3 ,8],\n",
" d: [11 ,22 ,33 ,44 ,55 ,66 ,77 ,88]\n",
"})\n",
"grouped = df.group_by([:a, :b])\n",
"grouped.groups"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 7,
"text": [
"{[\"bar\", \"one\"]=>[1], [\"bar\", \"three\"]=>[3], [\"bar\", \"two\"]=>[5], [\"foo\", \"one\"]=>[0, 6], [\"foo\", \"three\"]=>[7], [\"foo\", \"two\"]=>[2, 4]}"
]
}
],
"prompt_number": 7
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# First group by the columns :a and :b and then calculate mean of the grouped rows.\n",
"grouped.mean"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
" | c | d |
|---|
| [:bar, :one] | 2 | 22 |
| [:bar, :three] | 1 | 44 |
| [:bar, :two] | 6 | 66 |
| [:foo, :one] | 2.0 | 44.0 |
| [:foo, :three] | 8 | 88 |
| [:foo, :two] | 3.0 | 44.0 |
"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 8,
"text": [
"\n",
"#\n",
" c d \n",
"[:bar, :on 2 22 \n",
"[:bar, :th 1 44 \n",
"[:bar, :tw 6 66 \n",
"[:foo, :on 2.0 44.0 \n",
"[:foo, :th 8 88 \n",
"[:foo, :tw 3.0 44.0 \n"
]
}
],
"prompt_number": 8
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"grouped.get_group([\"foo\", \"one\"])"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
""
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 9,
"text": [
"\n",
"#\n",
" a b c d \n",
" 0 foo one 1 11 \n",
" 6 foo one 3 77 \n"
]
}
],
"prompt_number": 9
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"require 'daru'\n",
"sales = Daru::DataFrame.from_csv '/home/sameer/sales-funnel.csv'"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
" | account | manager | name | price | product | quantity | rep | status |
|---|
| 0 | 714466 | Debra Henley | Trantow-Barrows | 30000 | CPU | 1 | Craig Booker | presented |
| 1 | 714466 | Debra Henley | Trantow-Barrows | 10000 | Software | 1 | Craig Booker | presented |
| 2 | 714466 | Debra Henley | Trantow-Barrows | 5000 | Maintenance | 2 | Craig Booker | pending |
| 3 | 737550 | Debra Henley | Fritsch, Russel and Anderson | 35000 | CPU | 1 | Craig Booker | declined |
| 4 | 146832 | Debra Henley | Kiehn-Spinka | 65000 | CPU | 2 | Daniel Hilton | won |
| 5 | 218895 | Debra Henley | Kulas Inc | 40000 | CPU | 2 | Daniel Hilton | pending |
| 6 | 218895 | Debra Henley | Kulas Inc | 10000 | Software | 1 | Daniel Hilton | presented |
| 7 | 412290 | Debra Henley | Jerde-Hilpert | 5000 | Maintenance | 2 | John Smith | pending |
| 8 | 740150 | Debra Henley | Barton LLC | 35000 | CPU | 1 | John Smith | declined |
| 9 | 141962 | Fred Anderson | Herman LLC | 65000 | CPU | 2 | Cedric Moss | won |
| 10 | 163416 | Fred Anderson | Purdy-Kunde | 30000 | CPU | 1 | Cedric Moss | presented |
| 11 | 239344 | Fred Anderson | Stokes LLC | 5000 | Maintenance | 1 | Cedric Moss | pending |
| 12 | 239344 | Fred Anderson | Stokes LLC | 10000 | Software | 1 | Cedric Moss | presented |
| 13 | 307599 | Fred Anderson | Kassulke, Ondricka and Metz | 7000 | Maintenance | 3 | Wendy Yule | won |
| 14 | 688981 | Fred Anderson | Keeling LLC | 100000 | CPU | 5 | Wendy Yule | won |
| 15 | 729833 | Fred Anderson | Koepp Ltd | 65000 | CPU | 2 | Wendy Yule | declined |
| 16 | 729833 | Fred Anderson | Koepp Ltd | 5000 | Monitor | 2 | Wendy Yule | presented |
"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 10,
"text": [
"\n",
"#\n",
" account manager name price product quantity rep status \n",
" 0 714466 Debra Henl Trantow-Ba 30000 CPU 1 Craig Book presented \n",
" 1 714466 Debra Henl Trantow-Ba 10000 Software 1 Craig Book presented \n",
" 2 714466 Debra Henl Trantow-Ba 5000 Maintenanc 2 Craig Book pending \n",
" 3 737550 Debra Henl Fritsch, R 35000 CPU 1 Craig Book declined \n",
" 4 146832 Debra Henl Kiehn-Spin 65000 CPU 2 Daniel Hil won \n",
" 5 218895 Debra Henl Kulas Inc 40000 CPU 2 Daniel Hil pending \n",
" 6 218895 Debra Henl Kulas Inc 10000 Software 1 Daniel Hil presented \n",
" 7 412290 Debra Henl Jerde-Hilp 5000 Maintenanc 2 John Smith pending \n",
" 8 740150 Debra Henl Barton LLC 35000 CPU 1 John Smith declined \n",
" 9 141962 Fred Ander Herman LLC 65000 CPU 2 Cedric Mos won \n",
" 10 163416 Fred Ander Purdy-Kund 30000 CPU 1 Cedric Mos presented \n",
" 11 239344 Fred Ander Stokes LLC 5000 Maintenanc 1 Cedric Mos pending \n",
" 12 239344 Fred Ander Stokes LLC 10000 Software 1 Cedric Mos presented \n",
" 13 307599 Fred Ander Kassulke, 7000 Maintenanc 3 Wendy Yule won \n",
" 14 688981 Fred Ander Keeling LL 100000 CPU 5 Wendy Yule won \n",
" ... ... ... ... ... ... ... ... ... \n"
]
}
],
"prompt_number": 10
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"sales.pivot_table index: [:manager, :rep]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
" | account | price | quantity |
|---|
| [:\"Debra Henley\", :\"Craig Booker\"] | 720237.0 | 20000.0 | 1.25 |
| [:\"Debra Henley\", :\"Daniel Hilton\"] | 194874.0 | 38333.333333333336 | 1.6666666666666667 |
| [:\"Debra Henley\", :\"John Smith\"] | 576220.0 | 20000.0 | 1.5 |
| [:\"Fred Anderson\", :\"Cedric Moss\"] | 196016.5 | 27500.0 | 1.25 |
| [:\"Fred Anderson\", :\"Wendy Yule\"] | 614061.5 | 44250.0 | 3.0 |
"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 11,
"text": [
"\n",
"#\n",
" account price quantity \n",
"[:\"Debra H 720237.0 20000.0 1.25 \n",
"[:\"Debra H 194874.0 38333.3333 1.66666666 \n",
"[:\"Debra H 576220.0 20000.0 1.5 \n",
"[:\"Fred An 196016.5 27500.0 1.25 \n",
"[:\"Fred An 614061.5 44250.0 3.0 \n"
]
}
],
"prompt_number": 11
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"sales.pivot_table(index: [:manager,:rep], values: :price,vectors: [:product], agg: :sum)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
" | [:price, :CPU] | [:price, :Software] | [:price, :Maintenance] | [:price, :Monitor] |
|---|
| [:\"Debra Henley\", :\"Craig Booker\"] | 65000 | 10000 | 5000 | |
| [:\"Debra Henley\", :\"Daniel Hilton\"] | 105000 | 10000 | | |
| [:\"Debra Henley\", :\"John Smith\"] | 35000 | | 5000 | |
| [:\"Fred Anderson\", :\"Cedric Moss\"] | 95000 | 10000 | 5000 | |
| [:\"Fred Anderson\", :\"Wendy Yule\"] | 165000 | | 7000 | 5000 |
"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 12,
"text": [
"\n",
"#\n",
" [:price, : [:price, : [:price, : [:price, : \n",
"[:\"Debra H 65000 10000 5000 nil \n",
"[:\"Debra H 105000 10000 nil nil \n",
"[:\"Debra H 35000 nil 5000 nil \n",
"[:\"Fred An 95000 10000 5000 nil \n",
"[:\"Fred An 165000 nil 7000 5000 \n"
]
}
],
"prompt_number": 12
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"df = Daru::DataFrame.new({\n",
" a: ['ff' , 'fwwq', 'efe', 'a', 'efef', 'zzzz', 'efgg', 'q', 'ggf'], \n",
" b: ['one' , 'one', 'one', 'two', 'two', 'one', 'one', 'two', 'two'],\n",
" c: ['small','large','large','small','small','large','small','large','small'],\n",
" d: [-1,2,-2,3,-3,4,-5,6,7],\n",
" e: [2,4,4,6,6,8,10,12,14]\n",
" })\n",
" df.sort([:a,:d], by: {a: lambda {|a,b| a.length <=> b.length }, b: lambda {|a,b| a.abs <=> b.abs }}, ascending: [false, true])"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
" | a | b | c | d | e |
|---|
| 6 | efgg | one | small | -5 | 10 |
| 4 | efef | two | small | -3 | 6 |
| 1 | fwwq | one | large | 2 | 4 |
| 5 | zzzz | one | large | 4 | 8 |
| 2 | efe | one | large | -2 | 4 |
| 8 | ggf | two | small | 7 | 14 |
| 0 | ff | one | small | -1 | 2 |
| 3 | a | two | small | 3 | 6 |
| 7 | q | two | large | 6 | 12 |
"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 13,
"text": [
"\n",
"#\n",
" a b c d e \n",
" 6 efgg one small -5 10 \n",
" 4 efef two small -3 6 \n",
" 1 fwwq one large 2 4 \n",
" 5 zzzz one large 4 8 \n",
" 2 efe one large -2 4 \n",
" 8 ggf two small 7 14 \n",
" 0 ff one small -1 2 \n",
" 3 a two small 3 6 \n",
" 7 q two large 6 12 \n"
]
}
],
"prompt_number": 13
}
],
"metadata": {}
}
]
}