{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"application/javascript": [
"if(window['d3'] === undefined ||\n",
" window['Nyaplot'] === undefined){\n",
" var path = {\"d3\":\"https://cdnjs.cloudflare.com/ajax/libs/d3/3.5.5/d3.min\",\"downloadable\":\"http://cdn.rawgit.com/domitry/d3-downloadable/master/d3-downloadable\"};\n",
"\n",
"\n",
"\n",
" var shim = {\"d3\":{\"exports\":\"d3\"},\"downloadable\":{\"exports\":\"downloadable\"}};\n",
"\n",
" require.config({paths: path, shim:shim});\n",
"\n",
"\n",
"require(['d3'], function(d3){window['d3']=d3;console.log('finished loading d3');require(['downloadable'], function(downloadable){window['downloadable']=downloadable;console.log('finished loading downloadable');\n",
"\n",
"\tvar script = d3.select(\"head\")\n",
"\t .append(\"script\")\n",
"\t .attr(\"src\", \"http://cdn.rawgit.com/domitry/Nyaplotjs/master/release/nyaplot.js\")\n",
"\t .attr(\"async\", true);\n",
"\n",
"\tscript[0][0].onload = script[0][0].onreadystatechange = function(){\n",
"\n",
"\n",
"\t var event = document.createEvent(\"HTMLEvents\");\n",
"\t event.initEvent(\"load_nyaplot\",false,false);\n",
"\t window.dispatchEvent(event);\n",
"\t console.log('Finished loading Nyaplotjs');\n",
"\n",
"\t};\n",
"\n",
"\n",
"});});\n",
"}\n"
],
"text/plain": [
"\"if(window['d3'] === undefined ||\\n window['Nyaplot'] === undefined){\\n var path = {\\\"d3\\\":\\\"https://cdnjs.cloudflare.com/ajax/libs/d3/3.5.5/d3.min\\\",\\\"downloadable\\\":\\\"http://cdn.rawgit.com/domitry/d3-downloadable/master/d3-downloadable\\\"};\\n\\n\\n\\n var shim = {\\\"d3\\\":{\\\"exports\\\":\\\"d3\\\"},\\\"downloadable\\\":{\\\"exports\\\":\\\"downloadable\\\"}};\\n\\n require.config({paths: path, shim:shim});\\n\\n\\nrequire(['d3'], function(d3){window['d3']=d3;console.log('finished loading d3');require(['downloadable'], function(downloadable){window['downloadable']=downloadable;console.log('finished loading downloadable');\\n\\n\\tvar script = d3.select(\\\"head\\\")\\n\\t .append(\\\"script\\\")\\n\\t .attr(\\\"src\\\", \\\"http://cdn.rawgit.com/domitry/Nyaplotjs/master/release/nyaplot.js\\\")\\n\\t .attr(\\\"async\\\", true);\\n\\n\\tscript[0][0].onload = script[0][0].onreadystatechange = function(){\\n\\n\\n\\t var event = document.createEvent(\\\"HTMLEvents\\\");\\n\\t event.initEvent(\\\"load_nyaplot\\\",false,false);\\n\\t window.dispatchEvent(event);\\n\\t console.log('Finished loading Nyaplotjs');\\n\\n\\t};\\n\\n\\n});});\\n}\\n\""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"true"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"require 'daru'"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"
\n",
" \n",
" Daru::DataFrame(10x7) | \n",
"
\n",
"\n",
" \n",
" \n",
" | \n",
" \n",
" Country | \n",
" \n",
" Region | \n",
" \n",
" Population | \n",
" \n",
" Under15 | \n",
" \n",
" Over60 | \n",
" \n",
" FertilityRate | \n",
" \n",
" LifeExpectancy | \n",
" \n",
"
\n",
" \n",
"\n",
" \n",
" \n",
" 0 | \n",
" \n",
" Afghanistan | \n",
" \n",
" Eastern Mediterranean | \n",
" \n",
" 29825 | \n",
" \n",
" 47.42 | \n",
" \n",
" 3.82 | \n",
" \n",
" 5.4 | \n",
" \n",
" 60 | \n",
" \n",
"
\n",
" \n",
" \n",
" 1 | \n",
" \n",
" Albania | \n",
" \n",
" Europe | \n",
" \n",
" 3162 | \n",
" \n",
" 21.33 | \n",
" \n",
" 14.93 | \n",
" \n",
" 1.75 | \n",
" \n",
" 74 | \n",
" \n",
"
\n",
" \n",
" \n",
" 2 | \n",
" \n",
" Algeria | \n",
" \n",
" Africa | \n",
" \n",
" 38482 | \n",
" \n",
" 27.42 | \n",
" \n",
" 7.17 | \n",
" \n",
" 2.83 | \n",
" \n",
" 73 | \n",
" \n",
"
\n",
" \n",
" \n",
" 3 | \n",
" \n",
" Andorra | \n",
" \n",
" Europe | \n",
" \n",
" 78 | \n",
" \n",
" 15.2 | \n",
" \n",
" 22.86 | \n",
" \n",
" | \n",
" \n",
" 82 | \n",
" \n",
"
\n",
" \n",
" \n",
" 4 | \n",
" \n",
" Angola | \n",
" \n",
" Africa | \n",
" \n",
" 20821 | \n",
" \n",
" 47.58 | \n",
" \n",
" 3.84 | \n",
" \n",
" 6.1 | \n",
" \n",
" 51 | \n",
" \n",
"
\n",
" \n",
" \n",
" 5 | \n",
" \n",
" Antigua and Barbuda | \n",
" \n",
" Americas | \n",
" \n",
" 89 | \n",
" \n",
" 25.96 | \n",
" \n",
" 12.35 | \n",
" \n",
" 2.12 | \n",
" \n",
" 75 | \n",
" \n",
"
\n",
" \n",
" \n",
" 6 | \n",
" \n",
" Argentina | \n",
" \n",
" Americas | \n",
" \n",
" 41087 | \n",
" \n",
" 24.42 | \n",
" \n",
" 14.97 | \n",
" \n",
" 2.2 | \n",
" \n",
" 76 | \n",
" \n",
"
\n",
" \n",
" \n",
" 7 | \n",
" \n",
" Armenia | \n",
" \n",
" Europe | \n",
" \n",
" 2969 | \n",
" \n",
" 20.34 | \n",
" \n",
" 14.06 | \n",
" \n",
" 1.74 | \n",
" \n",
" 71 | \n",
" \n",
"
\n",
" \n",
" \n",
" 8 | \n",
" \n",
" Australia | \n",
" \n",
" Western Pacific | \n",
" \n",
" 23050 | \n",
" \n",
" 18.95 | \n",
" \n",
" 19.46 | \n",
" \n",
" 1.89 | \n",
" \n",
" 82 | \n",
" \n",
"
\n",
" \n",
" \n",
" 9 | \n",
" \n",
" Austria | \n",
" \n",
" Europe | \n",
" \n",
" 8464 | \n",
" \n",
" 14.51 | \n",
" \n",
" 23.52 | \n",
" \n",
" 1.44 | \n",
" \n",
" 81 | \n",
" \n",
"
\n",
" \n",
"\n",
" \n",
"
"
],
"text/plain": [
"#\n",
" Country Region Population Under15 Over60 FertilityR LifeExpect\n",
" 0 Afghanista Eastern Me 29825 47.42 3.82 5.4 60\n",
" 1 Albania Europe 3162 21.33 14.93 1.75 74\n",
" 2 Algeria Africa 38482 27.42 7.17 2.83 73\n",
" 3 Andorra Europe 78 15.2 22.86 nil 82\n",
" 4 Angola Africa 20821 47.58 3.84 6.1 51\n",
" 5 Antigua an Americas 89 25.96 12.35 2.12 75\n",
" 6 Argentina Americas 41087 24.42 14.97 2.2 76\n",
" 7 Armenia Europe 2969 20.34 14.06 1.74 71\n",
" 8 Australia Western Pa 23050 18.95 19.46 1.89 82\n",
" 9 Austria Europe 8464 14.51 23.52 1.44 81"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"require 'open-uri'\n",
"content = open('https://d37djvu3ytnwxt.cloudfront.net/asset-v1:MITx+15.071x_3+1T2016+type@asset+block/WHO.csv')\n",
"df = Daru::DataFrame.from_csv content\n",
"df = df.at 0..6\n",
"df.first"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"#"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.index = Daru::CategoricalIndex.new df['Region'].to_a"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
" \n",
" Daru::DataFrame(5x7) | \n",
"
\n",
"\n",
" \n",
" \n",
" | \n",
" \n",
" Country | \n",
" \n",
" Region | \n",
" \n",
" Population | \n",
" \n",
" Under15 | \n",
" \n",
" Over60 | \n",
" \n",
" FertilityRate | \n",
" \n",
" LifeExpectancy | \n",
" \n",
"
\n",
" \n",
"\n",
" \n",
" \n",
" Eastern Mediterranean | \n",
" \n",
" Afghanistan | \n",
" \n",
" Eastern Mediterranean | \n",
" \n",
" 29825 | \n",
" \n",
" 47.42 | \n",
" \n",
" 3.82 | \n",
" \n",
" 5.4 | \n",
" \n",
" 60 | \n",
" \n",
"
\n",
" \n",
" \n",
" Europe | \n",
" \n",
" Albania | \n",
" \n",
" Europe | \n",
" \n",
" 3162 | \n",
" \n",
" 21.33 | \n",
" \n",
" 14.93 | \n",
" \n",
" 1.75 | \n",
" \n",
" 74 | \n",
" \n",
"
\n",
" \n",
" \n",
" Africa | \n",
" \n",
" Algeria | \n",
" \n",
" Africa | \n",
" \n",
" 38482 | \n",
" \n",
" 27.42 | \n",
" \n",
" 7.17 | \n",
" \n",
" 2.83 | \n",
" \n",
" 73 | \n",
" \n",
"
\n",
" \n",
" \n",
" Europe | \n",
" \n",
" Andorra | \n",
" \n",
" Europe | \n",
" \n",
" 78 | \n",
" \n",
" 15.2 | \n",
" \n",
" 22.86 | \n",
" \n",
" | \n",
" \n",
" 82 | \n",
" \n",
"
\n",
" \n",
" \n",
" Africa | \n",
" \n",
" Angola | \n",
" \n",
" Africa | \n",
" \n",
" 20821 | \n",
" \n",
" 47.58 | \n",
" \n",
" 3.84 | \n",
" \n",
" 6.1 | \n",
" \n",
" 51 | \n",
" \n",
"
\n",
" \n",
"\n",
" \n",
"
"
],
"text/plain": [
"#\n",
" Country Region Population Under15 Over60 FertilityR LifeExpect\n",
" Eastern Me Afghanista Eastern Me 29825 47.42 3.82 5.4 60\n",
" Europe Albania Europe 3162 21.33 14.93 1.75 74\n",
" Africa Algeria Africa 38482 27.42 7.17 2.83 73\n",
" Europe Andorra Europe 78 15.2 22.86 nil 82\n",
" Africa Angola Africa 20821 47.58 3.84 6.1 51"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.first 5"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Say we want to know about regions as a whole. So let's index our dataset by 'Region' vector."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"List all regions"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"[\"Eastern Mediterranean\", \"Europe\", \"Africa\", \"Americas\", \"Western Pacific\", \"South-East Asia\"]"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.index.categories"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Let's find out how many countries lie in Africa region."
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"46"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.row['Africa'].size"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Finding out the mean life expectancy of europe is as easy as-"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"76.73584905660377"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.row['Europe']['LifeExpectancy'].mean"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Let's see the maximum life expectancy of South-East Asia"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"77"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.row['South-East Asia']['LifeExpectancy'].max"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Set see the countries in Europe that top the list of `LIfeExpectancy`"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
" \n",
" Daru::DataFrame(5x7) | \n",
"
\n",
"\n",
" \n",
" \n",
" | \n",
" \n",
" Country | \n",
" \n",
" Region | \n",
" \n",
" Population | \n",
" \n",
" Under15 | \n",
" \n",
" Over60 | \n",
" \n",
" FertilityRate | \n",
" \n",
" LifeExpectancy | \n",
" \n",
"
\n",
" \n",
"\n",
" \n",
" \n",
" Europe | \n",
" \n",
" San Marino | \n",
" \n",
" Europe | \n",
" \n",
" 31 | \n",
" \n",
" 14.04 | \n",
" \n",
" 26.97 | \n",
" \n",
" | \n",
" \n",
" 83 | \n",
" \n",
"
\n",
" \n",
" \n",
" Europe | \n",
" \n",
" Switzerland | \n",
" \n",
" Europe | \n",
" \n",
" 7997 | \n",
" \n",
" 14.79 | \n",
" \n",
" 23.25 | \n",
" \n",
" 1.51 | \n",
" \n",
" 83 | \n",
" \n",
"
\n",
" \n",
" \n",
" Europe | \n",
" \n",
" Andorra | \n",
" \n",
" Europe | \n",
" \n",
" 78 | \n",
" \n",
" 15.2 | \n",
" \n",
" 22.86 | \n",
" \n",
" | \n",
" \n",
" 82 | \n",
" \n",
"
\n",
" \n",
" \n",
" Europe | \n",
" \n",
" France | \n",
" \n",
" Europe | \n",
" \n",
" 63937 | \n",
" \n",
" 18.26 | \n",
" \n",
" 23.82 | \n",
" \n",
" 1.98 | \n",
" \n",
" 82 | \n",
" \n",
"
\n",
" \n",
" \n",
" Europe | \n",
" \n",
" Iceland | \n",
" \n",
" Europe | \n",
" \n",
" 326 | \n",
" \n",
" 20.71 | \n",
" \n",
" 17.62 | \n",
" \n",
" 2.11 | \n",
" \n",
" 82 | \n",
" \n",
"
\n",
" \n",
"\n",
" \n",
"
"
],
"text/plain": [
"#\n",
" Country Region Population Under15 Over60 FertilityR LifeExpect\n",
" Europe San Marino Europe 31 14.04 26.97 nil 83\n",
" Europe Switzerlan Europe 7997 14.79 23.25 1.51 83\n",
" Europe Andorra Europe 78 15.2 22.86 nil 82\n",
" Europe France Europe 63937 18.26 23.82 1.98 82\n",
" Europe Iceland Europe 326 20.71 17.62 2.11 82"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.row['Europe'].sort(['LifeExpectancy'], ascending: false).first 5"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Lets see countries in `South-East Asia` that have high `FertilityRate`"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
" \n",
" Daru::DataFrame(10x7) | \n",
"
\n",
"\n",
" \n",
" \n",
" | \n",
" \n",
" Country | \n",
" \n",
" Region | \n",
" \n",
" Population | \n",
" \n",
" Under15 | \n",
" \n",
" Over60 | \n",
" \n",
" FertilityRate | \n",
" \n",
" LifeExpectancy | \n",
" \n",
"
\n",
" \n",
"\n",
" \n",
" \n",
" South-East Asia | \n",
" \n",
" Myanmar | \n",
" \n",
" South-East Asia | \n",
" \n",
" 52797 | \n",
" \n",
" 25.28 | \n",
" \n",
" 8.15 | \n",
" \n",
" 1.98 | \n",
" \n",
" 65 | \n",
" \n",
"
\n",
" \n",
" \n",
" South-East Asia | \n",
" \n",
" Democratic People's Republic of Korea | \n",
" \n",
" South-East Asia | \n",
" \n",
" 24763 | \n",
" \n",
" 21.98 | \n",
" \n",
" 12.74 | \n",
" \n",
" 2 | \n",
" \n",
" 69 | \n",
" \n",
"
\n",
" \n",
" \n",
" South-East Asia | \n",
" \n",
" Bangladesh | \n",
" \n",
" South-East Asia | \n",
" \n",
" 155000 | \n",
" \n",
" 30.57 | \n",
" \n",
" 6.89 | \n",
" \n",
" 2.24 | \n",
" \n",
" 70 | \n",
" \n",
"
\n",
" \n",
" \n",
" South-East Asia | \n",
" \n",
" Maldives | \n",
" \n",
" South-East Asia | \n",
" \n",
" 338 | \n",
" \n",
" 29.03 | \n",
" \n",
" 6.65 | \n",
" \n",
" 2.31 | \n",
" \n",
" 77 | \n",
" \n",
"
\n",
" \n",
" \n",
" South-East Asia | \n",
" \n",
" Bhutan | \n",
" \n",
" South-East Asia | \n",
" \n",
" 742 | \n",
" \n",
" 28.53 | \n",
" \n",
" 6.9 | \n",
" \n",
" 2.32 | \n",
" \n",
" 67 | \n",
" \n",
"
\n",
" \n",
" \n",
" South-East Asia | \n",
" \n",
" Sri Lanka | \n",
" \n",
" South-East Asia | \n",
" \n",
" 21098 | \n",
" \n",
" 25.15 | \n",
" \n",
" 12.4 | \n",
" \n",
" 2.35 | \n",
" \n",
" 75 | \n",
" \n",
"
\n",
" \n",
" \n",
" South-East Asia | \n",
" \n",
" Indonesia | \n",
" \n",
" South-East Asia | \n",
" \n",
" 247000 | \n",
" \n",
" 29.27 | \n",
" \n",
" 7.86 | \n",
" \n",
" 2.4 | \n",
" \n",
" 69 | \n",
" \n",
"
\n",
" \n",
" \n",
" South-East Asia | \n",
" \n",
" Nepal | \n",
" \n",
" South-East Asia | \n",
" \n",
" 27474 | \n",
" \n",
" 35.58 | \n",
" \n",
" 7.65 | \n",
" \n",
" 2.5 | \n",
" \n",
" 68 | \n",
" \n",
"
\n",
" \n",
" \n",
" South-East Asia | \n",
" \n",
" India | \n",
" \n",
" South-East Asia | \n",
" \n",
" 1240000 | \n",
" \n",
" 29.43 | \n",
" \n",
" 8.1 | \n",
" \n",
" 2.53 | \n",
" \n",
" 65 | \n",
" \n",
"
\n",
" \n",
" \n",
" South-East Asia | \n",
" \n",
" Timor-Leste | \n",
" \n",
" South-East Asia | \n",
" \n",
" 1114 | \n",
" \n",
" 46.33 | \n",
" \n",
" 5.16 | \n",
" \n",
" 6.11 | \n",
" \n",
" 64 | \n",
" \n",
"
\n",
" \n",
"\n",
" \n",
"
"
],
"text/plain": [
"#\n",
" Country Region Population Under15 Over60 FertilityR LifeExpect\n",
" South-East Myanmar South-East 52797 25.28 8.15 1.98 65\n",
" South-East Democratic South-East 24763 21.98 12.74 2 69\n",
" South-East Bangladesh South-East 155000 30.57 6.89 2.24 70\n",
" South-East Maldives South-East 338 29.03 6.65 2.31 77\n",
" South-East Bhutan South-East 742 28.53 6.9 2.32 67\n",
" South-East Sri Lanka South-East 21098 25.15 12.4 2.35 75\n",
" South-East Indonesia South-East 247000 29.27 7.86 2.4 69\n",
" South-East Nepal South-East 27474 35.58 7.65 2.5 68\n",
" South-East India South-East 1240000 29.43 8.1 2.53 65\n",
" South-East Timor-Lest South-East 1114 46.33 5.16 6.11 64"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.row['South-East Asia'].sort(['FertilityRate']).row.at -10..-1"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Ruby 2.2.1",
"language": "ruby",
"name": "ruby"
},
"language_info": {
"file_extension": ".rb",
"mimetype": "application/x-ruby",
"name": "ruby",
"version": "2.2.3"
}
},
"nbformat": 4,
"nbformat_minor": 0
}