{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Daru Use Case: Analyzing baby names over a period of time.\n", "\n", "**In this example we'll read data about prevalance of baby names in the US from a bunch of CSV files and try to analyze to data to figure out interesting trends from it.**" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [ { "data": { "application/javascript": [ "if(window['d3'] === undefined ||\n", " window['Nyaplot'] === undefined){\n", " var path = {\"d3\":\"https://cdnjs.cloudflare.com/ajax/libs/d3/3.5.5/d3.min\",\"downloadable\":\"https://cdn.rawgit.com/domitry/d3-downloadable/master/d3-downloadable\"};\n", "\n", "\n", "\n", " var shim = {\"d3\":{\"exports\":\"d3\"},\"downloadable\":{\"exports\":\"downloadable\"}};\n", "\n", " require.config({paths: path, shim:shim});\n", "\n", "\n", "require(['d3'], function(d3){window['d3']=d3;console.log('finished loading d3');require(['downloadable'], function(downloadable){window['downloadable']=downloadable;console.log('finished loading downloadable');\n", "\n", "\tvar script = d3.select(\"head\")\n", "\t .append(\"script\")\n", "\t .attr(\"src\", \"https://cdn.rawgit.com/domitry/Nyaplotjs/master/release/nyaplot.js\")\n", "\t .attr(\"async\", true);\n", "\n", "\tscript[0][0].onload = script[0][0].onreadystatechange = function(){\n", "\n", "\n", "\t var event = document.createEvent(\"HTMLEvents\");\n", "\t event.initEvent(\"load_nyaplot\",false,false);\n", "\t window.dispatchEvent(event);\n", "\t console.log('Finished loading Nyaplotjs');\n", "\n", "\t};\n", "\n", "\n", "});});\n", "}\n" ], "text/plain": [ "\"if(window['d3'] === undefined ||\\n window['Nyaplot'] === undefined){\\n var path = {\\\"d3\\\":\\\"https://cdnjs.cloudflare.com/ajax/libs/d3/3.5.5/d3.min\\\",\\\"downloadable\\\":\\\"https://cdn.rawgit.com/domitry/d3-downloadable/master/d3-downloadable\\\"};\\n\\n\\n\\n var shim = {\\\"d3\\\":{\\\"exports\\\":\\\"d3\\\"},\\\"downloadable\\\":{\\\"exports\\\":\\\"downloadable\\\"}};\\n\\n require.config({paths: path, shim:shim});\\n\\n\\nrequire(['d3'], function(d3){window['d3']=d3;console.log('finished loading d3');require(['downloadable'], function(downloadable){window['downloadable']=downloadable;console.log('finished loading downloadable');\\n\\n\\tvar script = d3.select(\\\"head\\\")\\n\\t .append(\\\"script\\\")\\n\\t .attr(\\\"src\\\", \\\"https://cdn.rawgit.com/domitry/Nyaplotjs/master/release/nyaplot.js\\\")\\n\\t .attr(\\\"async\\\", true);\\n\\n\\tscript[0][0].onload = script[0][0].onreadystatechange = function(){\\n\\n\\n\\t var event = document.createEvent(\\\"HTMLEvents\\\");\\n\\t event.initEvent(\\\"load_nyaplot\\\",false,false);\\n\\t window.dispatchEvent(event);\\n\\t console.log('Finished loading Nyaplotjs');\\n\\n\\t};\\n\\n\\n});});\\n}\\n\"" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "true" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "require 'daru'\n", "require 'gnuplotrb'" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**The data is contained in multiple CSV files on a per year basis.**\n", "\n", "**Here's what a raw CSV file looks like for the year 1951. The first column is the name, second the sex and the third the number of births that took place with that name.**\n", "\n", "**Find the data [here](https://github.com/SciRuby/sciruby-notebooks/tree/master/Data%20Analysis/Analyzing%20baby%20names/data)**" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "[[\"Linda\", \"F\", \"73933\"], [\"Mary\", \"F\", \"65689\"], [\"Patricia\", \"F\", \"56422\"], [\"Deborah\", \"F\", \"42043\"], [\"Barbara\", \"F\", \"40588\"], [\"Susan\", \"F\", \"40207\"], [\"Nancy\", \"F\", \"30335\"], [\"Karen\", \"F\", \"27986\"], [\"Sandra\", \"F\", \"27656\"], [\"Kathleen\", \"F\", \"26703\"]]" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "CSV.read(\"data/yob1951.txt\").first(10)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**All the CSV files are loaded into a DataFrame, a new column 'year' added to them for identification, and then concatenated to produce one large DataFrame.**" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
Daru::DataFrame:110707160 rows: 1353205 cols: 4
birthsnamesexyear
073933LindaF1951
165689MaryF1951
256422PatriciaF1951
342043DeborahF1951
440588BarbaraF1951
540207SusanF1951
630335NancyF1951
727986KarenF1951
827656SandraF1951
926703KathleenF1951
1024773CarolF1951
1124070DonnaF1951
1223927SharonF1951
1321700BrendaF1951
1419835DianeF1951
1518559PamelaF1951
1617683MargaretF1951
1717061DebraF1951
1816949JanetF1951
1916289CynthiaF1951
2015947JaniceF1951
2115187CarolynF1951
2215103ElizabethF1951
2315016ChristineF1951
2414929JudithF1951
2514005JudyF1951
2613909ShirleyF1951
2713468JoyceF1951
2812818BettyF1951
2911987CherylF1951
3010947GloriaF1951
3110930RebeccaF1951
...............
13532045ZyrinM2014
" ], "text/plain": [ "\n", "#\n", " births name sex year \n", " 0 73933 Linda F 1951 \n", " 1 65689 Mary F 1951 \n", " 2 56422 Patricia F 1951 \n", " 3 42043 Deborah F 1951 \n", " 4 40588 Barbara F 1951 \n", " 5 40207 Susan F 1951 \n", " 6 30335 Nancy F 1951 \n", " 7 27986 Karen F 1951 \n", " 8 27656 Sandra F 1951 \n", " 9 26703 Kathleen F 1951 \n", " 10 24773 Carol F 1951 \n", " 11 24070 Donna F 1951 \n", " 12 23927 Sharon F 1951 \n", " 13 21700 Brenda F 1951 \n", " 14 19835 Diane F 1951 \n", " ... ... ... ... ... \n" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data_frame = Daru::DataFrame.from_csv(\"data/yob1951.txt\", \n", " headers: ['name', 'sex', 'births'])\n", "data_frame['year'] = [1951] * data_frame.size\n", "\n", "(1952..2014).each do |year|\n", " temp = Daru::DataFrame.from_csv(\"data/yob#{year}.txt\", \n", " headers: ['name', 'sex', 'births'])\n", " temp['year'] = [year] * temp.size\n", " data_frame = data_frame.concat(temp)\n", "end\n", "data_frame.vectors = Daru::Index.new(['births','name', 'sex','year'])\n", "\n", "data_frame" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**Pivot the DataFrame on the year as the row and sex as the column, using the 'births' column for aggregation.**\n", "\n", "**This tells us the number of male and female births per year.**" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
Daru::DataFrame:132293920 rows: 64 cols: 2
[\"births\", \"F\"][\"births\", \"M\"]
[1951]18000421881080
[1952]18546981944277
[1953]18803261969777
[1954]19416822037374
[1955]19546642057918
[1956]20075122113694
[1957]20441602155866
[1958]20108842120712
[1959]20230442133509
[1960]20220932132717
[1961]20173162122502
[1962]19665482068945
[1963]19272172031755
[1964]18945941993270
[1965]17650011861378
[1966]16918681783964
[1967]16507641744527
[1968]16401031738928
[1969]16869471789732
[1970]17481471859594
[1971]16634751769201
[1972]15211851622666
[1973]14581391559338
[1974]14674131573105
[1975]14576991562207
[1976]14650961569904
[1977]15329971643684
[1978]15316581642250
[1979]16050511721947
[1980]16599331783876
[1981]16674651790907
[1982]16926781813970
.........
[2014]17687751901376
" ], "text/plain": [ "\n", "#\n", " [\"births\", [\"births\", \n", " [1951] 1800042 1881080 \n", " [1952] 1854698 1944277 \n", " [1953] 1880326 1969777 \n", " [1954] 1941682 2037374 \n", " [1955] 1954664 2057918 \n", " [1956] 2007512 2113694 \n", " [1957] 2044160 2155866 \n", " [1958] 2010884 2120712 \n", " [1959] 2023044 2133509 \n", " [1960] 2022093 2132717 \n", " [1961] 2017316 2122502 \n", " [1962] 1966548 2068945 \n", " [1963] 1927217 2031755 \n", " [1964] 1894594 1993270 \n", " [1965] 1765001 1861378 \n", " ... ... ... \n" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pivoted = data_frame.pivot_table(\n", " index: ['year'], vectors: ['sex'], agg: :sum, values: 'births')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**The row and column names are rather inconvienient so we rename them into something better.**" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
Daru::DataFrame:132293920 rows: 64 cols: 2
FM
1951-01-01T00:00:00+00:0018000421881080
1952-01-01T00:00:00+00:0018546981944277
1953-01-01T00:00:00+00:0018803261969777
1954-01-01T00:00:00+00:0019416822037374
1955-01-01T00:00:00+00:0019546642057918
1956-01-01T00:00:00+00:0020075122113694
1957-01-01T00:00:00+00:0020441602155866
1958-01-01T00:00:00+00:0020108842120712
1959-01-01T00:00:00+00:0020230442133509
1960-01-01T00:00:00+00:0020220932132717
1961-01-01T00:00:00+00:0020173162122502
1962-01-01T00:00:00+00:0019665482068945
1963-01-01T00:00:00+00:0019272172031755
1964-01-01T00:00:00+00:0018945941993270
1965-01-01T00:00:00+00:0017650011861378
1966-01-01T00:00:00+00:0016918681783964
1967-01-01T00:00:00+00:0016507641744527
1968-01-01T00:00:00+00:0016401031738928
1969-01-01T00:00:00+00:0016869471789732
1970-01-01T00:00:00+00:0017481471859594
1971-01-01T00:00:00+00:0016634751769201
1972-01-01T00:00:00+00:0015211851622666
1973-01-01T00:00:00+00:0014581391559338
1974-01-01T00:00:00+00:0014674131573105
1975-01-01T00:00:00+00:0014576991562207
1976-01-01T00:00:00+00:0014650961569904
1977-01-01T00:00:00+00:0015329971643684
1978-01-01T00:00:00+00:0015316581642250
1979-01-01T00:00:00+00:0016050511721947
1980-01-01T00:00:00+00:0016599331783876
1981-01-01T00:00:00+00:0016674651790907
1982-01-01T00:00:00+00:0016926781813970
.........
2014-01-01T00:00:00+00:0017687751901376
" ], "text/plain": [ "\n", "#\n", " F M \n", "1951-01-01 1800042 1881080 \n", "1952-01-01 1854698 1944277 \n", "1953-01-01 1880326 1969777 \n", "1954-01-01 1941682 2037374 \n", "1955-01-01 1954664 2057918 \n", "1956-01-01 2007512 2113694 \n", "1957-01-01 2044160 2155866 \n", "1958-01-01 2010884 2120712 \n", "1959-01-01 2023044 2133509 \n", "1960-01-01 2022093 2132717 \n", "1961-01-01 2017316 2122502 \n", "1962-01-01 1966548 2068945 \n", "1963-01-01 1927217 2031755 \n", "1964-01-01 1894594 1993270 \n", "1965-01-01 1765001 1861378 \n", " ... ... ... \n" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pivoted.index = Daru::DateTimeIndex.date_range(:start => '1951', :periods => pivoted.size, freq: 'YEAR')\n", "pivoted.vectors = Daru::Index.new(['F', 'M'])\n", "pivoted" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**The number of male births vs. female births can then be plotted against each other using the GnuplotRB gem.**" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [ { "data": { "image/svg+xml": [ "\n", "\n", "\n", "\n", "Gnuplot\n", "Produced by GNUPLOT 5.0 patchlevel 3 \n", "\n", "\n", "\n", "\n", "\n", "\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t \n", "\t \n", "\t\n", "\t\n", "\t \n", "\t \n", "\t\n", "\n", "\n", "\n", "\n", "\t\t\n", "\t\t 1.4x106\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t 1.5x106\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t 1.6x106\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t 1.7x106\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t 1.8x106\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t 1.9x106\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t 2x106\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t 2.1x106\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t 2.2x106\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t01\n", "\t\n", "\t\n", "\t\tJan\n", "\t\n", "\t\n", "\t\t1946\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t01\n", "\t\n", "\t\n", "\t\tJan\n", "\t\n", "\t\n", "\t\t1952\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t01\n", "\t\n", "\t\n", "\t\tJan\n", "\t\n", "\t\n", "\t\t1958\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t01\n", "\t\n", "\t\n", "\t\tJan\n", "\t\n", "\t\n", "\t\t1964\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t01\n", "\t\n", "\t\n", "\t\tJan\n", "\t\n", "\t\n", "\t\t1970\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t01\n", "\t\n", "\t\n", "\t\tJan\n", "\t\n", "\t\n", "\t\t1976\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t01\n", "\t\n", "\t\n", "\t\tJan\n", "\t\n", "\t\n", "\t\t1982\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t01\n", "\t\n", "\t\n", "\t\tJan\n", "\t\n", "\t\n", "\t\t1988\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t01\n", "\t\n", "\t\n", "\t\tJan\n", "\t\n", "\t\n", "\t\t1994\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t01\n", "\t\n", "\t\n", "\t\tJan\n", "\t\n", "\t\n", "\t\t2000\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t01\n", "\t\n", "\t\n", "\t\tJan\n", "\t\n", "\t\n", "\t\t2006\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t01\n", "\t\n", "\t\n", "\t\tJan\n", "\t\n", "\t\n", "\t\t2012\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t01\n", "\t\n", "\t\n", "\t\tJan\n", "\t\n", "\t\n", "\t\t2018\n", "\t\n", "\n", "\n", "\n", "\n", "\t\n", "\n", "\t\n", "\t\tTotal births by sex and year\n", "\t\n", "\n", "\tF\n", "\n", "\t\n", "\t\tF\n", "\t\n", "\n", "\n", "\t\n", "\t\n", "\tM\n", "\n", "\t\n", "\t\tM\n", "\t\n", "\n", "\n", "\t\n", "\t\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\t\n", "\n", "\n", "\n" ], "text/plain": [ "# \"time\", :format_x => \"%d\\\\n%b\\\\n%Y\", :title => \"Total births by sex and year\", :timefmt => \"%Y-%m-%dT%H:%M:%S\"], @datasets=Hamster::Vector[#, @options=Hamster::Hash[:title => \"F\", :with => \"lines\", :using => \"1:2\"]>, #, @options=Hamster::Hash[:title => \"M\", :with => \"lines\", :using => \"1:2\"]>], @cmd=\"plot \">" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "GnuplotRB::Plot.new(\n", " [pivoted['F'], with: 'lines', title: 'F'],\n", " [pivoted['M'], with: 'lines', title: 'M'], title: 'Total births by sex and year')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**To further prod into the data, lets perform an SQL style GROUP BY operation on the DataFrame on the 'year' column so that the DataFrame is divided into groups according to year.**" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [], "source": [ "groups_by_year = data_frame.group_by(['year'])\n", "nil" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**This code interates over all the groups created by year and selects the rows that contain a particular name in them from.**\n", "\n", "**In this manner we collect rows from every group on a per year basis and create a DataFrame for each name which tells the number of births of a name during a particular year.**\n", "\n", "**For this example we'll choose the names James, Robert, Jessica and Sophia.**" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "[\"James\", \"Robert\", \"Jessica\", \"Sophia\"]" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pieces = []\n", "['James', 'Robert', 'Jessica', 'Sophia'].each do |name|\n", " rows = []\n", " groups_by_year.each_group do |group|\n", " rows << group.row[group['name'].index_of(name)]\n", " end\n", " pieces << Daru::DataFrame.rows(rows)\n", "end" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**Perform some basic preprocessing/cleaning on the DataFrame.**" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "[\n", "#\n", " births sex \n", "1951-01-01 259 F \n", "1952-01-01 261 F \n", "1953-01-01 237 F \n", "1954-01-01 226 F \n", "1955-01-01 246 F \n", "1956-01-01 249 F \n", "1957-01-01 281 F \n", "1958-01-01 252 F \n", "1959-01-01 296 F \n", "1960-01-01 288 F \n", "1961-01-01 286 F \n", "1962-01-01 262 F \n", "1963-01-01 311 F \n", "1964-01-01 317 F \n", "1965-01-01 303 F \n", " ... ... ... \n", ", \n", "#\n", " births sex \n", "1951-01-01 208 F \n", "1952-01-01 195 F \n", "1953-01-01 228 F \n", "1954-01-01 191 F \n", "1955-01-01 235 F \n", "1956-01-01 214 F \n", "1957-01-01 255 F \n", "1958-01-01 235 F \n", "1959-01-01 237 F \n", "1960-01-01 271 F \n", "1961-01-01 289 F \n", "1962-01-01 229 F \n", "1963-01-01 256 F \n", "1964-01-01 278 F \n", "1965-01-01 255 F \n", " ... ... ... \n", ", \n", "#\n", " births sex \n", "1951-01-01 466 F \n", "1952-01-01 451 F \n", "1953-01-01 495 F \n", "1954-01-01 423 F \n", "1955-01-01 386 F \n", "1956-01-01 406 F \n", "1957-01-01 476 F \n", "1958-01-01 529 F \n", "1959-01-01 523 F \n", "1960-01-01 559 F \n", "1961-01-01 669 F \n", "1962-01-01 867 F \n", "1963-01-01 1120 F \n", "1964-01-01 1172 F \n", "1965-01-01 1530 F \n", " ... ... ... \n", ", \n", "#\n", " births sex \n", "1951-01-01 153 F \n", "1952-01-01 111 F \n", "1953-01-01 131 F \n", "1954-01-01 112 F \n", "1955-01-01 152 F \n", "1956-01-01 121 F \n", "1957-01-01 187 F \n", "1958-01-01 227 F \n", "1959-01-01 275 F \n", "1960-01-01 262 F \n", "1961-01-01 324 F \n", "1962-01-01 485 F \n", "1963-01-01 523 F \n", "1964-01-01 470 F \n", "1965-01-01 507 F \n", " ... ... ... \n", "]" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pieces.each do |df|\n", " df['year'].map! {|e| DateTime.new(e) }\n", " df.set_index('year')\n", " df.rename df['name'][0]\n", " df.delete_vector 'name'\n", "end" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false }, "outputs": [ { "data": { "image/svg+xml": [ "\n", "\n", "\n", "\n", "Gnuplot\n", "Produced by GNUPLOT 5.0 patchlevel 3 \n", "\n", "\n", "\n", "\n", "\n", "\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t \n", "\t \n", "\t\n", "\t\n", "\t \n", "\t \n", "\t\n", "\n", "\n", "\t\n", "\t\tPrevalence of certain names according to year\n", "\t\n", "\n", "\n", "\n", "\n", "\t\t\n", "\t\t 0\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t 50\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t 100\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t 150\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t 200\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t 250\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t 300\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t 350\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t 400\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t1946\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t1952\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t1958\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t1964\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t1970\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t1976\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t1982\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t1988\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t1994\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t2000\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t2006\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t2012\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t2018\n", "\t\n", "\n", "\n", "\n", "\n", "\t\n", "\n", "\t\n", "\t\tOccurences\n", "\t\n", "\n", "\n", "\t\n", "\t\tYear\n", "\t\n", "\n", "\n", "\n", "\tJames\n", "\n", "\t\n", "\t\tJames\n", "\t\n", "\n", "\n", "\t\n", "\t\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\t\n", "\n", "\n", "\n", "\t\t\n", "\t\t 0\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t 50\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t 100\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t 150\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t 200\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t 250\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t 300\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t 350\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t1946\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t1952\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t1958\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t1964\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t1970\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t1976\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t1982\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t1988\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t1994\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t2000\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t2006\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t2012\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t2018\n", "\t\n", "\n", "\n", "\n", "\n", "\t\n", "\n", "\t\n", "\t\tOccurences\n", "\t\n", "\n", "\n", "\t\n", "\t\tYear\n", "\t\n", "\n", "\n", "\n", "\tRobert\n", "\n", "\t\n", "\t\tRobert\n", "\t\n", "\n", "\n", "\t\n", "\t\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\t\n", "\n", "\n", "\n", "\t\t\n", "\t\t 0\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t 10000\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t 20000\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t 30000\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t 40000\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t 50000\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t 60000\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t1946\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t1952\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t1958\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t1964\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t1970\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t1976\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t1982\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t1988\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t1994\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t2000\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t2006\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t2012\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t2018\n", "\t\n", "\n", "\n", "\n", "\n", "\t\n", "\n", "\t\n", "\t\tOccurences\n", "\t\n", "\n", "\n", "\t\n", "\t\tYear\n", "\t\n", "\n", "\n", "\n", "\tJessica\n", "\n", "\t\n", "\t\tJessica\n", "\t\n", "\n", "\n", "\t\n", "\t\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\t\n", "\n", "\n", "\n", "\t\t\n", "\t\t 0\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t 5000\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t 10000\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t 15000\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t 20000\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t 25000\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t1946\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t1952\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t1958\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t1964\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t1970\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t1976\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t1982\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t1988\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t1994\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t2000\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t2006\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t2012\n", "\t\n", "\n", "\n", "\t\t\n", "\t\t2018\n", "\t\n", "\n", "\n", "\n", "\n", "\t\n", "\n", "\t\n", "\t\tOccurences\n", "\t\n", "\n", "\n", "\t\n", "\t\tYear\n", "\t\n", "\n", "\n", "\n", "\tSophia\n", "\n", "\t\n", "\t\tSophia\n", "\t\n", "\n", "\n", "\t\n", "\t\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\t\n", "\n", "\n", "\n" ], "text/plain": [ "# \"time\", :format_x => [\"%Y\"], :timefmt => \"%Y-%m-%dT%H:%M:%S\"], @datasets=Hamster::Vector[#, @options=Hamster::Hash[:title => \"James\", :with => \"lines\", :using => \"1:2\"]>], @cmd=\"plot \">, # \"time\", :format_x => [\"%Y\"], :timefmt => \"%Y-%m-%dT%H:%M:%S\"], @datasets=Hamster::Vector[#, @options=Hamster::Hash[:title => \"Robert\", :with => \"lines\", :using => \"1:2\"]>], @cmd=\"plot \">, # \"time\", :format_x => [\"%Y\"], :timefmt => \"%Y-%m-%dT%H:%M:%S\"], @datasets=Hamster::Vector[#, @options=Hamster::Hash[:title => \"Jessica\", :with => \"lines\", :using => \"1:2\"]>], @cmd=\"plot \">, # \"time\", :format_x => [\"%Y\"], :timefmt => \"%Y-%m-%dT%H:%M:%S\"], @datasets=Hamster::Vector[#, @options=Hamster::Hash[:title => \"Sophia\", :with => \"lines\", :using => \"1:2\"]>], @cmd=\"plot \">], @options=Hamster::Hash[:layout => [[2, 2]], :format_x => [\"%Y\"], :xlabel => [\"Year\"], :title => [\"Prevalence of certain names according to year\"], :ylabel => [\"Occurences\"], :xtics => [\"nomirror rotate by -45\"], :xrange => [\"\\\"1945-01-01\\\":\\\"2016-01-01\\\"\"]]>" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "plots = []\n", "pieces.each do |df|\n", " plot = GnuplotRB::Plot.new([\n", " df['births'], with: 'lines', title: df.name])\n", " plot.format_x = '%Y'\n", " plots << plot\n", "end\n", "\n", "GnuplotRB::Multiplot.new(*plots).tap do |mp|\n", " mp.layout = [2,2]\n", " mp.format_x = '%Y'\n", " mp.xtics = 'nomirror rotate by -45'\n", " mp.title = 'Prevalence of certain names according to year'\n", " mp.xlabel = 'Year'\n", " mp.ylabel = 'Occurences'\n", " mp.xrange = '\"1945-01-01\":\"2016-01-01\"'\n", "end" ] } ], "metadata": { "kernelspec": { "display_name": "Ruby 2.2.1", "language": "ruby", "name": "ruby" }, "language_info": { "file_extension": ".rb", "mimetype": "application/x-ruby", "name": "ruby", "version": "2.2.1" } }, "nbformat": 4, "nbformat_minor": 0 }