{ "metadata": { "name": "", "signature": "sha256:280ca8a223ebb7dc18534e437cb82d7746c2de312334e28942ad9aacedacb6ff" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "heading", "level": 1, "metadata": {}, "source": [ "SocrMotionChart Configurator" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "A barebones generator for Socr Motion Charts - [SOCR HTML5 Motion Chart](https://github.com/RamyElkest/SocrMotionChartsHTML5).\n", "\n", "Useful data sources include the World Bank, UN Population Division and the UN Data website.\n", "\n", "*pandas* has inbuilt support for the World Bank indicators API ([remote data access](http://pandas.pydata.org/pandas-docs/stable/remote_data.html)), and I have popped a couple of hacky scrapers together for getting data into *pandas* dataframes from:\n", "\n", "* [UNdata](http://nbviewer.ipython.org/gist/psychemedia/bc614ceb74917ffb40bf)\n", "* [UN Population Division](http://nbviewer.ipython.org/gist/psychemedia/ad1a19d1c1938dd2a705)" ] }, { "cell_type": "code", "collapsed": false, "input": [ "socrTemplate='''\n", "\n", "\n", "\n", "\n", "\n", " \n", " \n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "

\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "'''" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 22 }, { "cell_type": "code", "collapsed": false, "input": [ "import pandas as pd\n", "import json\n", "\n", "\n", "#TO DO - option for square axes, or axis limits?\n", "\n", "def socrdata(df,STUB,title,key,x,y,size,color,category,xscale='linear',yscale='linear'):\n", " PATH='./'\n", " txtlist=[df.columns.tolist()]\n", " for row in df.iterrows():\n", " txtlist.append(row[1].tolist())\n", " with open(PATH+\"data/\"+STUB+\"_data.js\", \"w\") as f:\n", " f.write('var data='+json.dumps(txtlist, indent=4,sort_keys=True))\n", " \n", " cols=df.columns\n", " with open(PATH+STUB+\".html\", \"w\") as f:\n", " f.write(socrTemplate.format(\n", " stub=STUB,title=title,\n", " key=df.columns.get_loc(key),\n", " x=df.columns.get_loc(x),\n", " y=df.columns.get_loc(y),\n", " size=df.columns.get_loc(size),\n", " color=df.columns.get_loc(color),\n", " category=df.columns.get_loc(category),\n", " xscale=xscale, yscale=yscale\n", " ))" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 23 }, { "cell_type": "code", "collapsed": false, "input": [ "!head -n 5 ../../bahrainPop.csv" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "\"Country\",\"Variable\",\"Sex\",\"Variant\",\"Year\",\"Age\",\"Value\"\r", "\r\n", "\"Bahrain\",\"Population by five-year age group and sex (thousands)\",\"Total\",\"Medium variant\",\"1950\",\" 0-4\",20\r", "\r\n", "\"Bahrain\",\"Population by five-year age group and sex (thousands)\",\"Male\",\"Medium variant\",\"1950\",\" 0-4\",10\r", "\r\n", "\"Bahrain\",\"Population by five-year age group and sex (thousands)\",\"Female\",\"Medium variant\",\"1950\",\" 0-4\",10\r", "\r\n", "\"Bahrain\",\"Population by five-year age group and sex (thousands)\",\"Total\",\"Medium variant\",\"1950\",\" 5-9\",16\r", "\r\n" ] } ], "prompt_number": 29 }, { "cell_type": "code", "collapsed": false, "input": [ "dfb=pd.read_csv('../../bahrainPop.csv',skip_footer=2)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 24 }, { "cell_type": "code", "collapsed": false, "input": [ "tmp=dfb[['Year','Age','Sex','Value']].set_index(['Year','Age','Sex']).unstack(\"Sex\").reset_index()\n", "tmp.columns = [' '.join(col).strip() for col in tmp.columns.values]\n", "tmp[:4]" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "

\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "

	Year	Age	Value Female	Value Male	Value Total
0	1950	0-4	10	10	20
1	1950	10-14	6	7	13
2	1950	15-19	5	6	11
3	1950	20-24	5	6	11

\n", "

" ], "metadata": {}, "output_type": "pyout", "prompt_number": 25, "text": [ " Year Age Value Female Value Male Value Total\n", "0 1950 0-4 10 10 20\n", "1 1950 10-14 6 7 13\n", "2 1950 15-19 5 6 11\n", "3 1950 20-24 5 6 11" ] } ], "prompt_number": 25 }, { "cell_type": "code", "collapsed": false, "input": [ "socrdata(tmp[(~tmp['Age'].isin([' 5-9', ' 10-14', ' 15-19', ' 20-24',' 35-39', ' 40-44', ' 45-49', ' 50-54', ' 55-59', ' 60-64',\n", " ' 65-69', ' 70-74', ' 75-79', ' 80+', ' 80-84', ' 85-89', ' 90-94',\n", " ' 95-99', ' 100+'])) & (tmp['Year']<2020) ],\n", " 'bahrainPopx','Bahrain','Year','Value Female','Value Male','Value Total','Age','Age')" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 26 }, { "cell_type": "code", "collapsed": false, "input": [ "!ls" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "README.md\t\t\t bangladeshPopx.html js\r\n", "SocrMotionChart Configurator.ipynb bigtest.html\t license\r\n", "_index.html\t\t\t css\t\t\t wiki.documentation.txt\r\n", "bangladeshPop.html\t\t data\r\n" ] } ], "prompt_number": 27 }, { "cell_type": "code", "collapsed": false, "input": [], "language": "python", "metadata": {}, "outputs": [] } ], "metadata": {} } ] }