{ "metadata": { "name": "", "signature": "sha256:280ca8a223ebb7dc18534e437cb82d7746c2de312334e28942ad9aacedacb6ff" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "heading", "level": 1, "metadata": {}, "source": [ "SocrMotionChart Configurator" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "A barebones generator for Socr Motion Charts - [SOCR HTML5 Motion Chart](https://github.com/RamyElkest/SocrMotionChartsHTML5).\n", "\n", "Useful data sources include the World Bank, UN Population Division and the UN Data website.\n", "\n", "*pandas* has inbuilt support for the World Bank indicators API ([remote data access](http://pandas.pydata.org/pandas-docs/stable/remote_data.html)), and I have popped a couple of hacky scrapers together for getting data into *pandas* dataframes from:\n", "\n", "* [UNdata](http://nbviewer.ipython.org/gist/psychemedia/bc614ceb74917ffb40bf)\n", "* [UN Population Division](http://nbviewer.ipython.org/gist/psychemedia/ad1a19d1c1938dd2a705)" ] }, { "cell_type": "code", "collapsed": false, "input": [ "socrTemplate='''\n", "\n", "
\n", "\n", "\n", "\n", " \n", " \n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "'''" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 22 }, { "cell_type": "code", "collapsed": false, "input": [ "import pandas as pd\n", "import json\n", "\n", "\n", "#TO DO - option for square axes, or axis limits?\n", "\n", "def socrdata(df,STUB,title,key,x,y,size,color,category,xscale='linear',yscale='linear'):\n", " PATH='./'\n", " txtlist=[df.columns.tolist()]\n", " for row in df.iterrows():\n", " txtlist.append(row[1].tolist())\n", " with open(PATH+\"data/\"+STUB+\"_data.js\", \"w\") as f:\n", " f.write('var data='+json.dumps(txtlist, indent=4,sort_keys=True))\n", " \n", " cols=df.columns\n", " with open(PATH+STUB+\".html\", \"w\") as f:\n", " f.write(socrTemplate.format(\n", " stub=STUB,title=title,\n", " key=df.columns.get_loc(key),\n", " x=df.columns.get_loc(x),\n", " y=df.columns.get_loc(y),\n", " size=df.columns.get_loc(size),\n", " color=df.columns.get_loc(color),\n", " category=df.columns.get_loc(category),\n", " xscale=xscale, yscale=yscale\n", " ))" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 23 }, { "cell_type": "code", "collapsed": false, "input": [ "!head -n 5 ../../bahrainPop.csv" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "\"Country\",\"Variable\",\"Sex\",\"Variant\",\"Year\",\"Age\",\"Value\"\r", "\r\n", "\"Bahrain\",\"Population by five-year age group and sex (thousands)\",\"Total\",\"Medium variant\",\"1950\",\" 0-4\",20\r", "\r\n", "\"Bahrain\",\"Population by five-year age group and sex (thousands)\",\"Male\",\"Medium variant\",\"1950\",\" 0-4\",10\r", "\r\n", "\"Bahrain\",\"Population by five-year age group and sex (thousands)\",\"Female\",\"Medium variant\",\"1950\",\" 0-4\",10\r", "\r\n", "\"Bahrain\",\"Population by five-year age group and sex (thousands)\",\"Total\",\"Medium variant\",\"1950\",\" 5-9\",16\r", "\r\n" ] } ], "prompt_number": 29 }, { "cell_type": "code", "collapsed": false, "input": [ "dfb=pd.read_csv('../../bahrainPop.csv',skip_footer=2)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 24 }, { "cell_type": "code", "collapsed": false, "input": [ "tmp=dfb[['Year','Age','Sex','Value']].set_index(['Year','Age','Sex']).unstack(\"Sex\").reset_index()\n", "tmp.columns = [' '.join(col).strip() for col in tmp.columns.values]\n", "tmp[:4]" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "\n", " | Year | \n", "Age | \n", "Value Female | \n", "Value Male | \n", "Value Total | \n", "
---|---|---|---|---|---|
0 | \n", "1950 | \n", "0-4 | \n", "10 | \n", "10 | \n", "20 | \n", "
1 | \n", "1950 | \n", "10-14 | \n", "6 | \n", "7 | \n", "13 | \n", "
2 | \n", "1950 | \n", "15-19 | \n", "5 | \n", "6 | \n", "11 | \n", "
3 | \n", "1950 | \n", "20-24 | \n", "5 | \n", "6 | \n", "11 | \n", "