{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true }, "outputs": [], "source": [ "#szekelyland workbook - extrapolating from romanian values using mortality statistics from INSSE\n", "import pandas as pd, numpy as np" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": true }, "outputs": [], "source": [ "df=pd.read_csv('exportPivot_POP206C.csv')\n", "de=pd.read_csv('exportPivot_POP206E.csv')" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "Index([u'Sexe', u' Grupe de varsta ',\n", " u' Macroregiuni regiuni de dezvoltare si judete', u' Ani',\n", " u' UM: Numar persoane', u' Valoare'],\n", " dtype='object')" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "de.columns" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [], "source": [ "df=df.drop(u' UM: Numar persoane',axis=1).set_index([u' Ani',u'Clasificarea internationala a maladiilor - Revizia a X a 1994',u' Macroregiuni regiuni de dezvoltare si judete'])\n", "de=de.drop(u' UM: Numar persoane',axis=1).set_index([u' Ani',u'Sexe',u' Grupe de varsta ',u' Macroregiuni regiuni de dezvoltare si judete'])" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Valoare
AniSexeGrupe de varstaMacroregiuni regiuni de dezvoltare si judete
Anul 1999TotalTotalTOTAL265194
Anul 2000TotalTotalTOTAL255820
Anul 2001TotalTotalTOTAL259603
Anul 2002TotalTotalTOTAL269666
Anul 2003TotalTotalTOTAL266575
\n", "
" ], "text/plain": [ " Valoare\n", " Ani Sexe Grupe de varsta Macroregiuni regiuni de dezvoltare si judete \n", " Anul 1999 Total Total TOTAL 265194\n", " Anul 2000 Total Total TOTAL 255820\n", " Anul 2001 Total Total TOTAL 259603\n", " Anul 2002 Total Total TOTAL 269666\n", " Anul 2003 Total Total TOTAL 266575" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "de.head()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " Anul 1999\n", " Anul 2000\n", " Anul 2001\n", " Anul 2002\n", " Anul 2003\n", " Anul 2004\n", " Anul 2005\n", " Anul 2006\n", " Anul 2007\n", " Anul 2008\n", " Anul 2009\n", " Anul 2010\n", " Anul 2011\n", " Anul 2012\n" ] } ], "source": [ "for i in df.index.levels[0].unique():\n", " print i" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Alte cauze\n", "Boli ale aparatului circulator\n", "Boli ale aparatului digestiv\n", "Boli ale aparatului genito-urinar\n", "Boli ale aparatului respirator\n", "Boli ale sistemului nervos boli ale ochiului si anexele sale boli ale urechii si apofizei mastoide\n", "Boli endocrine de nutritie si metabolism\n", "Boli infectioase si parazitare\n", "Leziuni traumatice otraviri si alte consecinte ale cauzelor externe\n", "Malformatii congenitale deformatii si anomalii cromozomiale\n", "Sarcina nastere si lauzie\n", "Total\n", "Tulburari mentale si de comportament\n", "Tumori\n", "Unele afectiuni a caror origine se situeaza in perioada perinatala\n", "din care: Boala ischemica a inimii\n", "din care: Boli cerebro-vasculare\n", "din care: Diabet zaharat\n", "din care: Tuberculoza\n" ] } ], "source": [ "for i in df.index.levels[1].unique():\n", " print i" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": true }, "outputs": [], "source": [ "d={\n", "\"Alte cauze\":1094,\n", "\"Boli ale aparatului circulator\":1064,\n", "\"Boli ale aparatului digestiv\":1078,\n", "\"Boli ale aparatului genito-urinar\":1084,\n", "\"Boli ale aparatului respirator\":1072,\n", "\"Boli ale sistemului nervos boli ale ochiului si anexele sale boli ale urechii si apofizei mastoide\":9,\n", "\"Boli endocrine de nutritie si metabolism\":1051,\n", "\"Boli infectioase si parazitare\":1001,\n", "\"Leziuni traumatice otraviri si alte consecinte ale cauzelor externe\":1095,\n", "\"Malformatii congenitale deformatii si anomalii cromozomiale\":1093,\n", "\"Sarcina nastere si lauzie\":1087,\n", "\"Total\":1000,\n", "\"Tulburari mentale si de comportament\":1055,\n", "\"Tumori\":1026,\n", "\"Unele afectiuni a caror origine se situeaza in perioada perinatala\":1092,\n", "\"din care: Boala ischemica a inimii\":0,\n", "\"din care: Boli cerebro-vasculare\":0,\n", "\"din care: Diabet zaharat\":0,\n", "\"din care: Tuberculoza\":0}\n", "n0=[1048,1082,1083]\n", "n9=[1058,1062,1063]" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false }, "outputs": [], "source": [ "#calculate county share in romania total\n", "pop={}\n", "for j in df.index.levels[0].unique():\n", " if j[6:] not in pop:pop[j[6:]]={}\n", " for k in df.index.levels[1].unique():\n", " if d[k]!=0:\n", " if d[k] not in pop[j[6:]]:pop[j[6:]][str(d[k])]={}\n", " pop[j[6:]][str(d[k])][\"ro\"]=df.loc[j].loc[k].loc[' TOTAL'][0]\n", " try: a=df.loc[j].loc[k].loc[' Covasna'][0]\n", " except: a=0\n", " try: b=df.loc[j].loc[k].loc[' Harghita'][0]\n", " except: b=0\n", " try: c=df.loc[j].loc[k].loc[' Mures'][0]\n", " except: c=0\n", " pop[j[6:]][str(d[k])][\"szf\"]=(a+b+c)*1.0/df.loc[j].loc[k].loc[' TOTAL'][0]" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false }, "outputs": [], "source": [ "for y in pop:\n", " for c in n9:\n", " if str(c) not in pop[y]:pop[y][str(c)]={}\n", " for m in [\"szf\",\"ro\"]:\n", " pop[y][str(c)][m]=pop[y]['9'][m]/3.0\n", " pop[y].pop('9');\n", " for c in n0:\n", " if str(c) not in pop[y]:pop[y][str(c)]={}\n", " for m in [\"szf\",\"ro\"]:\n", " pop[y][str(c)][m]=pop[y]['1000'][m]\n", " " ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false }, "outputs": [], "source": [ "#calculate age deviation from national average\n", "e={\"Feminin\":\"f\",\"Masculin\":\"m\",\"Total\":\"s\"}\n", "pop2={}\n", "pop3={}\n", "for j in de.index.levels[0].unique():\n", " if j[6:] not in pop2:pop2[j[6:]]={}\n", " if j[6:] not in pop3:pop3[j[6:]]={}\n", " for k in de.index.levels[1].unique():\n", " if e[k] not in pop2[j[6:]]:pop2[j[6:]][e[k]]={}\n", " if e[k] not in pop3[j[6:]]:pop3[j[6:]][e[k]]={}\n", " for l in de.index.levels[2].unique():\n", " age=l[:3].strip().strip('-')\n", " try: a=de.loc[j].loc[k].loc[l].loc[' Covasna'][0]\n", " except: a=0\n", " try: b=de.loc[j].loc[k].loc[l].loc[' Harghita'][0]\n", " except: b=0\n", " try: c=de.loc[j].loc[k].loc[l].loc[' Mures'][0]\n", " except: c=0\n", " try: d=de.loc[j].loc[k].loc[l].loc[' TOTAL'][0]\n", " except: d=0\n", " if age!='To':\n", " if age!='0':\n", " if age!='85':\n", " pop2[j[6:]][e[k]][age]=(a+b+c)*1.0\n", " pop3[j[6:]][e[k]][age]=(d)*1.0\n", " else:\n", " pop2[j[6:]][e[k]]['90']=(a+b+c)*3.0/10\n", " pop2[j[6:]][e[k]]['95']=(a+b+c)*1.0/10\n", " pop2[j[6:]][e[k]]['85']=(a+b+c)*6.0/10\n", " pop3[j[6:]][e[k]]['90']=(d)*3.0/10\n", " pop3[j[6:]][e[k]]['95']=(d)*1.0/10\n", " pop3[j[6:]][e[k]]['85']=(d)*6.0/10\n", " \n", " else:\n", " pop2[j[6:]][e[k]]['0']=(a+b+c)*1.0/5\n", " pop2[j[6:]][e[k]]['1']=(a+b+c)*1.0/5\n", " pop2[j[6:]][e[k]]['2']=(a+b+c)*1.0/5\n", " pop2[j[6:]][e[k]]['3']=(a+b+c)*1.0/5\n", " pop2[j[6:]][e[k]]['4']=(a+b+c)*1.0/5\n", " pop3[j[6:]][e[k]]['0']=(d)*1.0/5\n", " pop3[j[6:]][e[k]]['1']=(d)*1.0/5\n", " pop3[j[6:]][e[k]]['2']=(d)*1.0/5\n", " pop3[j[6:]][e[k]]['3']=(d)*1.0/5\n", " pop3[j[6:]][e[k]]['4']=(d)*1.0/5" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": false }, "outputs": [], "source": [ "for i in pop2:\n", " for j in [\"f\",\"m\"]:\n", " for a in pop2[i][j]:\n", " pop2[i][j][a]=pop2[i][j][a]/sum(pop2[i][\"s\"].values())\n", " pop3[i][j][a]=pop3[i][j][a]/sum(pop3[i][\"s\"].values())" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import zipfile,json\n", "#read RO data\n", "z = zipfile.ZipFile('db2/642.zip') \n", "ro = json.loads(z.open('data.json').read())" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "collapsed": false }, "outputs": [], "source": [ "h=json.loads(file('hierarchy2.json').read())" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "collapsed": false }, "outputs": [], "source": [ "szf=[]\n", "for i in ro:\n", " szf.append({\"a\":i[\"a\"],\n", " \"c\":i[\"c\"],\n", " \"g\":i[\"g\"],\n", " \"t\":i[\"t\"],\n", " \"s\":i[\"s\"]*float(pop[str(i[\"t\"])][h[i['c']][\"group\"]][\"szf\"])}) " ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "collapsed": false }, "outputs": [], "source": [ "szf2=[]\n", "for i in ro:\n", " szf2.append({\"a\":i[\"a\"],\n", " \"c\":i[\"c\"],\n", " \"g\":i[\"g\"],\n", " \"t\":i[\"t\"],\n", " \"s\":i[\"s\"]*float(pop[str(i[\"t\"])][h[i['c']][\"group\"]][\"szf\"])\n", " *float(pop2[str(i[\"t\"])][i[\"g\"]][str(i[\"a\"])])\n", " /float(pop3[str(i[\"t\"])][i[\"g\"]][str(i[\"a\"])])\n", " })" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "collapsed": true }, "outputs": [], "source": [ "#save files\n", "try:\n", " import zlib\n", " compression = zipfile.ZIP_DEFLATED\n", "except:\n", " compression = zipfile.ZIP_STORED\n", "\n", "file('db2/data.json','w').write(json.dumps(szf)) \n", "zf = zipfile.ZipFile('db2/9999.zip', mode='w')\n", "zf.write('db2/data.json','data.json',compress_type=compression)\n", "zf.close()\n", "file('db2/data.json','w').write(json.dumps(szf2)) \n", "zf = zipfile.ZipFile('db2/9998.zip', mode='w')\n", "zf.write('db2/data.json','data.json',compress_type=compression)\n", "zf.close()" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "collapsed": true }, "outputs": [], "source": [ "#update dictionaries" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "collapsed": false }, "outputs": [], "source": [ "#run only once\n", "c=json.loads(file('countries.json').read())\n", "c=[u'9998']+c\n", "file('countries.json','w').write(json.dumps(c)) " ] }, { "cell_type": "code", "execution_count": 36, "metadata": { "collapsed": false }, "outputs": [], "source": [ "c=json.loads(file('cnames.json').read())\n", "c[u'9998']=u'Székelyland'\n", "file('cnames.json','w').write(json.dumps(c)) " ] }, { "cell_type": "code", "execution_count": 37, "metadata": { "collapsed": false }, "outputs": [], "source": [ "c=json.loads(file('hnames.json').read())\n", "c[u'Székelyland']=u'Székelyföld'\n", "file('hnames.json','w').write(json.dumps(c)) " ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "collapsed": true }, "outputs": [], "source": [ "#update population" ] }, { "cell_type": "code", "execution_count": 23, "metadata": { "collapsed": true }, "outputs": [], "source": [ "p=json.loads(file('pop.json').read())" ] }, { "cell_type": "code", "execution_count": 26, "metadata": { "collapsed": true }, "outputs": [], "source": [ "x=pd.read_csv('exportPivot_POP107A.csv')" ] }, { "cell_type": "code", "execution_count": 27, "metadata": { "collapsed": false }, "outputs": [], "source": [ "x=x.drop([u' Medii de rezidenta',u' UM: Numar persoane'],axis=1).set_index([u'Varste si grupe de varsta',u' Sexe',u' Ani',u' Macroregiuni regiuni de dezvoltare si judete'])" ] }, { "cell_type": "code", "execution_count": 28, "metadata": { "collapsed": false }, "outputs": [], "source": [ "x=x.unstack(u' Macroregiuni regiuni de dezvoltare si judete')" ] }, { "cell_type": "code", "execution_count": 29, "metadata": { "collapsed": false }, "outputs": [], "source": [ "x=pd.DataFrame(x.T.sum())" ] }, { "cell_type": "code", "execution_count": 30, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "['0 ani',\n", " '0- 4 ani',\n", " '1 ani',\n", " '10 ani',\n", " '10-14 ani',\n", " '11 ani',\n", " '12 ani',\n", " '13 ani',\n", " '14 ani',\n", " '15 ani',\n", " '15-19 ani',\n", " '16 ani',\n", " '17 ani',\n", " '18 ani',\n", " '19 ani',\n", " '2 ani',\n", " '20 ani',\n", " '20-24 ani',\n", " '21 ani',\n", " '22 ani',\n", " '23 ani',\n", " '24 ani',\n", " '25 ani',\n", " '25-29 ani',\n", " '26 ani',\n", " '27 ani',\n", " '28 ani',\n", " '29 ani',\n", " '3 ani',\n", " '30 ani',\n", " '30-34 ani',\n", " '31 ani',\n", " '32 ani',\n", " '33 ani',\n", " '34 ani',\n", " '35 ani',\n", " '35-39 ani',\n", " '36 ani',\n", " '37 ani',\n", " '38 ani',\n", " '39 ani',\n", " '4 ani',\n", " '40 ani',\n", " '40-44 ani',\n", " '41 ani',\n", " '42 ani',\n", " '43 ani',\n", " '44 ani',\n", " '45 ani',\n", " '45-49 ani',\n", " '46 ani',\n", " '47 ani',\n", " '48 ani',\n", " '49 ani',\n", " '5 ani',\n", " '5- 9 ani',\n", " '50 ani',\n", " '50-54 ani',\n", " '51 ani',\n", " '52 ani',\n", " '53 ani',\n", " '54 ani',\n", " '55 ani',\n", " '55-59 ani',\n", " '56 ani',\n", " '57 ani',\n", " '58 ani',\n", " '59 ani',\n", " '6 ani',\n", " '60 ani',\n", " '60-64 ani',\n", " '61 ani',\n", " '62 ani',\n", " '63 ani',\n", " '64 ani',\n", " '65 ani',\n", " '65-69 ani',\n", " '66 ani',\n", " '67 ani',\n", " '68 ani',\n", " '69 ani',\n", " '7 ani',\n", " '70 ani',\n", " '70-74 ani',\n", " '71 ani',\n", " '72 ani',\n", " '73 ani',\n", " '74 ani',\n", " '75 ani',\n", " '75-79 ani',\n", " '76 ani',\n", " '77 ani',\n", " '78 ani',\n", " '79 ani',\n", " '8 ani',\n", " '80 ani',\n", " '80-84 ani',\n", " '81 ani',\n", " '82 ani',\n", " '83 ani',\n", " '84 ani',\n", " '85 ani si peste',\n", " '9 ani',\n", " 'Total']" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sorted(x.index.levels[0].unique())" ] }, { "cell_type": "code", "execution_count": 31, "metadata": { "collapsed": false }, "outputs": [], "source": [ "indice=[str(i)+' ani' for i in range(5)]+[str(i*5)+'-'+str((i+1)*5-1)+' ani' for i in range(1,17)]\n", "indice[5]='5- 9 ani'" ] }, { "cell_type": "code", "execution_count": 32, "metadata": { "collapsed": false }, "outputs": [], "source": [ "p['9999']={}\n", "p['9998']={}\n", "gg={\"f\":u\" Feminin\",\"m\":u\" Masculin\"}\n", "for y in range(1999,2013):\n", " if str(y) not in p['9999']:p['9999'][str(y)]={}\n", " for g in [\"f\",\"m\"]:\n", " if g not in p['9999'][str(y)]:p['9999'][str(y)][g]={}\n", " for i in indice:\n", " p['9999'][str(y)][g][str(i[:2].strip().strip('-'))]=str(float(x.loc[i].loc[gg[g]].loc[' Anul '+str(y)][0]))\n", " p['9999'][str(y)][g][str(85)]=str(float(x.loc['85 ani si peste'].loc[gg[g]].loc[' Anul '+str(y)][0])*6.0/10)\n", " p['9999'][str(y)][g][str(90)]=str(float(x.loc['85 ani si peste'].loc[gg[g]].loc[' Anul '+str(y)][0])*3.0/10)\n", " p['9999'][str(y)][g][str(95)]=str(float(x.loc['85 ani si peste'].loc[gg[g]].loc[' Anul '+str(y)][0])*1.0/10)" ] }, { "cell_type": "code", "execution_count": 33, "metadata": { "collapsed": false }, "outputs": [], "source": [ "p['9998']=p['9999']" ] }, { "cell_type": "code", "execution_count": 34, "metadata": { "collapsed": true }, "outputs": [], "source": [ "#save updated population file\n", "file('pop.json','w').write(json.dumps(p)) " ] }, { "cell_type": "code", "execution_count": 35, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "{'0': '6632.0',\n", " '1': '6498.0',\n", " '10': '35468.0',\n", " '15': '46996.0',\n", " '2': '6426.0',\n", " '20': '46190.0',\n", " '25': '51603.0',\n", " '3': '6473.0',\n", " '30': '50243.0',\n", " '35': '46590.0',\n", " '4': '6921.0',\n", " '40': '34284.0',\n", " '45': '41425.0',\n", " '5': '32264.0',\n", " '50': '39348.0',\n", " '55': '30272.0',\n", " '60': '27693.0',\n", " '65': '23561.0',\n", " '70': '19324.0',\n", " '75': '13498.0',\n", " '80': '7376.0',\n", " '85': '1777.2',\n", " '90': '888.6',\n", " '95': '296.2'}" ] }, "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "p['9998']['2005']['m']" ] }, { "cell_type": "code", "execution_count": 60, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "{u'0': u'104877.0',\n", " u'1': u'101698.0',\n", " u'10': u'590487.0',\n", " u'15': u'846418.0',\n", " u'2': u'101017.0',\n", " u'20': u'790489.0',\n", " u'25': u'870167.0',\n", " u'3': u'100385.0',\n", " u'30': u'823108.0',\n", " u'35': u'871298.0',\n", " u'4': u'104074.0',\n", " u'40': u'619479.0',\n", " u'45': u'775247.0',\n", " u'5': u'540506.0',\n", " u'50': u'779300.0',\n", " u'55': u'652751.0',\n", " u'60': u'528157.0',\n", " u'65': u'614468.0',\n", " u'70': u'525710.0',\n", " u'75': u'398790.0',\n", " u'80': u'238385.0',\n", " u'85': u'69058.0',\n", " u'90': u'28777.0',\n", " u'95': u'5685.0'}" ] }, "execution_count": 60, "metadata": {}, "output_type": "execute_result" } ], "source": [ "p['642']['2005']['f']" ] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.11" } }, "nbformat": 4, "nbformat_minor": 0 }