{ "cells": [ { "cell_type": "code", "execution_count": 7, "metadata": { "ExecuteTime": { "end_time": "2019-03-07T02:56:32.521682Z", "start_time": "2019-03-07T02:56:26.288106Z" } }, "outputs": [], "source": [ "import pandas as pd, numpy as np, json" ] }, { "cell_type": "code", "execution_count": 124, "metadata": {}, "outputs": [], "source": [ "base_url='http://www.cdep.ro'" ] }, { "cell_type": "code", "execution_count": 125, "metadata": {}, "outputs": [], "source": [ "nmembers=json.loads(open('data/nmembers.json','r').read())" ] }, { "cell_type": "code", "execution_count": 126, "metadata": {}, "outputs": [], "source": [ "img=json.loads(open('data/img.json','r').read())" ] }, { "cell_type": "code", "execution_count": 140, "metadata": {}, "outputs": [], "source": [ "records=json.loads(open('data/records.json','r').read())" ] }, { "cell_type": "code", "execution_count": 141, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'votid': '318',\n", " 'vot': 'DA',\n", " 'grup': 'neafiliat',\n", " 'name': 'Lificiu Petru',\n", " 'camera': '2'}" ] }, "execution_count": 141, "metadata": {}, "output_type": "execute_result" } ], "source": [ "records[90]" ] }, { "cell_type": "code", "execution_count": 151, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'Name': {'full': 'Mihăiţă CALIMENTE', 'simple': 'Calimente Mihaita'},\n", " 'Photo': ['http://www.cdep.ro/parlamentari/l1996/caliment.jpg',\n", " 'http://www.cdep.ro/parlamentari/l2008/Calimente_Mihaita.jpg',\n", " 'http://www.cdep.ro/parlamentari/l2012/Calimente_Mihaita.jpg',\n", " 'http://www.cdep.ro/parlamentari/l2004/CalimenteMihaita.jpg'],\n", " 'UserID': ['5', '58', '50', '60'],\n", " 'Camera': ['DEPUTAT', 'DEPUTAT', 'DEPUTAT', 'DEPUTAT'],\n", " 'Link': ['http://www.cdep.ro/pls/parlam/structura2015.mp?idm=5&leg=1996&cam=2',\n", " 'http://www.cdep.ro/pls/parlam/structura2015.mp?idm=58&leg=2008&cam=2',\n", " 'http://www.cdep.ro/pls/parlam/structura2015.mp?idm=50&leg=2012&cam=2',\n", " 'http://www.cdep.ro/pls/parlam/structura2015.mp?idm=60&leg=2004&cam=2'],\n", " 'Parties': [{'party': 'PNTCD',\n", " 'start': '1996-11-27',\n", " 'end': '2000-02-01',\n", " 'judet': 'ARAD'},\n", " {'party': 'Independent',\n", " 'start': '2000-02-01',\n", " 'end': '2000-11-30',\n", " 'judet': 'ARAD'},\n", " {'party': 'PNL',\n", " 'start': '2008-12-19',\n", " 'end': '2012-12-20',\n", " 'judet': 'ARAD'},\n", " {'party': 'PNL',\n", " 'start': '2012-12-20',\n", " 'end': '2016-09-01',\n", " 'judet': 'ARAD'},\n", " {'party': 'Independent',\n", " 'start': '2016-09-01',\n", " 'end': '2016-12-21',\n", " 'judet': 'ARAD'},\n", " {'party': 'PNL',\n", " 'start': '2004-12-19',\n", " 'end': '2008-12-13',\n", " 'judet': 'ARAD'}],\n", " 'Countries': [{'country': 'Republica Federativă a Braziliei',\n", " 'start': '1997-02-01',\n", " 'end': '2000-11-30'},\n", " {'country': 'Republica Letonia', 'start': '2010-03-01', 'end': '2012-12-20'},\n", " {'country': 'Marele Ducat de Luxemburg',\n", " 'start': '2010-03-01',\n", " 'end': '2012-12-20'},\n", " {'country': 'Republica Italiană',\n", " 'start': '2010-03-01',\n", " 'end': '2012-12-20'},\n", " {'country': 'Marele Ducat de Luxemburg',\n", " 'start': '2012-12-20',\n", " 'end': '2016-12-21'},\n", " {'country': 'Albania', 'start': '2012-12-20', 'end': '2016-12-21'},\n", " {'country': 'UNESCO', 'start': '2007-04-01', 'end': '2008-03-01'},\n", " {'country': 'Marele Ducat de Luxemburg',\n", " 'start': '2004-12-19',\n", " 'end': '2008-12-13'}],\n", " 'Groups': [{'group': 'Comisia pentru cercetarea abuzurilor, corupţiei şi pentru petiţii',\n", " 'start': '1996-11-27',\n", " 'end': '2000-11-30'},\n", " {'group': 'Comisia pentru apărare, ordine publică şi siguranţă naţională',\n", " 'start': '2008-12-19',\n", " 'end': '2012-12-20'},\n", " {'group': 'Comisia pentru apărare, ordine publică şi siguranţă naţională',\n", " 'start': '2012-12-20',\n", " 'end': '2016-12-21'},\n", " {'group': 'Comisia pentru apărare, ordine publică şi siguranţă naţională',\n", " 'start': '2004-12-19',\n", " 'end': '2008-12-13'}],\n", " 'Activity': {'Luari de cuvânt': [{'value': 8,\n", " 'start': '1996-11-27',\n", " 'end': '2000-11-30'},\n", " {'value': 117, 'start': '2008-12-19', 'end': '2012-12-20'},\n", " {'value': 58, 'start': '2012-12-20', 'end': '2016-12-21'},\n", " {'value': 84, 'start': '2004-12-19', 'end': '2008-12-13'}],\n", " 'Sedințe': {'value': 70, 'start': '2004-12-19', 'end': '2008-12-13'},\n", " '- din care declaraţii politice': [{'value': 1,\n", " 'start': '1996-11-27',\n", " 'end': '2000-11-30'},\n", " {'value': 11, 'start': '2008-12-19', 'end': '2012-12-20'},\n", " {'value': 11, 'start': '2012-12-20', 'end': '2016-12-21'},\n", " {'value': 9, 'start': '2004-12-19', 'end': '2008-12-13'}],\n", " 'Propuneri legislative initiate': [{'value': 2,\n", " 'start': '1996-11-27',\n", " 'end': '2000-11-30'},\n", " {'value': 28, 'start': '2008-12-19', 'end': '2012-12-20'},\n", " {'value': 70, 'start': '2012-12-20', 'end': '2016-12-21'},\n", " {'value': 51, 'start': '2004-12-19', 'end': '2008-12-13'}],\n", " 'Întrebari si interpelari': [{'value': 1,\n", " 'start': '1996-11-27',\n", " 'end': '2000-11-30'},\n", " {'value': 7, 'start': '2008-12-19', 'end': '2012-12-20'},\n", " {'value': 8, 'start': '2004-12-19', 'end': '2008-12-13'}],\n", " 'Declaraţii politice depuse în scris': [{'value': 17,\n", " 'start': '2008-12-19',\n", " 'end': '2012-12-20'},\n", " {'value': 5, 'start': '2012-12-20', 'end': '2016-12-21'},\n", " {'value': 24, 'start': '2004-12-19', 'end': '2008-12-13'}],\n", " 'Motiuni': [{'value': 21, 'start': '2008-12-19', 'end': '2012-12-20'},\n", " {'value': 14, 'start': '2012-12-20', 'end': '2016-12-21'}]}}" ] }, "execution_count": 151, "metadata": {}, "output_type": "execute_result" } ], "source": [ "nmembers['Calimente Mihaita']" ] }, { "cell_type": "code", "execution_count": 130, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'-', 'AB', 'DA', 'NU'}" ] }, "execution_count": 130, "metadata": {}, "output_type": "execute_result" } ], "source": [ "{i['vot'] for i in records}" ] }, { "cell_type": "code", "execution_count": 131, "metadata": {}, "outputs": [], "source": [ "votes=json.loads(open('data/votes.json','r').read())" ] }, { "cell_type": "code", "execution_count": 132, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[{'votid': '313',\n", " 'descriere': 'Proiectul de Hotărâre privind atribuirea unor spaţii din Palatul Parlamentului în folosinţa gratuită a Clubului Parlamentarilor Români',\n", " 'time': '06.02.2006 16:47',\n", " 'camera': '2'}]" ] }, "execution_count": 132, "metadata": {}, "output_type": "execute_result" } ], "source": [ "votes[9]" ] }, { "cell_type": "code", "execution_count": 133, "metadata": {}, "outputs": [], "source": [ "def get_allegiance(allegiance,voteid,outcome,names):\n", " if voteid not in allegiance:\n", " allegiance[voteid]={}\n", " if outcome not in allegiance[voteid]:\n", " allegiance[voteid][outcome]=[]\n", " for name in names:\n", " allegiance[voteid][outcome].append(name)\n", " return allegiance" ] }, { "cell_type": "code", "execution_count": 134, "metadata": {}, "outputs": [], "source": [ "votes_dict={v[0]['votid']:v[0] for v in votes}\n", "#votes_dict={v['votid']:v for v in votes}" ] }, { "cell_type": "code", "execution_count": 135, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'votid': '388',\n", " 'descriere': 'Pl-x 585/2005',\n", " 'time': '14.02.2006 12:50',\n", " 'camera': '2'}" ] }, "execution_count": 135, "metadata": {}, "output_type": "execute_result" } ], "source": [ "votes_dict['388']" ] }, { "cell_type": "code", "execution_count": 136, "metadata": {}, "outputs": [], "source": [ "def senate(camera):\n", " if camera=='1':return 'Senatori'\n", " else: return 'Deputați'" ] }, { "cell_type": "code", "execution_count": 154, "metadata": {}, "outputs": [], "source": [ "import unidecode" ] }, { "cell_type": "code", "execution_count": 155, "metadata": {}, "outputs": [], "source": [ "def name_normalizer(name):\n", " return unidecode.unidecode(name)" ] }, { "cell_type": "code", "execution_count": 165, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "3382156" ] }, "execution_count": 165, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(records)" ] }, { "cell_type": "code", "execution_count": 168, "metadata": {}, "outputs": [], "source": [ "eu_allegiance={}\n", "eu_vt={}\n", "eu_joint_allegiance={}\n", "eu_joint_vt={}\n", "records_parsed=set()" ] }, { "cell_type": "code", "execution_count": 169, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0\n", "10000\n", "20000\n", "30000\n", "40000\n", "50000\n", "60000\n", "70000\n", "80000\n", "90000\n", "100000\n", "110000\n", "120000\n", "130000\n", "140000\n", "150000\n", "160000\n", "170000\n", "180000\n", "190000\n", "200000\n", "210000\n", "220000\n", "230000\n", "240000\n", "250000\n", "260000\n", "270000\n", "280000\n", "290000\n", "300000\n", "310000\n", "320000\n", "330000\n", "340000\n", "350000\n", "360000\n", "370000\n", "380000\n", "390000\n", "400000\n", "410000\n", "420000\n", "430000\n", "440000\n", "450000\n", "460000\n", "470000\n", "480000\n", "490000\n", "500000\n", "510000\n", "520000\n", "530000\n", "540000\n", "550000\n", "560000\n", "570000\n", "580000\n", "590000\n", "600000\n", "610000\n", "620000\n", "630000\n", "640000\n", "650000\n", "660000\n", "670000\n", "680000\n", "690000\n", "700000\n", "710000\n", "720000\n", "730000\n", "740000\n", "750000\n", "760000\n", "770000\n", "780000\n", "790000\n", "800000\n", "810000\n", "820000\n", "830000\n", "840000\n", "850000\n", "860000\n", "870000\n", "880000\n", "890000\n", "900000\n", "910000\n", "920000\n", "930000\n", "940000\n", "950000\n", "960000\n", "970000\n", "980000\n", "990000\n", "1000000\n", "1010000\n", "1020000\n", "1030000\n", "1040000\n", "1050000\n", "1060000\n", "1070000\n", "1080000\n", "1090000\n", "1100000\n", "1110000\n", "1120000\n", "1130000\n", "1140000\n", "1150000\n", "1160000\n", "1170000\n", "1180000\n", "1190000\n", "1200000\n", "1210000\n", "1220000\n", "1230000\n", "1240000\n", "1250000\n", "1260000\n", "1270000\n", "1280000\n", "1290000\n", "1300000\n", "1310000\n", "1320000\n", "1330000\n", "1340000\n", "1350000\n", "1360000\n", "1370000\n", "1380000\n", "1390000\n", "1400000\n", "1410000\n", "1420000\n", "1430000\n", "1440000\n", "1450000\n", "1460000\n", "1470000\n", "1480000\n", "1490000\n", "1500000\n", "1510000\n", "1520000\n", "1530000\n", "1540000\n", "1550000\n", "1560000\n", "1570000\n", "1580000\n", "1590000\n", "1600000\n", "1610000\n", "1620000\n", "1630000\n", "1640000\n", "1650000\n", "1660000\n", "1670000\n", "1680000\n", "1690000\n", "1700000\n", "1710000\n", "1720000\n", "1730000\n", "1740000\n", "1750000\n", "1760000\n", "1770000\n", "1780000\n", "1790000\n", "1800000\n", "1810000\n", "1820000\n", "1830000\n", "1840000\n", "1850000\n", "1860000\n", "1870000\n", "1880000\n", "1890000\n", "1900000\n", "1910000\n", "1920000\n", "1930000\n", "1940000\n", "1950000\n", "1960000\n", "1970000\n", "1980000\n", "1990000\n", "2000000\n", "2010000\n", "2020000\n", "2030000\n", "2040000\n", "2050000\n", "2060000\n", "2070000\n", "2080000\n", "2090000\n", "2100000\n", "2110000\n", "2120000\n", "2130000\n", "2140000\n", "2150000\n", "2160000\n", "2170000\n", "2180000\n", "2190000\n", "2200000\n", "2210000\n", "2220000\n", "2230000\n", "2240000\n", "2250000\n", "2260000\n", "2270000\n", "2280000\n", "2290000\n", "2300000\n", "2310000\n", "2320000\n", "2330000\n", "2340000\n", "2350000\n", "2360000\n", "2370000\n", "2380000\n", "2390000\n", "2400000\n", "2410000\n", "2420000\n", "2430000\n", "2440000\n", "2450000\n", "2460000\n", "2470000\n", "2480000\n", "2490000\n", "2500000\n", "2510000\n", "2520000\n", "2530000\n", "2540000\n", "2550000\n", "2560000\n", "2570000\n", "2580000\n", "2590000\n", "2600000\n", "2610000\n", "2620000\n", "2630000\n", "2640000\n", "2650000\n", "2660000\n", "2670000\n", "2680000\n", "2690000\n", "2700000\n", "2710000\n", "2720000\n", "2730000\n", "2740000\n", "2750000\n", "2760000\n", "2770000\n", "2780000\n", "2790000\n", "2800000\n", "2810000\n", "2820000\n", "2830000\n", "2840000\n", "2850000\n", "2860000\n", "2870000\n", "2880000\n", "2890000\n", "2900000\n", "2910000\n", "2920000\n", "2930000\n", "2940000\n", "2950000\n", "2960000\n", "2970000\n", "2980000\n", "2990000\n", "3000000\n", "3010000\n", "3020000\n", "3030000\n", "3040000\n", "3050000\n", "3060000\n", "3070000\n", "3080000\n", "3090000\n", "3100000\n", "3110000\n", "3120000\n", "3130000\n", "3140000\n", "3150000\n", "3160000\n", "3170000\n", "3180000\n", "3190000\n", "3200000\n", "3210000\n", "3220000\n", "3230000\n", "3240000\n", "3250000\n", "3260000\n", "3270000\n", "3280000\n", "3290000\n", "3300000\n", "3310000\n", "3320000\n", "3330000\n", "3340000\n", "3350000\n", "3360000\n", "3370000\n", "3380000\n" ] } ], "source": [ "for i in range(len(records[:])):\n", " if (i%10000==0):\n", " print(i)\n", " if i not in records_parsed:\n", " j=records[i]\n", " vote=votes_dict[j['votid']]\n", " ts=pd.to_datetime('-'.join(vote['time'].split(' ')[0].split('.')[::-1]))\n", " year=int(vote['time'].split(' ')[0][-4:])\n", " camera=senate(vote['camera'])\n", " if camera not in eu_vt:eu_vt[camera]={}\n", " if year not in eu_vt[camera]:eu_vt[camera][year]=[]\n", " if camera not in eu_allegiance:eu_allegiance[camera]={}\n", " if year not in eu_allegiance[camera]:eu_allegiance[camera][year]={'Name':{},'Group':{},'Party':{},'Country':{}} \n", " if year not in eu_joint_vt:eu_joint_vt[year]=[]\n", " if year not in eu_joint_allegiance:eu_joint_allegiance[year]={'Name':{},'Group':{},'Party':{},'Country':{}} \n", " if vote['descriere'] not in [\"Modification de l'ordre du jour\"]:\n", " #name=' '.join(j['name'].split(' ')[::-1])\n", " name=name_normalizer(j['name'])\n", " if name not in nmembers:\n", " print(name)\n", " else:\n", " member=nmembers[name]\n", " dummy={}\n", " outcome=j['vot']\n", " dummy['Vote']=j['votid']\n", " dummy['Name']=[member['Name']['full']]\n", " dummy['Outcome']=outcome\n", " dummy['Year']=year\n", " dummy['Group']=[]\n", " dummy['Party']=[]\n", " dummy['Country']=[]\n", " for k in member['Parties']:\n", " if pd.to_datetime(k['start'])