{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Getting the Judges" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "[Source](http://www.bvger.ch/gericht/richter/00563/00580/index.html?lang=de) of the judges.\n", "- [Abteilung IV](http://www.bvger.ch/gericht/richter/00563/00580/index.html?lang=de)\n", "- [Abteilung V](http://www.bvger.ch/gericht/richter/00563/00581/index.html?lang=de)" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import requests\n", "from bs4 import BeautifulSoup\n", "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "import matplotlib\n", "\n", "plt.style.use('ggplot')\n", "import dateutil.parser\n", "import re\n", "import time" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [], "source": [ "url = \"http://www.bvger.ch/gericht/richter/00563/00580/index.html?lang=de\"\n", "response = requests.get(url)\n", "response.encoding = 'utf-8' # Fix Encoding\n", "judges_IV_soup = BeautifulSoup(response.text, 'html.parser')\n", "judges_IV = judges_IV_soup.find_all('div', {'class': 'contentFlex flexUnterseite'})" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [], "source": [ "def extract_judges(html_elements):\n", " \"\"\"Process a list of html elements containig data about the judges\"\"\"\n", " lst = []\n", " for soup in html_elements:\n", " \n", " #name\n", " name = soup.find('a').text\n", " name = re.sub('\\(.+\\)', '', name).strip()\n", "\n", " #url\n", " html = soup.find('a').get('href')\n", " url = 'http://www.bvger.ch' + html\n", "\n", " response = requests.get(url)\n", " response.encoding = 'utf-8'\n", " judges_text = BeautifulSoup(response.text, 'html.parser')\n", " partei = judges_text.find('div', {'class': 'webText flexTinymceDiv'}).text[-3:]\n", " partei = partei.replace('los', 'parteilos').replace('ux.', 'FDP').replace('PLR', 'FDP')\n", " partei = partei.replace('que', 'parteilos').replace('üne', 'Grüne')\n", " partei = partei.replace('UDC', 'SVP').replace('DC.', 'SVP').replace('DC.', 'FDP')\n", " partei = partei.replace('rts', 'Grüne').replace('üne', 'Grüne').replace('GrGrüne', 'Grüne')\n", " partei = partei.replace('UDC', 'SVP').replace('DC.', 'SVP').replace('DC.', 'FDP')\n", " partei = partei.replace('07.', 'parteilos').replace('ale', 'GLP')\n", " partei = partei.replace('PS', 'SP').replace('VP.', 'SVP')\n", "\n", " judge = {'Name': name,\n", " 'Partei': partei.strip(),\n", " }\n", "\n", " lst.append(judge)\n", " return lst" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [], "source": [ "judges_IV_list = extract_judges(judges_IV)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": true }, "outputs": [], "source": [ "url = \"http://www.bvger.ch/gericht/richter/00563/00581/index.html?lang=de\"\n", "response = requests.get(url)\n", "response.encoding = 'utf-8'\n", "judges_V_soup = BeautifulSoup(response.text, 'html.parser')\n", "judges_V = judges_V_soup.find_all('div', {'class': 'contentFlex flexUnterseite'})" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [], "source": [ "judges_V_list = extract_judges(judges_V)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": true }, "outputs": [], "source": [ "judges_list = judges_IV_list + judges_V_list" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [], "source": [ "judges_list = pd.DataFrame(judges_list)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Reorganising the Names to merge later on" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false }, "outputs": [], "source": [ "first_names = []\n", "last_names = []\n", "for name in judges_list['Name']:\n", " parts = name.split(' ')\n", " if len(parts) == 3 and parts[2] == 'R.': \n", " # Handle special case for David R. Wenger\n", " first_names.append(' '.join(parts[1:]))\n", " last_names.append(parts[0])\n", " else:\n", " # Normal Case: Last Element is first name, everything else is last name\n", " first_names.append(parts[-1])\n", " last_names.append(' '.join(parts[:-1]))\n" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false }, "outputs": [], "source": [ "vorname = pd.DataFrame(first_names)\n", "nachname = pd.DataFrame(last_names)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false }, "outputs": [], "source": [ "df = pd.concat([judges_list, nachname, vorname], axis=1)\n", "df.columns = [['Name', 'Partei', 'Nachname', 'Vorname']]" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": false }, "outputs": [], "source": [ "df['Nachname Vorname'] = df['Vorname'] + ' ' + df['Nachname']" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": false }, "outputs": [], "source": [ "df.to_csv('richter_partei.csv', index=False)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
NameParteiNachnameVornameNachname Vorname
0Spälti Giannakitsas NinaSPSpälti GiannakitsasNinaNina Spälti Giannakitsas
1Bovier GéraldSVPBovierGéraldGérald Bovier
2Brüschweiler DanielaBDPBrüschweilerDanielaDaniela Brüschweiler
3Cattaneo DanieleFDPCattaneoDanieleDaniele Cattaneo
4Cotting-Schalch ClaudiaFDPCotting-SchalchClaudiaClaudia Cotting-Schalch
5Felley YanickSVPFelleyYanickYanick Felley
6Scherrer GérardparteilosScherrerGérardGérard Scherrer
7Lang WalterparteilosLangWalterWalter Lang
8Schürch HansFDPSchürchHansHans Schürch
9Tellenbach BendichtSPTellenbachBendichtBendicht Tellenbach
10Theis ContessinaGrüneTheisContessinaContessina Theis
11Thurnheer Simon MathiasSVPThurnheer SimonMathiasMathias Thurnheer Simon
12Wespi ThomasCVPWespiThomasThomas Wespi
13Luterbacher ChristaSPLuterbacherChristaChrista Luterbacher
14Antonioni Luftensteiner EmiliaGrüneAntonioni LuftensteinerEmiliaEmilia Antonioni Luftensteiner
15Badoud FrançoisparteilosBadoudFrançoisFrançois Badoud
16Balmelli-Mühlematter BarbaraGLPBalmelli-MühlematterBarbaraBarbara Balmelli-Mühlematter
17Beck Kadima MurielGrüneBeck KadimaMurielMuriel Beck Kadima
18Cossy SylvieGrüneCossySylvieSylvie Cossy
19Freihofer GabrielaSVPFreihoferGabrielaGabriela Freihofer
20Marti EstherGLPMartiEstherEsther Marti
21König MarkusSPKönigMarkusMarkus König
22Monnet Jean-PierreparteilosMonnetJean-PierreJean-Pierre Monnet
23Schenker Senn RegulaSPSchenker SennRegulaRegula Schenker Senn
24Waeber WilliamSPWaeberWilliamWilliam Waeber
25Wenger David R.SVPWengerDavid R.David R. Wenger
26Willisegger DanielSVPWilliseggerDanielDaniel Willisegger
\n", "
" ], "text/plain": [ " Name Partei Nachname \\\n", "0 Spälti Giannakitsas Nina SP Spälti Giannakitsas \n", "1 Bovier Gérald SVP Bovier \n", "2 Brüschweiler Daniela BDP Brüschweiler \n", "3 Cattaneo Daniele FDP Cattaneo \n", "4 Cotting-Schalch Claudia FDP Cotting-Schalch \n", "5 Felley Yanick SVP Felley \n", "6 Scherrer Gérard parteilos Scherrer \n", "7 Lang Walter parteilos Lang \n", "8 Schürch Hans FDP Schürch \n", "9 Tellenbach Bendicht SP Tellenbach \n", "10 Theis Contessina Grüne Theis \n", "11 Thurnheer Simon Mathias SVP Thurnheer Simon \n", "12 Wespi Thomas CVP Wespi \n", "13 Luterbacher Christa SP Luterbacher \n", "14 Antonioni Luftensteiner Emilia Grüne Antonioni Luftensteiner \n", "15 Badoud François parteilos Badoud \n", "16 Balmelli-Mühlematter Barbara GLP Balmelli-Mühlematter \n", "17 Beck Kadima Muriel Grüne Beck Kadima \n", "18 Cossy Sylvie Grüne Cossy \n", "19 Freihofer Gabriela SVP Freihofer \n", "20 Marti Esther GLP Marti \n", "21 König Markus SP König \n", "22 Monnet Jean-Pierre parteilos Monnet \n", "23 Schenker Senn Regula SP Schenker Senn \n", "24 Waeber William SP Waeber \n", "25 Wenger David R. SVP Wenger \n", "26 Willisegger Daniel SVP Willisegger \n", "\n", " Vorname Nachname Vorname \n", "0 Nina Nina Spälti Giannakitsas \n", "1 Gérald Gérald Bovier \n", "2 Daniela Daniela Brüschweiler \n", "3 Daniele Daniele Cattaneo \n", "4 Claudia Claudia Cotting-Schalch \n", "5 Yanick Yanick Felley \n", "6 Gérard Gérard Scherrer \n", "7 Walter Walter Lang \n", "8 Hans Hans Schürch \n", "9 Bendicht Bendicht Tellenbach \n", "10 Contessina Contessina Theis \n", "11 Mathias Mathias Thurnheer Simon \n", "12 Thomas Thomas Wespi \n", "13 Christa Christa Luterbacher \n", "14 Emilia Emilia Antonioni Luftensteiner \n", "15 François François Badoud \n", "16 Barbara Barbara Balmelli-Mühlematter \n", "17 Muriel Muriel Beck Kadima \n", "18 Sylvie Sylvie Cossy \n", "19 Gabriela Gabriela Freihofer \n", "20 Esther Esther Marti \n", "21 Markus Markus König \n", "22 Jean-Pierre Jean-Pierre Monnet \n", "23 Regula Regula Schenker Senn \n", "24 William William Waeber \n", "25 David R. David R. Wenger \n", "26 Daniel Daniel Willisegger " ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "collapsed": false }, "outputs": [], "source": [ "df_partei_count = pd.DataFrame(df['Partei'].value_counts())" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "collapsed": false }, "outputs": [], "source": [ "df_partei_count = df_partei_count.reset_index()" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "collapsed": false }, "outputs": [], "source": [ "total = sum(list(df_partei_count['Partei']))" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def per(x):\n", " percentage = x / total * 100\n", " return percentage" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "collapsed": false }, "outputs": [], "source": [ "df_partei_count['percentage'] = round(df_partei_count['Partei'].apply(per))" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
indexParteipercentage
0SP622.0
1SVP622.0
2Grüne415.0
3parteilos415.0
4FDP311.0
5GLP27.0
6CVP14.0
7BDP14.0
\n", "
" ], "text/plain": [ " index Partei percentage\n", "0 SP 6 22.0\n", "1 SVP 6 22.0\n", "2 Grüne 4 15.0\n", "3 parteilos 4 15.0\n", "4 FDP 3 11.0\n", "5 GLP 2 7.0\n", "6 CVP 1 4.0\n", "7 BDP 1 4.0" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_partei_count" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.2" } }, "nbformat": 4, "nbformat_minor": 0 }