\n",
"\n",
"\n",
"
\n",
"
... Antrag der FDP gestimmt, ... dass Deutschland eine Abschaffung der Sommerzeit wünscht ...
\n",
"
\n",
"
Von: \n",
" \n",
" Abcdefg Hijklmn\n",
" \n",
"
\n",
"
\n",
"\n",
"\n",
"
\n",
"
\n",
" Antwort von Ulrich Kelber (SPD)\n",
" 26. März. 2018 - 14:48
\n",
" Dauer bis zur Antwort: 1 Tag 6 Stunden\n",
" \n",
"
\n",
"
Sehr geehrter Herr Hijklmn,
\n",
"
vielen Dank für Ihre Anfrage zur Sommerzeit.
Ich denke, ...
\n",
"
Mit freundlichem Gruß
Ulrich Kelber
\n",
"
\n",
"
\n",
"\n",
" \n",
" \n",
"\n",
"'''\n",
"\n",
"print('Original HTML contains', 'a' if encrypted_name in html_text else 'no', 'reference to the encrypted name.')\n",
"for a, answer_text in enumerate(extract_answers_as_text(html_text)):\n",
" print('Answer {}:'.format(a+1))\n",
" for line in answer_text.split('\\n'):\n",
" print(line)\n",
" print('Extracted answer contains', 'a' if encrypted_name in answer_text else 'no', 'reference to the encrypted name.')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Create corpus of all answers of all deputies of the Bundestag"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Create or upate deputy files (JSON) and question files (URL)"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"716 of 716. 73 files successfully created. 0 files failed. Latest: gyde-jensen_fdp "
]
}
],
"source": [
"success = []\n",
"failure = []\n",
"\n",
"for d, deputy in enumerate(deputies):\n",
"\n",
" deputy_prefix = deputy_file_name_part(deputy)\n",
" deputy_file = corpus_dir / (deputy_prefix + '.json')\n",
"\n",
" try:\n",
" if update_only_missing_deputies and deputy_file.exists(): continue\n",
"\n",
" deputy_url = deputy_api_url(deputies_url, deputy['meta']['username'])\n",
" deputy_json = requests.get(deputy_url, proxies=proxies).json() # Request to abgeordnetenwatch.de!\n",
" deputy_file.write_text(json.dumps(deputy_json))\n",
" success.append(deputy_file.name)\n",
" \n",
" questions = deputy_json['profile']['questions']\n",
" for q, question in enumerate(reversed(questions)): # Oldest question first\n",
"\n",
" question_infix, question_suffix = question_file_name_parts(q, question)\n",
" url_filename = '_'.join([deputy_prefix, question_infix, question_suffix]) + '.url'\n",
" url_file = corpus_dir / url_filename\n",
" url_file.write_text(question['url'])\n",
" success.append(url_file.name) \n",
" \n",
" except Exception as exception:\n",
" failure.append((deputy_file.name, exception))\n",
"\n",
" finally:\n",
" print('\\r{} of {}. {} files successfully created. {} files failed. Latest: {:30.30}'.format(\n",
" d+1, len(deputies), len(success), len(failure), deputy_file.stem), end='')"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"No exception while updating the deputies and questions :-)\n",
"\n",
"73 files created or updated:\n",
"frauke-petry_die-blauen.json, frauke-petry_die-blauen_Q0001_2017-07-20_verbraucherschutz.url, frauke-petry_die-blauen_Q0002_2017-07-31_demokratie-und-bürgerrechte.url, frauke-petry_die-blauen_Q0003_2017-08-27_demokratie-und-bürgerrechte.url, frauke-petry_die-blauen_Q0004_2017-08-29_bildung-und-forschung.url, frauke-petry_die-blauen_Q0005_2017-09-14_familie.url, frauke-petry_die-blauen_Q0006_2017-09-15_umwelt.url, frauke-petry_die-blauen_Q0007_2017-09-15_demokratie-und-bürgerrechte.url, frauke-petry_die-blauen_Q0008_2017-09-17_inneres-und-justiz.url, frauke-petry_die-blauen_Q0009_2017-09-20_finanzen.url, frauke-petry_die-blauen_Q0010_2017-10-12_demokratie-und-bürgerrechte.url, frauke-petry_die-blauen_Q0011_2017-10-27_finanzen.url, frauke-petry_die-blauen_Q0012_2017-11-16_demokratie-und-bürgerrechte.url, frauke-petry_die-blauen_Q0013_2017-12-06_demokratie-und-bürgerrechte.url, frauke-petry_die-blauen_Q0014_2018-03-19_gesundheit.url, frauke-petry_die-blauen_Q0015_2018-05-30_demokratie-und-bürgerrechte.url, frauke-petry_die-blauen_Q0016_2018-07-06_wirtschaft.url, frauke-petry_die-blauen_Q0017_2018-11-29_demokratie-und-bürgerrechte.url, frauke-petry_die-blauen_Q0018_2018-12-14_demokratie-und-bürgerrechte.url, marco-bulow_parteilos.json, marco-bulow_parteilos_Q0001_2017-07-25_finanzen.url, marco-bulow_parteilos_Q0002_2017-07-30_demokratie-und-bürgerrechte.url, marco-bulow_parteilos_Q0003_2017-08-03_demokratie-und-bürgerrechte.url, marco-bulow_parteilos_Q0004_2017-09-07_wirtschaft.url, marco-bulow_parteilos_Q0005_2017-09-14_familie.url, marco-bulow_parteilos_Q0006_2017-09-20_demokratie-und-bürgerrechte.url, marco-bulow_parteilos_Q0007_2017-10-20_arbeit.url, marco-bulow_parteilos_Q0008_2017-10-26_bildung-und-forschung.url, marco-bulow_parteilos_Q0009_2017-11-24_frauen.url, marco-bulow_parteilos_Q0010_2017-11-29_umwelt.url, marco-bulow_parteilos_Q0011_2018-01-04_integration.url, marco-bulow_parteilos_Q0012_2018-01-19_soziales.url, marco-bulow_parteilos_Q0013_2018-02-08_inneres-und-justiz.url, marco-bulow_parteilos_Q0014_2018-02-12_demokratie-und-bürgerrechte.url, marco-bulow_parteilos_Q0015_2018-02-21_demokratie-und-bürgerrechte.url, marco-bulow_parteilos_Q0016_2018-02-23_internationales.url, marco-bulow_parteilos_Q0017_2018-03-07_demokratie-und-bürgerrechte.url, marco-bulow_parteilos_Q0018_2018-03-27_demokratie-und-bürgerrechte.url, marco-bulow_parteilos_Q0019_2018-03-31_internationales.url, marco-bulow_parteilos_Q0020_2018-04-05_gesundheit.url, marco-bulow_parteilos_Q0021_2018-04-11_internationales.url, marco-bulow_parteilos_Q0022_2018-04-14_internationales.url, marco-bulow_parteilos_Q0023_2018-04-15_demokratie-und-bürgerrechte.url, marco-bulow_parteilos_Q0024_2018-05-30_bildung-und-forschung.url, marco-bulow_parteilos_Q0025_2018-06-15_demokratie-und-bürgerrechte.url, marco-bulow_parteilos_Q0026_2018-06-19_umwelt.url, marco-bulow_parteilos_Q0027_2018-07-01_demokratie-und-bürgerrechte.url, marco-bulow_parteilos_Q0028_2018-07-15_soziales.url, marco-bulow_parteilos_Q0029_2018-07-15_soziales.url, marco-bulow_parteilos_Q0030_2018-07-29_kultur.url, marco-bulow_parteilos_Q0031_2018-08-23_gesundheit.url, marco-bulow_parteilos_Q0032_2018-09-20_demokratie-und-bürgerrechte.url, marco-bulow_parteilos_Q0033_2018-09-24_wirtschaft.url, marco-bulow_parteilos_Q0034_2018-09-30_internationales.url, marco-bulow_parteilos_Q0035_2018-09-30_demokratie-und-bürgerrechte.url, marco-bulow_parteilos_Q0036_2018-10-15_demokratie-und-bürgerrechte.url, marco-bulow_parteilos_Q0037_2018-11-06_gesundheit.url, marco-bulow_parteilos_Q0038_2018-11-27_demokratie-und-bürgerrechte.url, marco-bulow_parteilos_Q0039_2018-12-15_gesundheit.url, marco-bulow_parteilos_Q0040_2018-12-29_frauen.url, mario-mieruch_parteilos.json, mario-mieruch_parteilos_Q0001_2017-08-22_kultur.url, mario-mieruch_parteilos_Q0002_2017-08-24_soziales.url, mario-mieruch_parteilos_Q0003_2017-08-25_soziales.url, mario-mieruch_parteilos_Q0004_2017-08-28_soziales.url, mario-mieruch_parteilos_Q0005_2017-09-21_inneres-und-justiz.url, mario-mieruch_parteilos_Q0006_2017-10-18_internationales.url, mario-mieruch_parteilos_Q0007_2018-01-31_demokratie-und-bürgerrechte.url, mario-mieruch_parteilos_Q0008_2018-03-19_gesundheit.url, mario-mieruch_parteilos_Q0009_2018-08-13_inneres-und-justiz.url, uwe-kamann_parteilos.json, uwe-kamann_parteilos_Q0001_2017-09-21_inneres-und-justiz.url, uwe-kamann_parteilos_Q0002_2018-12-15_gesundheit.url\n"
]
}
],
"source": [
"for deputy_filename, exception in failure:\n",
" print('Exception while processing deputy {}:'.format(deputy_filename))\n",
" print(exception)\n",
" print()\n",
"\n",
"if not(failure):\n",
" print('No exception while updating the deputies and questions :-)')\n",
" print()\n",
"\n",
"print('{} files created or updated:'.format(len(success)))\n",
"print(', '.join(success))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Create or upate answer files (TXT)"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\r",
"Deputy 1 of 716. Question 1 of 20. 0 files created. 0 files failed. Latest: alexander-graf-lambsdorff_fdp \r",
"Deputy 1 of 716. Question 2 of 20. 0 files created. 0 files failed. Latest: alexander-graf-lambsdorff_fdp \r",
"Deputy 1 of 716. Question 3 of 20. 0 files created. 0 files failed. Latest: alexander-graf-lambsdorff_fdp \r",
"Deputy 1 of 716. Question 4 of 20. 0 files created. 0 files failed. Latest: alexander-graf-lambsdorff_fdp \r",
"Deputy 1 of 716. Question 5 of 20. 0 files created. 0 files failed. Latest: alexander-graf-lambsdorff_fdp \r",
"Deputy 1 of 716. Question 6 of 20. 0 files created. 0 files failed. Latest: alexander-graf-lambsdorff_fdp \r",
"Deputy 1 of 716. Question 7 of 20. 0 files created. 0 files failed. Latest: alexander-graf-lambsdorff_fdp \r",
"Deputy 1 of 716. Question 8 of 20. 0 files created. 0 files failed. Latest: alexander-graf-lambsdorff_fdp \r",
"Deputy 1 of 716. Question 9 of 20. 0 files created. 0 files failed. Latest: alexander-graf-lambsdorff_fdp \r",
"Deputy 1 of 716. Question 10 of 20. 0 files created. 0 files failed. Latest: alexander-graf-lambsdorff_fdp \r",
"Deputy 1 of 716. Question 11 of 20. 0 files created. 0 files failed. Latest: alexander-graf-lambsdorff_fdp \r",
"Deputy 1 of 716. Question 12 of 20. 0 files created. 0 files failed. Latest: alexander-graf-lambsdorff_fdp \r",
"Deputy 1 of 716. Question 13 of 20. 0 files created. 0 files failed. Latest: alexander-graf-lambsdorff_fdp \r",
"Deputy 1 of 716. Question 14 of 20. 0 files created. 0 files failed. Latest: alexander-graf-lambsdorff_fdp \r",
"Deputy 1 of 716. Question 15 of 20. 0 files created. 0 files failed. Latest: alexander-graf-lambsdorff_fdp \r",
"Deputy 1 of 716. Question 16 of 20. 0 files created. 0 files failed. Latest: alexander-graf-lambsdorff_fdp \r",
"Deputy 1 of 716. Question 17 of 20. 0 files created. 0 files failed. Latest: alexander-graf-lambsdorff_fdp \r",
"Deputy 1 of 716. Question 18 of 20. 0 files created. 0 files failed. Latest: alexander-graf-lambsdorff_fdp \r",
"Deputy 1 of 716. Question 19 of 20. 0 files created. 0 files failed. Latest: alexander-graf-lambsdorff_fdp \r",
"Deputy 1 of 716. Question 20 of 20. 0 files created. 0 files failed. Latest: alexander-graf-lambsdorff_fdp \r",
"Deputy 2 of 716. Question 1 of 62. 0 files created. 0 files failed. Latest: martin-schulz-1_spd \r",
"Deputy 2 of 716. Question 2 of 62. 0 files created. 0 files failed. Latest: martin-schulz-1_spd \r",
"Deputy 2 of 716. Question 3 of 62. 0 files created. 0 files failed. Latest: martin-schulz-1_spd \r",
"Deputy 2 of 716. Question 4 of 62. 0 files created. 0 files failed. Latest: martin-schulz-1_spd \r",
"Deputy 2 of 716. Question 5 of 62. 0 files created. 0 files failed. Latest: martin-schulz-1_spd \r",
"Deputy 2 of 716. Question 6 of 62. 0 files created. 0 files failed. Latest: martin-schulz-1_spd \r",
"Deputy 2 of 716. Question 7 of 62. 0 files created. 0 files failed. Latest: martin-schulz-1_spd \r",
"Deputy 2 of 716. Question 8 of 62. 0 files created. 0 files failed. Latest: martin-schulz-1_spd \r",
"Deputy 2 of 716. Question 9 of 62. 0 files created. 0 files failed. Latest: martin-schulz-1_spd \r",
"Deputy 2 of 716. Question 10 of 62. 0 files created. 0 files failed. Latest: martin-schulz-1_spd \r",
"Deputy 2 of 716. Question 11 of 62. 0 files created. 0 files failed. Latest: martin-schulz-1_spd \r",
"Deputy 2 of 716. Question 12 of 62. 0 files created. 0 files failed. Latest: martin-schulz-1_spd \r",
"Deputy 2 of 716. Question 13 of 62. 0 files created. 0 files failed. Latest: martin-schulz-1_spd \r",
"Deputy 2 of 716. Question 14 of 62. 0 files created. 0 files failed. Latest: martin-schulz-1_spd \r",
"Deputy 2 of 716. Question 15 of 62. 0 files created. 0 files failed. Latest: martin-schulz-1_spd \r",
"Deputy 2 of 716. Question 16 of 62. 0 files created. 0 files failed. Latest: martin-schulz-1_spd \r",
"Deputy 2 of 716. Question 17 of 62. 0 files created. 0 files failed. Latest: martin-schulz-1_spd \r",
"Deputy 2 of 716. Question 18 of 62. 0 files created. 0 files failed. Latest: martin-schulz-1_spd \r",
"Deputy 2 of 716. Question 19 of 62. 0 files created. 0 files failed. Latest: martin-schulz-1_spd \r",
"Deputy 2 of 716. Question 20 of 62. 0 files created. 0 files failed. Latest: martin-schulz-1_spd \r",
"Deputy 2 of 716. Question 21 of 62. 0 files created. 0 files failed. Latest: martin-schulz-1_spd \r",
"Deputy 2 of 716. Question 22 of 62. 0 files created. 0 files failed. Latest: martin-schulz-1_spd \r",
"Deputy 2 of 716. Question 23 of 62. 0 files created. 0 files failed. Latest: martin-schulz-1_spd \r",
"Deputy 2 of 716. Question 24 of 62. 0 files created. 0 files failed. Latest: martin-schulz-1_spd \r",
"Deputy 2 of 716. Question 25 of 62. 0 files created. 0 files failed. Latest: martin-schulz-1_spd \r",
"Deputy 2 of 716. Question 26 of 62. 0 files created. 0 files failed. Latest: martin-schulz-1_spd \r",
"Deputy 2 of 716. Question 27 of 62. 0 files created. 0 files failed. Latest: martin-schulz-1_spd \r",
"Deputy 2 of 716. Question 28 of 62. 0 files created. 0 files failed. Latest: martin-schulz-1_spd \r",
"Deputy 2 of 716. Question 29 of 62. 0 files created. 0 files failed. Latest: martin-schulz-1_spd \r",
"Deputy 2 of 716. Question 30 of 62. 0 files created. 0 files failed. Latest: martin-schulz-1_spd \r",
"Deputy 2 of 716. Question 31 of 62. 0 files created. 0 files failed. Latest: martin-schulz-1_spd \r",
"Deputy 2 of 716. Question 32 of 62. 0 files created. 0 files failed. Latest: martin-schulz-1_spd \r",
"Deputy 2 of 716. Question 33 of 62. 0 files created. 0 files failed. Latest: martin-schulz-1_spd \r",
"Deputy 2 of 716. Question 34 of 62. 0 files created. 0 files failed. Latest: martin-schulz-1_spd \r",
"Deputy 2 of 716. Question 35 of 62. 0 files created. 0 files failed. Latest: martin-schulz-1_spd \r",
"Deputy 2 of 716. Question 36 of 62. 0 files created. 0 files failed. Latest: martin-schulz-1_spd \r",
"Deputy 2 of 716. Question 37 of 62. 0 files created. 0 files failed. Latest: martin-schulz-1_spd \r",
"Deputy 2 of 716. Question 38 of 62. 0 files created. 0 files failed. Latest: martin-schulz-1_spd \r",
"Deputy 2 of 716. Question 39 of 62. 0 files created. 0 files failed. Latest: martin-schulz-1_spd \r",
"Deputy 2 of 716. Question 40 of 62. 0 files created. 0 files failed. Latest: martin-schulz-1_spd \r",
"Deputy 2 of 716. Question 41 of 62. 0 files created. 0 files failed. Latest: martin-schulz-1_spd \r",
"Deputy 2 of 716. Question 42 of 62. 0 files created. 0 files failed. Latest: martin-schulz-1_spd \r",
"Deputy 2 of 716. Question 43 of 62. 0 files created. 0 files failed. Latest: martin-schulz-1_spd \r",
"Deputy 2 of 716. Question 44 of 62. 0 files created. 0 files failed. Latest: martin-schulz-1_spd \r",
"Deputy 2 of 716. Question 45 of 62. 0 files created. 0 files failed. Latest: martin-schulz-1_spd \r",
"Deputy 2 of 716. Question 46 of 62. 0 files created. 0 files failed. Latest: martin-schulz-1_spd \r",
"Deputy 2 of 716. Question 47 of 62. 0 files created. 0 files failed. Latest: martin-schulz-1_spd \r",
"Deputy 2 of 716. Question 48 of 62. 0 files created. 0 files failed. Latest: martin-schulz-1_spd \r",
"Deputy 2 of 716. Question 49 of 62. 0 files created. 0 files failed. Latest: martin-schulz-1_spd \r",
"Deputy 2 of 716. Question 50 of 62. 0 files created. 0 files failed. Latest: martin-schulz-1_spd \r",
"Deputy 2 of 716. Question 51 of 62. 0 files created. 0 files failed. Latest: martin-schulz-1_spd \r",
"Deputy 2 of 716. Question 52 of 62. 0 files created. 0 files failed. Latest: martin-schulz-1_spd \r",
"Deputy 2 of 716. Question 53 of 62. 0 files created. 0 files failed. Latest: martin-schulz-1_spd \r",
"Deputy 2 of 716. Question 54 of 62. 0 files created. 0 files failed. Latest: martin-schulz-1_spd \r",
"Deputy 2 of 716. Question 55 of 62. 0 files created. 0 files failed. Latest: martin-schulz-1_spd \r",
"Deputy 2 of 716. Question 56 of 62. 0 files created. 0 files failed. Latest: martin-schulz-1_spd \r",
"Deputy 2 of 716. Question 57 of 62. 0 files created. 0 files failed. Latest: martin-schulz-1_spd \r",
"Deputy 2 of 716. Question 58 of 62. 0 files created. 0 files failed. Latest: martin-schulz-1_spd \r",
"Deputy 2 of 716. Question 59 of 62. 0 files created. 0 files failed. Latest: martin-schulz-1_spd \r",
"Deputy 2 of 716. Question 60 of 62. 0 files created. 0 files failed. Latest: martin-schulz-1_spd \r",
"Deputy 2 of 716. Question 61 of 62. 0 files created. 0 files failed. Latest: martin-schulz-1_spd \r",
"Deputy 2 of 716. Question 62 of 62. 0 files created. 0 files failed. Latest: martin-schulz-1_spd \r",
"Deputy 3 of 716. Question 1 of 9. 0 files created. 0 files failed. Latest: michael-theurer_fdp \r",
"Deputy 3 of 716. Question 2 of 9. 0 files created. 0 files failed. Latest: michael-theurer_fdp \r",
"Deputy 3 of 716. Question 3 of 9. 0 files created. 0 files failed. Latest: michael-theurer_fdp \r",
"Deputy 3 of 716. Question 4 of 9. 0 files created. 0 files failed. Latest: michael-theurer_fdp "
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Deputy 716 of 716. Question 1 of 1. 48 files created. 0 files failed. Latest: gyde-jensen_fdp "
]
}
],
"source": [
"success = []\n",
"failure = []\n",
"\n",
"for d, deputy in enumerate(deputies):\n",
"\n",
" deputy_prefix = deputy_file_name_part(deputy)\n",
" deputy_file = corpus_dir / (deputy_prefix + '.json')\n",
" \n",
" questions = json.loads(deputy_file.read_text())['profile']['questions']\n",
" \n",
" for q, question in enumerate(reversed(questions)): # Oldest question first\n",
"\n",
" a = -1\n",
" try:\n",
" question_infix, question_suffix = question_file_name_parts(q, question)\n",
" answer_files = []\n",
"\n",
" answers = question['answers']\n",
" for a, answer in enumerate(answers):\n",
" answer_infix = answer_file_name_part(a, answer)\n",
" answer_filename = '_'.join([deputy_prefix, question_infix, answer_infix, question_suffix]) + '.txt'\n",
" answer_files.append(corpus_dir / answer_filename)\n",
" \n",
" # Even if there is just one new answer, we need to fetch the whole page again\n",
" if update_only_missing_answers and all(file.exists() for file in answer_files): continue\n",
" \n",
" question_page = requests.get(question['url'], proxies=proxies).text # Request to abgeordnetenwatch.de!\n",
"\n",
" answer_texts = extract_answers_as_text(question_page)\n",
" \n",
" for file, text in zip(answer_files, answer_texts):\n",
" file.write_text(text)\n",
" success.append(file.name)\n",
" \n",
" except Exception as exception:\n",
" failure.append((deputy_prefix, q, a, exception))\n",
"\n",
" finally:\n",
" print('\\rDeputy {} of {}. Question {} of {}. {} files created. {} files failed. Latest: {:30.30}'.format(\n",
" d+1, len(deputies), q+1, len(questions), len(success), len(failure), deputy_prefix), end='')"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"No exception while updating the answers :-)\n",
"\n",
"48 files created or updated:\n",
"marco-bulow_parteilos_Q0001_2017-07-25_A01_2017-08-29_finanzen.txt, marco-bulow_parteilos_Q0002_2017-07-30_A01_2017-08-14_demokratie-und-bürgerrechte.txt, marco-bulow_parteilos_Q0003_2017-08-03_A01_2017-08-29_demokratie-und-bürgerrechte.txt, marco-bulow_parteilos_Q0004_2017-09-07_A01_2017-09-21_wirtschaft.txt, marco-bulow_parteilos_Q0005_2017-09-14_A01_2017-09-21_familie.txt, marco-bulow_parteilos_Q0006_2017-09-20_A01_2017-09-22_demokratie-und-bürgerrechte.txt, marco-bulow_parteilos_Q0007_2017-10-20_A01_2017-11-07_arbeit.txt, marco-bulow_parteilos_Q0008_2017-10-26_A01_2017-11-21_bildung-und-forschung.txt, marco-bulow_parteilos_Q0009_2017-11-24_A01_2017-12-12_frauen.txt, marco-bulow_parteilos_Q0010_2017-11-29_A01_2017-12-11_umwelt.txt, marco-bulow_parteilos_Q0011_2018-01-04_A01_2018-01-17_integration.txt, marco-bulow_parteilos_Q0012_2018-01-19_A01_2018-01-31_soziales.txt, marco-bulow_parteilos_Q0013_2018-02-08_A01_2018-03-05_inneres-und-justiz.txt, marco-bulow_parteilos_Q0014_2018-02-12_A01_2018-03-05_demokratie-und-bürgerrechte.txt, marco-bulow_parteilos_Q0015_2018-02-21_A01_2018-03-16_demokratie-und-bürgerrechte.txt, marco-bulow_parteilos_Q0016_2018-02-23_A01_2018-03-05_internationales.txt, marco-bulow_parteilos_Q0017_2018-03-07_A01_2018-03-15_demokratie-und-bürgerrechte.txt, marco-bulow_parteilos_Q0018_2018-03-27_A01_2018-04-25_demokratie-und-bürgerrechte.txt, marco-bulow_parteilos_Q0019_2018-03-31_A01_2018-04-26_internationales.txt, marco-bulow_parteilos_Q0020_2018-04-05_A01_2018-04-17_gesundheit.txt, marco-bulow_parteilos_Q0021_2018-04-11_A01_2018-04-18_internationales.txt, marco-bulow_parteilos_Q0022_2018-04-14_A01_2018-05-02_internationales.txt, marco-bulow_parteilos_Q0023_2018-04-15_A01_2018-05-17_demokratie-und-bürgerrechte.txt, marco-bulow_parteilos_Q0024_2018-05-30_A01_2018-07-10_bildung-und-forschung.txt, marco-bulow_parteilos_Q0025_2018-06-15_A01_2018-06-20_demokratie-und-bürgerrechte.txt, marco-bulow_parteilos_Q0026_2018-06-19_A01_2018-07-10_umwelt.txt, marco-bulow_parteilos_Q0027_2018-07-01_A01_2018-07-10_demokratie-und-bürgerrechte.txt, marco-bulow_parteilos_Q0028_2018-07-15_A01_2018-08-07_soziales.txt, marco-bulow_parteilos_Q0029_2018-07-15_A01_2018-07-23_soziales.txt, marco-bulow_parteilos_Q0030_2018-07-29_A01_2018-09-12_kultur.txt, marco-bulow_parteilos_Q0031_2018-08-23_A01_2018-09-12_gesundheit.txt, marco-bulow_parteilos_Q0032_2018-09-20_A01_2018-10-02_demokratie-und-bürgerrechte.txt, marco-bulow_parteilos_Q0033_2018-09-24_A01_2018-09-26_wirtschaft.txt, marco-bulow_parteilos_Q0034_2018-09-30_A01_2018-10-10_internationales.txt, marco-bulow_parteilos_Q0035_2018-09-30_A01_2018-10-10_demokratie-und-bürgerrechte.txt, marco-bulow_parteilos_Q0036_2018-10-15_A01_2018-11-13_demokratie-und-bürgerrechte.txt, marco-bulow_parteilos_Q0037_2018-11-06_A01_2018-12-18_gesundheit.txt, marco-bulow_parteilos_Q0038_2018-11-27_A01_2018-12-18_demokratie-und-bürgerrechte.txt, marco-bulow_parteilos_Q0039_2018-12-15_A01_2018-12-18_gesundheit.txt, mario-mieruch_parteilos_Q0001_2017-08-22_A01_2017-08-23_kultur.txt, mario-mieruch_parteilos_Q0002_2017-08-24_A01_2017-08-24_soziales.txt, mario-mieruch_parteilos_Q0003_2017-08-25_A01_2017-08-25_soziales.txt, mario-mieruch_parteilos_Q0004_2017-08-28_A01_2017-08-30_soziales.txt, mario-mieruch_parteilos_Q0005_2017-09-21_A01_2017-09-21_inneres-und-justiz.txt, mario-mieruch_parteilos_Q0006_2017-10-18_A01_2018-01-16_internationales.txt, mario-mieruch_parteilos_Q0007_2018-01-31_A01_2018-02-02_demokratie-und-bürgerrechte.txt, mario-mieruch_parteilos_Q0008_2018-03-19_A01_2018-03-20_gesundheit.txt, mario-mieruch_parteilos_Q0009_2018-08-13_A01_2018-08-13_inneres-und-justiz.txt\n"
]
}
],
"source": [
"for deputy_prefix, q, a, exception in failure:\n",
" print('Exception while processing answer {} for question {} for deputy {}:'.format(a+1, q+1, deputy_prefix))\n",
" print(exception)\n",
" print()\n",
"\n",
"if not(failure):\n",
" print('No exception while updating the answers :-)')\n",
" print()\n",
" \n",
"print('{} files created or updated:'.format(len(success)))\n",
"print(', '.join(success)) "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Review of the corpus: counts, answers without questions, questions without answers"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" number of deputy files: 718 (was 714)\n",
" number of questions: 10212 (was 10143)\n",
" number of answers: 7722 (was 7674)\n",
" number of questions with multiple answers: 50 (was 50)\n"
]
}
],
"source": [
"for statistic in statistics:\n",
" value = len(list(corpus_dir.glob(statistic['pattern'])))\n",
" statistic['after'] = value\n",
" print('{:>42}: {:7} (was {:7})'.format(statistic['name'], statistic['after'], statistic['before']))"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {
"scrolled": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"alexander-graf-lambsdorff_fdp: 10/20, martin-schulz-1_spd: 0/62, michael-theurer_fdp: 8/9, fabio-de-masi_die-linke: 19/17, sarah-ryglewski_spd: 8/13, anke-domscheit-berg_die-linke: 17/18, beatrix-von-storch_afd: 8/23, konstantin-kuhle_fdp: 2/5, johannes-schraps_spd: 5/6, armin-paul-hampel_afd: 0/7, petr-bystron_afd: 0/12, waldemar-herdt_afd: 3/7, manfred-todtenhausen_fdp: 5/7, norbert-muller-4_die-linke: 12/12, alexander-krauss_cdu: 20/23, dr-juergen-martens_fdp: 4/4, alexander-gauland_afd: 0/25, steffen-kotre_afd: 1/1, frauke-petry_die-blauen: 0/18, lars-herrmann_afd: 2/3, christoph-neumann_afd: 0/4, siegbert-droese_afd: 0/3, detlev-spangenberg_afd: 4/5, torsten-herbst_fdp: 0/0, thomas-kemmerich_fdp: 1/2, stephan-brandner_afd: 132/130, christoph-de-vries_cdu: 19/16, christoph-plos_cdu: 35/36, dr-bernd-baumann_afd: 0/8, kay-gottschalk_afd: 1/7, zaklin-nastic_die-linke: 7/10, katja-suding_fdp: 14/15, dr-wieland-schinnenburg_fdp: 6/6, frank-magnitz_afd: 2/4, dr-kirsten-kappert-gonther_die-grünen: 21/23, dr-jens-zimmermann_spd: 13/14, yvonne-magwas_cdu: 12/15, dr-diether-dehm_die-linke: 2/2, johannes-steiniger_cdu: 10/10, nina-warken_cdu: 0/0, artur-auernhammer_csu: 9/10, dr-silke-launert_csu: 13/17, peter-altmaier_cdu: 34/74, alexander-ulrich_die-linke: 7/8, kai-whittaker_cdu: 6/6, johann-wadephul_cdu: 7/9, dr-konstantin-von-notz_die-grünen: 33/33, carsten-trager_spd: 12/13, dr-petra-sitte_die-linke: 4/4, detlef-seif_cdu: 3/3, johannes-selle_cdu: 11/12, reinhold-sendker_cdu: 2/2, patrick-sensburg_cdu: 7/7, thomas-silberhorn_csu: 11/11, uwe-schummer_cdu: 20/20, mahmut-ozdemir_spd: 17/19, frithjof-schmidt_die-grünen: 1/1, patrick-schnieder_cdu: 9/9, dr-rolf-mutzenich_spd: 26/26, andrea-nahles_spd: 350/361, bettina-muller_spd: 8/9, marlene-mortler_csu: 12/39, jan-metzler_cdu: 0/8, hilde-mattheis_spd: 9/12, dr-gesine-lotzsch_die-linke: 8/11, katja-leikert_cdu: 11/11, markus-kurth_die-grünen: 7/9, sylvia-kotting-uhl_die-grünen: 13/13, jan-korte_die-linke: 13/13, volkmar-klein_cdu: 14/15, sven-christian-kindler_die-grünen: 11/15, oliver-kaczmarek_spd: 5/6, heike-hansel_die-linke: 3/9, dr-stephan-harbarth_cdu: 6/13, jurgen-hardt_cdu: 12/12, thomas-hitschler_spd: 4/5, christian-hirte_cdu: 3/4, ulla-jelpke_die-linke: 16/16, andreas-jung_cdu: 0/10, ursula-groden-kranich_cdu: 14/17, hermann-grohe_cdu: 23/25, klaus-dieter-grohler_cdu: 8/23, hans-joachim-fuchtel_cdu: 1/6, agnieszka-brugger_die-grünen: 5/11, christine-buchholz_die-linke: 19/21, dr-karl-heinz-brunner_spd: 7/8, michael-brand_cdu: 22/31, matthias-birkwald_die-linke: 19/21, ulrike-bahr_spd: 17/17, soren-bartol_spd: 15/17, dr-matthias-bartke_spd: 38/39, katrin-goring-eckardt_die-grünen: 124/130, thomas-lutze_die-linke: 0/1, dr-sascha-raabe_spd: 15/18, martina-renner_die-linke: 11/11, prof-dr-matthias-zimmer_cdu: 25/25, sabine-zimmermann_die-linke: 7/11, markus-tressel_die-grünen: 1/2, pia-zimmermann_die-linke: 3/6, stefan-zierke_spd: 12/12, dagmar-ziegler_spd: 2/4, emmi-zeulner_csu: 4/9, hubertus-zdebel_die-linke: 3/3, gulistan-yuksel_spd: 2/2, dirk-wiese_spd: 3/3, klaus-peter-willsch_cdu: 9/9, elisabeth-winkelmeier-becker_cdu: 20/21, oliver-wittke_cdu: 3/3, anja-weisgerber_csu: 16/17, sabine-weiss_cdu: 5/6, peter-weis_cdu: 15/16, ingo-wellenreuther_cdu: 19/21, marian-wendt_cdu: 3/7, bernd-westphal_spd: 9/9, gabi-weber_spd: 7/11, kai-wegner_cdu: 15/15, albert-weiler_cdu: 0/8, harald-weinberg_die-linke: 8/10, marcus-weinberg_cdu: 41/41, marco-wanderwitz_cdu: 8/8, dr-sahra-wagenknecht_die-linke: 13/42, beate-walter-rosenheimer_die-grünen: 4/4, dirk-vopel_spd: 3/4, christian-von-stetten_cdu: 0/7, ursula-von-der-leyen_cdu: 14/34, hans-georg-von-der-marwitz_cdu: 0/8, matern-von-marschall_cdu: 21/21, oswin-veith_cdu: 12/12, julia-verlinden_die-grünen: 8/10, volkmar-vogel_cdu: 9/9, kathrin-vogler_die-linke: 2/3, ute-vogt_spd: 34/34, arnold-vaatz_cdu: 9/11, dr-volker-ullrich_csu: 8/11, jurgen-trittin_die-grünen: 13/17, antje-tillmann_cdu: 25/26, michael-thews_spd: 14/14, michael-stubgen_cdu: 5/6, kerstin-tack_spd: 25/26, dr-kirsten-tackmann_die-linke: 3/3, peter-tauber_cdu: 32/37, claudia-tausend_spd: 18/24, karin-strenz_cdu: 14/17, max-straubinger_csu: 3/4, stephan-stracke_csu: 14/19, gero-storjohann_cdu: 8/9, dieter-stier_cdu: 1/3, albert-stegemann_cdu: 5/8, peter-stein_cdu: 4/8, sebastian-steineke_cdu: 5/5, kersten-steinke_die-linke: 0/0, sonja-steffen_spd: 9/10, wolfgang-stefinger_csu: 22/22, dr-frank-steffel_cdu: 35/35, martina-stamm-fibich_spd: 11/11, tino-sorge_cdu: 14/17, jens-spahn_cdu: 88/98, rainer-spiering_spd: 6/7, svenja-stadler_spd: 29/31, armin-schuster_cdu: 23/33, frank-schwabe_spd: 5/7, stefan-schwartze_spd: 11/14, andreas-schwarz_spd: 10/10, rita-schwarzeluhr-sutter_spd: 9/11, ewald-schurer_spd: 5/6, kordula-schulz-asche_die-grünen: 8/8, klaus-peter-schulze_cdu: 6/7, swen-schulz_spd: 18/18, ursula-schulte_spd: 6/6, nadine-schon_cdu: 10/10, ulla-schmidt_spd: 14/15, carsten-schneider_spd: 3/27, anita-schafer_cdu: 0/7, axel-schafer_spd: 2/3, wolfgang-schauble_cdu: 0/28, ulle-schauws_die-grünen: 1/2, dr-nina-scheer_spd: 13/19, andreas-scheuer_csu: 0/48, gerhard-schick_die-grünen: 20/20, marianne-schieder_spd: 5/7, udo-schiefner_spd: 3/4, jana-schimke_cdu: 16/20, tankred-schipanski_cdu: 11/10, johann-saathoff_spd: 13/14, manuel-sarrazin_die-grünen: 10/9, christian-schmidt-2_csu: 55/86, dagmar-schmidt_spd: 11/13, dr-ernst-dieter-rossmann_spd: 9/9, tabea-rosner_die-grünen: 15/16, claudia-roth_die-grünen: 0/21, michael-roth_spd: 11/12, norbert-rottgen_cdu: 12/15, erwin-ruddel_cdu: 19/20, corinna-ruffer_die-grünen: 3/4, albert-rupprecht_csu: 1/5, susann-ruthrich_spd: 1/3, bernd-rutzel_spd: 6/7, johannes-roring_cdu: 2/4, dr-martin-rosemann_spd: 12/14, rene-rospel_spd: 5/7, eckhardt-rehberg_cdu: 6/6, dr-carola-reimann_spd: 8/8, lothar-riebsamen_cdu: 9/13, josef-rief_cdu: 3/6, andreas-rimkus_spd: 1/3, sonke-rix_spd: 7/9, dennis-rohde_spd: 8/10, alois-rainer_csu: 3/3, peter-ramsauer_csu: 1/13, alexander-radwan_csu: 9/9, kerstin-radomski_cdu: 2/6, thomas-rachel_cdu: 8/8, martin-rabanus_spd: 5/8, eckhard-pols_cdu: 17/17, sabine-poschmann_spd: 9/9, florian-post_spd: 16/19, achim-post_spd: 4/7, florian-pronold_spd: 4/9, joachim-pfeiffer_cdu: 31/32, detlev-pilger_spd: 9/11, lisa-paus_die-grünen: 22/22, petra-pau_die-linke: 16/20, martin-patzelt_cdu: 13/16, sylvia-pantel_cdu: 13/13, aydan-ozoguz_spd: 17/24, friedrich-ostendorff_die-grünen: 2/2, henning-otte_cdu: 10/11, cem-ozdemir_die-grünen: 0/47, omid-nouripour_die-grünen: 17/18, georg-nuslein_csu: 6/7, wilfried-oellers_cdu: 3/6, thomas-oppermann_spd: 80/89, florian-osner_csu: 3/3, dr-alexander-s-neu_die-linke: 9/10, dr-andreas-nick_cdu: 4/4, dietmar-nietan_spd: 14/16, ulli-nissen_spd: 9/10, michaela-noll_cdu: 10/12, thomas-nord_die-linke: 5/6, stefan-muller_csu: 12/13, carsten-muller_cdu: 10/10, beate-muller-gemmeke_die-grünen: 14/14, michelle-muntefering_spd: 5/9, gerd-muller-2_csu: 14/14, niema-movassat_die-linke: 8/8, dietrich-monstadt_cdu: 12/13, karsten-moring_cdu: 16/17, cornelia-mohring_die-linke: 6/6, susanne-mittag_spd: 13/14, klaus-mindrup_spd: 25/26, irene-mihalic_die-grünen: 5/5, hans-michelbach_csu: 0/7, mathias-middelberg_cdu: 4/4, dr-matthias-miersch_spd: 20/21, stephan-mayer_csu: 6/19, dr-michael-meister_cdu: 5/6, angela-merkel_cdu: 0/111, gisela-manderla_cdu: 5/6, caren-marks_spd: 2/2, katja-mast_spd: 11/13, andreas-mattfeldt_cdu: 4/6, karin-maag_cdu: 8/14, kirsten-luhmann_spd: 22/23, daniela-ludwig_csu: 24/29, dr-jan-marco-luczak_cdu: 40/42, carsten-linnemann_cdu: 11/14, patricia-lips_cdu: 6/6, burkhard-lischka_spd: 24/24, andrea-lindholz_csu: 31/31, dr-tobias-lindner_die-grünen: 6/7, dr-andreas-lenz_csu: 10/12, michael-leutert_die-linke: 3/4, antje-lezius_cdu: 11/11, stefan-liebich_die-linke: 29/29, ralph-lenkert_die-linke: 7/7, steffi-lemke_die-grünen: 4/5, sabine-leidig_die-linke: 2/2, paul-lehrieder_csu: 9/9, monika-lazar_die-grünen: 11/12, caren-lay_die-linke: 8/8, christine-lambrecht_spd: 8/10, dr-karl-lamers_cdu: 13/13, andreas-lammel_cdu: 4/8, katharina-landgraf_cdu: 2/2, christian-lange-2_spd: 4/4, ulrich-lange_csu: 8/8, karl-lauterbach_spd: 0/39, renate-kunast_die-grünen: 31/33, roy-kuhne_cdu: 2/6, chris-kuhn_die-grünen: 12/12, "
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"stephan-kuhn_die-grünen: 13/13, anette-kramme_spd: 10/11, jutta-krellmann_die-linke: 3/5, gunther-krichbaum_cdu: 14/15, gunter-krings_cdu: 4/6, oliver-krischer_die-grünen: 28/29, rudiger-kruse_cdu: 9/10, jens-koeppen_cdu: 14/14, dr-barbel-kofler_spd: 7/10, daniela-kolbe_spd: 8/9, markus-koob_cdu: 6/11, carsten-korber_cdu: 1/4, axel-knoerig_cdu: 7/8, maria-klein-schmeink_die-grünen: 8/9, lars-klingbeil_spd: 61/69, dr-georg-kippels_cdu: 12/15, katja-kipping_die-linke: 81/83, cansel-kiziltepe_spd: 33/38, arno-klare_spd: 9/10, roderich-kiesewetter_cdu: 61/61, katja-keul_die-grünen: 13/15, anja-karliczek_cdu: 16/17, kerstin-kassner_die-linke: 8/9, gabriele-katzmarek_spd: 19/19, volker-kauder_cdu: 16/25, stefan-kaufmann_cdu: 13/14, uwe-kekeritz_die-grünen: 9/9, ulrich-wolfgang-kelber_spd: 36/37, ralf-kapschack_spd: 9/9, alois-karl_csu: 0/5, johannes-kahrs_spd: 34/34, josip-juratovic_spd: 8/8, thomas-jurk_spd: 4/4, frank-junge_spd: 8/8, erich-irlstorfer_csu: 1/16, dieter-janecek_die-grünen: 9/12, thomas-jarzombek_cdu: 15/18, andrej-hunko_die-linke: 4/5, eva-hogl_spd: 46/47, karl-holmeier_csu: 3/6, hendrik-hoppenstedt_cdu: 0/6, dr-anton-hofreiter_die-grünen: 59/84, alexander-hoffmann_csu: 9/11, prof-dr-heribert-hirte_cdu: 35/40, marcus-held_spd: 9/10, mark-helfrich_cdu: 2/2, wolfgang-hellmich_spd: 8/8, barbara-hendricks_spd: 26/26, rudolf-henke_cdu: 28/31, michael-hennrich_cdu: 7/10, gustav-herzog_spd: 41/42, ansgar-heveling_cdu: 5/10, gabriele-hiller-ohm_spd: 14/16, dirk-heidenblut_spd: 16/16, matthias-heider_cdu: 23/25, hubertus-heil_spd: 36/67, mechthild-heil_cdu: 9/16, frank-heinrich_cdu: 14/14, gabriela-heinrich_spd: 14/14, mark-hauptmann_cdu: 8/8, matthias-hauer_cdu: 8/8, britta-haselmann_die-grünen: 28/35, sebastian-hartmann_spd: 14/15, metin-hakverdi_spd: 9/10, dr-andre-hahn_die-linke: 3/5, florian-hahn_csu: 21/21, anja-hajduk_die-grünen: 19/20, rita-hagl-kehl_spd: 6/7, christian-haase_cdu: 9/8, bettina-hagedorn_spd: 12/13, dr-gregor-gysi_die-linke: 143/145, fritz-guntzler_cdu: 4/4, olav-gutting_cdu: 16/17, astrid-groteluschen_cdu: 3/5, uli-grotsch_spd: 9/10, markus-grubel_cdu: 6/6, manfred-grund_cdu: 4/4, oliver-grundmann_cdu: 8/9, monika-grutters_cdu: 25/26, michael-gros_spd: 8/11, kerstin-griese_spd: 11/12, nicole-gohlke_die-linke: 13/15, martin-gerster_spd: 8/9, eberhard-gienger_cdu: 7/7, kai-gehring_die-grünen: 10/11, michael-gerdes_spd: 3/7, alois-gerig_cdu: 6/8, sigmar-gabriel_spd: 0/49, ingo-gadechens_cdu: 10/13, matthias-gastel_die-grünen: 14/15, thomas-gebhart_cdu: 9/11, hans-peter-friedrich_csu: 1/6, michael-frieser_csu: 14/16, dagmar-freitag_spd: 14/13, dr-edgar-franke_spd: 16/16, ulrich-freese_spd: 2/5, thorsten-frei_cdu: 4/4, dr-maria-flachsbarth_cdu: 14/13, klaus-ernst_die-linke: 7/7, saskia-esken_spd: 4/5, hermann-farber_cdu: 8/10, dr-johannes-fechner_spd: 13/15, uwe-feiler_cdu: 14/14, dr-fritz-felgentreu_spd: 36/38, enak-ferlemann_cdu: 16/15, michael-donth_cdu: 0/12, katja-dorner_die-grünen: 18/19, marie-luise-dott_cdu: 8/8, katharina-droge_die-grünen: 15/15, hansjorg-durz_csu: 19/18, harald-ebner_die-grünen: 7/7, ekin-deligoz_die-grünen: 5/7, dr-karamba-diaby_spd: 7/8, sabine-dittmar_spd: 9/13, alexander-dobrindt_csu: 0/50, kees-de-vries_cdu: 3/4, dr-thomas-de-maiziere_cdu: 30/35, dr-daniela-de-ridder_spd: 10/16, prof-dr-lars-castellucci_spd: 9/9, gitta-connemann_cdu: 13/17, sevim-dagdelen_die-linke: 1/7, bernhard-daldrup_spd: 9/9, marco-bulow_parteilos: 39/40, martin-burkert_spd: 13/12, dr-reinhard-brandl_csu: 7/8, dr-franziska-brantner_die-grünen: 8/10, dr-ralf-brauksiepe_cdu: 4/5, dr-helge-braun_cdu: 13/14, heike-brehmer_cdu: 11/11, ralph-brinkhaus_cdu: 24/28, peter-bleser_cdu: 10/10, heidrun-bluhm_die-linke: 5/5, norbert-brackmann_cdu: 20/20, lothar-binding_spd: 27/27, steffen-bilger_cdu: 9/12, peter-beyer_cdu: 3/3, andre-berghegger_cdu: 0/4, sybille-benning_cdu: 10/11, veronika-bellmann_cdu: 1/5, manfred-behrens_cdu: 5/6, maik-beermann_cdu: 6/6, dr-dietmar-bartsch_die-linke: 32/33, barbel-bas_spd: 9/9, doris-barnett_spd: 10/13, norbert-barthle_cdu: 8/8, thomas-bareis_cdu: 4/9, dr-katarina-barley_spd: 43/84, dorothee-bar_csu: 44/57, kerstin-andreae_die-grünen: 29/29, niels-annen_spd: 53/57, ingrid-arndt-brauer_spd: 7/7, heike-baehrens_spd: 13/13, annalena-baerbock_die-grünen: 63/65, stephan-albani_cdu: 8/14, christian-petry_spd: 1/1, wolfgang-strengmann-kuhn_die-grünen: 8/14, ronja-kemmer_cdu: 9/9, angelika-glockner_spd: 1/5, detlef-muller_spd: 8/8, dr-alice-weidel_afd: 0/31, bernd-riexinger_die-linke: 5/8, alexander-kulitz_fdp: 2/7, gokay-akbulut_die-linke: 10/12, alexander-throm_cdu: 7/12, jens-brandenburg_fdp: 3/3, jessica-tatti_die-linke: 8/12, marc-bernhard_afd: 7/11, markus-frohnmaier_afd: 1/4, felix-schreiner_cdu: 1/8, michel-brandt_die-linke: 7/12, benjamin-strasser_fdp: 4/4, thomas-seitz_afd: 1/7, dr-nils-schmid_spd: 17/17, brigitte-freihold_die-linke: 0/0, sandra-weeser_fdp: 3/3, canan-bayram_die-grünen: 21/26, evrim-sommer_die-linke: 0/0, stefan-gelbhaar_die-grünen: 16/19, pascal-meiser_die-linke: 12/16, annette-widmann-mauz-2_cdu: 18/18, bijan-djir-sarai_fdp: 2/4, christine-aschenberg-dugnus_fdp: 4/4, florian-toncar_fdp: 9/9, dr-hermann-otto-solms_fdp: 1/8, erhard-grundl_die-grünen: 13/16, frank-schaffler_fdp: 1/5, hartmut-ebbing_fdp: 18/21, jens-lehmann_cdu: 1/5, marco-buschmann_fdp: 6/6, martin-hohmann_afd: 0/4, michael-link_fdp: 2/5, michael-grosse-bromer-2_cdu: 18/22, otto-fricke_fdp: 3/5, pascal-kober_fdp: 9/10, prof-dr-martin-neumann_fdp: 2/3, stephan-thomae_fdp: 2/9, alexander-muller-2_fdp: 5/5, bettina-stark-watzinger_fdp: 6/12, britta-katharina-dassler_fdp: 3/5, christian-lindner_fdp: 248/270, daniela-wagner_die-grünen: 10/14, marcel-klinge_fdp: 0/0, dr-stefan-ruppert_fdp: 2/6, hagen-reinhold_fdp: 2/2, ingrid-nestle_die-grünen: 9/9, jimmy-schulz_fdp: 10/12, johannes-vogel_fdp: 3/5, judith-skudelny_fdp: 2/7, lisa-badum_die-grünen: 10/10, manuel-hoferlin_fdp: 0/7, matthias-seestern-pauly_fdp: 2/2, oliver-luksic_fdp: 1/1, peter-aumer_csu: 12/16, ingrid-lieselotte-remmers_die-linke: 0/1, andreas-wagner-2_die-linke: 11/12, claudia-muller_die-grünen: 2/4, danyal-bayaz_die-grünen: 9/10, dr-bernd-buchholz_fdp: 0/2, dr-marc-jongen-2_afd: 1/5, jorn-konig_afd: 0/4, katrin-helling-plahr_fdp: 7/8, marcus-faber_fdp: 6/7, margit-stumpp_die-grünen: 4/5, mario-mieruch_parteilos: 9/9, martin-e-renner_afd: 0/5, michael-schrodi_spd: 5/6, prof-dr-axel-gehrke_afd: 10/14, renata-alt_fdp: 4/6, stefan-schmidt_die-grünen: 4/4, stephan-protschka_afd: 5/6, thomas-ehrhorn_afd: 2/4, till-mansmann_fdp: 0/5, tobias-matthias-peterka_afd: 1/2, tobias-pfluger_die-linke: 3/10, volker-munz_afd: 1/4, wolfgang-kubicki_fdp: 16/35, daniela-langer_fdp: 9/10, thomas-heilmann_cdu: 28/39, enrico-komning_afd: 5/5, ulrike-schielke-ziesing_afd: 2/6, leif-erik-holm_afd: 0/3, birke-bull_die-linke: 2/4, katrin-budde_spd: 1/3, matthias-hohn_die-linke: 1/4, elisabeth-motschmann-2_cdu: 20/23, christian-sauter_fdp: 0/7, petra-nicolaisen_cdu: 18/18, nikolas-lobel_cdu: 5/15, linda-teuteberg_fdp: 0/9, margarete-bause_die-grünen: 7/10, karsten-klein_fdp: 1/5, katja-hessel_fdp: 2/3, thomas-hacker_fdp: 4/8, christoph-meyer_fdp: 21/21, dr-bettina-hoffmann_die-grünen: 6/8, hans-jurgen-irmer_cdu: 1/9, nicola-beer_fdp: 9/10, timon-gremmels_spd: 16/16, achim-kessler_die-linke: 5/6, christian-durr_fdp: 6/8, dr-gero-hocker_fdp: 2/5, filiz-polat_die-grünen: 7/8, ottmar-von-holtz_die-grünen: 9/10, victor-perli_die-linke: 6/6, jens-beeck_fdp: 6/7, markus-tons_spd: 3/3, karlheinz-busen_fdp: 3/5, bernd-reuther_fdp: 1/2, marie-agnes-strack-zimmermann_fdp: 4/6, katrin-werner_die-linke: 1/2, heiko-maas_spd: 2/56, markus-uhl_cdu: 10/12, astrid-damerow_cdu: 14/15, dr-michael-von-abercron_cdu: 7/9, luise-amtsberg-2_die-grünen: 3/13, christoph-matschie-2_spd: 6/12, uwe-schmidt-3_spd: 12/14, dr-gottfried-curio_afd: 0/2, daniel-fost_fdp: 1/9, nicole-bauer_fdp: 3/6, ulrich-lechte_fdp: 28/29, dietmar-friedhoff_afd: 2/5, jens-kestner_afd: 0/3, rene-springer_afd: 8/12, matthias-buttner_afd: 1/3, frank-pasemann_afd: 1/4, andreas-mrosek_afd: 0/3, rudiger-lucassen_afd: 12/13, roland-hartwig_afd: 5/6, jorg-schneider_afd: 6/7, udo-hemmelgarn_afd: 2/3, karsten-hilse_afd: 0/5, tino-chrupalla_afd: 0/3, jens-maier_afd: 9/19, "
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"heiko-hessenkemper_afd: 0/1, ulrich-oehme_afd: 1/5, jan-nolte_afd: 11/12, albrecht-glaser_afd: 8/18, uwe-schulz-2_afd: 3/6, mariana-harder-kuhnel_afd: 0/5, jurgen-pohl_afd: 2/6, robby-schlund_afd: 12/13, andreas-bleck_afd: 6/7, nicole-hochst_afd: 5/11, sebastian-munzenmaier_afd: 0/9, heiko-wildberg_afd: 5/7, johannes-huber_afd: 6/14, wolfgang-wiehle_afd: 4/9, gerold-otten_afd: 3/8, martin-hebner_afd: 0/4, hansjorg-muller_afd: 12/16, peter-boehringer_afd: 4/10, paul-podolay_afd: 4/5, martin-sichert_afd: 30/30, rainer-kraft_afd: 5/7, peter-felser_afd: 8/10, dirk-spaniel_afd: 3/6, lothar-maier-2_afd: 3/7, jurgen-braun_afd: 1/4, martin-hess_afd: 1/5, melanie-bernstein_cdu: 15/17, claudia-schmidtke_cdu: 3/5, philipp-amthor_cdu: 19/32, silvia-breher_cdu: 24/24, dietlind-tiemann_cdu: 4/7, eckhard-gnodtke_cdu: 2/3, sepp-muller_cdu: 7/8, christoph-bernstiel_cdu: 3/3, torsten-schweiger_cdu: 3/3, carsten-brodesser_cdu: 12/17, hermann-josef-tebroke_cdu: 5/11, stefan-rouenhoff_cdu: 5/7, marc-henrichmann_cdu: 5/5, kerstin-vieregge_cdu: 6/6, paul-ziemiak_cdu: 8/8, hans-jurgen-thies_cdu: 8/7, ingmar-jung_cdu: 5/10, norbert-altenkamp_cdu: 5/9, bettina-wiesmann_cdu: 0/7, stefan-sauer_cdu: 4/8, bjorn-simon_cdu: 20/21, astrid-mannes_cdu: 17/18, josef-oster_cdu: 0/16, andreas-steier_cdu: 0/0, torbjorn-kartes_cdu: 4/8, marc-biadacz_cdu: 11/11, axel-eduard-fischer_cdu: 7/8, axel-muller_cdu: 2/5, katrin-staffler_csu: 9/11, bernhard-loos_csu: 6/11, michael-kuffer_csu: 0/17, stephan-pilsinger_csu: 0/16, michael-kiesling_csu: 18/20, thomas-erndl_csu: 1/4, sebastian-brehm_csu: 3/8, sven-lehmann_die-grünen: 10/14, manuela-rottmann_die-grünen: 4/9, anna-christmann_die-grünen: 8/9, gerhard-zickenheiner_die-grünen: 9/11, lorenz-gosta-beutin-2_die-linke: 2/4, amira-mohamed-ali_die-linke: 7/9, friedrich-straetmanns_die-linke: 12/11, sylvia-gabelmann_die-linke: 2/3, soren-pellmann_die-linke: 9/11, jorg-cezanne_die-linke: 2/6, eva-schreiber_die-linke: 9/11, simone-barrientos_die-linke: 6/11, susanne-ferschl_die-linke: 13/13, doris-achelwilm_die-linke: 1/3, ulla-ihnen_fdp: 2/3, grigorios-aggelidis_fdp: 0/1, frank-sitta_fdp: 2/2, katharina-kloke_fdp: 8/8, markus-herbrand_fdp: 1/2, reinhard-arnold-houben_fdp: 9/9, nicole-westig_fdp: 3/6, roman-muller-bohm_fdp: 4/4, olaf-in-der-beek_fdp: 2/2, carlo-cronenberg_fdp: 1/1, frank-muller-rosentritt_fdp: 6/6, gerald-ullrich_fdp: 0/1, carina-konrad_fdp: 0/7, mario-brandenburg_fdp: 2/3, thomas-sattelberger_fdp: 11/11, lukas-kohler_fdp: 5/7, andrew-ullmann_fdp: 6/7, christian-jung_fdp: 10/10, christoph-hoffmann_fdp: 9/16, mathias-stein_spd: 12/13, siemtje-moller_spd: 16/16, marja-liisa-vollers_spd: 11/15, falko-mohrs_spd: 5/7, manja-schule_spd: 16/19, claudia-moll_spd: 4/20, helge-lindh_spd: 6/16, elvan-korkmaz_spd: 7/11, wiebke-esdar_spd: 30/30, nezahat-baradari_spd: 0/0, esther-dilcher_spd: 5/5, elisabeth-kaiser_spd: 10/10, leni-breymaier_spd: 11/12, josephine-ortleb_spd: 4/5, fabian-jacobi_afd: 0/6, jochen-haug_afd: 0/9, berengar-elsner-von-gronow_afd: 0/2, bruno-hollnagel_afd: 5/11, uwe-kamann_parteilos: 0/2, yasmin-fahimi_spd: 10/16, stefan-keuter_afd: 5/7, michael-espendiller_afd: 5/10, franziska-gminder_afd: 0/0, corinna-miazga_afd: 4/11, gotz-fromming_afd: 0/1, birgit-malsack-winkemann_afd: 0/1, roman-reusch_afd: 2/4, norbert-kleinwachter_afd: 0/1, joana-cotar_afd: 4/9, wilhelm-von-gottberg_afd: 0/2, harald-weyel_afd: 2/3, uwe-witt_afd: 2/3, christian-wirth_afd: 0/3, verena-hartmann_afd: 2/10, martin-reichardt_afd: 0/1, marcus-buhl_afd: 1/1, anton-friesen_afd: 25/26, gyde-jensen_fdp: 1/1, "
]
}
],
"source": [
"def unique_filename_parts(pattern, name_slice):\n",
" files = list(corpus_dir.glob(pattern))\n",
" parts = sorted(['_'.join(f.stem.split('_')[name_slice]) for f in files])\n",
" return parts\n",
"\n",
"for d, deputy in enumerate(deputies):\n",
"\n",
" deputy_prefix = deputy_file_name_part(deputy)\n",
" \n",
" questions = unique_filename_parts(deputy_prefix + '*.url', slice(4))\n",
" answered = unique_filename_parts(deputy_prefix + '*.txt', slice(4))\n",
"\n",
" answer_without_question = [q for q in answered if not q in questions]\n",
"\n",
" print('{}: {}/{},'.format(deputy_prefix, len(answered), len(questions)), end=' ')\n",
"\n",
" if answer_without_question:\n",
" print()\n",
" print('Following questions are answered, but the question itself is not known:')\n",
" print(', '.join(answer_without_question))\n",
" print() \n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"\n",
" \n",
" \n",
" \n",
" \n",
" \n",
" © D. Speicher \n",
" Licensed under a \n",
" \n",
" CC BY-NC 4.0\n",
" .\n",
" | \n",
" \n",
" Acknowledgments:\n",
" This material was prepared within the project\n",
" \n",
" P3ML\n",
" \n",
" which is funded by the Ministry of Education and Research of Germany (BMBF)\n",
" under grant number 01/S17064. The authors gratefully acknowledge this support.\n",
" | \n",
"
\n",
"
"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.8"
}
},
"nbformat": 4,
"nbformat_minor": 2
}