{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# General Assumptions and Import Statements\n", "The aim of this use case is to analyse if there is a correlation between the economic welfare and migrational movement. The hypothesis is people are moving to places where welfare is higher." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/konrad/programming/python/correlaid/datenguide-python/venv/lib/python3.7/site-packages/statsmodels/tools/_testing.py:19: FutureWarning: pandas.util.testing is deprecated. Use the functions in the public API at pandas.testing instead.\n", " import pandas.util.testing as tm\n" ] } ], "source": [ "# Import necessary libraries\n", "\n", "import pandas as pd\n", "import seaborn as sns\n", "import numpy as np\n", "from matplotlib import pyplot as plt\n", "%matplotlib inline\n", "\n", "# Import datenguidepy package\n", "\n", "import os\n", "if not os.path.basename(os.getcwd()) == \"datenguide-python\":\n", " os.chdir(\"..\")\n", "\n", "from datenguidepy.query_builder import Query\n", "from datenguidepy.query_helper import get_regions" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Fetch Data for the Bundesländer in Germany with Datenguidepy" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "region_id\n", "10 Saarland\n", "11 Berlin\n", "12 Brandenburg\n", "13 Mecklenburg-Vorpommern\n", "14 Sachsen\n", "15 Sachsen-Anhalt\n", "16 Thüringen\n", "01 Schleswig-Holstein\n", "02 Hamburg\n", "03 Niedersachsen\n", "04 Bremen\n", "05 Nordrhein-Westfalen\n", "06 Hessen\n", "07 Rheinland-Pfalz\n", "08 Baden-Württemberg, Land\n", "09 Bayern\n", "Name: name, dtype: object" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# First get the region codes to query data from the Bundesländer.\n", "\n", "region_codes = get_regions().query('level == \"nuts1\"').name\n", "region_codes" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "ename": "RuntimeError", "evalue": "Body contains the following error content\n{'errors': [{'message': '[exception] Trying to create too many scroll contexts. Must be less than or equal to: [500]. This limit can be set by changing the [search.max_open_scroll_context] setting.', 'locations': [{'line': 1, 'column': 28}], 'path': ['region', 'BEVSTD'], 'extensions': {'code': 'INTERNAL_SERVER_ERROR', 'exception': {'type': 'FeathersError', 'name': 'GeneralError', 'message': '[exception] Trying to create too many scroll contexts. Must be less than or equal to: [500]. This limit can be set by changing the [search.max_open_scroll_context] setting.', 'code': 500, 'className': 'general-error', 'data': {'msg': '[exception] Trying to create too many scroll contexts. Must be less than or equal to: [500]. This limit can be set by changing the [search.max_open_scroll_context] setting.', 'path': '/genesapi-regionalstatistik-3/_search', 'query': {'size': 10, 'scroll': '10s'}, 'body': '{\"query\":{\"constant_score\":{\"filter\":{\"bool\":{\"must\":[{\"term\":{\"region_id\":\"10\"}},{\"exists\":{\"field\":\"BEVSTD\"}}],\"must_not\":[{\"exists\":{\"field\":\"ALTX75\"}},{\"exists\":{\"field\":\"GES\"}},{\"exists\":{\"field\":\"ALTX21\"}},{\"exists\":{\"field\":\"NAT\"}},{\"exists\":{\"field\":\"ALTX76\"}},{\"exists\":{\"field\":\"ALTX20\"}}]}}}}}', 'statusCode': 500, 'response': '{\"error\":{\"root_cause\":[{\"type\":\"exception\",\"reason\":\"Trying to create too many scroll contexts. Must be less than or equal to: [500]. This limit can be set by changing the [search.max_open_scroll_context] setting.\"}],\"type\":\"search_phase_execution_exception\",\"reason\":\"all shards failed\",\"phase\":\"query\",\"grouped\":true,\"failed_shards\":[{\"shard\":0,\"index\":\"genesapi-regionalstatistik-3\",\"node\":\"oVtg5PqCQhGPfOpV0eyhKg\",\"reason\":{\"type\":\"exception\",\"reason\":\"Trying to create too many scroll contexts. Must be less than or equal to: [500]. This limit can be set by changing the [search.max_open_scroll_context] setting.\"}}]},\"status\":500}'}, 'errors': {}, 'hook': {'type': 'before', 'method': 'find', 'path': 'genesapiRawQuery', 'params': {'query': {'index': 'genesapi-regionalstatistik-3', 'size': 10, 'scroll': '10s', 'body': {'query': {'constant_score': {'filter': {'bool': {'must': [{'term': {'region_id': '10'}}, {'exists': {'field': 'BEVSTD'}}], 'must_not': [{'exists': {'field': 'ALTX75'}}, {'exists': {'field': 'GES'}}, {'exists': {'field': 'ALTX21'}}, {'exists': {'field': 'NAT'}}, {'exists': {'field': 'ALTX76'}}, {'exists': {'field': 'ALTX20'}}]}}}}}}}}}}}, {'message': '[exception] Trying to create too many scroll contexts. Must be less than or equal to: [500]. This limit can be set by changing the [search.max_open_scroll_context] setting.', 'locations': [{'line': 1, 'column': 238}], 'path': ['region', 'BIP804'], 'extensions': {'code': 'INTERNAL_SERVER_ERROR', 'exception': {'type': 'FeathersError', 'name': 'GeneralError', 'message': '[exception] Trying to create too many scroll contexts. Must be less than or equal to: [500]. This limit can be set by changing the [search.max_open_scroll_context] setting.', 'code': 500, 'className': 'general-error', 'data': {'msg': '[exception] Trying to create too many scroll contexts. Must be less than or equal to: [500]. This limit can be set by changing the [search.max_open_scroll_context] setting.', 'path': '/genesapi-regionalstatistik-3/_search', 'query': {'size': 10, 'scroll': '10s'}, 'body': '{\"query\":{\"constant_score\":{\"filter\":{\"bool\":{\"must\":[{\"term\":{\"region_id\":\"10\"}},{\"exists\":{\"field\":\"BIP804\"}}],\"must_not\":[]}}}}}', 'statusCode': 500, 'response': '{\"error\":{\"root_cause\":[{\"type\":\"exception\",\"reason\":\"Trying to create too many scroll contexts. Must be less than or equal to: [500]. This limit can be set by changing the [search.max_open_scroll_context] setting.\"}],\"type\":\"search_phase_execution_exception\",\"reason\":\"all shards failed\",\"phase\":\"query\",\"grouped\":true,\"failed_shards\":[{\"shard\":0,\"index\":\"genesapi-regionalstatistik-3\",\"node\":\"oVtg5PqCQhGPfOpV0eyhKg\",\"reason\":{\"type\":\"exception\",\"reason\":\"Trying to create too many scroll contexts. Must be less than or equal to: [500]. This limit can be set by changing the [search.max_open_scroll_context] setting.\"}}]},\"status\":500}'}, 'errors': {}, 'hook': {'type': 'before', 'method': 'find', 'path': 'genesapiRawQuery', 'params': {'query': {'index': 'genesapi-regionalstatistik-3', 'size': 10, 'scroll': '10s', 'body': {'query': {'constant_score': {'filter': {'bool': {'must': [{'term': {'region_id': '10'}}, {'exists': {'field': 'BIP804'}}], 'must_not': []}}}}}}}}}}}, {'message': '[exception] Trying to create too many scroll contexts. Must be less than or equal to: [500]. This limit can be set by changing the [search.max_open_scroll_context] setting.', 'locations': [{'line': 1, 'column': 98}], 'path': ['region', 'BEV085'], 'extensions': {'code': 'INTERNAL_SERVER_ERROR', 'exception': {'type': 'FeathersError', 'name': 'GeneralError', 'message': '[exception] Trying to create too many scroll contexts. Must be less than or equal to: [500]. This limit can be set by changing the [search.max_open_scroll_context] setting.', 'code': 500, 'className': 'general-error', 'data': {'msg': '[exception] Trying to create too many scroll contexts. Must be less than or equal to: [500]. This limit can be set by changing the [search.max_open_scroll_context] setting.', 'path': '/genesapi-regionalstatistik-3/_search', 'query': {'size': 10, 'scroll': '10s'}, 'body': '{\"query\":{\"constant_score\":{\"filter\":{\"bool\":{\"must\":[{\"term\":{\"region_id\":\"10\"}},{\"exists\":{\"field\":\"BEV085\"}}],\"must_not\":[{\"exists\":{\"field\":\"GES\"}},{\"exists\":{\"field\":\"NAT\"}},{\"exists\":{\"field\":\"ALTX01\"}}]}}}}}', 'statusCode': 500, 'response': '{\"error\":{\"root_cause\":[{\"type\":\"exception\",\"reason\":\"Trying to create too many scroll contexts. Must be less than or equal to: [500]. This limit can be set by changing the [search.max_open_scroll_context] setting.\"}],\"type\":\"search_phase_execution_exception\",\"reason\":\"all shards failed\",\"phase\":\"query\",\"grouped\":true,\"failed_shards\":[{\"shard\":0,\"index\":\"genesapi-regionalstatistik-3\",\"node\":\"oVtg5PqCQhGPfOpV0eyhKg\",\"reason\":{\"type\":\"exception\",\"reason\":\"Trying to create too many scroll contexts. Must be less than or equal to: [500]. This limit can be set by changing the [search.max_open_scroll_context] setting.\"}}]},\"status\":500}'}, 'errors': {}, 'hook': {'type': 'before', 'method': 'find', 'path': 'genesapiRawQuery', 'params': {'query': {'index': 'genesapi-regionalstatistik-3', 'size': 10, 'scroll': '10s', 'body': {'query': {'constant_score': {'filter': {'bool': {'must': [{'term': {'region_id': '10'}}, {'exists': {'field': 'BEV085'}}], 'must_not': [{'exists': {'field': 'GES'}}, {'exists': {'field': 'NAT'}}, {'exists': {'field': 'ALTX01'}}]}}}}}}}}}}}, {'message': '[exception] Trying to create too many scroll contexts. Must be less than or equal to: [500]. This limit can be set by changing the [search.max_open_scroll_context] setting.', 'locations': [{'line': 1, 'column': 168}], 'path': ['region', 'BEV086'], 'extensions': {'code': 'INTERNAL_SERVER_ERROR', 'exception': {'type': 'FeathersError', 'name': 'GeneralError', 'message': '[exception] Trying to create too many scroll contexts. Must be less than or equal to: [500]. This limit can be set by changing the [search.max_open_scroll_context] setting.', 'code': 500, 'className': 'general-error', 'data': {'msg': '[exception] Trying to create too many scroll contexts. Must be less than or equal to: [500]. This limit can be set by changing the [search.max_open_scroll_context] setting.', 'path': '/genesapi-regionalstatistik-3/_search', 'query': {'size': 10, 'scroll': '10s'}, 'body': '{\"query\":{\"constant_score\":{\"filter\":{\"bool\":{\"must\":[{\"term\":{\"region_id\":\"10\"}},{\"exists\":{\"field\":\"BEV086\"}}],\"must_not\":[{\"exists\":{\"field\":\"GES\"}},{\"exists\":{\"field\":\"NAT\"}},{\"exists\":{\"field\":\"ALTX01\"}}]}}}}}', 'statusCode': 500, 'response': '{\"error\":{\"root_cause\":[{\"type\":\"exception\",\"reason\":\"Trying to create too many scroll contexts. Must be less than or equal to: [500]. This limit can be set by changing the [search.max_open_scroll_context] setting.\"}],\"type\":\"search_phase_execution_exception\",\"reason\":\"all shards failed\",\"phase\":\"query\",\"grouped\":true,\"failed_shards\":[{\"shard\":0,\"index\":\"genesapi-regionalstatistik-3\",\"node\":\"oVtg5PqCQhGPfOpV0eyhKg\",\"reason\":{\"type\":\"exception\",\"reason\":\"Trying to create too many scroll contexts. Must be less than or equal to: [500]. This limit can be set by changing the [search.max_open_scroll_context] setting.\"}}]},\"status\":500}'}, 'errors': {}, 'hook': {'type': 'before', 'method': 'find', 'path': 'genesapiRawQuery', 'params': {'query': {'index': 'genesapi-regionalstatistik-3', 'size': 10, 'scroll': '10s', 'body': {'query': {'constant_score': {'filter': {'bool': {'must': [{'term': {'region_id': '10'}}, {'exists': {'field': 'BEV086'}}], 'must_not': [{'exists': {'field': 'GES'}}, {'exists': {'field': 'NAT'}}, {'exists': {'field': 'ALTX01'}}]}}}}}}}}}}}], 'data': {'region': {'id': '10', 'name': 'Saarland', 'BEVSTD': None, 'BEV085': None, 'BEV086': None, 'BIP804': None}}}", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0;31m# Get the results for the query\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 9\u001b[0;31m \u001b[0mresults\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mregion_query\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mresults\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 10\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[0;31m# Preview the first data rows\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/programming/python/correlaid/datenguide-python/datenguidepy/query_builder.py\u001b[0m in \u001b[0;36mresults\u001b[0;34m(self, verbose_statistics, verbose_enums, add_units)\u001b[0m\n\u001b[1;32m 631\u001b[0m result = QueryExecutioner(\n\u001b[1;32m 632\u001b[0m \u001b[0mstatistics_meta_data_provider\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_stat_meta_data_provider\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 633\u001b[0;31m ).run_query(self)\n\u001b[0m\u001b[1;32m 634\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 635\u001b[0m \u001b[0;31m# It is currently assumed that all graphql queries\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/programming/python/correlaid/datenguide-python/datenguidepy/query_execution.py\u001b[0m in \u001b[0;36mrun_query\u001b[0;34m(self, query)\u001b[0m\n\u001b[1;32m 491\u001b[0m all_results = [\n\u001b[1;32m 492\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_run_single_query_json\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mquery_json\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mquery\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_fields_with_types\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 493\u001b[0;31m \u001b[0;32mfor\u001b[0m \u001b[0mquery_json\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_generate_post_json\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mquery\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 494\u001b[0m ]\n\u001b[1;32m 495\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0many\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmap\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;32mlambda\u001b[0m \u001b[0mr\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mr\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mall_results\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/programming/python/correlaid/datenguide-python/datenguidepy/query_execution.py\u001b[0m in \u001b[0;36m\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 491\u001b[0m all_results = [\n\u001b[1;32m 492\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_run_single_query_json\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mquery_json\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mquery\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_fields_with_types\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 493\u001b[0;31m \u001b[0;32mfor\u001b[0m \u001b[0mquery_json\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_generate_post_json\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mquery\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 494\u001b[0m ]\n\u001b[1;32m 495\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0many\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmap\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;32mlambda\u001b[0m \u001b[0mr\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mr\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mall_results\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/programming/python/correlaid/datenguide-python/datenguidepy/query_execution.py\u001b[0m in \u001b[0;36m_run_single_query_json\u001b[0;34m(self, query_json, query_fields_with_types)\u001b[0m\n\u001b[1;32m 519\u001b[0m \u001b[0mpage\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 520\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 521\u001b[0;31m \u001b[0msingle_result\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_send_request\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mquery_json\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 522\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0msingle_result\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 523\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/programming/python/correlaid/datenguide-python/datenguidepy/query_execution.py\u001b[0m in \u001b[0;36m_send_request\u001b[0;34m(self, query_json)\u001b[0m\n\u001b[1;32m 558\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mresp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstatus_code\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m200\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 559\u001b[0m \u001b[0mbody_json\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mresp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjson\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 560\u001b[0;31m \u001b[0mcheck_http200_body_error\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbody_json\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 561\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mbody_json\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 562\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/programming/python/correlaid/datenguide-python/datenguidepy/query_execution.py\u001b[0m in \u001b[0;36mcheck_http200_body_error\u001b[0;34m(body_json)\u001b[0m\n\u001b[1;32m 79\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;34m\"errors\"\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mbody_json\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 80\u001b[0m raise RuntimeError(\n\u001b[0;32m---> 81\u001b[0;31m \u001b[0;34m\"Body contains the following error content\\n\"\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbody_json\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 82\u001b[0m )\n\u001b[1;32m 83\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mRuntimeError\u001b[0m: Body contains the following error content\n{'errors': [{'message': '[exception] Trying to create too many scroll contexts. Must be less than or equal to: [500]. This limit can be set by changing the [search.max_open_scroll_context] setting.', 'locations': [{'line': 1, 'column': 28}], 'path': ['region', 'BEVSTD'], 'extensions': {'code': 'INTERNAL_SERVER_ERROR', 'exception': {'type': 'FeathersError', 'name': 'GeneralError', 'message': '[exception] Trying to create too many scroll contexts. Must be less than or equal to: [500]. This limit can be set by changing the [search.max_open_scroll_context] setting.', 'code': 500, 'className': 'general-error', 'data': {'msg': '[exception] Trying to create too many scroll contexts. Must be less than or equal to: [500]. This limit can be set by changing the [search.max_open_scroll_context] setting.', 'path': '/genesapi-regionalstatistik-3/_search', 'query': {'size': 10, 'scroll': '10s'}, 'body': '{\"query\":{\"constant_score\":{\"filter\":{\"bool\":{\"must\":[{\"term\":{\"region_id\":\"10\"}},{\"exists\":{\"field\":\"BEVSTD\"}}],\"must_not\":[{\"exists\":{\"field\":\"ALTX75\"}},{\"exists\":{\"field\":\"GES\"}},{\"exists\":{\"field\":\"ALTX21\"}},{\"exists\":{\"field\":\"NAT\"}},{\"exists\":{\"field\":\"ALTX76\"}},{\"exists\":{\"field\":\"ALTX20\"}}]}}}}}', 'statusCode': 500, 'response': '{\"error\":{\"root_cause\":[{\"type\":\"exception\",\"reason\":\"Trying to create too many scroll contexts. Must be less than or equal to: [500]. This limit can be set by changing the [search.max_open_scroll_context] setting.\"}],\"type\":\"search_phase_execution_exception\",\"reason\":\"all shards failed\",\"phase\":\"query\",\"grouped\":true,\"failed_shards\":[{\"shard\":0,\"index\":\"genesapi-regionalstatistik-3\",\"node\":\"oVtg5PqCQhGPfOpV0eyhKg\",\"reason\":{\"type\":\"exception\",\"reason\":\"Trying to create too many scroll contexts. Must be less than or equal to: [500]. This limit can be set by changing the [search.max_open_scroll_context] setting.\"}}]},\"status\":500}'}, 'errors': {}, 'hook': {'type': 'before', 'method': 'find', 'path': 'genesapiRawQuery', 'params': {'query': {'index': 'genesapi-regionalstatistik-3', 'size': 10, 'scroll': '10s', 'body': {'query': {'constant_score': {'filter': {'bool': {'must': [{'term': {'region_id': '10'}}, {'exists': {'field': 'BEVSTD'}}], 'must_not': [{'exists': {'field': 'ALTX75'}}, {'exists': {'field': 'GES'}}, {'exists': {'field': 'ALTX21'}}, {'exists': {'field': 'NAT'}}, {'exists': {'field': 'ALTX76'}}, {'exists': {'field': 'ALTX20'}}]}}}}}}}}}}}, {'message': '[exception] Trying to create too many scroll contexts. Must be less than or equal to: [500]. This limit can be set by changing the [search.max_open_scroll_context] setting.', 'locations': [{'line': 1, 'column': 238}], 'path': ['region', 'BIP804'], 'extensions': {'code': 'INTERNAL_SERVER_ERROR', 'exception': {'type': 'FeathersError', 'name': 'GeneralError', 'message': '[exception] Trying to create too many scroll contexts. Must be less than or equal to: [500]. This limit can be set by changing the [search.max_open_scroll_context] setting.', 'code': 500, 'className': 'general-error', 'data': {'msg': '[exception] Trying to create too many scroll contexts. Must be less than or equal to: [500]. This limit can be set by changing the [search.max_open_scroll_context] setting.', 'path': '/genesapi-regionalstatistik-3/_search', 'query': {'size': 10, 'scroll': '10s'}, 'body': '{\"query\":{\"constant_score\":{\"filter\":{\"bool\":{\"must\":[{\"term\":{\"region_id\":\"10\"}},{\"exists\":{\"field\":\"BIP804\"}}],\"must_not\":[]}}}}}', 'statusCode': 500, 'response': '{\"error\":{\"root_cause\":[{\"type\":\"exception\",\"reason\":\"Trying to create too many scroll contexts. Must be less than or equal to: [500]. This limit can be set by changing the [search.max_open_scroll_context] setting.\"}],\"type\":\"search_phase_execution_exception\",\"reason\":\"all shards failed\",\"phase\":\"query\",\"grouped\":true,\"failed_shards\":[{\"shard\":0,\"index\":\"genesapi-regionalstatistik-3\",\"node\":\"oVtg5PqCQhGPfOpV0eyhKg\",\"reason\":{\"type\":\"exception\",\"reason\":\"Trying to create too many scroll contexts. Must be less than or equal to: [500]. This limit can be set by changing the [search.max_open_scroll_context] setting.\"}}]},\"status\":500}'}, 'errors': {}, 'hook': {'type': 'before', 'method': 'find', 'path': 'genesapiRawQuery', 'params': {'query': {'index': 'genesapi-regionalstatistik-3', 'size': 10, 'scroll': '10s', 'body': {'query': {'constant_score': {'filter': {'bool': {'must': [{'term': {'region_id': '10'}}, {'exists': {'field': 'BIP804'}}], 'must_not': []}}}}}}}}}}}, {'message': '[exception] Trying to create too many scroll contexts. Must be less than or equal to: [500]. This limit can be set by changing the [search.max_open_scroll_context] setting.', 'locations': [{'line': 1, 'column': 98}], 'path': ['region', 'BEV085'], 'extensions': {'code': 'INTERNAL_SERVER_ERROR', 'exception': {'type': 'FeathersError', 'name': 'GeneralError', 'message': '[exception] Trying to create too many scroll contexts. Must be less than or equal to: [500]. This limit can be set by changing the [search.max_open_scroll_context] setting.', 'code': 500, 'className': 'general-error', 'data': {'msg': '[exception] Trying to create too many scroll contexts. Must be less than or equal to: [500]. This limit can be set by changing the [search.max_open_scroll_context] setting.', 'path': '/genesapi-regionalstatistik-3/_search', 'query': {'size': 10, 'scroll': '10s'}, 'body': '{\"query\":{\"constant_score\":{\"filter\":{\"bool\":{\"must\":[{\"term\":{\"region_id\":\"10\"}},{\"exists\":{\"field\":\"BEV085\"}}],\"must_not\":[{\"exists\":{\"field\":\"GES\"}},{\"exists\":{\"field\":\"NAT\"}},{\"exists\":{\"field\":\"ALTX01\"}}]}}}}}', 'statusCode': 500, 'response': '{\"error\":{\"root_cause\":[{\"type\":\"exception\",\"reason\":\"Trying to create too many scroll contexts. Must be less than or equal to: [500]. This limit can be set by changing the [search.max_open_scroll_context] setting.\"}],\"type\":\"search_phase_execution_exception\",\"reason\":\"all shards failed\",\"phase\":\"query\",\"grouped\":true,\"failed_shards\":[{\"shard\":0,\"index\":\"genesapi-regionalstatistik-3\",\"node\":\"oVtg5PqCQhGPfOpV0eyhKg\",\"reason\":{\"type\":\"exception\",\"reason\":\"Trying to create too many scroll contexts. Must be less than or equal to: [500]. This limit can be set by changing the [search.max_open_scroll_context] setting.\"}}]},\"status\":500}'}, 'errors': {}, 'hook': {'type': 'before', 'method': 'find', 'path': 'genesapiRawQuery', 'params': {'query': {'index': 'genesapi-regionalstatistik-3', 'size': 10, 'scroll': '10s', 'body': {'query': {'constant_score': {'filter': {'bool': {'must': [{'term': {'region_id': '10'}}, {'exists': {'field': 'BEV085'}}], 'must_not': [{'exists': {'field': 'GES'}}, {'exists': {'field': 'NAT'}}, {'exists': {'field': 'ALTX01'}}]}}}}}}}}}}}, {'message': '[exception] Trying to create too many scroll contexts. Must be less than or equal to: [500]. This limit can be set by changing the [search.max_open_scroll_context] setting.', 'locations': [{'line': 1, 'column': 168}], 'path': ['region', 'BEV086'], 'extensions': {'code': 'INTERNAL_SERVER_ERROR', 'exception': {'type': 'FeathersError', 'name': 'GeneralError', 'message': '[exception] Trying to create too many scroll contexts. Must be less than or equal to: [500]. This limit can be set by changing the [search.max_open_scroll_context] setting.', 'code': 500, 'className': 'general-error', 'data': {'msg': '[exception] Trying to create too many scroll contexts. Must be less than or equal to: [500]. This limit can be set by changing the [search.max_open_scroll_context] setting.', 'path': '/genesapi-regionalstatistik-3/_search', 'query': {'size': 10, 'scroll': '10s'}, 'body': '{\"query\":{\"constant_score\":{\"filter\":{\"bool\":{\"must\":[{\"term\":{\"region_id\":\"10\"}},{\"exists\":{\"field\":\"BEV086\"}}],\"must_not\":[{\"exists\":{\"field\":\"GES\"}},{\"exists\":{\"field\":\"NAT\"}},{\"exists\":{\"field\":\"ALTX01\"}}]}}}}}', 'statusCode': 500, 'response': '{\"error\":{\"root_cause\":[{\"type\":\"exception\",\"reason\":\"Trying to create too many scroll contexts. Must be less than or equal to: [500]. This limit can be set by changing the [search.max_open_scroll_context] setting.\"}],\"type\":\"search_phase_execution_exception\",\"reason\":\"all shards failed\",\"phase\":\"query\",\"grouped\":true,\"failed_shards\":[{\"shard\":0,\"index\":\"genesapi-regionalstatistik-3\",\"node\":\"oVtg5PqCQhGPfOpV0eyhKg\",\"reason\":{\"type\":\"exception\",\"reason\":\"Trying to create too many scroll contexts. Must be less than or equal to: [500]. This limit can be set by changing the [search.max_open_scroll_context] setting.\"}}]},\"status\":500}'}, 'errors': {}, 'hook': {'type': 'before', 'method': 'find', 'path': 'genesapiRawQuery', 'params': {'query': {'index': 'genesapi-regionalstatistik-3', 'size': 10, 'scroll': '10s', 'body': {'query': {'constant_score': {'filter': {'bool': {'must': [{'term': {'region_id': '10'}}, {'exists': {'field': 'BEV086'}}], 'must_not': [{'exists': {'field': 'GES'}}, {'exists': {'field': 'NAT'}}, {'exists': {'field': 'ALTX01'}}]}}}}}}}}}}}], 'data': {'region': {'id': '10', 'name': 'Saarland', 'BEVSTD': None, 'BEV085': None, 'BEV086': None, 'BIP804': None}}}" ] } ], "source": [ "# Create a query fpr the regions and variables of interest\n", "region_query = Query.region(list(region_codes.index))\n", "region_query.add_field('BEVSTD') # population\n", "region_query.add_field('BEV085') # moving in administrative districts\n", "region_query.add_field('BEV086') # moving out administrative districts\n", "region_query.add_field('BIP804') # GDP per capita\n", "\n", "# Get the results for the query\n", "results = region_query.results()\n", "\n", "# Preview the first data rows\n", "results.head()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Calculate the net migration per 1000 inhabitants: (people movin in - people moving out) / population * 1000\n", "results['migration'] = (results.BEV085 - results.BEV086) / results.BEVSTD * 1000\n", "\n", "# Overview of the dataset:\n", "results.describe()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Create a Visualisation" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Determine plot size\n", "ax = plt.subplots(figsize=(25, 15))\n", "\n", "# Make a scatterplot with adjusted color and size mapping\n", "plt.scatter(results[\"BIP804\"], results[\"migration\"], c=results[\"year\"], s=(results[\"BEVSTD\"] / 10000), cmap='GnBu', alpha = 0.5)\n", "plt.colorbar(label='Year', ticks=range(min(results[\"year\"]), max(results[\"year\"])+1, 2))\n", "plt.clim(min(results[\"year\"]), max(results[\"year\"]))\n", "\n", "# Draw the regression line\n", "sns.regplot(x=\"BIP804\", y=\"migration\", data=results, scatter=False, color=\"red\")\n", "\n", "# Create a legend for the population size.\n", "for pop in np.linspace(min(results[\"BEVSTD\"]), max(results[\"BEVSTD\"]+1), num=4):\n", " plt.scatter([], [], c='k', alpha=0.3, s=(pop/10000), label=' ' + str(int(pop/1000000)) + ' Million')\n", "plt.legend(scatterpoints=1, frameon=False, labelspacing=3, title='Population', title_fontsize=\"larger\", loc='best', bbox_to_anchor=(0.5, -0.25, 0.5, 0.5))\n", "\n", "# Add title and labels\n", "plt.title('Migration and GDP', fontweight=\"bold\")\n", "plt.xlabel('Gross Domestic Product per Capita', fontweight=\"bold\")\n", "plt.ylabel('Net Migartion per 1000 Inhabitants', fontweight=\"bold\");" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The analysis is based on data for all 16 Bundesländer in gemany from 1995 to 2017. Over the years GDP and migration increased. The plot shows a positive correlation between (net) migration and GDP in general." ] } ], "metadata": { "kernelspec": { "display_name": "datenguide", "language": "python", "name": "datenguide" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.4+" } }, "nbformat": 4, "nbformat_minor": 4 }