{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Importing the libraries" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true }, "outputs": [], "source": [ "from pandasdmx import Request" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": true }, "outputs": [], "source": [ "from bokeh.io import output_notebook, show\n", "from bokeh.plotting import figure\n", "from bokeh.models import ColumnDataSource, LabelSet" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "\n", "
\n", " \n", " Loading BokehJS ...\n", "
" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/javascript": [ "\n", "(function(global) {\n", " function now() {\n", " return new Date();\n", " }\n", "\n", " var force = true;\n", "\n", " if (typeof (window._bokeh_onload_callbacks) === \"undefined\" || force === true) {\n", " window._bokeh_onload_callbacks = [];\n", " window._bokeh_is_loading = undefined;\n", " }\n", "\n", "\n", " \n", " if (typeof (window._bokeh_timeout) === \"undefined\" || force === true) {\n", " window._bokeh_timeout = Date.now() + 5000;\n", " window._bokeh_failed_load = false;\n", " }\n", "\n", " var NB_LOAD_WARNING = {'data': {'text/html':\n", " \"
\\n\"+\n", " \"

\\n\"+\n", " \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n", " \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n", " \"

\\n\"+\n", " \"\\n\"+\n", " \"\\n\"+\n", " \"from bokeh.resources import INLINE\\n\"+\n", " \"output_notebook(resources=INLINE)\\n\"+\n", " \"\\n\"+\n", " \"
\"}};\n", "\n", " function display_loaded() {\n", " if (window.Bokeh !== undefined) {\n", " var el = document.getElementById(\"4a7c2b58-2129-4240-9990-807e61c56583\");\n", " el.textContent = \"BokehJS \" + Bokeh.version + \" successfully loaded.\";\n", " } else if (Date.now() < window._bokeh_timeout) {\n", " setTimeout(display_loaded, 100)\n", " }\n", " }\n", "\n", " function run_callbacks() {\n", " window._bokeh_onload_callbacks.forEach(function(callback) { callback() });\n", " delete window._bokeh_onload_callbacks\n", " console.info(\"Bokeh: all callbacks have finished\");\n", " }\n", "\n", " function load_libs(js_urls, callback) {\n", " window._bokeh_onload_callbacks.push(callback);\n", " if (window._bokeh_is_loading > 0) {\n", " console.log(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n", " return null;\n", " }\n", " if (js_urls == null || js_urls.length === 0) {\n", " run_callbacks();\n", " return null;\n", " }\n", " console.log(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n", " window._bokeh_is_loading = js_urls.length;\n", " for (var i = 0; i < js_urls.length; i++) {\n", " var url = js_urls[i];\n", " var s = document.createElement('script');\n", " s.src = url;\n", " s.async = false;\n", " s.onreadystatechange = s.onload = function() {\n", " window._bokeh_is_loading--;\n", " if (window._bokeh_is_loading === 0) {\n", " console.log(\"Bokeh: all BokehJS libraries loaded\");\n", " run_callbacks()\n", " }\n", " };\n", " s.onerror = function() {\n", " console.warn(\"failed to load library \" + url);\n", " };\n", " console.log(\"Bokeh: injecting script tag for BokehJS library: \", url);\n", " document.getElementsByTagName(\"head\")[0].appendChild(s);\n", " }\n", " };var element = document.getElementById(\"4a7c2b58-2129-4240-9990-807e61c56583\");\n", " if (element == null) {\n", " console.log(\"Bokeh: ERROR: autoload.js configured with elementid '4a7c2b58-2129-4240-9990-807e61c56583' but no matching script tag was found. \")\n", " return false;\n", " }\n", "\n", " var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-0.12.5.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-0.12.5.min.js\"];\n", "\n", " var inline_js = [\n", " function(Bokeh) {\n", " Bokeh.set_log_level(\"info\");\n", " },\n", " \n", " function(Bokeh) {\n", " \n", " },\n", " \n", " function(Bokeh) {\n", " \n", " document.getElementById(\"4a7c2b58-2129-4240-9990-807e61c56583\").textContent = \"BokehJS is loading...\";\n", " },\n", " function(Bokeh) {\n", " console.log(\"Bokeh: injecting CSS: https://cdn.pydata.org/bokeh/release/bokeh-0.12.5.min.css\");\n", " Bokeh.embed.inject_css(\"https://cdn.pydata.org/bokeh/release/bokeh-0.12.5.min.css\");\n", " console.log(\"Bokeh: injecting CSS: https://cdn.pydata.org/bokeh/release/bokeh-widgets-0.12.5.min.css\");\n", " Bokeh.embed.inject_css(\"https://cdn.pydata.org/bokeh/release/bokeh-widgets-0.12.5.min.css\");\n", " }\n", " ];\n", "\n", " function run_inline_js() {\n", " \n", " if ((window.Bokeh !== undefined) || (force === true)) {\n", " for (var i = 0; i < inline_js.length; i++) {\n", " inline_js[i](window.Bokeh);\n", " }if (force === true) {\n", " display_loaded();\n", " }} else if (Date.now() < window._bokeh_timeout) {\n", " setTimeout(run_inline_js, 100);\n", " } else if (!window._bokeh_failed_load) {\n", " console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n", " window._bokeh_failed_load = true;\n", " } else if (force !== true) {\n", " var cell = $(document.getElementById(\"4a7c2b58-2129-4240-9990-807e61c56583\")).parents('.cell').data().cell;\n", " cell.output_area.append_execute_result(NB_LOAD_WARNING)\n", " }\n", "\n", " }\n", "\n", " if (window._bokeh_is_loading === 0) {\n", " console.log(\"Bokeh: BokehJS loaded, going straight to plotting\");\n", " run_inline_js();\n", " } else {\n", " load_libs(js_urls, function() {\n", " console.log(\"Bokeh: BokehJS plotting callback run at\", now());\n", " run_inline_js();\n", " });\n", " }\n", "}(this));" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "output_notebook()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Connecting to the webservice" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": true }, "outputs": [], "source": [ "estat = Request('ESTAT')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Downloading dataflow definitions" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": true }, "outputs": [], "source": [ "dflow = estat.dataflow()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Defining functions" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Data structure" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def dsd_resp(id):\n", " \"\"\"\n", " The function takes a dataflow identifier and requests from the Eurostat SDMX api the dataflow's datastructure, datastructure id and datastructure response.\n", " \"\"\" \n", " # Defining the data set's dataflow based on the data set id.\n", " df = dflow.dataflow[id]\n", " \n", " # Printing the table's name\n", " print(\"The table's name:\")\n", " print(dflow.write().dataflow.loc[id][0])\n", " \n", " # Acquiring the dataflow's datastructure id\n", " dsd_id = df.structure.id\n", " \n", " # Requesting for the dataflow's datastructure\n", " dsd_resp = estat.get(resource_type = 'datastructure', resource_id = dsd_id)\n", " \n", " # Requesting the dataflow's datastructure\n", " dsd = dsd_resp.datastructure[dsd_id]\n", " \n", " return dsd_resp, dsd_id, dsd" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Datastructure attributes" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def dsd_att(dsd):\n", " \"\"\"The function takes a table datastructure definition and prints its measure, dimension and attribute lists.\"\"\"\n", " # Measures\n", " print(\"Measures:\")\n", " print(dsd.measures.aslist())\n", " \n", " # Dimensions\n", " print(\"\\nDimensions:\")\n", " print(dsd.dimensions.aslist())\n", " \n", " # Attributes\n", " print(\"\\nAttributes:\")\n", " print(dsd.attributes.aslist())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Selecting the tables" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Listing the tables from the 'High-tech industry and knowledge-intensive services (**htec**)' database" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
name
dataflow
htec_cis3Innovation in high-tech sectors in SMEs (CIS3)...
htec_cis4Innovation in high-tech sectors (CIS 2004), EU...
htec_cis5Innovation in high-tech sectors (CIS 2006), EU...
htec_cis6Innovation in high-tech sectors (CIS 2008, CIS...
htec_eco_entEnterprises in high-tech sectors by NACE Rev.1...
htec_eco_ent2Enterprises in high-tech sectors by NACE Rev.2...
htec_eco_sbsEconomic data in high-tech sectors by NACE Rev...
htec_eco_sbs2Economic data in high-tech sectors by NACE Rev...
htec_emp_natEmployment in technology and knowledge-intensi...
htec_emp_nat2Employment in technology and knowledge-intensi...
htec_emp_niscedEmployment in technology and knowledge-intensi...
htec_emp_nisced2Employment in technology and knowledge-intensi...
htec_emp_niscoEmployment in technology and knowledge-intensi...
htec_emp_nisco2Employment in technology and knowledge-intensi...
htec_emp_regEmployment in technology and knowledge-intensi...
htec_emp_reg2Employment in technology and knowledge-intensi...
htec_emp_riscedEmployment in technology and knowledge-intensi...
htec_emp_risced2Employment in technology and knowledge-intensi...
htec_emp_riscoEmployment in technology and knowledge-intensi...
htec_emp_risco2Employment in technology and knowledge-intensi...
htec_emp_sbsEmployment statistics on high-tech industries ...
htec_emp_sbs2Employment statistics on high-tech industries ...
htec_kia_empAnnual data on employment in knowledge-intensi...
htec_kia_emp2Annual data on employment in knowledge-intensi...
htec_si_exp4High-tech exports - Exports of high technology...
htec_sti_expBusiness enterprise R&D expenditure in high-te...
htec_sti_exp2Business enterprise R&D expenditure in high-te...
htec_sti_persBusiness enterprise R&D personnel in high-tech...
htec_sti_pers2Business enterprise R&D personnel in high-tech...
htec_trd_group4High-tech trade by high-tech group of products...
htec_trd_tot4Total high-tech trade in million euro and as a...
htec_vci_stage1Venture capital investment by aggregated stage...
htec_vci_stage2Venture capital investment by detailed stage o...
\n", "
" ], "text/plain": [ " name\n", "dataflow \n", "htec_cis3 Innovation in high-tech sectors in SMEs (CIS3)...\n", "htec_cis4 Innovation in high-tech sectors (CIS 2004), EU...\n", "htec_cis5 Innovation in high-tech sectors (CIS 2006), EU...\n", "htec_cis6 Innovation in high-tech sectors (CIS 2008, CIS...\n", "htec_eco_ent Enterprises in high-tech sectors by NACE Rev.1...\n", "htec_eco_ent2 Enterprises in high-tech sectors by NACE Rev.2...\n", "htec_eco_sbs Economic data in high-tech sectors by NACE Rev...\n", "htec_eco_sbs2 Economic data in high-tech sectors by NACE Rev...\n", "htec_emp_nat Employment in technology and knowledge-intensi...\n", "htec_emp_nat2 Employment in technology and knowledge-intensi...\n", "htec_emp_nisced Employment in technology and knowledge-intensi...\n", "htec_emp_nisced2 Employment in technology and knowledge-intensi...\n", "htec_emp_nisco Employment in technology and knowledge-intensi...\n", "htec_emp_nisco2 Employment in technology and knowledge-intensi...\n", "htec_emp_reg Employment in technology and knowledge-intensi...\n", "htec_emp_reg2 Employment in technology and knowledge-intensi...\n", "htec_emp_risced Employment in technology and knowledge-intensi...\n", "htec_emp_risced2 Employment in technology and knowledge-intensi...\n", "htec_emp_risco Employment in technology and knowledge-intensi...\n", "htec_emp_risco2 Employment in technology and knowledge-intensi...\n", "htec_emp_sbs Employment statistics on high-tech industries ...\n", "htec_emp_sbs2 Employment statistics on high-tech industries ...\n", "htec_kia_emp Annual data on employment in knowledge-intensi...\n", "htec_kia_emp2 Annual data on employment in knowledge-intensi...\n", "htec_si_exp4 High-tech exports - Exports of high technology...\n", "htec_sti_exp Business enterprise R&D expenditure in high-te...\n", "htec_sti_exp2 Business enterprise R&D expenditure in high-te...\n", "htec_sti_pers Business enterprise R&D personnel in high-tech...\n", "htec_sti_pers2 Business enterprise R&D personnel in high-tech...\n", "htec_trd_group4 High-tech trade by high-tech group of products...\n", "htec_trd_tot4 Total high-tech trade in million euro and as a...\n", "htec_vci_stage1 Venture capital investment by aggregated stage...\n", "htec_vci_stage2 Venture capital investment by detailed stage o..." ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dflow.write().dataflow[dflow.write().dataflow.index.str.startswith('htec') == True]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### The tables I am going to analyze" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": true }, "outputs": [], "source": [ "id1 = 'htec_sti_exp2'" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'Business enterprise R&D expenditure in high-tech sectors - NACE Rev. 2'" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dflow.write().dataflow.loc[id1][0]" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": true }, "outputs": [], "source": [ "id2 = 'htec_vci_stage2'" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "'Venture capital investment by detailed stage of development (from 2007, source: EVCA)'" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dflow.write().dataflow.loc[id2][0]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Business R&D expenditure" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Getting the table's dataflow definition and datastructure" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The table's name:\n", "Business enterprise R&D expenditure in high-tech sectors - NACE Rev. 2\n" ] } ], "source": [ "dsd_resp1, dsd_id1, dsd1 = dsd_resp(id1)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Datastructure measures, dimensions, attributes and codelist" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Measures:\n", "[PrimaryMeasure | OBS_VALUE]\n", "\n", "Dimensions:\n", "[Dimension | FREQ, Dimension | UNIT, Dimension | NACE_R2, Dimension | GEO, TimeDimension | TIME_PERIOD]\n", "\n", "Attributes:\n", "[DataAttribute | OBS_FLAG, DataAttribute | OBS_STATUS]\n" ] } ], "source": [ "dsd_att(dsd1)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Codelist:" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
dim_or_attrname
FREQFREQDFREQ
ADAnnual
DDDaily
HDHalf-year
MDMonthly
QDQuarterly
SDSemi-annual
WDWeekly
UNITUNITDUNIT
MIO_EURDMillion euro
NACE_R2NACE_R2DNACE_R2
CDManufacturing
C_HTCDHigh-technology manufacturing
C_HTC_MDMedium high-technology manufacturing
C_LTCDLow-technology manufacturing
C_LTC_MDMedium low-technology manufacturing
G-NDServices of the business economy
TOTALDTotal - all NACE activities
GEOGEODGEO
ATDAustria
BEDBelgium
BGDBulgaria
CHDSwitzerland
CN_X_HKDChina except Hong Kong
CYDCyprus
CZDCzech Republic
DEDGermany (until 1990 former territory of the FRG)
DKDDenmark
EEDEstonia
ELDGreece
.........
NLDNetherlands
NODNorway
PLDPoland
PTDPortugal
RODRomania
RSDSerbia
RUDRussia
SEDSweden
SIDSlovenia
SKDSlovakia
TRDTurkey
UKDUnited Kingdom
USDUnited States
OBS_FLAGOBS_FLAGAObservation flag.
bAbreak in time series
cAconfidential
dAdefinition differs, see metadata
eAestimated
fAforecast
iAsee metadata (phased out)
nAnot significant
pAprovisional
rArevised
sAEurostat estimate (phased out)
uAlow reliability
zAnot applicable
OBS_STATUSOBS_STATUSAObservation status.
-Anot applicable or real zero or zero by default
0Aless than half of the unit used
naAnot available
\n", "

76 rows × 2 columns

\n", "
" ], "text/plain": [ " dim_or_attr \\\n", "FREQ FREQ D \n", " A D \n", " D D \n", " H D \n", " M D \n", " Q D \n", " S D \n", " W D \n", "UNIT UNIT D \n", " MIO_EUR D \n", "NACE_R2 NACE_R2 D \n", " C D \n", " C_HTC D \n", " C_HTC_M D \n", " C_LTC D \n", " C_LTC_M D \n", " G-N D \n", " TOTAL D \n", "GEO GEO D \n", " AT D \n", " BE D \n", " BG D \n", " CH D \n", " CN_X_HK D \n", " CY D \n", " CZ D \n", " DE D \n", " DK D \n", " EE D \n", " EL D \n", "... ... \n", " NL D \n", " NO D \n", " PL D \n", " PT D \n", " RO D \n", " RS D \n", " RU D \n", " SE D \n", " SI D \n", " SK D \n", " TR D \n", " UK D \n", " US D \n", "OBS_FLAG OBS_FLAG A \n", " b A \n", " c A \n", " d A \n", " e A \n", " f A \n", " i A \n", " n A \n", " p A \n", " r A \n", " s A \n", " u A \n", " z A \n", "OBS_STATUS OBS_STATUS A \n", " - A \n", " 0 A \n", " na A \n", "\n", " name \n", "FREQ FREQ FREQ \n", " A Annual \n", " D Daily \n", " H Half-year \n", " M Monthly \n", " Q Quarterly \n", " S Semi-annual \n", " W Weekly \n", "UNIT UNIT UNIT \n", " MIO_EUR Million euro \n", "NACE_R2 NACE_R2 NACE_R2 \n", " C Manufacturing \n", " C_HTC High-technology manufacturing \n", " C_HTC_M Medium high-technology manufacturing \n", " C_LTC Low-technology manufacturing \n", " C_LTC_M Medium low-technology manufacturing \n", " G-N Services of the business economy \n", " TOTAL Total - all NACE activities \n", "GEO GEO GEO \n", " AT Austria \n", " BE Belgium \n", " BG Bulgaria \n", " CH Switzerland \n", " CN_X_HK China except Hong Kong \n", " CY Cyprus \n", " CZ Czech Republic \n", " DE Germany (until 1990 former territory of the FRG) \n", " DK Denmark \n", " EE Estonia \n", " EL Greece \n", "... ... \n", " NL Netherlands \n", " NO Norway \n", " PL Poland \n", " PT Portugal \n", " RO Romania \n", " RS Serbia \n", " RU Russia \n", " SE Sweden \n", " SI Slovenia \n", " SK Slovakia \n", " TR Turkey \n", " UK United Kingdom \n", " US United States \n", "OBS_FLAG OBS_FLAG Observation flag. \n", " b break in time series \n", " c confidential \n", " d definition differs, see metadata \n", " e estimated \n", " f forecast \n", " i see metadata (phased out) \n", " n not significant \n", " p provisional \n", " r revised \n", " s Eurostat estimate (phased out) \n", " u low reliability \n", " z not applicable \n", "OBS_STATUS OBS_STATUS Observation status. \n", " - not applicable or real zero or zero by default \n", " 0 less than half of the unit used \n", " na not available \n", "\n", "[76 rows x 2 columns]" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dsd_resp1.write().codelist" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Requesting the data" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "collapsed": true }, "outputs": [], "source": [ "dresp1 = estat.get(resource_type = 'data',\n", " resource_id = id1,\n", " params = {'references': None,\n", " 'startPeriod': '2014',\n", " 'endPeriod': '2014'\n", " })" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "collapsed": true }, "outputs": [], "source": [ "dat1 = dresp1.data" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'TIME_PERIOD'" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dat1.dim_at_obs" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "280" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ser1 = list(dat1.series)\n", "len(ser1)" ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "SeriesKey(UNIT='MIO_EUR', NACE_R2='C', GEO='CY', FREQ='A')" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ser1[5].key" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'AT',\n", " 'BE',\n", " 'BG',\n", " 'CH',\n", " 'CN_X_HK',\n", " 'CY',\n", " 'CZ',\n", " 'DE',\n", " 'DK',\n", " 'EE',\n", " 'EL',\n", " 'ES',\n", " 'EU28',\n", " 'FI',\n", " 'FR',\n", " 'HR',\n", " 'HU',\n", " 'IE',\n", " 'IS',\n", " 'IT',\n", " 'JP',\n", " 'KR',\n", " 'LT',\n", " 'LU',\n", " 'LV',\n", " 'ME',\n", " 'MT',\n", " 'NL',\n", " 'NO',\n", " 'PL',\n", " 'PT',\n", " 'RO',\n", " 'RS',\n", " 'RU',\n", " 'SE',\n", " 'SI',\n", " 'SK',\n", " 'TR',\n", " 'UK',\n", " 'US'}" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "set(s.key.GEO for s in dat1.series)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Writing the data into DataFrame" ] }, { "cell_type": "code", "execution_count": 23, "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
NACE_R2CC_HTCC_HTC_MC_LTCC_LTC_MG-NTOTAL
GEO
CY5.04.00.00.01.04.015.0
CZ976.0107.0609.037.0223.0688.01730.0
DE49482.011541.033187.01343.03401.07061.056997.0
DK2814.01526.0956.0236.096.01985.04865.0
ES3096.0748.01519.0426.0403.03212.06784.0
FI3126.01873.0892.0190.0171.01133.04410.0
HR85.049.028.06.03.073.0164.0
HU526.0232.0201.044.049.0435.01022.0
IT8736.01822.05029.0966.0919.03163.012344.0
LT50.015.017.012.05.064.0116.0
MT10.05.03.01.01.023.033.0
NL4389.0942.02455.0463.0529.02627.07433.0
PL821.0104.0399.0145.0173.0891.01800.0
SI455.0204.0165.022.065.0224.0689.0
TR1562.0166.01064.0117.0216.01404.03014.0
UK9656.01745.06080.0689.01143.014296.024730.0
\n", "
" ], "text/plain": [ "NACE_R2 C C_HTC C_HTC_M C_LTC C_LTC_M G-N TOTAL\n", "GEO \n", "CY 5.0 4.0 0.0 0.0 1.0 4.0 15.0\n", "CZ 976.0 107.0 609.0 37.0 223.0 688.0 1730.0\n", "DE 49482.0 11541.0 33187.0 1343.0 3401.0 7061.0 56997.0\n", "DK 2814.0 1526.0 956.0 236.0 96.0 1985.0 4865.0\n", "ES 3096.0 748.0 1519.0 426.0 403.0 3212.0 6784.0\n", "FI 3126.0 1873.0 892.0 190.0 171.0 1133.0 4410.0\n", "HR 85.0 49.0 28.0 6.0 3.0 73.0 164.0\n", "HU 526.0 232.0 201.0 44.0 49.0 435.0 1022.0\n", "IT 8736.0 1822.0 5029.0 966.0 919.0 3163.0 12344.0\n", "LT 50.0 15.0 17.0 12.0 5.0 64.0 116.0\n", "MT 10.0 5.0 3.0 1.0 1.0 23.0 33.0\n", "NL 4389.0 942.0 2455.0 463.0 529.0 2627.0 7433.0\n", "PL 821.0 104.0 399.0 145.0 173.0 891.0 1800.0\n", "SI 455.0 204.0 165.0 22.0 65.0 224.0 689.0\n", "TR 1562.0 166.0 1064.0 117.0 216.0 1404.0 3014.0\n", "UK 9656.0 1745.0 6080.0 689.0 1143.0 14296.0 24730.0" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "iter1 = (s for s in dat1.series if (s.key.GEO in ['CN_X_HK', 'EU28', 'US']) == False)\n", "tab1 = dresp1.write(iter1)\n", "tab1 = tab1.stack('GEO')\n", "tab1.columns = tab1.columns.droplevel(['FREQ','UNIT']) # Dropping 'FREQ' and 'UNIT' levels\n", "tab1 = tab1.loc['2014']\n", "tab1.index = tab1.index.droplevel(0) # Dropping the 'TIME_PERIOD' axis\n", "tab1.dropna(axis = 0, how = 'any', inplace = True)\n", "tab1" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['CY',\n", " 'CZ',\n", " 'DE',\n", " 'DK',\n", " 'ES',\n", " 'FI',\n", " 'HR',\n", " 'HU',\n", " 'IT',\n", " 'LT',\n", " 'MT',\n", " 'NL',\n", " 'PL',\n", " 'SI',\n", " 'TR',\n", " 'UK']" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "list(tab1.index)" ] }, { "cell_type": "code", "execution_count": 25, "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/html": [ "\n", "\n", "
\n", "
\n", "
\n", "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "source = ColumnDataSource(data = dict(names = list(tab1.index),\n", " manuf = tab1.loc[:,'C'],\n", " serv = tab1.loc[:,'G-N']))\n", "\n", "p = figure(plot_width = 600, plot_height = 600)\n", "p.scatter(x = 'manuf' , y = 'serv', size = 10, color = \"purple\", alpha = 0.6, source = source)\n", "p.xaxis.axis_label = 'Manufacturing'\n", "p.yaxis.axis_label = 'Business Services'\n", "\n", "labels = LabelSet(x = 'manuf', y = 'serv', text = 'names', source = source, render_mode='canvas', level='glyph')\n", "\n", "\n", "p.add_layout(labels)\n", "show(p)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.1" } }, "nbformat": 4, "nbformat_minor": 2 }