{ "cells": [ { "cell_type": "markdown", "id": "e93d157f-29f0-4191-80c1-147920f90212", "metadata": { "tags": [] }, "source": [ "## Court Caseload Extract" ] }, { "cell_type": "code", "execution_count": 8, "id": "265a68e5-bf04-468a-ae79-a58d0a79f6bf", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: pydbtools in /opt/conda/lib/python3.9/site-packages (5.6.4)\n", "Requirement already satisfied: sql-metadata<3.0.0,>=2.3.0 in /opt/conda/lib/python3.9/site-packages (from pydbtools) (2.15.0)\n", "Requirement already satisfied: awswrangler>=2.12.0 in /opt/conda/lib/python3.9/site-packages (from pydbtools) (3.11.0)\n", "Requirement already satisfied: sqlparse>=0.5.0 in /opt/conda/lib/python3.9/site-packages (from pydbtools) (0.5.3)\n", "Requirement already satisfied: pyarrow>=14.0.0 in /opt/conda/lib/python3.9/site-packages (from pydbtools) (18.1.0)\n", "Requirement already satisfied: Jinja2>=3.1.0 in /opt/conda/lib/python3.9/site-packages (from pydbtools) (3.1.6)\n", "Requirement already satisfied: boto3>=1.7.4 in /opt/conda/lib/python3.9/site-packages (from pydbtools) (1.24.82)\n", "Requirement already satisfied: arrow-pd-parser>=1.3.9 in /opt/conda/lib/python3.9/site-packages (from pydbtools) (2.2.0)\n", "Requirement already satisfied: mojap-metadata[arrow]<2.0.0,>=1.10.0 in /opt/conda/lib/python3.9/site-packages (from arrow-pd-parser>=1.3.9->pydbtools) (1.15.3)\n", "Requirement already satisfied: pandas>=1.2 in /opt/conda/lib/python3.9/site-packages (from arrow-pd-parser>=1.3.9->pydbtools) (1.3.3)\n", "Requirement already satisfied: smart-open<6.0.0,>=5.2.1 in /opt/conda/lib/python3.9/site-packages (from arrow-pd-parser>=1.3.9->pydbtools) (5.2.1)\n", "Collecting numpy<2.1.0,>=1.26\n", " Using cached numpy-2.0.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (19.5 MB)\n", "Requirement already satisfied: typing-extensions<5.0.0,>=4.4.0 in /opt/conda/lib/python3.9/site-packages (from awswrangler>=2.12.0->pydbtools) (4.12.2)\n", "Requirement already satisfied: botocore<2.0.0,>=1.23.32 in /opt/conda/lib/python3.9/site-packages (from awswrangler>=2.12.0->pydbtools) (1.27.82)\n", "Requirement already satisfied: packaging<25.0,>=21.1 in /opt/conda/lib/python3.9/site-packages (from awswrangler>=2.12.0->pydbtools) (24.2)\n", "Requirement already satisfied: jmespath<2.0.0,>=0.7.1 in /opt/conda/lib/python3.9/site-packages (from boto3>=1.7.4->pydbtools) (1.0.1)\n", "Requirement already satisfied: s3transfer<0.7.0,>=0.6.0 in /opt/conda/lib/python3.9/site-packages (from boto3>=1.7.4->pydbtools) (0.6.0)\n", "Requirement already satisfied: urllib3<1.27,>=1.25.4 in /opt/conda/lib/python3.9/site-packages (from botocore<2.0.0,>=1.23.32->awswrangler>=2.12.0->pydbtools) (1.26.6)\n", "Requirement already satisfied: python-dateutil<3.0.0,>=2.1 in /opt/conda/lib/python3.9/site-packages (from botocore<2.0.0,>=1.23.32->awswrangler>=2.12.0->pydbtools) (2.8.2)\n", "Requirement already satisfied: MarkupSafe>=2.0 in /opt/conda/lib/python3.9/site-packages (from Jinja2>=3.1.0->pydbtools) (2.0.1)\n", "Requirement already satisfied: jsonschema>=3.0.0 in /opt/conda/lib/python3.9/site-packages (from mojap-metadata[arrow]<2.0.0,>=1.10.0->arrow-pd-parser>=1.3.9->pydbtools) (3.2.0)\n", "Requirement already satisfied: dataengineeringutils3>=1.4.0 in /opt/conda/lib/python3.9/site-packages (from mojap-metadata[arrow]<2.0.0,>=1.10.0->arrow-pd-parser>=1.3.9->pydbtools) (1.4.3)\n", "Requirement already satisfied: parameterized==0.7.* in /opt/conda/lib/python3.9/site-packages (from mojap-metadata[arrow]<2.0.0,>=1.10.0->arrow-pd-parser>=1.3.9->pydbtools) (0.7.5)\n", "Requirement already satisfied: PyYAML<7.0,>=6.0 in /opt/conda/lib/python3.9/site-packages (from mojap-metadata[arrow]<2.0.0,>=1.10.0->arrow-pd-parser>=1.3.9->pydbtools) (6.0.2)\n", "Requirement already satisfied: Deprecated<2.0.0,>=1.2.12 in /opt/conda/lib/python3.9/site-packages (from dataengineeringutils3>=1.4.0->mojap-metadata[arrow]<2.0.0,>=1.10.0->arrow-pd-parser>=1.3.9->pydbtools) (1.2.18)\n", "Requirement already satisfied: wrapt<2,>=1.10 in /opt/conda/lib/python3.9/site-packages (from Deprecated<2.0.0,>=1.2.12->dataengineeringutils3>=1.4.0->mojap-metadata[arrow]<2.0.0,>=1.10.0->arrow-pd-parser>=1.3.9->pydbtools) (1.17.2)\n", "Requirement already satisfied: attrs>=17.4.0 in /opt/conda/lib/python3.9/site-packages (from jsonschema>=3.0.0->mojap-metadata[arrow]<2.0.0,>=1.10.0->arrow-pd-parser>=1.3.9->pydbtools) (21.2.0)\n", "Requirement already satisfied: six>=1.11.0 in /opt/conda/lib/python3.9/site-packages (from jsonschema>=3.0.0->mojap-metadata[arrow]<2.0.0,>=1.10.0->arrow-pd-parser>=1.3.9->pydbtools) (1.16.0)\n", "Requirement already satisfied: pyrsistent>=0.14.0 in /opt/conda/lib/python3.9/site-packages (from jsonschema>=3.0.0->mojap-metadata[arrow]<2.0.0,>=1.10.0->arrow-pd-parser>=1.3.9->pydbtools) (0.17.3)\n", "Requirement already satisfied: setuptools in /opt/conda/lib/python3.9/site-packages (from jsonschema>=3.0.0->mojap-metadata[arrow]<2.0.0,>=1.10.0->arrow-pd-parser>=1.3.9->pydbtools) (58.0.4)\n", "Requirement already satisfied: pytz>=2017.3 in /opt/conda/lib/python3.9/site-packages (from pandas>=1.2->arrow-pd-parser>=1.3.9->pydbtools) (2021.1)\n", "Installing collected packages: numpy\n", " Attempting uninstall: numpy\n", " Found existing installation: numpy 1.24.3\n", " Uninstalling numpy-1.24.3:\n", " Successfully uninstalled numpy-1.24.3\n", "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", "scipy 1.7.1 requires numpy<1.23.0,>=1.16.5, but you have numpy 2.0.2 which is incompatible.\n", "numba 0.54.0 requires numpy<1.21,>=1.17, but you have numpy 2.0.2 which is incompatible.\u001b[0m\n", "Successfully installed numpy-2.0.2\n", "Collecting numpy==1.24.3\n", " Using cached numpy-1.24.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.3 MB)\n", "Installing collected packages: numpy\n", "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", "scipy 1.7.1 requires numpy<1.23.0,>=1.16.5, but you have numpy 1.24.3 which is incompatible.\n", "numba 0.54.0 requires numpy<1.21,>=1.17, but you have numpy 1.24.3 which is incompatible.\n", "awswrangler 3.11.0 requires numpy<2.1.0,>=1.26; python_version < \"3.10\", but you have numpy 1.24.3 which is incompatible.\u001b[0m\n", "Successfully installed numpy-1.24.3\n", "Looking in links: orce-reinstall\n", "Requirement already satisfied: pybind11>=2.12 in /opt/conda/lib/python3.9/site-packages (2.13.6)\n", "Requirement already satisfied: openpyxl in /opt/conda/lib/python3.9/site-packages (3.1.5)\n", "Requirement already satisfied: et-xmlfile in /opt/conda/lib/python3.9/site-packages (from openpyxl) (2.0.0)\n" ] } ], "source": [ "!pip install pydbtools\n", "!pip install numpy==1.24.3 --user --force-reinstall\n", "!pip install \"pybind11>=2.12\" -force-reinstall" ] }, { "cell_type": "code", "execution_count": 2, "id": "e80c4e8b-d28d-489d-959c-643b43322fe3", "metadata": {}, "outputs": [], "source": [ "import pydbtools as pydb\n", "import pandas as pd" ] }, { "cell_type": "markdown", "id": "88e715e0-7e70-4743-9e17-cbdd862e6e2b", "metadata": {}, "source": [ "## Quarterly Figures" ] }, { "cell_type": "code", "execution_count": 3, "id": "8ec8ae05-de0b-4b64-bef2-a657bb1cc53e", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idyearquarterall_cases_receiptsall_cases_disposalsall_cases_openstotal_all_trial_receiptstotal_all_trial_disposalstotal_all_trial_openstriable_either_way_receipts...indictable_only_openscommitted_for_sentence_receiptscommitted_for_sentence_disposalscommitted_for_sentence_opensappeals_receiptsappeals_disposalsappeals_opensunknown_receiptsunknown_disposalsunknown_opens
02f8d1c1e93f31d0765194a95718f891f2025Q4<NA><NA><NA><NA><NA><NA><NA>...<NA><NA><NA><NA><NA><NA><NA><NA><NA><NA>
1755b7c10d16531b8a315aad797aa801c2025Q3<NA>1<NA><NA><NA><NA><NA>...<NA><NA>1<NA><NA><NA><NA><NA><NA><NA>
\n", "

2 rows × 24 columns

\n", "
" ], "text/plain": [ " id year quarter all_cases_receipts \\\n", "0 2f8d1c1e93f31d0765194a95718f891f 2025 Q4 \n", "1 755b7c10d16531b8a315aad797aa801c 2025 Q3 \n", "\n", " all_cases_disposals all_cases_opens total_all_trial_receipts \\\n", "0 \n", "1 1 \n", "\n", " total_all_trial_disposals total_all_trial_opens \\\n", "0 \n", "1 \n", "\n", " triable_either_way_receipts ... indictable_only_opens \\\n", "0 ... \n", "1 ... \n", "\n", " committed_for_sentence_receipts committed_for_sentence_disposals \\\n", "0 \n", "1 1 \n", "\n", " committed_for_sentence_opens appeals_receipts appeals_disposals \\\n", "0 \n", "1 \n", "\n", " appeals_opens unknown_receipts unknown_disposals unknown_opens \n", "0 \n", "1 \n", "\n", "[2 rows x 24 columns]" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Query to get the dataset from athena\n", "q_query = \"Select * from courts_caseload_rpt_dev_dbt.rpt_crown_court_caseload_quarterly_summary\"\n", "dfq = pydb.read_sql_query(q_query)\n", "\n", "#Get the first two rows of the dataset to explore\n", "dfq.head(2)" ] }, { "cell_type": "markdown", "id": "29b947b2-7132-47c1-a6b8-43d249e38114", "metadata": {}, "source": [ "## Rename Columns To Conform with Output specification " ] }, { "cell_type": "markdown", "id": "1f47a290-0416-4535-ada7-60570d353a26", "metadata": {}, "source": [ "Get Column Names from dbt config" ] }, { "cell_type": "code", "execution_count": 4, "id": "afa8e8b2-82a7-4d8f-a1e6-f8d724673aa8", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: pyyaml in /opt/conda/lib/python3.9/site-packages (6.0.2)\n" ] } ], "source": [ "!pip install pyyaml\n", "import yaml" ] }, { "cell_type": "code", "execution_count": 5, "id": "12b6f05c-c291-4e71-8ebb-286b7bf9f9a8", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'version': 2, 'models': [{'name': 'courts_caseload_rpt__rpt_crown_court_caseload_quarterly_summary', 'description': 'This model represents the staging component of the reporting layer, utilizing data from the OneCrown dimensional model', 'columns': [{'name': 'id', 'description': 'Unique Identifier for each row', 'tests': ['unique', 'not_null']}, {'name': 'year', 'description': 'Year', 'tests': ['not_null']}, {'name': 'quarter', 'description': 'Quarter', 'tests': ['not_null']}, {'name': 'all_cases_receipts', 'description': 'All cases:receipts'}, {'name': 'all_cases_disposals', 'description': 'All cases:disposals'}, {'name': 'all_cases_opens', 'description': 'All cases:open'}, {'name': 'total_all_trial_receipts', 'description': 'All trials:receipts'}, {'name': 'total_all_trial_disposals', 'description': 'All trials:disposals'}, {'name': 'total_all_trial_opens', 'description': 'All trials:open'}, {'name': 'triable_either_way_receipts', 'description': 'Triable-either-way trials:receipts'}, {'name': 'triable_either_way_disposals', 'description': 'Triable-either-way trials:disposals'}, {'name': 'triable_either_way_opens', 'description': 'Triable-either-way trials:open'}, {'name': 'indictable_only_receipts', 'description': 'Indictable only trials:receipts'}, {'name': 'indictable_only_disposals', 'description': 'Indictable only trials:disposals'}, {'name': 'indictable_only_opens', 'description': 'Indictable only trials:open'}, {'name': 'committed_for_sentence_receipts', 'description': 'Committed for sentence:receipts'}, {'name': 'committed_for_sentence_disposals', 'description': 'Committed for sentence:disposals'}, {'name': 'committed_for_sentence_opens', 'description': 'Committed for sentence:open'}, {'name': 'appeals_receipts', 'description': 'Appeals:receipts'}, {'name': 'appeals_disposals', 'description': 'Appeals:disposals'}, {'name': 'appeals_opens', 'description': 'Appeals:open'}, {'name': 'unknown_receipts', 'description': 'Unknown:receipts'}, {'name': 'unknown_disposals', 'description': 'Unknown:disposals'}, {'name': 'unknown_opens', 'description': 'Unknown:open'}]}]}\n" ] } ], "source": [ "#Import DBT config file\n", "\n", "file_path = r'courts_caseload_rpt__rpt_crown_court_caseload_properties.yml'\n", "with open(file_path, 'r') as file:\n", " data = yaml.safe_load(file)\n", "print(data)" ] }, { "cell_type": "code", "execution_count": 6, "id": "36073436-23e3-429b-9f16-92c285ef9277", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['Year', 'Quarter', 'All cases:receipts', 'All cases:disposals', 'All cases:open', 'All trials:receipts', 'All trials:disposals', 'All trials:open', 'Triable-either-way trials:receipts', 'Triable-either-way trials:disposals', 'Triable-either-way trials:open', 'Indictable only trials:receipts', 'Indictable only trials:disposals', 'Indictable only trials:open', 'Committed for sentence:receipts', 'Committed for sentence:disposals', 'Committed for sentence:open', 'Appeals:receipts', 'Appeals:disposals', 'Appeals:open', 'Unknown:receipts', 'Unknown:disposals', 'Unknown:open']\n" ] } ], "source": [ "#Get Output Column Names from .yml description tags\n", "\n", "updated_column_name = [] \n", "for model in data['models']:\n", " if 'description' in model:\n", " updated_column_name.append(model['description'])\n", " if 'columns' in model:\n", " for column in model['columns']:\n", " if 'description' in column:\n", " updated_column_name.append(column['description'])\n", "del updated_column_name[:2]\n", "print(updated_column_name) " ] }, { "cell_type": "code", "execution_count": 7, "id": "9b71d00f-9e67-4363-a904-3a58e1b1abdd", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "23\n" ] } ], "source": [ "#Counts to check column (items) in list\n", "\n", "count = len(updated_column_name)\n", "print(count)" ] }, { "cell_type": "code", "execution_count": 8, "id": "f347223f-7c54-483e-bae7-4193fe96eefd", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['Year', 'Quarter', 'All cases:receipts', 'All cases:disposals', 'All cases:open', 'All trials:receipts', 'All trials:disposals', 'All trials:open', 'Triable-either-way trials:receipts', 'Triable-either-way trials:disposals', 'Triable-either-way trials:open', 'Indictable only trials:receipts', 'Indictable only trials:disposals', 'Indictable only trials:open', 'Committed for sentence:receipts', 'Committed for sentence:disposals', 'Committed for sentence:open', 'Appeals:receipts', 'Appeals:disposals', 'Appeals:open', 'Unknown:receipts', 'Unknown:disposals', 'Unknown:open']\n" ] } ], "source": [ "#Map Dataframe columns to updated columns from list\n", "\n", "dfq = dfq.drop(columns='id')\n", "dfq.columns = updated_column_name\n", "column_list = dfq.columns.to_list()\n", "print(column_list)" ] }, { "cell_type": "markdown", "id": "60869dca-fc04-40d9-af91-f1ed1f87efe2", "metadata": {}, "source": [ "# Build GPTables for Quarterly figures" ] }, { "cell_type": "code", "execution_count": 9, "id": "faf1614c-f73e-4802-8dfb-6f4692905958", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Collecting gptables\n", " Using cached gptables-1.2.0-py3-none-any.whl (72 kB)\n", "Requirement already satisfied: xlrd>=1.2.0 in /opt/conda/lib/python3.9/site-packages (from gptables) (2.0.1)\n", "Requirement already satisfied: pyyaml>=3.12 in /opt/conda/lib/python3.9/site-packages (from gptables) (6.0.2)\n", "Requirement already satisfied: pandas>=0.25.3 in /opt/conda/lib/python3.9/site-packages (from gptables) (1.3.3)\n", "Collecting XlsxWriter>=1.2.6\n", " Using cached XlsxWriter-3.2.2-py3-none-any.whl (165 kB)\n", "Requirement already satisfied: numpy>=1.17.3 in ./.local/lib/python3.9/site-packages (from pandas>=0.25.3->gptables) (1.24.3)\n", "Requirement already satisfied: python-dateutil>=2.7.3 in /opt/conda/lib/python3.9/site-packages (from pandas>=0.25.3->gptables) (2.8.2)\n", "Requirement already satisfied: pytz>=2017.3 in /opt/conda/lib/python3.9/site-packages (from pandas>=0.25.3->gptables) (2021.1)\n", "Requirement already satisfied: six>=1.5 in /opt/conda/lib/python3.9/site-packages (from python-dateutil>=2.7.3->pandas>=0.25.3->gptables) (1.16.0)\n", "Installing collected packages: XlsxWriter, gptables\n", "Successfully installed XlsxWriter-3.2.2 gptables-1.2.0\n" ] } ], "source": [ "!pip install gptables\n", "\n", "import gptables as gpt" ] }, { "cell_type": "code", "execution_count": 11, "id": "4e1aea61-aa4b-491d-849f-e1ca6e10beac", "metadata": {}, "outputs": [], "source": [ "# Define Metadata for the quarterly tables \n", "\n", "metadata = {\n", " \"title\": \"Table C1: Receipts, disposals and open criminal cases in the Crown Court in England and Wales, annually 2016 - 2023, quarterly Q1 2016 - Q3 2024 [note 13][note 14][note 15][note 16][note 118] (“One Crown”)\",\n", " \"source\": \"Source: XHIBIT system and Common Platform, HMCTS\",\n", "}\n", "\n", "#Create a GPTable object\n", "\n", "q_table = gpt.GPTable(\n", " table=dfq,\n", " index_columns={1:0},\n", " title=metadata[\"title\"],\n", " table_name=\"Crown_Court_Cases\",\n", " ## subtitle=metadata[\"subtitle\"],\n", " source=metadata[\"source\"],\n", ")\n", "\n", "q_data = q_table.table\n", "\n", "#Convert GPTable to Dataframe \n", "dfql = pd.DataFrame(q_data)\n", "\n" ] }, { "cell_type": "markdown", "id": "1f875374-7bcb-445d-8592-9baeb3d5381d", "metadata": {}, "source": [ "# Yearly Figures" ] }, { "cell_type": "code", "execution_count": 12, "id": "da473a89-ade2-44de-b4e5-781a4a5ac13a", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idyearall_cases_receiptsall_cases_disposalsall_cases_openstotal_all_trial_receiptstotal_all_trial_disposalstotal_all_trial_openstriable_either_way_receiptstriable_either_way_disposals...indictable_only_openscommitted_for_sentence_receiptscommitted_for_sentence_disposalscommitted_for_sentence_opensappeals_receiptsappeals_disposalsappeals_opensunknown_receiptsunknown_disposalsunknown_opens
0312351bff07989769097660a563950652025149281445575261<NA><NA><NA>62415594...2187152025301115995717542589236
107811dc6c422334ce36a09ff5cd6fe712024121571113936174941<NA><NA><NA>4926743936...42049433994250546094541956277586232023
\n", "

2 rows × 23 columns

\n", "
" ], "text/plain": [ " id year all_cases_receipts \\\n", "0 312351bff07989769097660a56395065 2025 14928 \n", "1 07811dc6c422334ce36a09ff5cd6fe71 2024 121571 \n", "\n", " all_cases_disposals all_cases_opens total_all_trial_receipts \\\n", "0 14455 75261 \n", "1 113936 174941 \n", "\n", " total_all_trial_disposals total_all_trial_opens \\\n", "0 \n", "1 \n", "\n", " triable_either_way_receipts triable_either_way_disposals ... \\\n", "0 6241 5594 ... \n", "1 49267 43936 ... \n", "\n", " indictable_only_opens committed_for_sentence_receipts \\\n", "0 21871 5202 \n", "1 42049 43399 \n", "\n", " committed_for_sentence_disposals committed_for_sentence_opens \\\n", "0 5301 11599 \n", "1 42505 46094 \n", "\n", " appeals_receipts appeals_disposals appeals_opens unknown_receipts \\\n", "0 571 754 2589 2 \n", "1 5419 5627 7586 23 \n", "\n", " unknown_disposals unknown_opens \n", "0 3 6 \n", "1 20 23 \n", "\n", "[2 rows x 23 columns]" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Query to get the dataset from athena\n", "\n", "y_query = \"Select * from courts_caseload_rpt_dev_dbt.rpt_crown_court_caseload_yearly_summary\"\n", "dfy = pydb.read_sql_query(y_query)\n", "\n", "#Get the first two rows of the dataset to explore\n", "\n", "dfy.head(2)" ] }, { "cell_type": "markdown", "id": "063efbed-8cdd-4629-b456-15c407c15b4c", "metadata": {}, "source": [ "# Rename To Conform with Output specification" ] }, { "cell_type": "code", "execution_count": 13, "id": "ff5ad3e2-716a-4ea3-8b00-8c6f0c165c80", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'version': 2, 'models': [{'name': 'courts_caseload_rpt__rpt_crown_court_caseload_yearly_summary', 'description': 'This model represents the base component of the reporting layer, utilizing data from the OneCrown dimensional model to create a yearly summary of the caseload report', 'columns': [{'name': 'id', 'description': 'Unique Identifier for each row', 'tests': ['unique', 'not_null']}, {'name': 'year', 'description': 'Year', 'tests': ['not_null']}, {'name': 'all_cases_receipts', 'description': 'All cases:receipts'}, {'name': 'all_cases_disposals', 'description': 'All cases:disposals'}, {'name': 'all_cases_opens', 'description': 'All cases:open'}, {'name': 'total_all_trial_receipts', 'description': 'All trials:receipts'}, {'name': 'total_all_trial_disposals', 'description': 'All trials:disposals'}, {'name': 'total_all_trial_opens', 'description': 'All trials:open'}, {'name': 'triable_either_way_receipts', 'description': 'Triable-either-way trials:receipts'}, {'name': 'triable_either_way_disposals', 'description': 'Triable-either-way trials:disposals'}, {'name': 'triable_either_way_opens', 'description': 'Triable-either-way trials:open'}, {'name': 'indictable_only_receipts', 'description': 'Indictable only trials:receipts'}, {'name': 'indictable_only_disposals', 'description': 'Indictable only trials:disposals'}, {'name': 'indictable_only_opens', 'description': 'Indictable only trials:open'}, {'name': 'committed_for_sentence_receipts', 'description': 'Committed for sentence:receipts'}, {'name': 'committed_for_sentence_disposals', 'description': 'Committed for sentence:disposals'}, {'name': 'committed_for_sentence_opens', 'description': 'Committed for sentence:open'}, {'name': 'appeals_receipts', 'description': 'Appeals:receipts'}, {'name': 'appeals_disposals', 'description': 'Appeals:disposals'}, {'name': 'appeals_opens', 'description': 'Appeals:open'}, {'name': 'unknown_receipts', 'description': 'Unknown:receipts'}, {'name': 'unknown_disposals', 'description': 'Unknown:disposals'}, {'name': 'unknown_opens', 'description': 'Unknown:open'}]}]}\n" ] } ], "source": [ "#Import DBT config file\n", "\n", "file_path_y = r'courts_caseload_rpt__rpt_crown_court_caseload_Y_properties.yml'\n", "with open(file_path_y, 'r') as file:\n", " data_y = yaml.safe_load(file)\n", "print(data_y)" ] }, { "cell_type": "code", "execution_count": 14, "id": "a9ba46ae-3ea1-40f3-836c-7538f24765e8", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['Year', 'All cases:receipts', 'All cases:disposals', 'All cases:open', 'All trials:receipts', 'All trials:disposals', 'All trials:open', 'Triable-either-way trials:receipts', 'Triable-either-way trials:disposals', 'Triable-either-way trials:open', 'Indictable only trials:receipts', 'Indictable only trials:disposals', 'Indictable only trials:open', 'Committed for sentence:receipts', 'Committed for sentence:disposals', 'Committed for sentence:open', 'Appeals:receipts', 'Appeals:disposals', 'Appeals:open', 'Unknown:receipts', 'Unknown:disposals', 'Unknown:open']\n" ] } ], "source": [ "#Get Output Column Names from .yml description tags\n", "\n", "updated_column_name_y = [] \n", "for model in data_y['models']:\n", " if 'description' in model:\n", " updated_column_name_y.append(model['description'])\n", " if 'columns' in model:\n", " for column in model['columns']:\n", " if 'description' in column:\n", " updated_column_name_y.append(column['description'])\n", "del updated_column_name_y[:2]\n", "print(updated_column_name_y) " ] }, { "cell_type": "code", "execution_count": 15, "id": "917770b3-8bfd-4b6c-89d9-0f69a993fa89", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "22\n" ] } ], "source": [ "#Counts to check column (items) in list\n", "\n", "count = len(updated_column_name_y)\n", "print(count)" ] }, { "cell_type": "code", "execution_count": 16, "id": "4265883a-a7f1-4418-b1a5-4cd68ec001cf", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['Year', 'All cases:receipts', 'All cases:disposals', 'All cases:open', 'All trials:receipts', 'All trials:disposals', 'All trials:open', 'Triable-either-way trials:receipts', 'Triable-either-way trials:disposals', 'Triable-either-way trials:open', 'Indictable only trials:receipts', 'Indictable only trials:disposals', 'Indictable only trials:open', 'Committed for sentence:receipts', 'Committed for sentence:disposals', 'Committed for sentence:open', 'Appeals:receipts', 'Appeals:disposals', 'Appeals:open', 'Unknown:receipts', 'Unknown:disposals', 'Unknown:open']\n" ] } ], "source": [ "#Map Dataframe columns to updated columns from list\n", "\n", "dfy = dfy.drop(columns='id')\n", "dfy.columns = updated_column_name_y\n", "column_list_y = dfy.columns.to_list()\n", "print(column_list_y)" ] }, { "cell_type": "markdown", "id": "1b6954db-94d9-44e8-9c8a-e250369ab7ee", "metadata": {}, "source": [ "# Build GPtable for yearly figures" ] }, { "cell_type": "code", "execution_count": 49, "id": "a210bc20-3074-42ec-aa30-d2e55265d6bf", "metadata": {}, "outputs": [], "source": [ "# Define Metadata for the quarterly tables \n", "\n", "metadata = {\n", " \"title\": \"Table C1: Receipts, disposals and open criminal cases in the Crown Court in England and Wales, annually 2016 - 2023, quarterly Q1 2016 - Q3 2024 [note 13][note 14][note 15][note 16][note 118] (“One Crown”)\",\n", " \"subtitle1\": \"This worksheet contains one table.\",\n", " \"subtitle2\": \"This table contains notes, which can be found in the Notes worksheet.\",\n", " \"source\": \"Source: XHIBIT system and Common Platform, HMCTS\",\n", "}\n", "\n", "#Create a GPTable object\n", "\n", "y_table = gpt.GPTable(\n", " table=dfy,\n", " index_columns={1:0},\n", " title=metadata[\"title\"],\n", " table_name=\"Crown_Court_Cases\",\n", " ## subtitle=metadata[\"subtitle\"],\n", " source=metadata[\"source\"],\n", ")\n", "y_data = y_table.table\n", "\n", "#Convert GPTable to Dataframe\n", "dfyl = pd.DataFrame(y_data)" ] }, { "cell_type": "markdown", "id": "d25bb406-84fb-42a1-863f-a33253891349", "metadata": {}, "source": [ "# Export Quarterly and yearly Figures to Excel Document" ] }, { "cell_type": "code", "execution_count": 50, "id": "e1661aae-f81e-46e1-81e3-e49b30ae4cbe", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: openpyxl in /opt/conda/lib/python3.9/site-packages (3.1.5)\n", "Requirement already satisfied: et-xmlfile in /opt/conda/lib/python3.9/site-packages (from openpyxl) (2.0.0)\n", "Requirement already satisfied: XlsxWriter in /opt/conda/lib/python3.9/site-packages (3.2.2)\n" ] } ], "source": [ "!pip install openpyxl\n", "!pip install XlsxWriter\n", "\n", "import openpyxl\n", "import xlsxwriter\n", "from openpyxl import load_workbook\n", "from openpyxl.utils import get_column_letter\n", "from openpyxl.styles import Font, Alignment" ] }, { "cell_type": "code", "execution_count": 51, "id": "ae0864fb-77cd-4cbf-b23d-37aaad7dcc07", "metadata": { "jp-MarkdownHeadingCollapsed": true, "tags": [] }, "outputs": [], "source": [ "# Save the table as an Excel File \n", "\n", "output_filename = \"GPTable_Output.xlsx\"\n", "with pd.ExcelWriter(output_filename, engine=\"openpyxl\") as writer:\n", " dfyl.to_excel(writer, sheet_name=\"Yearly_Crown_Court_Cases\", startrow=5, index=False)\n", " dfql.to_excel(writer, sheet_name=\"Quarterly_Crown_Court_Cases\", startrow=5, index=False)\n", "\n", "#Edit Yearly Figures WorkBook\n", "\n", "#Load Workbook\n", "wb = load_workbook(output_filename)\n", "\n", "def insert_metadata(sheet_name):\n", " ws = wb[sheet_name]\n", " ws[\"A1\"] = metadata[\"title\"] # Title in first row\n", " ws[\"A2\"] = metadata[\"subtitle1\"] # subtitle1 in second row\n", " ws[\"A3\"] = metadata[\"subtitle2\"] # subtitle2 in third row\n", " ws[\"A4\"] = metadata[\"source\"] # Source in fourth row\n", " \n", " ws[\"A1\"].font = Font(bold=True) # Make title bold\n", " \n", " # Apply wrap text to row 4 (column headers)\n", " for col in range(1, ws.max_column + 1): # Loop through all columns\n", " ws.cell(row=6, column=col).alignment = Alignment(wrap_text=True)\n", "\n", "# Apply metadata to both sheets\n", "insert_metadata(\"Yearly_Crown_Court_Cases\")\n", "insert_metadata(\"Quarterly_Crown_Court_Cases\")\n", "\n", "# Save the updated file\n", "wb.save(output_filename)\n" ] }, { "cell_type": "markdown", "id": "8d728068-51cc-44e5-a2be-534481cdde50", "metadata": {}, "source": [ "Download Link " ] }, { "cell_type": "code", "execution_count": 52, "id": "3a67df52-416a-48c3-ad43-c76c11753935", "metadata": { "collapsed": true, "jupyter": { "outputs_hidden": true }, "tags": [] }, "outputs": [ { "data": { "text/html": [ "Click here to download GPTable_Output.xlsx" ], "text/plain": [ "" ] }, "execution_count": 52, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from IPython.display import HTML\n", "\n", "def create_download_link(filename):\n", " return HTML(f'Click here to download {filename}')\n", "\n", "create_download_link(output_filename)" ] }, { "cell_type": "markdown", "id": "84afcbed-d821-4f6a-af82-4e345c90300e", "metadata": {}, "source": [ "Copy Workbook To S3 Location" ] }, { "cell_type": "code", "execution_count": 53, "id": "5a597f63-fd49-41e7-bc1d-dacfcc55f3de", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: boto3 in /opt/conda/lib/python3.9/site-packages (1.24.82)\n", "Requirement already satisfied: botocore<1.28.0,>=1.27.82 in /opt/conda/lib/python3.9/site-packages (from boto3) (1.27.82)\n", "Requirement already satisfied: jmespath<2.0.0,>=0.7.1 in /opt/conda/lib/python3.9/site-packages (from boto3) (1.0.1)\n", "Requirement already satisfied: s3transfer<0.7.0,>=0.6.0 in /opt/conda/lib/python3.9/site-packages (from boto3) (0.6.0)\n", "Requirement already satisfied: urllib3<1.27,>=1.25.4 in /opt/conda/lib/python3.9/site-packages (from botocore<1.28.0,>=1.27.82->boto3) (1.26.6)\n", "Requirement already satisfied: python-dateutil<3.0.0,>=2.1 in /opt/conda/lib/python3.9/site-packages (from botocore<1.28.0,>=1.27.82->boto3) (2.8.2)\n", "Requirement already satisfied: six>=1.5 in /opt/conda/lib/python3.9/site-packages (from python-dateutil<3.0.0,>=2.1->botocore<1.28.0,>=1.27.82->boto3) (1.16.0)\n" ] } ], "source": [ "!pip install boto3" ] }, { "cell_type": "code", "execution_count": 56, "id": "7cfdd59a-a323-4d2a-9afe-5e19dbce4dd9", "metadata": { "tags": [] }, "outputs": [], "source": [ "import boto3" ] }, { "cell_type": "code", "execution_count": 58, "id": "16fbe504-223b-4266-9071-a8f4b926aa6c", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "AWS response code for uploading file is 200\n" ] } ], "source": [ "# Set up AWS session with credentials\n", "#session = boto3.Session(\n", "# aws_access_key_id=\"\",\n", "# aws_secret_access_key=\"\",\n", "# region_name=\"\"\n", "#)\n", "\n", "# Create an S3 client using the session\n", "s3_client = boto3.client(\"s3\")\n", "\n", "# Prepare the file and the bucket name\n", "bucket_name = \"alpha-fotest\" # Enclosed in quotes\n", "output_filename = \"GPTable_Output.xlsx\" # The file you're uploading\n", "data = open(output_filename, \"rb\") # Open the file in binary mode\n", "\n", "response = s3_client.put_object(Bucket=bucket_name , Body=data, Key=output_filename)\n", "print(f\"AWS response code for uploading file is {(response['ResponseMetadata']['HTTPStatusCode'])}\")\n" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.6" } }, "nbformat": 4, "nbformat_minor": 5 }