{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Download NTSB Aviation Accidents Database " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "A machine-readable copy of the database is [published online](https://www.ntsb.gov/_layouts/ntsb.aviation/index.aspx) in Microsoft Access format. Let's download it." ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "import os\n", "import pypyodbc" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Configure the data directories." ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "%store -r" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Remove the zip file if it already exists." ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "zip_path = os.path.join(input_dir, \"avall.zip\")" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "!rm -f $zip_path" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Download the file." ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "--2018-10-17 18:43:43-- https://app.ntsb.gov/avdata/Access/avall.zip\n", "Resolving app.ntsb.gov (app.ntsb.gov)... 199.173.155.8\n", "Connecting to app.ntsb.gov (app.ntsb.gov)|199.173.155.8|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 242648102 (231M) [application/x-zip-compressed]\n", "Saving to: ‘/home/palewire/Code/helicopter-accident-analysis/src/../input/avall.zip’\n", "\n", "/home/palewire/Code 100%[===================>] 231.41M 1.07MB/s in 2m 43s \n", "\n", "2018-10-17 18:46:28 (1.42 MB/s) - ‘/home/palewire/Code/helicopter-accident-analysis/src/../input/avall.zip’ saved [242648102/242648102]\n", "\n" ] } ], "source": [ "!wget -O $input_dir/avall.zip -nc https://app.ntsb.gov/avdata/Access/avall.zip" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Unzip the file." ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Archive: /home/palewire/Code/helicopter-accident-analysis/src/../input/avall.zip\n" ] } ], "source": [ "!unzip -n $input_dir/\\*.zip -d $input_dir/" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Use the [mdbtools](https://github.com/brianb/mdbtools) command-line tool to get a list of all the tables in the database" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "mdb_path = os.path.join(input_dir, 'avall.mdb')" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "tables = !mdb-tables $mdb_path" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Split them into a Python list" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "tables = tables[0].split()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Extract the tables from the Microsoft Access database and write them out as CSVs." ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "def export_table(name):\n", " print(f\"Exporting {name}\")\n", " export_path = os.path.join(input_dir, f\"{name.lower()}.csv\")\n", " !mdb-export $mdb_path $name > $export_path" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Exporting aircraft\n", "Exporting Country\n", "Exporting ct_seqevt\n", "Exporting dt_aircraft\n", "Exporting dt_events\n", "Exporting dt_Flight_Crew\n", "Exporting eADMSPUB_DataDictionary\n", "Exporting engines\n", "Exporting events\n", "Exporting Events_Sequence\n", "Exporting Findings\n", "Exporting Flight_Crew\n", "Exporting flight_time\n", "Exporting injury\n", "Exporting narratives\n", "Exporting NTSB_Admin\n", "Exporting Occurrences\n", "Exporting seq_of_events\n", "Exporting states\n", "Exporting ct_iaids\n" ] } ], "source": [ "for t in tables: export_table(t)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.5" } }, "nbformat": 4, "nbformat_minor": 2 }