{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ " # Press \"Format notebook\" or in the menu \"Edit -> Apply ... Formatter\"\n", " # useless comment\n", "import requests # useless comment\n", "\n", "headers = {\n", " 'Referer': 'https://www.transtats.bts.gov/DL_SelectFields.asp?Table_ID=236&DB_Short_Name=On-Time',\n", " 'Origin': 'https://www.transtats.bts.gov',\n", " 'Content-Type': 'application/x-www-form-urlencoded',\n", "}\n", "\n", "params = (\n", " ('Table_ID', '236'),\n", " ('Has_Group', '3'), ('Is_Zipped', '0'),\n", ")\n", "\n", "with open('modern-1-url.txt', encoding='utf-8') as f:\n", " data = f.read().strip()\n", "\n", "os.makedirs('data', exist_ok=True)\n", "\n", "\n", "import pandas as pd\n", "\n", "\n", "\n", "\n", "\n", "\n", "def read(fp):\n", " df = (pd.read_csv(fp)\n", " .rename(columns=str.lower) .drop('unnamed: 36', axis=1) .pipe(extract_city_name) .pipe(time_to_datetime, ['dep_time', 'arr_time', 'crs_arr_time', 'crs_dep_time'])\n", " .assign(fl_date=lambda x: pd.to_datetime(x['fl_date']),\n", " dest=lambda x: pd.Categorical(x['dest']),\n", " origin=lambda x: pd.Categorical(x['origin']), tail_num=lambda x: pd.Categorical(x['tail_num']), unique_carrier=lambda x: pd.Categorical(x['unique_carrier']),\n", " cancellation_code=lambda x: pd.Categorical(x['cancellation_code'])))\n", " return df\n", "\n", "\n", "def extract_city_name(df:pd.DataFrame) -> pd.DataFrame:\n", " '''\n", " Chicago, IL -> Chicago for origin_city_name and dest_city_name\n", " '''\n", " cols = ['origin_city_name', 'dest_city_name']\n", " city = df[cols].apply(lambda x: x.str.extract(\"(.*), \\w{2}\", expand=False))\n", " df = df.copy()\n", " df[['origin_city_name', 'dest_city_name']] = city\n", " return df\n" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.8" } }, "nbformat": 4, "nbformat_minor": 4 }