{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "hide_input": true
   },
   "source": [
    "# FSEconomy: most profitable assignments\n",
    "\n",
    "*Tip: Menu \"Cell -> Rul all\" to see actual contents of this notebook*"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "hide_input": true
   },
   "outputs": [],
   "source": [
    "import re\n",
    "import os.path as path\n",
    "\n",
    "import colorlover as cl\n",
    "import numpy as np\n",
    "import ipywidgets as widgets\n",
    "import pandas as pd\n",
    "from IPython.display import display, Markdown\n",
    "\n",
    "import plotly.offline as offline\n",
    "import plotly.graph_objs as go\n",
    "import plotly.tools as tools\n",
    "\n",
    "offline.init_notebook_mode(connected=True)\n",
    "\n",
    "def make_data_path(dataset_name, ext='csv'):\n",
    "    dataset_name = re.sub(r'[^a-zA-Z0-9_-]', '_', dataset_name)\n",
    "    dataset_name = re.sub(r'_+', '_', dataset_name)\n",
    "    dataset_name = re.sub(r'(^_+|_+$)', '', dataset_name)\n",
    "    f = lambda ex: path.join('.', 'data', dataset_name + '.' + ex).lower()\n",
    "    if ext is None:\n",
    "        for ext in ['csv', 'csv.gz']:\n",
    "            if path.isfile(f(ext)):\n",
    "                return f(ext)\n",
    "        \n",
    "        ext = 'csv'\n",
    "    return f(ext)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Passenger fare: distance, number of people within a ticket\n",
    "\n",
    "$Pay(dis, ppl) = \\left[\\left(\\frac{1}{ppl}\\right)^{0.3} \\cdot 500 \\cdot \\left(\\frac{arctan(dis/25)}{arctan(1)}\\right)\\right] \\cdot ppl \\cdot dis$"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "hide_input": true,
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "def pay_per_mile_per_unit(distance, units):\n",
    "    maxfare = 1000\n",
    "    crit_distance = 25\n",
    "    return (\n",
    "        ((1/units)**0.3)\n",
    "        * (maxfare / 2)\n",
    "        * (np.arctan(distance/crit_distance) / np.arctan(1))\n",
    "    ) / distance\n",
    "\n",
    "\n",
    "distances = np.linspace(1, 400, 100)\n",
    "\n",
    "\n",
    "@widgets.interact(units=widgets.IntSlider(value=1, min=1, max=27, description='Passengers'))\n",
    "def plot_fare(units):\n",
    "    fig = tools.make_subplots(\n",
    "        rows=1, cols=2,\n",
    "        subplot_titles=('Pay per mile per passenger', 'Pay per passenger'),\n",
    "        print_grid=False)\n",
    "\n",
    "    fig.append_trace(go.Scatter(\n",
    "        x=distances,\n",
    "        y=pay_per_mile_per_unit(distances, units) * distances,\n",
    "        line=dict(\n",
    "            shape='spline',\n",
    "        ),\n",
    "        name='pay/p',\n",
    "    ), row=1, col=1)\n",
    "\n",
    "    fig.append_trace(go.Scatter(\n",
    "        x=distances,\n",
    "        y=pay_per_mile_per_unit(distances, units),\n",
    "        line=dict(\n",
    "            shape='spline',\n",
    "            simplify=False,\n",
    "        ),\n",
    "        name='pay/mn/p',\n",
    "    ), row=1, col=2)\n",
    "\n",
    "    fig['layout']['xaxis1'].update(title='Distance (nm)', rangemode='tozero')\n",
    "    fig['layout']['yaxis1'].update(title='Pay (v$)', range=[0, 1000])\n",
    "    fig['layout']['xaxis2'].update(title='Distance (nm)', rangemode='tozero')\n",
    "    fig['layout']['yaxis2'].update(title='Pay (v$)', range=[0, 30])\n",
    "    fig['layout'].update(title='{} passenger(s) per assignment'.format(units))\n",
    "\n",
    "    offline.iplot(fig)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Preliminary conclusions:\n",
    "\n",
    " 1. One ticket per person! Group discounts for 3 persons is ~30%, we don't want that.\n",
    " 2. Ticket price levels out beyond 100 nm. Flhing further than 150nm doesn't bring noticably more money."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Flight time estimation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "hide_input": true,
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "# Load and per-filter all data\n",
    "\n",
    "\n",
    "def _flight_logs(model):\n",
    "    return make_data_path('Flight logs - {}'.format(model), ext=None)\n",
    "\n",
    "\n",
    "def _fliter_outliers_stddev(data, label_or_series, width=5):\n",
    "    series = data[label_or_series] if type(\n",
    "        label_or_series) == str else label_or_series\n",
    "    return data[np.abs(series - series.mean()) < width*series.std()]\n",
    "\n",
    "\n",
    "# Load available models\n",
    "models = pd.read_csv(make_data_path('aircraft models'))\n",
    "models = models[models['MakeModel'].map(\n",
    "    lambda mm: path.isfile(_flight_logs(mm)))]\n",
    "\n",
    "all_flight_logs = {}\n",
    "\n",
    "for model in models['MakeModel']:\n",
    "    display(Markdown('### Loading ' + model))\n",
    "    # Load flight logs for selected model and fix data types\n",
    "    flight_logs = pd.read_csv(_flight_logs(model), dtype={})\n",
    "    flight_logs['FlightTime'] = pd.to_timedelta(flight_logs['FlightTime'])\n",
    "    flight_logs['Time'] = pd.to_datetime(flight_logs['Time'])\n",
    "    print('Initially loaded flight records:', len(flight_logs))\n",
    "\n",
    "    # Filter out obvious garbage\n",
    "    flight_logs = flight_logs[flight_logs.Distance > 0]\n",
    "    flight_logs = flight_logs[flight_logs.FlightTime > np.timedelta64(0, 'm')]\n",
    "    print('After garbage filtering:', len(flight_logs))\n",
    "\n",
    "    # Derived columns\n",
    "    flight_logs['FlightTimePerDistance'] = flight_logs['FlightTime'] / \\\n",
    "        flight_logs['Distance']\n",
    "\n",
    "    # Filter out outliers\n",
    "    flight_logs = _fliter_outliers_stddev(flight_logs, 'Distance')\n",
    "    flight_logs = _fliter_outliers_stddev(flight_logs, 'FlightTime')\n",
    "    flight_logs = _fliter_outliers_stddev(\n",
    "        flight_logs, flight_logs['FlightTime'] / flight_logs['Distance'], width=6)\n",
    "    print('After outliers filtering:', len(flight_logs))\n",
    "\n",
    "    all_flight_logs[model] = flight_logs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "hide_input": true,
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "def select_model_widget(desc='Model:'):\n",
    "    return widgets.Dropdown(options=models['MakeModel'], value='Cessna 172 Skyhawk', description=desc)\n",
    "\n",
    "\n",
    "def _hist_color_scale(colors):\n",
    "    return [[float(i)/float(len(colors)-1), colors[i]] for i in range(len(colors))]\n",
    "\n",
    "\n",
    "def _plots(data, x, y, func=None, time_units=np.timedelta64(1, 'm'), sample=400):\n",
    "    colorscale = _hist_color_scale(cl.scales['4']['seq']['BuPu'])\n",
    "    data_sampled = data.sample(n=sample, random_state=1)\n",
    "\n",
    "    traces = []\n",
    "    traces.append(go.Scatter(\n",
    "        x=data_sampled[x],\n",
    "        y=data_sampled[y] / time_units,\n",
    "        mode='markers',\n",
    "        opacity=0.3,\n",
    "        marker=dict(color='purple', size=3),\n",
    "        showlegend=False,\n",
    "    ))\n",
    "    traces.append(go.Histogram2d(\n",
    "        x=data[x],\n",
    "        y=data[y] / time_units,\n",
    "        histnorm=\"probability\",\n",
    "        showscale=False,\n",
    "        colorscale=colorscale,\n",
    "    ))\n",
    "    if func is not None:\n",
    "        func_x = np.linspace(0, data[x].max(), 100)\n",
    "        func_y = func(func_x)\n",
    "        traces.append(go.Scatter(\n",
    "            x=func_x,\n",
    "            y=func_y,\n",
    "            opacity=0.5,\n",
    "            line=dict(color='black', width=2),\n",
    "            showlegend=False,\n",
    "        ))\n",
    "    return traces\n",
    "\n",
    "\n",
    "def _fit(data, x, y, deg=3, time_units=np.timedelta64(1, 'm')):\n",
    "    return np.poly1d(np.polyfit(x=data[x], y=data[y]/time_units, deg=deg))\n",
    "\n",
    "\n",
    "layout = go.Layout(\n",
    "    xaxis=dict(\n",
    "        rangemode='tozero',\n",
    "    ),\n",
    "    yaxis=dict(\n",
    "        rangemode='tozero',\n",
    "    ),\n",
    ")\n",
    "\n",
    "\n",
    "def fit_flight_time(model):\n",
    "    return _fit(all_flight_logs[model], 'Distance', 'FlightTime')\n",
    "\n",
    "\n",
    "def fit_flight_time_per_distance(model):\n",
    "    return _fit(all_flight_logs[model], 'Distance', 'FlightTimePerDistance')\n",
    "\n",
    "\n",
    "@widgets.interact(model=select_model_widget())\n",
    "def analyze_flight_time(model):\n",
    "    flight_logs = all_flight_logs[model]\n",
    "    flight_time_per_distance = fit_flight_time_per_distance(model)\n",
    "    flight_time = fit_flight_time(model)\n",
    "\n",
    "    fig = tools.make_subplots(rows=1, cols=2, print_grid=False)\n",
    "    [fig.append_trace(t, 1, 1) for t in _plots(\n",
    "        flight_logs, x='Distance', y='FlightTime', func=flight_time)]\n",
    "    [fig.append_trace(t, 1, 2) for t in _plots(\n",
    "        flight_logs, x='Distance', y='FlightTimePerDistance', func=flight_time_per_distance)]\n",
    "\n",
    "    fig['layout']['xaxis1'].update(\n",
    "        title='Distance (nm)', rangemode='nonnegative')\n",
    "    fig['layout']['yaxis1'].update(\n",
    "        title='Flight Time (min)', rangemode='nonnegative')\n",
    "    fig['layout']['xaxis2'].update(\n",
    "        title='Distance (nm)', rangemode='nonnegative')\n",
    "    fig['layout']['yaxis2'].update(\n",
    "        title='Flight Time per Distance (min/nm)', rangemode='nonnegative')\n",
    "    fig['layout'].update(title=model)\n",
    "    offline.iplot(fig)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Conclusions:\n",
    "    \n",
    " * Vast majority of flights is under 200 nm for GA planes.\n",
    " * Flight time per mile is higher for shorter flights, which is expected.\n",
    " * Curious that it's not very visible on the normal graph."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Flight profit per hour\n",
    "\n",
    "Let's take assignment pay from the first section and flight time estimation from the second and estimate pay her hour for different aircraft and flight distances. Here we make following assumptions:\n",
    "\n",
    " 1. All assignments are single-person. Having more people per assignment only reduces pay, so we won't be doing this (in ideal world, at least).\n",
    " 2. We are able to fill selected airplane to its full capacity. Making this happen in real world is a whole another can of worms, but we're considering an ideal case for the time being.\n",
    " 3. There is some time overhead for setting up the filght. For experienced FSE people it would be lower, for noobs like myself - higher.\n",
    "\n",
    "Then we simply plot following formula:\n",
    "$pay\\_per\\_hour(distance) = \\frac{pay\\_per\\_distance\\_per\\_passenger(distance, 1) \\cdot distance \\cdot plane\\_capacity}{flight\\_time(distance)}$\n",
    "\n",
    "We use three different ways to estimate flight time (to cross-check ourselves):\n",
    "\n",
    " 1. Cruise speed: ideal case when we perform all the flight at the specified cruise speed.\n",
    " 2. Statistical: average flight time for the plane for distances +/- 5nm.\n",
    " 3. Polynomial approximation: perform polynomial regression of flight time by distance data we have and use that polynomial to calculate flight time."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "hide_input": true,
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "def pay_per_hour(model, distance, loading_time_mins=10, mode='regression'):\n",
    "    distance = pd.Series(distance)\n",
    "    capacity = models[models['MakeModel'] == model]['Seats'].iloc[0]\n",
    "    capacity = min(capacity, 27*2)\n",
    "    total_pay = pay_per_mile_per_unit(\n",
    "        distance=distance, units=1) * capacity * distance\n",
    "    if mode == 'regression':\n",
    "        flight_time_hours = fit_flight_time(model)(distance) / 60\n",
    "    elif mode == 'regression_normalized':\n",
    "        flight_time_hours = (fit_flight_time_per_distance(\n",
    "            model)(distance) * distance) / 60\n",
    "    elif mode == 'stat':\n",
    "        flight_logs = all_flight_logs[model]\n",
    "        flight_time_hours = pd.Series([flight_logs[np.abs(flight_logs.Distance - d) < 5].FlightTime.mean() / np.timedelta64(1, 'h')\n",
    "                                       for d in distance])\n",
    "    elif mode == 'cruise':\n",
    "        cruise = models[models['MakeModel'] == model]['CruiseSpeed'].iloc[0]\n",
    "        flight_time_hours = distance / cruise\n",
    "    else:\n",
    "        raise ValueError('Unknown flight time estimation mode: ' + mode)\n",
    "    flight_time_hours += loading_time_mins / 60\n",
    "    return total_pay / flight_time_hours\n",
    "\n",
    "\n",
    "@widgets.interact(\n",
    "    model_l=select_model_widget(desc='Model (left):'),\n",
    "    model_r=select_model_widget(desc='Model (right):'),\n",
    "    loading_time=widgets.IntSlider(desc='Loading time (mins):', min=0, max=60, value=15, continuous_update=False))\n",
    "def plot_pay_per_hour(model_l, model_r, loading_time):\n",
    "    def plot_one(model, name):\n",
    "        traces = []\n",
    "        for mode in ['regression', 'stat', 'cruise']:\n",
    "            x = np.linspace(0, 300, 100)\n",
    "            y = pay_per_hour(model, x, mode=mode,\n",
    "                             loading_time_mins=loading_time)\n",
    "            traces.append(go.Scatter(\n",
    "                x=x,\n",
    "                y=y,\n",
    "                name='{} ({})'.format(mode, name),\n",
    "            ))\n",
    "        return traces\n",
    "\n",
    "    fig = tools.make_subplots(\n",
    "        rows=1, cols=2,\n",
    "        print_grid=False,\n",
    "        shared_yaxes=True,\n",
    "        subplot_titles=(model_l, model_r),\n",
    "    )\n",
    "    [fig.append_trace(t, row=1, col=1) for t in plot_one(model_l, 'left')]\n",
    "    [fig.append_trace(t, row=1, col=2) for t in plot_one(model_r, 'right')]\n",
    "\n",
    "    fig['layout']['xaxis1'].update(title='Distance (nm)')\n",
    "    fig['layout']['yaxis1'].update(title='Pay per hour (v$/h)')\n",
    "    fig['layout']['xaxis2'].update(title='Distance (nm)')\n",
    "    fig['layout']['yaxis2'].update(title='Pay per hour (v$/h)')\n",
    "    \n",
    "    offline.iplot(fig)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Conclusions:\n",
    "    \n",
    " 1. Bigger planes are much more profitable, assuming we fill them fully (what a surprise!)\n",
    " 2. Most profitable flight distance is 30-50 nm, depending on a plane and setup time. Peak for bigger and faster aircraft is slightly shifted to the right, but not as much as I expected.\n",
    " 3. Our polymonial approximation does a very good job! Machine Learning!"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  },
  "varInspector": {
   "cols": {
    "lenName": 16,
    "lenType": 16,
    "lenVar": 40
   },
   "kernels_config": {
    "python": {
     "delete_cmd_postfix": "",
     "delete_cmd_prefix": "del ",
     "library": "var_list.py",
     "varRefreshCmd": "print(var_dic_list())"
    },
    "r": {
     "delete_cmd_postfix": ") ",
     "delete_cmd_prefix": "rm(",
     "library": "var_list.r",
     "varRefreshCmd": "cat(var_dic_list()) "
    }
   },
   "types_to_exclude": [
    "module",
    "function",
    "builtin_function_or_method",
    "instance",
    "_Feature"
   ],
   "window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}