{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# OmniSci Charting Example with Altair\n", "\n", "Let's see if we can replicate [this](https://omnisci.github.io/mapd-charting/example/example1.html) OmniSci charting example in Python with Altair, Vega Lite, and Vega:\n", "\n", "![](https://cloud.githubusercontent.com/assets/2932405/25641647/1acce1f2-2f4a-11e7-87d4-a4e80cb262f5.gif)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "First, import `altair`, `ibis`, and `ibis_vega_transform` and connect to the database:" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import altair as alt\n", "import ibis_vega_transform\n", "\n", "import warnings\n", "try:\n", " from ibis.backends import omniscidb as ibis_omniscidb\n", "except ImportError as msg:\n", " warnings.warn(str(msg))\n", " from ibis import omniscidb as ibis_omniscidb\n", "\n", "conn = ibis_omniscidb.connect(\n", " host='metis.mapd.com', user='demouser', password='HyperInteractive',\n", " port=443, database='mapd', protocol= 'https'\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "conn.list_tables()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We can access the flights table:" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "t = conn.table(\"flights_donotmodify\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "t" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "It has 7 million rows:" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "t.info()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "t.head().execute()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## States\n", "\n", "Now let's make the top left graph, the flights by state, letting you select one:" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "states = alt.selection_multi(fields=['origin_state'])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "ibis_vega_transform.enable_debug()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "flights_by_state = alt.Chart(\n", " t,\n", " title=\"Total Number of Flights by State\"\n", ").mark_bar().encode(\n", " x='count()',\n", " y=alt.Y(\n", " 'origin_state',\n", " sort=alt.Sort(encoding='x', order='descending')\n", " ),\n", " color=alt.condition(states, alt.ColorValue(\"steelblue\"), alt.ColorValue(\"grey\"))\n", ").add_selection(\n", " states\n", ")\n", "flights_by_state" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Now let's add the little text there with the count as well." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "ibis_vega_transform.disable_debug()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "flights_by_state_text = alt.Chart(t).mark_text(dx=20).encode(\n", " x='count()',\n", " y=alt.Y('origin_state', sort=alt.Sort(encoding='x', order='descending')),\n", " text='count()'\n", ")\n", "flights_by_state_text" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We can combine them together now:" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def create_flights_by_state(map_fn=lambda c: c):\n", " return map_fn(flights_by_state_text) + map_fn(flights_by_state)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "create_flights_by_state()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Delay chart" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We do the same now for the delay chart:" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "airlines = alt.selection_multi(fields=['carrier_name'])\n", "\n", "def create_carrier_delay(before_aggregate_fn=lambda c: c):\n", " carrier_delay = before_aggregate_fn(alt.Chart(\n", " t,\n", " title=\"Carrier Departure Delay by Arrival Delay (Minutes)\"\n", " )).transform_aggregate(\n", " depdelay='mean(depdelay)',\n", " arrdelay='mean(arrdelay)',\n", " groupby=[\"carrier_name\"]\n", " ).mark_point(filled=True, size=200).encode(\n", " x='depdelay',\n", " y='arrdelay',\n", " color=alt.condition(airlines, alt.ColorValue(\"steelblue\"), alt.ColorValue(\"grey\")),\n", " tooltip=['carrier_name', 'depdelay', 'arrdelay']\n", " ).add_selection(airlines)\n", "\n", " carrier_delay_text = before_aggregate_fn(alt.Chart(\n", " t\n", " )).transform_aggregate(\n", " depdelay='mean(depdelay)',\n", " arrdelay='mean(arrdelay)',\n", " groupby=[\"carrier_name\"]\n", " ).mark_text().encode(\n", " x='depdelay',\n", " y='arrdelay',\n", " text='carrier_name',\n", " )\n", " return carrier_delay + carrier_delay_text\n", "\n", "create_carrier_delay()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## States + Delays" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Now we can combine these two charts, by adding filters by each others selections:" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "(\n", " create_flights_by_state(lambda c: c.transform_filter(airlines)) |\n", " create_carrier_delay(lambda c: c.transform_filter(states))\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "ibis_vega_transform.disable_debug()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Departure Time" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Now we can add the final part, the depature time chart:" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "dates = alt.selection_interval(\n", " fields=['dep_timestamp'],\n", " encodings=['x'],\n", ")\n", "\n", "\n", "def create_time(map_fn=lambda c:c):\n", " return map_fn(\n", " alt.Chart(\n", " t,\n", " title='Number of Flights by Departure Time'\n", " )\n", " ).transform_filter(\n", " 'datum.dep_timestamp != null'\n", " ).mark_line().encode(\n", " alt.X(\n", " 'yearmonthdate(dep_timestamp):T',\n", " ),\n", " alt.Y(\n", " 'count():Q',\n", " scale=alt.Scale(zero=False)\n", " )\n", " ).add_selection(\n", " dates\n", " )\n", "create_time()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Combined" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "And add them all together!" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "HEIGHT = 800\n", "WIDTH = 1000\n", "\n", "(\n", " (\n", " create_flights_by_state(\n", " lambda c: c.transform_filter(\n", " {\"and\": [airlines, dates]}\n", " ).properties(\n", " height= 2 * HEIGHT / 3,\n", " width=WIDTH / 2\n", " )\n", " ) | \n", " create_carrier_delay(\n", " lambda c: c.transform_filter(\n", " {\"and\": [dates, states]}\n", " )\n", " ).properties(\n", " height=2 * HEIGHT / 3,\n", " width=WIDTH / 2\n", " )\n", " ) & (\n", " create_time(\n", " lambda c: c.transform_filter(\n", " {\"and\": [airlines, states]}\n", " ).properties(\n", " height=HEIGHT / 3,\n", " width=WIDTH + 50\n", " )\n", " )\n", " )\n", ").configure_axis(\n", " grid=False\n", ").configure_view(\n", " strokeOpacity=0\n", ").configure(\n", " autosize={\n", " 'resize': True\n", " }\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.10" } }, "nbformat": 4, "nbformat_minor": 4 }