{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# MapD Charting Example with Altair\n", "\n", "Let's see if we can replicate [this](https://omnisci.github.io/mapd-charting/example/example1.html) MapD charting example in Python with Altair, Vega Lite, and Vega:\n", "\n", "![](https://cloud.githubusercontent.com/assets/2932405/25641647/1acce1f2-2f4a-11e7-87d4-a4e80cb262f5.gif)" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "RendererRegistry.enable('ibis')" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import altair as alt\n", "import jupyterlab_omnisci.vega_ibis\n", "import ibis\n", "\n", "alt.data_transformers.enable('ibis')\n", "alt.renderers.enable('ibis')" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "conn = ibis.mapd.connect(\n", " host='metis.mapd.com', user='mapd', password='HyperInteractive',\n", " port=443, database='mapd', protocol= 'https'\n", ")\n", "t = conn.table(\"flights_donotmodify\")" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Table rows: 7009728\n", "\n", "Column Type Non-null #\n", "------ ---- ----------\n", "flight_year int16 7009728 \n", "flight_month int16 7009728 \n", "flight_dayofmonth int16 7009728 \n", "flight_dayofweek int16 7009728 \n", "deptime int16 6873482 \n", "crsdeptime int16 7009728 \n", "arrtime int16 6858079 \n", "crsarrtime int16 7009728 \n", "uniquecarrier String(nullable=True) 7009728 \n", "flightnum int16 7009728 \n", "tailnum String(nullable=True) 6926363 \n", "actualelapsedtime int16 6855029 \n", "crselapsedtime int16 7008884 \n", "airtime int16 6855029 \n", "arrdelay int16 6855029 \n", "depdelay int16 6873482 \n", "origin String(nullable=True) 7009728 \n", "dest String(nullable=True) 7009728 \n", "distance int16 7009728 \n", "taxiin int16 6858079 \n", "taxiout int16 6872670 \n", "cancelled int16 7009728 \n", "cancellationcode String(nullable=True) 137434 \n", "diverted int16 7009728 \n", "carrierdelay int16 1524735 \n", "weatherdelay int16 1524735 \n", "nasdelay int16 1524735 \n", "securitydelay int16 1524735 \n", "lateaircraftdelay int16 1524735 \n", "dep_timestamp Timestamp(timezone=None, nullable=True) 6873482 \n", "arr_timestamp Timestamp(timezone=None, nullable=True) 6858079 \n", "carrier_name String(nullable=True) 7009728 \n", "plane_type String(nullable=True) 6199619 \n", "plane_manufacturer String(nullable=True) 6199619 \n", "plane_issue_date date 6199385 \n", "plane_model String(nullable=True) 6199619 \n", "plane_status String(nullable=True) 6199619 \n", "plane_aircraft_type String(nullable=True) 6199619 \n", "plane_engine_type String(nullable=True) 6199619 \n", "plane_year int16 5996236 \n", "origin_name String(nullable=True) 7009728 \n", "origin_city String(nullable=True) 7009728 \n", "origin_state String(nullable=True) 7009728 \n", "origin_country String(nullable=True) 7009728 \n", "origin_lat float32 7009728 \n", "origin_lon float32 7009728 \n", "dest_name String(nullable=True) 7009728 \n", "dest_city String(nullable=True) 7009728 \n", "dest_state String(nullable=True) 7009728 \n", "dest_country String(nullable=True) 7009728 \n", "dest_lat float32 7009728 \n", "dest_lon float32 7009728 \n", "origin_merc_x float32 7009728 \n", "origin_merc_y float32 7009728 \n", "dest_merc_x float32 7009728 \n", "dest_merc_y float32 7009728 \n" ] } ], "source": [ "t.info()" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/usr/local/miniconda3/envs/jupyterlab-omnisci/lib/python3.7/site-packages/altair/utils/core.py:90: UserWarning: I don't know how to infer vegalite type from 'empty'. Defaulting to nominal.\n", " \"Defaulting to nominal.\".format(typ))\n" ] }, { "data": { "application/vnd.vega.ibis.v5+json": { "$schema": "https://vega.github.io/schema/vega-lite/v3.2.1.json", "config": { "mark": { "tooltip": null }, "view": { "height": 300, "width": 400 } }, "hconcat": [ { "data": { "name": "ibis:6655977658344790925" }, "encoding": { "color": { "condition": { "selection": "selector001", "value": "steelblue" }, "value": "grey" }, "x": { "aggregate": "count", "type": "quantitative" }, "y": { "field": "origin_state", "sort": { "encoding": "x", "order": "descending" }, "type": "nominal" } }, "height": 400, "mark": "bar", "selection": { "selector001": { "fields": [ "origin_state" ], "type": "multi" } }, "title": "Total Number of Flights by State", "transform": [ { "filter": { "selection": "selector002" } } ], "width": 600 }, { "data": { "name": "ibis:5661337957732429958" }, "encoding": { "color": { "condition": { "selection": "selector002", "value": "steelblue" }, "value": "grey" }, "tooltip": [ { "field": "carrier_name", "type": "nominal" }, { "field": "depdelay", "type": "quantitative" }, { "field": "arrdelay", "type": "quantitative" } ], "x": { "field": "depdelay", "type": "quantitative" }, "y": { "field": "arrdelay", "type": "quantitative" } }, "height": 400, "mark": { "filled": true, "size": 200, "type": "point" }, "selection": { "selector002": { "fields": [ "carrier_name" ], "type": "multi" } }, "title": "Carrier Departure Delay by Arrival Delay (Minutes)", "transform": [ { "filter": { "selection": "selector001" } }, { "aggregate": [ { "as": "depdelay", "field": "depdelay", "op": "mean" }, { "as": "arrdelay", "field": "arrdelay", "op": "mean" } ], "groupby": [ "carrier_name" ] } ], "width": 600 } ] }, "text/plain": [ "HConcatChart({\n", " hconcat: [Chart({\n", " data: Empty DataFrame\n", " Columns: [origin_state, carrier_name]\n", " Index: [],\n", " encoding: FacetedEncoding({\n", " color: ColorValue({\n", " condition: ColorValue({\n", " selection: 'selector001',\n", " value: 'steelblue'\n", " }),\n", " value: 'grey'\n", " }),\n", " x: X({\n", " shorthand: 'count()'\n", " }),\n", " y: Y({\n", " shorthand: 'origin_state',\n", " sort: Sort({\n", " encoding: 'x',\n", " order: 'descending'\n", " })\n", " })\n", " }),\n", " height: 400,\n", " mark: 'bar',\n", " selection: {'selector001': SelectionDef({\n", " fields: ['origin_state'],\n", " type: 'multi'\n", " })},\n", " title: 'Total Number of Flights by State',\n", " transform: [FilterTransform({\n", " filter: {'selection': 'selector002'}\n", " })],\n", " width: 600\n", " }), Chart({\n", " data: Empty DataFrame\n", " Columns: [depdelay, arrdelay, carrier_name, origin_state]\n", " Index: [],\n", " encoding: FacetedEncoding({\n", " color: ColorValue({\n", " condition: ColorValue({\n", " selection: 'selector002',\n", " value: 'steelblue'\n", " }),\n", " value: 'grey'\n", " }),\n", " tooltip: [Tooltip({\n", " shorthand: 'carrier_name'\n", " }), Tooltip({\n", " shorthand: 'depdelay'\n", " }), Tooltip({\n", " shorthand: 'arrdelay'\n", " })],\n", " x: X({\n", " shorthand: 'depdelay'\n", " }),\n", " y: Y({\n", " shorthand: 'arrdelay'\n", " })\n", " }),\n", " height: 400,\n", " mark: MarkDef({\n", " filled: True,\n", " size: 200,\n", " type: 'point'\n", " }),\n", " selection: {'selector002': SelectionDef({\n", " fields: ['carrier_name'],\n", " type: 'multi'\n", " })},\n", " title: 'Carrier Departure Delay by Arrival Delay (Minutes)',\n", " transform: [FilterTransform({\n", " filter: {'selection': 'selector001'}\n", " }), AggregateTransform({\n", " aggregate: [AggregatedFieldDef({\n", " as: 'depdelay',\n", " field: 'depdelay',\n", " op: 'mean'\n", " }), AggregatedFieldDef({\n", " as: 'arrdelay',\n", " field: 'arrdelay',\n", " op: 'mean'\n", " })],\n", " groupby: ['carrier_name']\n", " })],\n", " width: 600\n", " })]\n", "})" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "states = alt.selection_multi(fields=['origin_state'])\n", "airlines = alt.selection_multi(fields=['carrier_name'])\n", "\n", "flights_by_state = alt.Chart(\n", " t[t.origin_state, t.carrier_name],\n", " title=\"Total Number of Flights by State\"\n", ").transform_filter(\n", " airlines\n", ").mark_bar().encode(\n", " x='count()',\n", " y=alt.Y('origin_state', sort=alt.Sort(encoding='x', order='descending')),\n", " color=alt.condition(states, alt.ColorValue(\"steelblue\"), alt.ColorValue(\"grey\"))\n", ").add_selection(states).properties(\n", " height=400,\n", " width=600\n", ")\n", "\n", "\n", "carrier_delay = alt.Chart(\n", " t[t.depdelay, t.arrdelay, t.carrier_name, t.origin_state],\n", " title=\"Carrier Departure Delay by Arrival Delay (Minutes)\"\n", ").transform_filter(\n", " states\n", ").transform_aggregate(\n", " depdelay='mean(depdelay)',\n", " arrdelay='mean(arrdelay)',\n", " groupby=[\"carrier_name\"]\n", ").mark_point(filled=True, size=200).encode(\n", " x='depdelay',\n", " y='arrdelay',\n", " color=alt.condition(airlines, alt.ColorValue(\"steelblue\"), alt.ColorValue(\"grey\")),\n", " tooltip=['carrier_name', 'depdelay', 'arrdelay']\n", ").add_selection(airlines).properties(\n", " height=400,\n", " width=600\n", ")\n", "\n", "\n", "(flights_by_state | carrier_delay)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# jupyterlab_omnisci.vega_ibis.FALLBACK = False\n", "\n", "# alt.Chart(t[t.dep_timestamp]).mark_line().encode(\n", "# x='monthdate(dep_timestamp):O',\n", "# y='count()'\n", "# )" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 4 }