{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": { "tags": [ "parameters" ] }, "outputs": [], "source": [ "flex_title = \"Altair plots\"\n", "flex_author = \"built using jupyter-flex\"\n", "flex_source_link = \"https://github.com/danielfrg/jupyter-flex/blob/master/examples/plots/altair.ipynb\"\n", "flex_include_source = True" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Simple charts" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Simple Scatter Plot with Tooltips" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "tags": [ "source" ] }, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "import altair as alt\n", "from vega_datasets import data" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "tags": [ "source" ] }, "outputs": [], "source": [ "alt.renderers.set_embed_options(actions=False)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "tags": [ "source" ] }, "outputs": [], "source": [ "np.random.seed(42)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "tags": [ "source" ] }, "outputs": [], "source": [ "source = data.cars()\n", "\n", "plot = alt.Chart(source).mark_circle(size=60).encode(\n", " x='Horsepower',\n", " y='Miles_per_Gallon',\n", " color='Origin',\n", " tooltip=['Name', 'Origin', 'Horsepower', 'Miles_per_Gallon']\n", ")\n", "plot" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "tags": [ "body" ] }, "outputs": [], "source": [ "plot.properties(\n", " width='container',\n", " height='container'\n", ").interactive()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Col 2" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Simple bar chart" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "tags": [ "source" ] }, "outputs": [], "source": [ "source = pd.DataFrame({\n", " 'a': ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I'],\n", " 'b': [28, 55, 43, 91, 81, 53, 19, 87, 52]\n", "})\n", "\n", "plot = alt.Chart(source).mark_bar().encode(\n", " x='a',\n", " y='b'\n", ")\n", "plot" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "tags": [ "body" ] }, "outputs": [], "source": [ "plot.properties(\n", " width='container',\n", " height='container'\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Simple Heatmap" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "tags": [ "source" ] }, "outputs": [], "source": [ "# Compute x^2 + y^2 across a 2D grid\n", "x, y = np.meshgrid(range(-5, 5), range(-5, 5))\n", "z = x ** 2 + y ** 2\n", "\n", "# Convert this grid to columnar data expected by Altair\n", "source = pd.DataFrame({'x': x.ravel(),\n", " 'y': y.ravel(),\n", " 'z': z.ravel()})\n", "\n", "plot = alt.Chart(source).mark_rect().encode(\n", " x='x:O',\n", " y='y:O',\n", " color='z:Q'\n", ")\n", "plot" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "tags": [ "body" ] }, "outputs": [], "source": [ "plot.properties(\n", " width='container',\n", " height='container'\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Bar Charts" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Bar Chart with Negative Values" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "tags": [ "body", "source" ] }, "outputs": [], "source": [ "source = data.us_employment()\n", "\n", "alt.Chart(source).mark_bar().encode(\n", " x=\"month:T\",\n", " y=\"nonfarm_change:Q\",\n", " color=alt.condition(\n", " alt.datum.nonfarm_change > 0,\n", " alt.value(\"steelblue\"), # The positive color\n", " alt.value(\"orange\") # The negative color\n", " )\n", ").properties(\n", " width=\"container\",\n", " height=\"container\"\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Horizontal Bar Chart" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "tags": [ "body" ] }, "outputs": [], "source": [ "source = data.wheat()\n", "\n", "alt.Chart(source).mark_bar().encode(\n", " x='wheat:Q',\n", " y=\"year:O\"\n", ").properties(\n", " width=\"container\",\n", " height=\"container\"\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Col 2" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Stacked Bar Chart" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "tags": [ "body" ] }, "outputs": [], "source": [ "source = data.barley()\n", "\n", "alt.Chart(source).mark_bar().encode(\n", " x='variety',\n", " y='sum(yield)',\n", " color='site'\n", ").properties(\n", " width=\"container\",\n", " height=\"container\"\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Line and Area Charts" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Filled Step Chart" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "tags": [ "body" ] }, "outputs": [], "source": [ "source = data.stocks()\n", "\n", "alt.Chart(source).mark_area(\n", " color=\"lightblue\",\n", " interpolate='step-after',\n", " line=True\n", ").encode(\n", " x='date',\n", " y='price'\n", ").transform_filter(alt.datum.symbol == 'GOOG').properties(\n", " width=\"container\",\n", " height=\"container\"\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Multi Series Line Chart" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "tags": [ "body" ] }, "outputs": [], "source": [ "source = data.stocks()\n", "\n", "alt.Chart(source).mark_line().encode(\n", " x='date',\n", " y='price',\n", " color='symbol'\n", ").properties(\n", " width=\"container\",\n", " height=\"container\"\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Col 2" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Cumulative Count Chart" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "tags": [ "body" ] }, "outputs": [], "source": [ "source = data.movies.url\n", "\n", "alt.Chart(source).transform_window(\n", " cumulative_count=\"count()\",\n", " sort=[{\"field\": \"IMDB_Rating\"}],\n", ").mark_area().encode(\n", " x=\"IMDB_Rating:Q\",\n", " y=\"cumulative_count:Q\"\n", ").properties(\n", " width=\"container\",\n", " height=\"container\"\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Stacked Density Estimates" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "tags": [ "body" ] }, "outputs": [], "source": [ "source = data.iris()\n", "\n", "alt.Chart(source).transform_fold(\n", " ['petalWidth',\n", " 'petalLength',\n", " 'sepalWidth',\n", " 'sepalLength'],\n", " as_ = ['Measurement_type', 'value']\n", ").transform_density(\n", " density='value',\n", " bandwidth=0.3,\n", " groupby=['Measurement_type'],\n", " extent= [0, 8],\n", " counts = True,\n", " steps=200\n", ").mark_area().encode(\n", " alt.X('value:Q'),\n", " alt.Y('density:Q', stack='zero'),\n", " alt.Color('Measurement_type:N')\n", ").properties(\n", " width=\"container\",\n", " height=\"container\"\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Scatter and Maps" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Binned Scatterplot" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "tags": [ "body" ] }, "outputs": [], "source": [ "source = data.movies.url\n", "\n", "alt.Chart(source).mark_circle().encode(\n", " alt.X('IMDB_Rating:Q', bin=True),\n", " alt.Y('Rotten_Tomatoes_Rating:Q', bin=True),\n", " size='count()'\n", ").properties(\n", " width=\"container\",\n", " height=\"container\"\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Multifeature Scatter Plot" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "tags": [ "body" ] }, "outputs": [], "source": [ "source = data.iris()\n", "\n", "alt.Chart(source).mark_circle().encode(\n", " alt.X('sepalLength', scale=alt.Scale(zero=False)),\n", " alt.Y('sepalWidth', scale=alt.Scale(zero=False, padding=1)),\n", " color='species',\n", " size='petalWidth'\n", ").properties(\n", " width=\"container\",\n", " height=\"container\"\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Col 2" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Choropleth Map" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "tags": [ "body" ] }, "outputs": [], "source": [ "from vega_datasets import data\n", "\n", "counties = alt.topo_feature(data.us_10m.url, 'counties')\n", "source = data.unemployment.url\n", "\n", "alt.Chart(counties).mark_geoshape().encode(\n", " color='rate:Q'\n", ").transform_lookup(\n", " lookup='id',\n", " from_=alt.LookupData(source, 'id', ['rate'])\n", ").project(\n", " type='albersUsa'\n", ").properties(\n", " width=\"container\",\n", " height=\"container\"\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Layered Histogram" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "tags": [ "body" ] }, "outputs": [], "source": [ "# Generating Data\n", "source = pd.DataFrame({\n", " 'Trial A': np.random.normal(0, 0.8, 1000),\n", " 'Trial B': np.random.normal(-2, 1, 1000),\n", " 'Trial C': np.random.normal(3, 2, 1000)\n", "})\n", "\n", "alt.Chart(source).transform_fold(\n", " ['Trial A', 'Trial B', 'Trial C'],\n", " as_=['Experiment', 'Measurement']\n", ").mark_area(\n", " opacity=0.3,\n", " interpolate='step'\n", ").encode(\n", " alt.X('Measurement:Q', bin=alt.Bin(maxbins=100)),\n", " alt.Y('count()', stack=None),\n", " alt.Color('Experiment:N')\n", ").properties(\n", " width=\"container\",\n", " height=\"container\"\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Scatter Matrix" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "tags": [ "body" ] }, "outputs": [], "source": [ "source = data.cars()\n", "\n", "alt.Chart(source).mark_circle().encode(\n", " alt.X(alt.repeat(\"column\"), type='quantitative'),\n", " alt.Y(alt.repeat(\"row\"), type='quantitative'),\n", " color='Origin:N'\n", ").repeat(\n", " row=['Horsepower', 'Acceleration', 'Miles_per_Gallon'],\n", " column=['Miles_per_Gallon', 'Acceleration', 'Horsepower']\n", ").interactive()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Faceted Density Estimates" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "tags": [ "body" ] }, "outputs": [], "source": [ "source = data.iris()\n", "\n", "alt.Chart(source).transform_fold(\n", " ['petalWidth',\n", " 'petalLength',\n", " 'sepalWidth',\n", " 'sepalLength'],\n", " as_ = ['Measurement_type', 'value']\n", ").transform_density(\n", " density='value',\n", " bandwidth=0.3,\n", " groupby=['Measurement_type'],\n", " extent= [0, 8]\n", ").mark_area().encode(\n", " alt.X('value:Q'),\n", " alt.Y('density:Q'),\n", " alt.Row('Measurement_type:N')\n", ").properties(width=600, height=180)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Interactive " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Interactive Crossfilter" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "tags": [ "body" ] }, "outputs": [], "source": [ "source = alt.UrlData(\n", " data.flights_2k.url,\n", " format={'parse': {'date': 'date'}}\n", ")\n", "\n", "brush = alt.selection(type='interval', encodings=['x'])\n", "\n", "# Define the base chart, with the common parts of the\n", "# background and highlights\n", "base = alt.Chart().mark_bar().encode(\n", " x=alt.X(alt.repeat('column'), type='quantitative', bin=alt.Bin(maxbins=20)),\n", " y='count()'\n", ").properties(\n", " width=200,\n", " height=300\n", ")\n", "\n", "# gray background with selection\n", "background = base.encode(\n", " color=alt.value('#ddd')\n", ").add_selection(brush)\n", "\n", "# blue highlights on the transformed data\n", "highlight = base.transform_filter(brush)\n", "\n", "# layer the two charts & repeat\n", "alt.layer(\n", " background,\n", " highlight,\n", " data=source\n", ").transform_calculate(\n", " \"time\",\n", " \"hours(datum.date)\"\n", ").repeat(column=[\"distance\", \"delay\", \"time\"])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Scatter Plot and Histogram with Interval Selection" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "tags": [ "body" ] }, "outputs": [], "source": [ "x = np.random.normal(size=100)\n", "y = np.random.normal(size=100)\n", "\n", "m = np.random.normal(15, 1, size=100)\n", "\n", "source = pd.DataFrame({\"x\": x, \"y\":y, \"m\":m})\n", "\n", "# interval selection in the scatter plot\n", "pts = alt.selection(type=\"interval\", encodings=[\"x\"])\n", "\n", "# left panel: scatter plot\n", "points = alt.Chart().mark_point(filled=True, color=\"black\").encode(\n", " x='x',\n", " y='y'\n", ").transform_filter(\n", " pts\n", ").properties(\n", " width=300,\n", " height=300\n", ")\n", "\n", "# right panel: histogram\n", "mag = alt.Chart().mark_bar().encode(\n", " x='mbin:N',\n", " y=\"count()\",\n", " color=alt.condition(pts, alt.value(\"black\"), alt.value(\"lightgray\"))\n", ").properties(\n", " width=300,\n", " height=300\n", ").add_selection(pts)\n", "\n", "# build the chart:\n", "alt.hconcat(\n", " points,\n", " mag,\n", " data=source\n", ").transform_bin(\n", " \"mbin\",\n", " field=\"m\",\n", " bin=alt.Bin(maxbins=20)\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Col 2" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Interactive average" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "tags": [ "body" ] }, "outputs": [], "source": [ "source = data.seattle_weather()\n", "brush = alt.selection(type='interval', encodings=['x'])\n", "\n", "bars = alt.Chart().mark_bar().encode(\n", " x='month(date):O',\n", " y='mean(precipitation):Q',\n", " opacity=alt.condition(brush, alt.OpacityValue(1), alt.OpacityValue(0.7)),\n", ").add_selection(\n", " brush\n", ").properties(width=700, height=300)\n", "\n", "line = alt.Chart().mark_rule(color='firebrick').encode(\n", " y='mean(precipitation):Q',\n", " size=alt.SizeValue(3)\n", ").transform_filter(\n", " brush\n", ").properties(width=700, height=300)\n", "\n", "alt.layer(bars, line, data=source)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Interactive Legend" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "tags": [ "body" ] }, "outputs": [], "source": [ "source = data.unemployment_across_industries.url\n", "\n", "selection = alt.selection_multi(fields=['series'], bind='legend')\n", "\n", "alt.Chart(source).mark_area().encode(\n", " alt.X('yearmonth(date):T', axis=alt.Axis(domain=False, format='%Y', tickSize=0)),\n", " alt.Y('sum(count):Q', stack='center', axis=None),\n", " alt.Color('series:N', scale=alt.Scale(scheme='category20b')),\n", " opacity=alt.condition(selection, alt.value(1), alt.value(0.2))\n", ").properties(\n", " width=\"container\",\n", " height=\"container\"\n", ").add_selection(\n", " selection\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "celltoolbar": "Tags", "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.6" } }, "nbformat": 4, "nbformat_minor": 4 }