{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Merging and concatenating data frames\n", "\n", "[Data set download](https://s3.amazonaws.com/bebi103.caltech.edu/data/frog_strikes.zip)\n", "\n", "
" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "nbsphinx": "hidden", "tags": [] }, "outputs": [], "source": [ "#| code-fold: true\n", "\n", "# Colab setup ------------------\n", "import os, sys, subprocess\n", "if \"google.colab\" in sys.modules:\n", " cmd = \"pip install --upgrade polars iqplot watermark\"\n", " process = subprocess.Popen(cmd.split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE)\n", " stdout, stderr = process.communicate()\n", " data_path = \"https://s3.amazonaws.com/bebi103.caltech.edu/data/\"\n", "else:\n", " data_path = \"../data/\"\n", "# ------------------------------" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ " \n", "
\n", " \n", " Loading BokehJS ...\n", "
\n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/javascript": "'use strict';\n(function(root) {\n function now() {\n return new Date();\n }\n\n const force = true;\n\n if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n root._bokeh_onload_callbacks = [];\n root._bokeh_is_loading = undefined;\n }\n\nconst JS_MIME_TYPE = 'application/javascript';\n const HTML_MIME_TYPE = 'text/html';\n const EXEC_MIME_TYPE = 'application/vnd.bokehjs_exec.v0+json';\n const CLASS_NAME = 'output_bokeh rendered_html';\n\n /**\n * Render data to the DOM node\n */\n function render(props, node) {\n const script = document.createElement(\"script\");\n node.appendChild(script);\n }\n\n /**\n * Handle when an output is cleared or removed\n */\n function handleClearOutput(event, handle) {\n function drop(id) {\n const view = Bokeh.index.get_by_id(id)\n if (view != null) {\n view.model.document.clear()\n Bokeh.index.delete(view)\n }\n }\n\n const cell = handle.cell;\n\n const id = cell.output_area._bokeh_element_id;\n const server_id = cell.output_area._bokeh_server_id;\n\n // Clean up Bokeh references\n if (id != null) {\n drop(id)\n }\n\n if (server_id !== undefined) {\n // Clean up Bokeh references\n const cmd_clean = \"from bokeh.io.state import curstate; print(curstate().uuid_to_server['\" + server_id + \"'].get_sessions()[0].document.roots[0]._id)\";\n cell.notebook.kernel.execute(cmd_clean, {\n iopub: {\n output: function(msg) {\n const id = msg.content.text.trim()\n drop(id)\n }\n }\n });\n // Destroy server and session\n const cmd_destroy = \"import bokeh.io.notebook as ion; ion.destroy_server('\" + server_id + \"')\";\n cell.notebook.kernel.execute(cmd_destroy);\n }\n }\n\n /**\n * Handle when a new output is added\n */\n function handleAddOutput(event, handle) {\n const output_area = handle.output_area;\n const output = handle.output;\n\n // limit handleAddOutput to display_data with EXEC_MIME_TYPE content only\n if ((output.output_type != \"display_data\") || (!Object.prototype.hasOwnProperty.call(output.data, EXEC_MIME_TYPE))) {\n return\n }\n\n const toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n\n if (output.metadata[EXEC_MIME_TYPE][\"id\"] !== undefined) {\n toinsert[toinsert.length - 1].firstChild.textContent = output.data[JS_MIME_TYPE];\n // store reference to embed id on output_area\n output_area._bokeh_element_id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n }\n if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n const bk_div = document.createElement(\"div\");\n bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n const script_attrs = bk_div.children[0].attributes;\n for (let i = 0; i < script_attrs.length; i++) {\n toinsert[toinsert.length - 1].firstChild.setAttribute(script_attrs[i].name, script_attrs[i].value);\n toinsert[toinsert.length - 1].firstChild.textContent = bk_div.children[0].textContent\n }\n // store reference to server id on output_area\n output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n }\n }\n\n function register_renderer(events, OutputArea) {\n\n function append_mime(data, metadata, element) {\n // create a DOM node to render to\n const toinsert = this.create_output_subarea(\n metadata,\n CLASS_NAME,\n EXEC_MIME_TYPE\n );\n this.keyboard_manager.register_events(toinsert);\n // Render to node\n const props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n render(props, toinsert[toinsert.length - 1]);\n element.append(toinsert);\n return toinsert\n }\n\n /* Handle when an output is cleared or removed */\n events.on('clear_output.CodeCell', handleClearOutput);\n events.on('delete.Cell', handleClearOutput);\n\n /* Handle when a new output is added */\n events.on('output_added.OutputArea', handleAddOutput);\n\n /**\n * Register the mime type and append_mime function with output_area\n */\n OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n /* Is output safe? */\n safe: true,\n /* Index of renderer in `output_area.display_order` */\n index: 0\n });\n }\n\n // register the mime type if in Jupyter Notebook environment and previously unregistered\n if (root.Jupyter !== undefined) {\n const events = require('base/js/events');\n const OutputArea = require('notebook/js/outputarea').OutputArea;\n\n if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n register_renderer(events, OutputArea);\n }\n }\n if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n root._bokeh_timeout = Date.now() + 5000;\n root._bokeh_failed_load = false;\n }\n\n const NB_LOAD_WARNING = {'data': {'text/html':\n \"
\\n\"+\n \"

\\n\"+\n \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n \"

\\n\"+\n \"\\n\"+\n \"\\n\"+\n \"from bokeh.resources import INLINE\\n\"+\n \"output_notebook(resources=INLINE)\\n\"+\n \"\\n\"+\n \"
\"}};\n\n function display_loaded(error = null) {\n const el = document.getElementById(\"f14909ba-af9c-47ba-9feb-d3efb8b5b819\");\n if (el != null) {\n const html = (() => {\n if (typeof root.Bokeh === \"undefined\") {\n if (error == null) {\n return \"BokehJS is loading ...\";\n } else {\n return \"BokehJS failed to load.\";\n }\n } else {\n const prefix = `BokehJS ${root.Bokeh.version}`;\n if (error == null) {\n return `${prefix} successfully loaded.`;\n } else {\n return `${prefix} encountered errors while loading and may not function as expected.`;\n }\n }\n })();\n el.innerHTML = html;\n\n if (error != null) {\n const wrapper = document.createElement(\"div\");\n wrapper.style.overflow = \"auto\";\n wrapper.style.height = \"5em\";\n wrapper.style.resize = \"vertical\";\n const content = document.createElement(\"div\");\n content.style.fontFamily = \"monospace\";\n content.style.whiteSpace = \"pre-wrap\";\n content.style.backgroundColor = \"rgb(255, 221, 221)\";\n content.textContent = error.stack ?? error.toString();\n wrapper.append(content);\n el.append(wrapper);\n }\n } else if (Date.now() < root._bokeh_timeout) {\n setTimeout(() => display_loaded(error), 100);\n }\n }\n\n function run_callbacks() {\n try {\n root._bokeh_onload_callbacks.forEach(function(callback) {\n if (callback != null)\n callback();\n });\n } finally {\n delete root._bokeh_onload_callbacks\n }\n console.debug(\"Bokeh: all callbacks have finished\");\n }\n\n function load_libs(css_urls, js_urls, callback) {\n if (css_urls == null) css_urls = [];\n if (js_urls == null) js_urls = [];\n\n root._bokeh_onload_callbacks.push(callback);\n if (root._bokeh_is_loading > 0) {\n console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n return null;\n }\n if (js_urls == null || js_urls.length === 0) {\n run_callbacks();\n return null;\n }\n console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n root._bokeh_is_loading = css_urls.length + js_urls.length;\n\n function on_load() {\n root._bokeh_is_loading--;\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n run_callbacks()\n }\n }\n\n function on_error(url) {\n console.error(\"failed to load \" + url);\n }\n\n for (let i = 0; i < css_urls.length; i++) {\n const url = css_urls[i];\n const element = document.createElement(\"link\");\n element.onload = on_load;\n element.onerror = on_error.bind(null, url);\n element.rel = \"stylesheet\";\n element.type = \"text/css\";\n element.href = url;\n console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n document.body.appendChild(element);\n }\n\n for (let i = 0; i < js_urls.length; i++) {\n const url = js_urls[i];\n const element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error.bind(null, url);\n element.async = false;\n element.src = url;\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n };\n\n function inject_raw_css(css) {\n const element = document.createElement(\"style\");\n element.appendChild(document.createTextNode(css));\n document.body.appendChild(element);\n }\n\n const js_urls = [\"https://cdn.bokeh.org/bokeh/release/bokeh-3.7.3.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-gl-3.7.3.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-widgets-3.7.3.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-tables-3.7.3.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-mathjax-3.7.3.min.js\"];\n const css_urls = [];\n\n const inline_js = [ function(Bokeh) {\n Bokeh.set_log_level(\"info\");\n },\nfunction(Bokeh) {\n }\n ];\n\n function run_inline_js() {\n if (root.Bokeh !== undefined || force === true) {\n try {\n for (let i = 0; i < inline_js.length; i++) {\n inline_js[i].call(root, root.Bokeh);\n }\n\n } catch (error) {display_loaded(error);throw error;\n }if (force === true) {\n display_loaded();\n }} else if (Date.now() < root._bokeh_timeout) {\n setTimeout(run_inline_js, 100);\n } else if (!root._bokeh_failed_load) {\n console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n root._bokeh_failed_load = true;\n } else if (force !== true) {\n const cell = $(document.getElementById(\"f14909ba-af9c-47ba-9feb-d3efb8b5b819\")).parents('.cell').data().cell;\n cell.output_area.append_execute_result(NB_LOAD_WARNING)\n }\n }\n\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n run_inline_js();\n } else {\n load_libs(css_urls, js_urls, function() {\n console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n run_inline_js();\n });\n }\n}(window));", "application/vnd.bokehjs_load.v0+json": "" }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import numpy as np\n", "import polars as pl\n", "\n", "import iqplot\n", "\n", "import bokeh.io\n", "bokeh.io.output_notebook()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "
" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "It often happens that experiments consist of multiple data files that need to be brought together into a single data frame to work with in exploratory data analysis and subsequent analyses. Through its concatenation and merging capabilities, Polars provides powerful tools for handling this sort of data." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## The frog tongue strike data set\n", "\n", "As usual, we will work with a real data set to learn about concatenation and merging of data frames. The data set we will use comes from a fun paper about the adhesive properties of frog tongues. The reference is [Kleinteich and Gorb, Tongue adhesion in the horned frog *Ceratophrys sp.*, *Sci. Rep.*, **4**, 5225, 2014](https://dx.doi.org/10.1038%2Fsrep05225). You might also want to check out a *New York Times* feature on the paper [here](http://www.nytimes.com/2014/08/25/science/a-frog-thats-a-living-breathing-pac-man.html).\n", "\n", "In this paper, the authors investigated various properties of the adhesive characteristics of the tongues of horned frogs when they strike prey. The authors had a striking pad connected to a cantilever to measure forces. They also used high speed cameras to capture the strike and record relevant data.\n", "\n", "To get an idea of the experimental set up, you can check out this movie, kindly sent to me by Thomas Kleinteich. If video does not play in your browser, you may download it [here](kleinteich_frog_strike.mp4).\n", "\n", "
\n", " \n", "\n", " \n", "
" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### The data files\n", "\n", "I pulled data files from the [Kleinteich and Gorb paper](https://dx.doi.org/10.1038%2Fsrep05225). You can download the data files here: [https://s3.amazonaws.com/bebi103.caltech.edu/data/frog_strikes.zip](https://s3.amazonaws.com/bebi103.caltech.edu/data/frog_strikes.zip).\n", "\n", "There are four files, one for each of the four frogs, labeled with IDs I, II, III, and IV, that were studied. To see the format of the files, we can look at the content of the file for frog I. You can use\n", "\n", " head -n 20 ../data/frog_strikes_I.csv\n", " \n", "from the command line. Here is the content of the first data file.\n", "\n", "```\n", "# These data are from Kleinteich and Gorb, Sci. Rep., 4, 5225, 2014.\n", "# Frog ID: I\n", "# Age: adult\n", "# Snout-vent-length (SVL): 63 mm\n", "# Body weight: 63.1 g\n", "# Species: Ceratophrys cranwelli crossed with Ceratophrys cornuta\n", "date,trial number,impact force (mN),impact time (ms),impact force / body weight,adhesive force (mN),time frog pulls on target (ms),adhesive force / body weight,adhesive impulse (N-s),total contact area (mm2),contact area without mucus (mm2),contact area with mucus / contact area without mucus,contact pressure (Pa),adhesive strength (Pa)\n", "2013_02_26,3,1205,46,1.95,-785,884,1.27,-0.290,387,70,0.82,3117,-2030\n", "2013_02_26,4,2527,44,4.08,-983,248,1.59,-0.181,101,94,0.07,24923,-9695\n", "2013_03_01,1,1745,34,2.82,-850,211,1.37,-0.157,83,79,0.05,21020,-10239\n", "2013_03_01,2,1556,41,2.51,-455,1025,0.74,-0.170,330,158,0.52,4718,-1381\n", "2013_03_01,3,493,36,0.80,-974,499,1.57,-0.423,245,216,0.12,2012,-3975\n", "2013_03_01,4,2276,31,3.68,-592,969,0.96,-0.176,341,106,0.69,6676,-1737\n", "2013_03_05,1,556,43,0.90,-512,835,0.83,-0.285,359,110,0.69,1550,-1427\n", "2013_03_05,2,1928,46,3.11,-804,508,1.30,-0.285,246,178,0.28,7832,-3266\n", "2013_03_05,3,2641,50,4.27,-690,491,1.12,-0.239,269,224,0.17,9824,-2568\n", "2013_03_05,4,1897,41,3.06,-462,839,0.75,-0.328,266,176,0.34,7122,-1733\n", "2013_03_12,1,1891,40,3.06,-766,1069,1.24,-0.380,408,33,0.92,4638,-1879\n", "2013_03_12,2,1545,48,2.50,-715,649,1.15,-0.298,141,112,0.21,10947,-5064\n", "2013_03_12,3,1307,29,2.11,-613,1845,0.99,-0.768,455,92,0.80,2874,-1348\n", "2013_03_12,4,1692,31,2.73,-677,917,1.09,-0.457,186,129,0.31,9089,-3636\n", "2013_03_12,5,1543,38,2.49,-528,750,0.85,-0.353,153,148,0.03,10095,-3453\n", "2013_03_15,1,1282,31,2.07,-452,785,0.73,-0.253,290,105,0.64,4419,-1557\n", "2013_03_15,2,775,34,1.25,-430,837,0.70,-0.276,257,124,0.52,3019,-1677\n", "2013_03_15,3,2032,60,3.28,-652,486,1.05,-0.257,147,134,0.09,13784,-4425\n", "2013_03_15,4,1240,34,2.00,-692,906,1.12,-0.317,364,260,0.28,3406,-1901\n", "2013_03_15,5,473,40,0.76,-536,1218,0.87,-0.382,259,168,0.35,1830,-2073\n", "```\n", "\n", "The first lines all begin with `#` signs, signifying that they are comments. They do give important information about the frog, though.\n", "\n", "The first line after the comments are the headers, giving the column names for the data frame we will load." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Concatenating data frames\n", "\n", "We would like to have all of the data frames be together in one data frame so we can conveniently do things like make plots comparing the four frogs. Let's read in the data sets and make a list of data frames." ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (5, 14)
datetrial numberimpact force (mN)impact time (ms)impact force / body weightadhesive force (mN)time frog pulls on target (ms)adhesive force / body weightadhesive impulse (N-s)total contact area (mm2)contact area without mucus (mm2)contact area with mucus / contact area without mucuscontact pressure (Pa)adhesive strength (Pa)
stri64i64i64f64i64i64f64f64i64i64f64i64i64
"2013_02_26"31205461.95-7858841.27-0.29387700.823117-2030
"2013_02_26"42527444.08-9832481.59-0.181101940.0724923-9695
"2013_03_01"11745342.82-8502111.37-0.15783790.0521020-10239
"2013_03_01"21556412.51-45510250.74-0.173301580.524718-1381
"2013_03_01"3493360.8-9744991.57-0.4232452160.122012-3975
" ], "text/plain": [ "shape: (5, 14)\n", "┌───────────┬───────────┬───────────┬───────────┬───┬───────────┬───────────┬───────────┬──────────┐\n", "│ date ┆ trial ┆ impact ┆ impact ┆ … ┆ contact ┆ contact ┆ contact ┆ adhesive │\n", "│ --- ┆ number ┆ force ┆ time (ms) ┆ ┆ area ┆ area with ┆ pressure ┆ strength │\n", "│ str ┆ --- ┆ (mN) ┆ --- ┆ ┆ without ┆ mucus / ┆ (Pa) ┆ (Pa) │\n", "│ ┆ i64 ┆ --- ┆ i64 ┆ ┆ mucus ┆ cont… ┆ --- ┆ --- │\n", "│ ┆ ┆ i64 ┆ ┆ ┆ (mm… ┆ --- ┆ i64 ┆ i64 │\n", "│ ┆ ┆ ┆ ┆ ┆ --- ┆ f64 ┆ ┆ │\n", "│ ┆ ┆ ┆ ┆ ┆ i64 ┆ ┆ ┆ │\n", "╞═══════════╪═══════════╪═══════════╪═══════════╪═══╪═══════════╪═══════════╪═══════════╪══════════╡\n", "│ 2013_02_2 ┆ 3 ┆ 1205 ┆ 46 ┆ … ┆ 70 ┆ 0.82 ┆ 3117 ┆ -2030 │\n", "│ 6 ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n", "│ 2013_02_2 ┆ 4 ┆ 2527 ┆ 44 ┆ … ┆ 94 ┆ 0.07 ┆ 24923 ┆ -9695 │\n", "│ 6 ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n", "│ 2013_03_0 ┆ 1 ┆ 1745 ┆ 34 ┆ … ┆ 79 ┆ 0.05 ┆ 21020 ┆ -10239 │\n", "│ 1 ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n", "│ 2013_03_0 ┆ 2 ┆ 1556 ┆ 41 ┆ … ┆ 158 ┆ 0.52 ┆ 4718 ┆ -1381 │\n", "│ 1 ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n", "│ 2013_03_0 ┆ 3 ┆ 493 ┆ 36 ┆ … ┆ 216 ┆ 0.12 ┆ 2012 ┆ -3975 │\n", "│ 1 ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n", "└───────────┴───────────┴───────────┴───────────┴───┴───────────┴───────────┴───────────┴──────────┘" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# On a local machine, we would do this: fnames = glob.glob('../data/frog_strikes_*.csv')\n", "# But for Colab compatibility, we will do it by hand\n", "fnames = [\n", " os.path.join(data_path, f\"frog_strikes_{frog_id}.csv\")\n", " for frog_id in [\"I\", \"II\", \"III\", \"IV\"]\n", "]\n", "\n", "dfs = [pl.read_csv(f, comment_prefix=\"#\") for f in fnames]\n", "\n", "# Take a look at first data frame\n", "dfs[0].head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We have successfully loaded in all of the data frames. They all have the same columns (as given by the CSV files). So, we wish to tape the data frames together vertically. We can use the `pl.concat()` function to do this.\n", "\n", "Before we do that, though, we might notice a problem. We will not have information to tell us which frog is which. We might therefore like to add a column to each data frame that has the frog ID, and then concatenate them. We can parse the ID of the frog from the file name, as we can see by looking at the file names." ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['../data/frog_strikes_I.csv',\n", " '../data/frog_strikes_II.csv',\n", " '../data/frog_strikes_III.csv',\n", " '../data/frog_strikes_IV.csv']" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fnames" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "So, for each data frame/file name pair, we extract the Roman numeral and add a column to the data frame containing the frog ID. To do this, we use a Polars **literal**, accessible with `pl.lit()`, which means that we want to insert a specific value (in this case, `\"I\"`, `\"II\"`, `\"III\"`, or `\"IV\"`) into a data frame as a column." ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (5, 15)
datetrial numberimpact force (mN)impact time (ms)impact force / body weightadhesive force (mN)time frog pulls on target (ms)adhesive force / body weightadhesive impulse (N-s)total contact area (mm2)contact area without mucus (mm2)contact area with mucus / contact area without mucuscontact pressure (Pa)adhesive strength (Pa)ID
stri64i64i64f64i64i64f64f64i64i64f64i64i64str
"2013_02_26"31205461.95-7858841.27-0.29387700.823117-2030"I"
"2013_02_26"42527444.08-9832481.59-0.181101940.0724923-9695"I"
"2013_03_01"11745342.82-8502111.37-0.15783790.0521020-10239"I"
"2013_03_01"21556412.51-45510250.74-0.173301580.524718-1381"I"
"2013_03_01"3493360.8-9744991.57-0.4232452160.122012-3975"I"
" ], "text/plain": [ "shape: (5, 15)\n", "┌─────────────┬────────┬─────────────┬────────────┬───┬────────────┬────────────┬────────────┬─────┐\n", "│ date ┆ trial ┆ impact ┆ impact ┆ … ┆ contact ┆ contact ┆ adhesive ┆ ID │\n", "│ --- ┆ number ┆ force (mN) ┆ time (ms) ┆ ┆ area with ┆ pressure ┆ strength ┆ --- │\n", "│ str ┆ --- ┆ --- ┆ --- ┆ ┆ mucus / ┆ (Pa) ┆ (Pa) ┆ str │\n", "│ ┆ i64 ┆ i64 ┆ i64 ┆ ┆ cont… ┆ --- ┆ --- ┆ │\n", "│ ┆ ┆ ┆ ┆ ┆ --- ┆ i64 ┆ i64 ┆ │\n", "│ ┆ ┆ ┆ ┆ ┆ f64 ┆ ┆ ┆ │\n", "╞═════════════╪════════╪═════════════╪════════════╪═══╪════════════╪════════════╪════════════╪═════╡\n", "│ 2013_02_26 ┆ 3 ┆ 1205 ┆ 46 ┆ … ┆ 0.82 ┆ 3117 ┆ -2030 ┆ I │\n", "│ 2013_02_26 ┆ 4 ┆ 2527 ┆ 44 ┆ … ┆ 0.07 ┆ 24923 ┆ -9695 ┆ I │\n", "│ 2013_03_01 ┆ 1 ┆ 1745 ┆ 34 ┆ … ┆ 0.05 ┆ 21020 ┆ -10239 ┆ I │\n", "│ 2013_03_01 ┆ 2 ┆ 1556 ┆ 41 ┆ … ┆ 0.52 ┆ 4718 ┆ -1381 ┆ I │\n", "│ 2013_03_01 ┆ 3 ┆ 493 ┆ 36 ┆ … ┆ 0.12 ┆ 2012 ┆ -3975 ┆ I │\n", "└─────────────┴────────┴─────────────┴────────────┴───┴────────────┴────────────┴────────────┴─────┘" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "for i, f in enumerate(fnames):\n", " frog_id = f[f.rfind('_')+1:f.rfind('.')]\n", " dfs[i] = dfs[i].with_columns(pl.lit(frog_id).alias('ID'))\n", " \n", "# Take a look\n", "dfs[0].head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Good! Now all data frames have an `'ID'` column, and we can concatenate. The `pl.concat()` function takes as input a list of data frames to be concatenated and stacks them on top of each other." ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Number of rows: 80 \n", "Unique IDs: ['III', 'I', 'II', 'IV']\n" ] } ], "source": [ "# Concatenate data frames\n", "df = pl.concat(dfs)\n", "\n", "# Make sure we got them all\n", "print(\n", " \"Number of rows:\", len(df), \"\\nUnique IDs:\", df.get_column(\"ID\").unique().to_list()\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Check!" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Creating a DataFrame from scratch\n", "\n", "Looking back at the [headers of the original data files](#The-data-files), we see that there is information present in the header that we would like to have in our data frame. For example, it would be nice to know if each strike came from an adult or juvenile. Or what the snout-vent length was. Working toward the goal of including this in our data frame, we will first construct a new data frame containing information about each frog." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Data frames from dictionaries\n", "\n", "One way do create this new data frame is to first construct a dictionary with the respective fields. Since these data sets are small, we can look at the files and make the dictionary by hand." ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "data_dict = {\n", " \"ID\": [\"I\", \"II\", \"III\", \"IV\"],\n", " \"age\": [\"adult\", \"adult\", \"juvenile\", \"juvenile\"],\n", " \"SVL (mm)\": [63, 70, 28, 31],\n", " \"body weight (g)\": [63.1, 72.7, 12.7, 12.7],\n", " \"species\": [\"cross\", \"cross\", \"cranwelli\", \"cranwelli\"],\n", "}" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Now that we have this dictionary, we can convert it into a `DataFrame` by instantiating a `pl.DataFrame` class with it, using the `data` kwarg." ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (4, 5)
IDageSVL (mm)body weight (g)species
strstri64f64str
"I""adult"6363.1"cross"
"II""adult"7072.7"cross"
"III""juvenile"2812.7"cranwelli"
"IV""juvenile"3112.7"cranwelli"
" ], "text/plain": [ "shape: (4, 5)\n", "┌─────┬──────────┬──────────┬─────────────────┬───────────┐\n", "│ ID ┆ age ┆ SVL (mm) ┆ body weight (g) ┆ species │\n", "│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", "│ str ┆ str ┆ i64 ┆ f64 ┆ str │\n", "╞═════╪══════════╪══════════╪═════════════════╪═══════════╡\n", "│ I ┆ adult ┆ 63 ┆ 63.1 ┆ cross │\n", "│ II ┆ adult ┆ 70 ┆ 72.7 ┆ cross │\n", "│ III ┆ juvenile ┆ 28 ┆ 12.7 ┆ cranwelli │\n", "│ IV ┆ juvenile ┆ 31 ┆ 12.7 ┆ cranwelli │\n", "└─────┴──────────┴──────────┴─────────────────┴───────────┘" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Make it into a DataFrame\n", "df_frog_info = pl.DataFrame(data=data_dict)\n", "\n", "# Take a look\n", "df_frog_info" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Nice!" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Data frames from numpy arrays\n", "\n", "Sometimes the data sets are not small enough to construct a dictionary by hand. Oftentimes, we have a two-dimensional array of data that we want to make into a `DataFrame`. As an example, let's say we have a Numpy array where the first column is snout vent length and the second is weight." ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[63. , 63.1],\n", " [70. , 72.7],\n", " [28. , 12.7],\n", " [31. , 12.7]])" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data = np.array([[63, 70, 28, 31], [63.1, 72.7, 12.7, 12.7]]).transpose()\n", "\n", "# Verify that it's what we think it is\n", "data" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "To make this into a `DataFrame`, we again create `pl.DataFrame` instance, but this time we also specify the `schema` keyword argument to label the columns." ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (4, 2)
SVL (mm)weight (g)
f64f64
63.063.1
70.072.7
28.012.7
31.012.7
" ], "text/plain": [ "shape: (4, 2)\n", "┌──────────┬────────────┐\n", "│ SVL (mm) ┆ weight (g) │\n", "│ --- ┆ --- │\n", "│ f64 ┆ f64 │\n", "╞══════════╪════════════╡\n", "│ 63.0 ┆ 63.1 │\n", "│ 70.0 ┆ 72.7 │\n", "│ 28.0 ┆ 12.7 │\n", "│ 31.0 ┆ 12.7 │\n", "└──────────┴────────────┘" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_demo = pl.DataFrame(data=data, schema=[\"SVL (mm)\", \"weight (g)\"])\n", "\n", "# Take a look\n", "df_demo" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "That also works. Generally, any two-dimensional Numpy array can be converted into a `DataFrame` in this way. You just need to supply column names." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Programmatically creating a data frame\n", "\n", "Hand-entering data should be minimized. The information about each frog was hand-entered once by the experimenter. We should not hand enter them again. We therefore should parse the comment lines of input files to get the pertinent information.\n", "\n", "Note, though, that in the case of a single experiment with only four data sets, hand entering might be faster and indeed less error prone than doing it programmatically. We should definitely do it programmatically if we have a large number of data files or will ever do an experiment with the same file format again.\n", "\n", "So, let's programmatically parse the files. We start by writing a function to parse the metadata from a single file. Recall that the comment lines look like this:\n", "\n", "```\n", "# These data are from Kleinteich and Gorb, Sci. Rep., 4, 5225, 2014.\n", "# Frog ID: I\n", "# Age: adult\n", "# Snout-vent-length (SVL): 63 mm\n", "# Body weight: 63.1 g\n", "# Species: Ceratophrys cranwelli crossed with Ceratophrys cornuta\n", "```\n", "\n", "(The function below will not work with Colab because `open()` does not work for files specified by a URL.)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "def parse_frog_metadata(fname):\n", " with open(fname, 'r') as f:\n", " # Citation line, ignore.\n", " f.readline()\n", " \n", " # Frog ID\n", " line = f.readline()\n", " frog_id = line[line.find(':')+1:].strip()\n", " \n", " # Age\n", " line = f.readline()\n", " age = line[line.find(':')+1:].strip()\n", " \n", " # SVL, assume units given as mm\n", " line = f.readline()\n", " svl = line[line.find(':')+1:line.rfind(' ')].strip()\n", " \n", " # Body weight, assume units given as g\n", " line = f.readline()\n", " body_weight = line[line.find(':')+1:line.rfind(' ')].strip()\n", "\n", " # Species (either cranwelli or cross)\n", " line = f.readline()\n", " species = line[line.find(':')+1:].strip()\n", " if 'cross' in species:\n", " species = 'cross'\n", " else:\n", " species = 'cranwelli'\n", "\n", " return frog_id, age, svl, body_weight, species" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Let's take it for a spin." ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "('I', 'adult', '63', '63.1', 'cross')" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "parse_frog_metadata(os.path.join(data_path, 'frog_strikes_I.csv'))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Looks good! Now we can create a list of tuples to use as data for making a data frame." ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[('I', 'adult', '63', '63.1', 'cross'),\n", " ('II', 'adult', '70', '72.7', 'cross'),\n", " ('III', 'juvenile', '28', '12.7', 'cranwelli'),\n", " ('IV', 'juvenile', '31', '12.7', 'cranwelli')]" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data = [parse_frog_metadata(f) for f in fnames]\n", " \n", "# Take a look\n", "data" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We now input this list of tuples, plus the column names, into `pl.DataFrame()`, and we've got our data frame. We do have to specify that this list of tuples is row-oriented, so Polars knows that each tuple is a row and not a column." ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (4, 5)
IDageSVL (mm)body weight (g)species
strstrstrstrstr
"I""adult""63""63.1""cross"
"II""adult""70""72.7""cross"
"III""juvenile""28""12.7""cranwelli"
"IV""juvenile""31""12.7""cranwelli"
" ], "text/plain": [ "shape: (4, 5)\n", "┌─────┬──────────┬──────────┬─────────────────┬───────────┐\n", "│ ID ┆ age ┆ SVL (mm) ┆ body weight (g) ┆ species │\n", "│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", "│ str ┆ str ┆ str ┆ str ┆ str │\n", "╞═════╪══════════╪══════════╪═════════════════╪═══════════╡\n", "│ I ┆ adult ┆ 63 ┆ 63.1 ┆ cross │\n", "│ II ┆ adult ┆ 70 ┆ 72.7 ┆ cross │\n", "│ III ┆ juvenile ┆ 28 ┆ 12.7 ┆ cranwelli │\n", "│ IV ┆ juvenile ┆ 31 ┆ 12.7 ┆ cranwelli │\n", "└─────┴──────────┴──────────┴─────────────────┴───────────┘" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_frog_info = pl.DataFrame(\n", " data=data, \n", " schema=[\"ID\", \"age\", \"SVL (mm)\", \"body weight (g)\", \"species\"],\n", " orient='row',\n", ")\n", "\n", "# Take a look\n", "df_frog_info" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Joining DataFrames\n", "\n", "We want to add the information about the frogs into our main data frame, `df`, that we have been working with. Specifically, for each row of the data frame, we also want to include the frog's age, snout-vent length, body weight, and species. So, we want to take the data frame with all of the information about the tongue strikes and combine it with the data frame containing information about each frog. This combining of data frames is a **join operation**. In this case, we join on the `'ID'` column, since the value of the that column in each data frame indicates the frog we are talking about. \n", "\n", "To perform a join operation we use the `df.join()` method (or `df.join_asof()` method for approximate matches). Its default join strategy is an **inner join**, in which the entries of a given row are included in the joined data frame if and only if the entry `'ID'` column of the respective frames match. You can read more about available join strategies in [the documentation](https://docs.pola.rs/user-guide/transformations/joins/)." ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (5, 19)
datetrial numberimpact force (mN)impact time (ms)impact force / body weightadhesive force (mN)time frog pulls on target (ms)adhesive force / body weightadhesive impulse (N-s)total contact area (mm2)contact area without mucus (mm2)contact area with mucus / contact area without mucuscontact pressure (Pa)adhesive strength (Pa)IDageSVL (mm)body weight (g)species
stri64i64i64f64i64i64f64f64i64i64f64i64i64strstrstrstrstr
"2013_02_26"31205461.95-7858841.27-0.29387700.823117-2030"I""adult""63""63.1""cross"
"2013_02_26"42527444.08-9832481.59-0.181101940.0724923-9695"I""adult""63""63.1""cross"
"2013_03_01"11745342.82-8502111.37-0.15783790.0521020-10239"I""adult""63""63.1""cross"
"2013_03_01"21556412.51-45510250.74-0.173301580.524718-1381"I""adult""63""63.1""cross"
"2013_03_01"3493360.8-9744991.57-0.4232452160.122012-3975"I""adult""63""63.1""cross"
" ], "text/plain": [ "shape: (5, 19)\n", "┌────────────┬────────┬──────────────┬──────────────┬───┬───────┬──────────┬─────────────┬─────────┐\n", "│ date ┆ trial ┆ impact force ┆ impact time ┆ … ┆ age ┆ SVL (mm) ┆ body weight ┆ species │\n", "│ --- ┆ number ┆ (mN) ┆ (ms) ┆ ┆ --- ┆ --- ┆ (g) ┆ --- │\n", "│ str ┆ --- ┆ --- ┆ --- ┆ ┆ str ┆ str ┆ --- ┆ str │\n", "│ ┆ i64 ┆ i64 ┆ i64 ┆ ┆ ┆ ┆ str ┆ │\n", "╞════════════╪════════╪══════════════╪══════════════╪═══╪═══════╪══════════╪═════════════╪═════════╡\n", "│ 2013_02_26 ┆ 3 ┆ 1205 ┆ 46 ┆ … ┆ adult ┆ 63 ┆ 63.1 ┆ cross │\n", "│ 2013_02_26 ┆ 4 ┆ 2527 ┆ 44 ┆ … ┆ adult ┆ 63 ┆ 63.1 ┆ cross │\n", "│ 2013_03_01 ┆ 1 ┆ 1745 ┆ 34 ┆ … ┆ adult ┆ 63 ┆ 63.1 ┆ cross │\n", "│ 2013_03_01 ┆ 2 ┆ 1556 ┆ 41 ┆ … ┆ adult ┆ 63 ┆ 63.1 ┆ cross │\n", "│ 2013_03_01 ┆ 3 ┆ 493 ┆ 36 ┆ … ┆ adult ┆ 63 ┆ 63.1 ┆ cross │\n", "└────────────┴────────┴──────────────┴──────────────┴───┴───────┴──────────┴─────────────┴─────────┘" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = df.join(df_frog_info, on='ID')\n", "\n", "# Take a look\n", "df.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Note that the entries for the added columns were repeated appropriately, e.g., the body weight column had 63 for every row corresponding to frog I. " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## At long last, a plot!\n", "\n", "While the purpose of this part of the lesson was to learn how to concatenate and merge data frames, going through all of that wrangling effort would somehow be unsatisfying if we we didn't generate a plot. Let's compare the impact force on a per-mass basis for each frog." ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/javascript": "(function(root) {\n function embed_document(root) {\n const docs_json = {\"1900bdaf-4ac3-41cf-b50b-de8f01e64368\":{\"version\":\"3.7.3\",\"title\":\"Bokeh Application\",\"roots\":[{\"type\":\"object\",\"name\":\"Figure\",\"id\":\"p1004\",\"attributes\":{\"x_range\":{\"type\":\"object\",\"name\":\"DataRange1d\",\"id\":\"p1006\"},\"y_range\":{\"type\":\"object\",\"name\":\"FactorRange\",\"id\":\"p1003\",\"attributes\":{\"factors\":[\"IV\",\"III\",\"II\",\"I\"]}},\"x_scale\":{\"type\":\"object\",\"name\":\"LinearScale\",\"id\":\"p1013\"},\"y_scale\":{\"type\":\"object\",\"name\":\"CategoricalScale\",\"id\":\"p1014\"},\"title\":{\"type\":\"object\",\"name\":\"Title\",\"id\":\"p1011\"},\"renderers\":[{\"type\":\"object\",\"name\":\"GlyphRenderer\",\"id\":\"p1048\",\"attributes\":{\"name\":\"hover_glyphs\",\"data_source\":{\"type\":\"object\",\"name\":\"ColumnDataSource\",\"id\":\"p1039\",\"attributes\":{\"selected\":{\"type\":\"object\",\"name\":\"Selection\",\"id\":\"p1040\",\"attributes\":{\"indices\":[],\"line_indices\":[]}},\"selection_policy\":{\"type\":\"object\",\"name\":\"UnionRenderers\",\"id\":\"p1041\"},\"data\":{\"type\":\"map\",\"entries\":[[\"index\",{\"type\":\"ndarray\",\"array\":{\"type\":\"bytes\",\"data\":\"AAAAAAEAAAACAAAAAwAAAAQAAAAFAAAABgAAAAcAAAAIAAAACQAAAAoAAAALAAAADAAAAA0AAAAOAAAADwAAABAAAAARAAAAEgAAABMAAAAUAAAAFQAAABYAAAAXAAAAGAAAABkAAAAaAAAAGwAAABwAAAAdAAAAHgAAAB8AAAAgAAAAIQAAACIAAAAjAAAAJAAAACUAAAAmAAAAJwAAAA==\"},\"shape\":[40],\"dtype\":\"int32\",\"order\":\"little\"}],[\"age\",{\"type\":\"ndarray\",\"array\":[\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\"],\"shape\":[40],\"dtype\":\"object\",\"order\":\"little\"}],[\"ID\",{\"type\":\"ndarray\",\"array\":[\"I\",\"I\",\"I\",\"I\",\"I\",\"I\",\"I\",\"I\",\"I\",\"I\",\"I\",\"I\",\"I\",\"I\",\"I\",\"I\",\"I\",\"I\",\"I\",\"I\",\"II\",\"II\",\"II\",\"II\",\"II\",\"II\",\"II\",\"II\",\"II\",\"II\",\"II\",\"II\",\"II\",\"II\",\"II\",\"II\",\"II\",\"II\",\"II\",\"II\"],\"shape\":[40],\"dtype\":\"object\",\"order\":\"little\"}],[\"impact force / body weight\",{\"type\":\"ndarray\",\"array\":{\"type\":\"bytes\",\"data\":\"MzMzMzMz/z9SuB6F61EQQI/C9ShcjwZAFK5H4XoUBECamZmZmZnpP3E9CtejcA1AzczMzMzM7D/hehSuR+EIQBSuR+F6FBFAexSuR+F6CEB7FK5H4XoIQAAAAAAAAARA4XoUrkfhAEDXo3A9CtcFQOxRuB6F6wNAj8L1KFyPAEAAAAAAAAD0Pz0K16NwPQpAAAAAAAAAAEBSuB6F61HoP1K4HoXrUQ5AuB6F61G49j+kcD0K16PoP9ejcD0K1wFAUrgehetR9D/2KFyPwvUMQNejcD0K1/M/rkfhehSu9z9cj8L1KFwLQGZmZmZmZuY/ZmZmZmZm+j8pXI/C9SjkP1K4HoXrUfw/j8L1KFyP4j89CtejcD0GQFyPwvUoXPM/UrgehetR8D/NzMzMzMzsP0jhehSuR/E/hetRuB6F+z8=\"},\"shape\":[40],\"dtype\":\"float64\",\"order\":\"little\"}],[\"cat\",{\"type\":\"ndarray\",\"array\":[\"I\",\"I\",\"I\",\"I\",\"I\",\"I\",\"I\",\"I\",\"I\",\"I\",\"I\",\"I\",\"I\",\"I\",\"I\",\"I\",\"I\",\"I\",\"I\",\"I\",\"II\",\"II\",\"II\",\"II\",\"II\",\"II\",\"II\",\"II\",\"II\",\"II\",\"II\",\"II\",\"II\",\"II\",\"II\",\"II\",\"II\",\"II\",\"II\",\"II\"],\"shape\":[40],\"dtype\":\"object\",\"order\":\"little\"}],[\"__label\",{\"type\":\"ndarray\",\"array\":[\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\"],\"shape\":[40],\"dtype\":\"object\",\"order\":\"little\"}]]}}},\"view\":{\"type\":\"object\",\"name\":\"CDSView\",\"id\":\"p1049\",\"attributes\":{\"filter\":{\"type\":\"object\",\"name\":\"AllIndices\",\"id\":\"p1050\"}}},\"glyph\":{\"type\":\"object\",\"name\":\"Scatter\",\"id\":\"p1045\",\"attributes\":{\"x\":{\"type\":\"field\",\"field\":\"impact force / body weight\"},\"y\":{\"type\":\"field\",\"field\":\"cat\",\"transform\":{\"type\":\"object\",\"name\":\"Jitter\",\"id\":\"p1038\",\"attributes\":{\"width\":0.1,\"distribution\":\"normal\",\"range\":{\"id\":\"p1003\"}}}},\"line_color\":{\"type\":\"value\",\"value\":\"#1f77b3\"},\"fill_color\":{\"type\":\"value\",\"value\":\"#1f77b3\"},\"hatch_color\":{\"type\":\"value\",\"value\":\"#1f77b3\"}}},\"nonselection_glyph\":{\"type\":\"object\",\"name\":\"Scatter\",\"id\":\"p1046\",\"attributes\":{\"x\":{\"type\":\"field\",\"field\":\"impact force / body weight\"},\"y\":{\"type\":\"field\",\"field\":\"cat\",\"transform\":{\"id\":\"p1038\"}},\"line_color\":{\"type\":\"value\",\"value\":\"#1f77b3\"},\"line_alpha\":{\"type\":\"value\",\"value\":0.1},\"fill_color\":{\"type\":\"value\",\"value\":\"#1f77b3\"},\"fill_alpha\":{\"type\":\"value\",\"value\":0.1},\"hatch_color\":{\"type\":\"value\",\"value\":\"#1f77b3\"},\"hatch_alpha\":{\"type\":\"value\",\"value\":0.1}}},\"muted_glyph\":{\"type\":\"object\",\"name\":\"Scatter\",\"id\":\"p1047\",\"attributes\":{\"x\":{\"type\":\"field\",\"field\":\"impact force / body weight\"},\"y\":{\"type\":\"field\",\"field\":\"cat\",\"transform\":{\"id\":\"p1038\"}},\"line_color\":{\"type\":\"value\",\"value\":\"#1f77b3\"},\"line_alpha\":{\"type\":\"value\",\"value\":0.2},\"fill_color\":{\"type\":\"value\",\"value\":\"#1f77b3\"},\"fill_alpha\":{\"type\":\"value\",\"value\":0.2},\"hatch_color\":{\"type\":\"value\",\"value\":\"#1f77b3\"},\"hatch_alpha\":{\"type\":\"value\",\"value\":0.2}}}}},{\"type\":\"object\",\"name\":\"GlyphRenderer\",\"id\":\"p1060\",\"attributes\":{\"name\":\"hover_glyphs\",\"data_source\":{\"type\":\"object\",\"name\":\"ColumnDataSource\",\"id\":\"p1051\",\"attributes\":{\"selected\":{\"type\":\"object\",\"name\":\"Selection\",\"id\":\"p1052\",\"attributes\":{\"indices\":[],\"line_indices\":[]}},\"selection_policy\":{\"type\":\"object\",\"name\":\"UnionRenderers\",\"id\":\"p1053\"},\"data\":{\"type\":\"map\",\"entries\":[[\"index\",{\"type\":\"ndarray\",\"array\":{\"type\":\"bytes\",\"data\":\"KAAAACkAAAAqAAAAKwAAACwAAAAtAAAALgAAAC8AAAAwAAAAMQAAADIAAAAzAAAANAAAADUAAAA2AAAANwAAADgAAAA5AAAAOgAAADsAAAA8AAAAPQAAAD4AAAA/AAAAQAAAAEEAAABCAAAAQwAAAEQAAABFAAAARgAAAEcAAABIAAAASQAAAEoAAABLAAAATAAAAE0AAABOAAAATwAAAA==\"},\"shape\":[40],\"dtype\":\"int32\",\"order\":\"little\"}],[\"age\",{\"type\":\"ndarray\",\"array\":[\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\"],\"shape\":[40],\"dtype\":\"object\",\"order\":\"little\"}],[\"ID\",{\"type\":\"ndarray\",\"array\":[\"III\",\"III\",\"III\",\"III\",\"III\",\"III\",\"III\",\"III\",\"III\",\"III\",\"III\",\"III\",\"III\",\"III\",\"III\",\"III\",\"III\",\"III\",\"III\",\"III\",\"IV\",\"IV\",\"IV\",\"IV\",\"IV\",\"IV\",\"IV\",\"IV\",\"IV\",\"IV\",\"IV\",\"IV\",\"IV\",\"IV\",\"IV\",\"IV\",\"IV\",\"IV\",\"IV\",\"IV\"],\"shape\":[40],\"dtype\":\"object\",\"order\":\"little\"}],[\"impact force / body weight\",{\"type\":\"ndarray\",\"array\":{\"type\":\"bytes\",\"data\":\"w/UoXI/CE0CkcD0K16MKQOF6FK5H4QRA9ihcj8L1GECkcD0K16MTQIXrUbgehRFASOF6FK5HEUCkcD0K16MSQPYoXI/C9RlAmpmZmZmZDUCF61G4HoUNQClcj8L1KBRA9ihcj8L1E0CF61G4HoURQDMzMzMzMxFAuB6F61G4CEDXo3A9CtcJQMP1KFyPwhNAZmZmZmZmFUBxPQrXo3APQHsUrkfhevQ/zczMzMzM8D/sUbgehevRP/YoXI/C9QpAH4XrUbgeBUDD9Shcj8LFP+xRuB6F6w1APQrXo3A9AEBxPQrXo3AVQFyPwvUoXBFAhetRuB6F9z+F61G4HoX3P8P1KFyPwhFAuB6F61G4DkBI4XoUrkcYQAAAAAAAAAhAAAAAAAAAEkAzMzMzMzMVQEjhehSuRxJA16NwPQrXC0A=\"},\"shape\":[40],\"dtype\":\"float64\",\"order\":\"little\"}],[\"cat\",{\"type\":\"ndarray\",\"array\":[\"III\",\"III\",\"III\",\"III\",\"III\",\"III\",\"III\",\"III\",\"III\",\"III\",\"III\",\"III\",\"III\",\"III\",\"III\",\"III\",\"III\",\"III\",\"III\",\"III\",\"IV\",\"IV\",\"IV\",\"IV\",\"IV\",\"IV\",\"IV\",\"IV\",\"IV\",\"IV\",\"IV\",\"IV\",\"IV\",\"IV\",\"IV\",\"IV\",\"IV\",\"IV\",\"IV\",\"IV\"],\"shape\":[40],\"dtype\":\"object\",\"order\":\"little\"}],[\"__label\",{\"type\":\"ndarray\",\"array\":[\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\"],\"shape\":[40],\"dtype\":\"object\",\"order\":\"little\"}]]}}},\"view\":{\"type\":\"object\",\"name\":\"CDSView\",\"id\":\"p1061\",\"attributes\":{\"filter\":{\"type\":\"object\",\"name\":\"AllIndices\",\"id\":\"p1062\"}}},\"glyph\":{\"type\":\"object\",\"name\":\"Scatter\",\"id\":\"p1057\",\"attributes\":{\"x\":{\"type\":\"field\",\"field\":\"impact force / body weight\"},\"y\":{\"type\":\"field\",\"field\":\"cat\",\"transform\":{\"id\":\"p1038\"}},\"line_color\":{\"type\":\"value\",\"value\":\"#ff7e0e\"},\"fill_color\":{\"type\":\"value\",\"value\":\"#ff7e0e\"},\"hatch_color\":{\"type\":\"value\",\"value\":\"#ff7e0e\"}}},\"nonselection_glyph\":{\"type\":\"object\",\"name\":\"Scatter\",\"id\":\"p1058\",\"attributes\":{\"x\":{\"type\":\"field\",\"field\":\"impact force / body weight\"},\"y\":{\"type\":\"field\",\"field\":\"cat\",\"transform\":{\"id\":\"p1038\"}},\"line_color\":{\"type\":\"value\",\"value\":\"#ff7e0e\"},\"line_alpha\":{\"type\":\"value\",\"value\":0.1},\"fill_color\":{\"type\":\"value\",\"value\":\"#ff7e0e\"},\"fill_alpha\":{\"type\":\"value\",\"value\":0.1},\"hatch_color\":{\"type\":\"value\",\"value\":\"#ff7e0e\"},\"hatch_alpha\":{\"type\":\"value\",\"value\":0.1}}},\"muted_glyph\":{\"type\":\"object\",\"name\":\"Scatter\",\"id\":\"p1059\",\"attributes\":{\"x\":{\"type\":\"field\",\"field\":\"impact force / body weight\"},\"y\":{\"type\":\"field\",\"field\":\"cat\",\"transform\":{\"id\":\"p1038\"}},\"line_color\":{\"type\":\"value\",\"value\":\"#ff7e0e\"},\"line_alpha\":{\"type\":\"value\",\"value\":0.2},\"fill_color\":{\"type\":\"value\",\"value\":\"#ff7e0e\"},\"fill_alpha\":{\"type\":\"value\",\"value\":0.2},\"hatch_color\":{\"type\":\"value\",\"value\":\"#ff7e0e\"},\"hatch_alpha\":{\"type\":\"value\",\"value\":0.2}}}}}],\"toolbar\":{\"type\":\"object\",\"name\":\"Toolbar\",\"id\":\"p1012\",\"attributes\":{\"tools\":[{\"type\":\"object\",\"name\":\"PanTool\",\"id\":\"p1025\"},{\"type\":\"object\",\"name\":\"WheelZoomTool\",\"id\":\"p1026\",\"attributes\":{\"renderers\":\"auto\"}},{\"type\":\"object\",\"name\":\"BoxZoomTool\",\"id\":\"p1027\",\"attributes\":{\"dimensions\":\"both\",\"overlay\":{\"type\":\"object\",\"name\":\"BoxAnnotation\",\"id\":\"p1028\",\"attributes\":{\"syncable\":false,\"line_color\":\"black\",\"line_alpha\":1.0,\"line_width\":2,\"line_dash\":[4,4],\"fill_color\":\"lightgrey\",\"fill_alpha\":0.5,\"level\":\"overlay\",\"visible\":false,\"left\":{\"type\":\"number\",\"value\":\"nan\"},\"right\":{\"type\":\"number\",\"value\":\"nan\"},\"top\":{\"type\":\"number\",\"value\":\"nan\"},\"bottom\":{\"type\":\"number\",\"value\":\"nan\"},\"left_units\":\"canvas\",\"right_units\":\"canvas\",\"top_units\":\"canvas\",\"bottom_units\":\"canvas\",\"handles\":{\"type\":\"object\",\"name\":\"BoxInteractionHandles\",\"id\":\"p1034\",\"attributes\":{\"all\":{\"type\":\"object\",\"name\":\"AreaVisuals\",\"id\":\"p1033\",\"attributes\":{\"fill_color\":\"white\",\"hover_fill_color\":\"lightgray\"}}}}}}}},{\"type\":\"object\",\"name\":\"SaveTool\",\"id\":\"p1035\"},{\"type\":\"object\",\"name\":\"ResetTool\",\"id\":\"p1036\"},{\"type\":\"object\",\"name\":\"HelpTool\",\"id\":\"p1037\"}]}},\"toolbar_location\":\"above\",\"left\":[{\"type\":\"object\",\"name\":\"CategoricalAxis\",\"id\":\"p1020\",\"attributes\":{\"ticker\":{\"type\":\"object\",\"name\":\"CategoricalTicker\",\"id\":\"p1021\"},\"formatter\":{\"type\":\"object\",\"name\":\"CategoricalTickFormatter\",\"id\":\"p1022\"},\"axis_label\":\"frog ID\",\"major_label_policy\":{\"type\":\"object\",\"name\":\"AllLabels\",\"id\":\"p1023\"}}}],\"right\":[{\"type\":\"object\",\"name\":\"Legend\",\"id\":\"p1063\",\"attributes\":{\"location\":\"center\",\"title\":\"age\",\"click_policy\":\"hide\",\"items\":[{\"type\":\"object\",\"name\":\"LegendItem\",\"id\":\"p1064\",\"attributes\":{\"label\":{\"type\":\"value\",\"value\":\"adult\"},\"renderers\":[{\"id\":\"p1048\"}]}},{\"type\":\"object\",\"name\":\"LegendItem\",\"id\":\"p1065\",\"attributes\":{\"label\":{\"type\":\"value\",\"value\":\"juvenile\"},\"renderers\":[{\"id\":\"p1060\"}]}}]}}],\"below\":[{\"type\":\"object\",\"name\":\"LinearAxis\",\"id\":\"p1015\",\"attributes\":{\"ticker\":{\"type\":\"object\",\"name\":\"BasicTicker\",\"id\":\"p1016\",\"attributes\":{\"mantissas\":[1,2,5]}},\"formatter\":{\"type\":\"object\",\"name\":\"BasicTickFormatter\",\"id\":\"p1017\"},\"axis_label\":\"impact force / body weight (mN/g)\",\"major_label_policy\":{\"type\":\"object\",\"name\":\"AllLabels\",\"id\":\"p1018\"}}}],\"center\":[{\"type\":\"object\",\"name\":\"Grid\",\"id\":\"p1019\",\"attributes\":{\"axis\":{\"id\":\"p1015\"}}},{\"type\":\"object\",\"name\":\"Grid\",\"id\":\"p1024\",\"attributes\":{\"dimension\":1,\"axis\":{\"id\":\"p1020\"},\"grid_line_color\":null}}],\"frame_width\":375,\"frame_height\":275}}]}};\n const render_items = [{\"docid\":\"1900bdaf-4ac3-41cf-b50b-de8f01e64368\",\"roots\":{\"p1004\":\"a7f8cb67-ccf3-41e2-839d-1632a3f9e93a\"},\"root_ids\":[\"p1004\"]}];\n void root.Bokeh.embed.embed_items_notebook(docs_json, render_items);\n }\n if (root.Bokeh !== undefined) {\n embed_document(root);\n } else {\n let attempts = 0;\n const timer = setInterval(function(root) {\n if (root.Bokeh !== undefined) {\n clearInterval(timer);\n embed_document(root);\n } else {\n attempts++;\n if (attempts > 100) {\n clearInterval(timer);\n console.log(\"Bokeh: ERROR: Unable to run BokehJS code because BokehJS library is missing\");\n }\n }\n }, 10, root)\n }\n})(window);", "application/vnd.bokehjs_exec.v0+json": "" }, "metadata": { "application/vnd.bokehjs_exec.v0+json": { "id": "p1004" } }, "output_type": "display_data" } ], "source": [ "p = iqplot.strip(\n", " df,\n", " q=\"impact force / body weight\",\n", " cats=\"ID\",\n", " color_column=\"age\",\n", " spread=\"jitter\",\n", " x_axis_label=\"impact force / body weight (mN/g)\",\n", " y_axis_label=\"frog ID\"\n", ")\n", "\n", "bokeh.io.show(p)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Apparently Frog III consistently packs a powerful punch, er.... tongue, for its body weight." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Computing environment" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Python implementation: CPython\n", "Python version : 3.13.7\n", "IPython version : 9.5.0\n", "\n", "numpy : 2.2.6\n", "polars : 1.33.1\n", "bokeh : 3.7.3\n", "iqplot : 0.3.7\n", "jupyterlab: 4.4.7\n", "\n" ] } ], "source": [ "%load_ext watermark\n", "%watermark -v -p numpy,polars,bokeh,iqplot,jupyterlab" ] } ], "metadata": { "kernelspec": { "display_name": "default", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.13.7" } }, "nbformat": 4, "nbformat_minor": 4 }