{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Merging and concatenating data frames\n", "\n", "[Data set download](https://s3.amazonaws.com/bebi103.caltech.edu/data/frog_strikes.zip)\n", "\n", "
" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "nbsphinx": "hidden", "tags": [] }, "outputs": [], "source": [ "# Colab setup ------------------\n", "import os, sys, subprocess\n", "if \"google.colab\" in sys.modules:\n", " cmd = \"pip install --upgrade iqplot watermark\"\n", " process = subprocess.Popen(cmd.split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE)\n", " stdout, stderr = process.communicate()\n", " data_path = \"https://s3.amazonaws.com/bebi103.caltech.edu/data/\"\n", "else:\n", " data_path = \"../data/\"\n", "# ------------------------------" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", " \n", " Loading BokehJS ...\n", "
\n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/javascript": [ "(function(root) {\n", " function now() {\n", " return new Date();\n", " }\n", "\n", " const force = true;\n", "\n", " if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n", " root._bokeh_onload_callbacks = [];\n", " root._bokeh_is_loading = undefined;\n", " }\n", "\n", "const JS_MIME_TYPE = 'application/javascript';\n", " const HTML_MIME_TYPE = 'text/html';\n", " const EXEC_MIME_TYPE = 'application/vnd.bokehjs_exec.v0+json';\n", " const CLASS_NAME = 'output_bokeh rendered_html';\n", "\n", " /**\n", " * Render data to the DOM node\n", " */\n", " function render(props, node) {\n", " const script = document.createElement(\"script\");\n", " node.appendChild(script);\n", " }\n", "\n", " /**\n", " * Handle when an output is cleared or removed\n", " */\n", " function handleClearOutput(event, handle) {\n", " const cell = handle.cell;\n", "\n", " const id = cell.output_area._bokeh_element_id;\n", " const server_id = cell.output_area._bokeh_server_id;\n", " // Clean up Bokeh references\n", " if (id != null && id in Bokeh.index) {\n", " Bokeh.index[id].model.document.clear();\n", " delete Bokeh.index[id];\n", " }\n", "\n", " if (server_id !== undefined) {\n", " // Clean up Bokeh references\n", " const cmd_clean = \"from bokeh.io.state import curstate; print(curstate().uuid_to_server['\" + server_id + \"'].get_sessions()[0].document.roots[0]._id)\";\n", " cell.notebook.kernel.execute(cmd_clean, {\n", " iopub: {\n", " output: function(msg) {\n", " const id = msg.content.text.trim();\n", " if (id in Bokeh.index) {\n", " Bokeh.index[id].model.document.clear();\n", " delete Bokeh.index[id];\n", " }\n", " }\n", " }\n", " });\n", " // Destroy server and session\n", " const cmd_destroy = \"import bokeh.io.notebook as ion; ion.destroy_server('\" + server_id + \"')\";\n", " cell.notebook.kernel.execute(cmd_destroy);\n", " }\n", " }\n", "\n", " /**\n", " * Handle when a new output is added\n", " */\n", " function handleAddOutput(event, handle) {\n", " const output_area = handle.output_area;\n", " const output = handle.output;\n", "\n", " // limit handleAddOutput to display_data with EXEC_MIME_TYPE content only\n", " if ((output.output_type != \"display_data\") || (!Object.prototype.hasOwnProperty.call(output.data, EXEC_MIME_TYPE))) {\n", " return\n", " }\n", "\n", " const toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n", "\n", " if (output.metadata[EXEC_MIME_TYPE][\"id\"] !== undefined) {\n", " toinsert[toinsert.length - 1].firstChild.textContent = output.data[JS_MIME_TYPE];\n", " // store reference to embed id on output_area\n", " output_area._bokeh_element_id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n", " }\n", " if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n", " const bk_div = document.createElement(\"div\");\n", " bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n", " const script_attrs = bk_div.children[0].attributes;\n", " for (let i = 0; i < script_attrs.length; i++) {\n", " toinsert[toinsert.length - 1].firstChild.setAttribute(script_attrs[i].name, script_attrs[i].value);\n", " toinsert[toinsert.length - 1].firstChild.textContent = bk_div.children[0].textContent\n", " }\n", " // store reference to server id on output_area\n", " output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n", " }\n", " }\n", "\n", " function register_renderer(events, OutputArea) {\n", "\n", " function append_mime(data, metadata, element) {\n", " // create a DOM node to render to\n", " const toinsert = this.create_output_subarea(\n", " metadata,\n", " CLASS_NAME,\n", " EXEC_MIME_TYPE\n", " );\n", " this.keyboard_manager.register_events(toinsert);\n", " // Render to node\n", " const props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n", " render(props, toinsert[toinsert.length - 1]);\n", " element.append(toinsert);\n", " return toinsert\n", " }\n", "\n", " /* Handle when an output is cleared or removed */\n", " events.on('clear_output.CodeCell', handleClearOutput);\n", " events.on('delete.Cell', handleClearOutput);\n", "\n", " /* Handle when a new output is added */\n", " events.on('output_added.OutputArea', handleAddOutput);\n", "\n", " /**\n", " * Register the mime type and append_mime function with output_area\n", " */\n", " OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n", " /* Is output safe? */\n", " safe: true,\n", " /* Index of renderer in `output_area.display_order` */\n", " index: 0\n", " });\n", " }\n", "\n", " // register the mime type if in Jupyter Notebook environment and previously unregistered\n", " if (root.Jupyter !== undefined) {\n", " const events = require('base/js/events');\n", " const OutputArea = require('notebook/js/outputarea').OutputArea;\n", "\n", " if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n", " register_renderer(events, OutputArea);\n", " }\n", " }\n", " if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n", " root._bokeh_timeout = Date.now() + 5000;\n", " root._bokeh_failed_load = false;\n", " }\n", "\n", " const NB_LOAD_WARNING = {'data': {'text/html':\n", " \"
\\n\"+\n", " \"

\\n\"+\n", " \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n", " \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n", " \"

\\n\"+\n", " \"\\n\"+\n", " \"\\n\"+\n", " \"from bokeh.resources import INLINE\\n\"+\n", " \"output_notebook(resources=INLINE)\\n\"+\n", " \"\\n\"+\n", " \"
\"}};\n", "\n", " function display_loaded() {\n", " const el = document.getElementById(\"fe0062d8-4ea1-42b8-bc8e-166e0e29d52f\");\n", " if (el != null) {\n", " el.textContent = \"BokehJS is loading...\";\n", " }\n", " if (root.Bokeh !== undefined) {\n", " if (el != null) {\n", " el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n", " }\n", " } else if (Date.now() < root._bokeh_timeout) {\n", " setTimeout(display_loaded, 100)\n", " }\n", " }\n", "\n", " function run_callbacks() {\n", " try {\n", " root._bokeh_onload_callbacks.forEach(function(callback) {\n", " if (callback != null)\n", " callback();\n", " });\n", " } finally {\n", " delete root._bokeh_onload_callbacks\n", " }\n", " console.debug(\"Bokeh: all callbacks have finished\");\n", " }\n", "\n", " function load_libs(css_urls, js_urls, callback) {\n", " if (css_urls == null) css_urls = [];\n", " if (js_urls == null) js_urls = [];\n", "\n", " root._bokeh_onload_callbacks.push(callback);\n", " if (root._bokeh_is_loading > 0) {\n", " console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n", " return null;\n", " }\n", " if (js_urls == null || js_urls.length === 0) {\n", " run_callbacks();\n", " return null;\n", " }\n", " console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n", " root._bokeh_is_loading = css_urls.length + js_urls.length;\n", "\n", " function on_load() {\n", " root._bokeh_is_loading--;\n", " if (root._bokeh_is_loading === 0) {\n", " console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n", " run_callbacks()\n", " }\n", " }\n", "\n", " function on_error(url) {\n", " console.error(\"failed to load \" + url);\n", " }\n", "\n", " for (let i = 0; i < css_urls.length; i++) {\n", " const url = css_urls[i];\n", " const element = document.createElement(\"link\");\n", " element.onload = on_load;\n", " element.onerror = on_error.bind(null, url);\n", " element.rel = \"stylesheet\";\n", " element.type = \"text/css\";\n", " element.href = url;\n", " console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n", " document.body.appendChild(element);\n", " }\n", "\n", " for (let i = 0; i < js_urls.length; i++) {\n", " const url = js_urls[i];\n", " const element = document.createElement('script');\n", " element.onload = on_load;\n", " element.onerror = on_error.bind(null, url);\n", " element.async = false;\n", " element.src = url;\n", " console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n", " document.head.appendChild(element);\n", " }\n", " };\n", "\n", " function inject_raw_css(css) {\n", " const element = document.createElement(\"style\");\n", " element.appendChild(document.createTextNode(css));\n", " document.body.appendChild(element);\n", " }\n", "\n", " const js_urls = [\"https://cdn.bokeh.org/bokeh/release/bokeh-3.2.1.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-gl-3.2.1.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-widgets-3.2.1.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-tables-3.2.1.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-mathjax-3.2.1.min.js\"];\n", " const css_urls = [];\n", "\n", " const inline_js = [ function(Bokeh) {\n", " Bokeh.set_log_level(\"info\");\n", " },\n", "function(Bokeh) {\n", " }\n", " ];\n", "\n", " function run_inline_js() {\n", " if (root.Bokeh !== undefined || force === true) {\n", " for (let i = 0; i < inline_js.length; i++) {\n", " inline_js[i].call(root, root.Bokeh);\n", " }\n", "if (force === true) {\n", " display_loaded();\n", " }} else if (Date.now() < root._bokeh_timeout) {\n", " setTimeout(run_inline_js, 100);\n", " } else if (!root._bokeh_failed_load) {\n", " console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n", " root._bokeh_failed_load = true;\n", " } else if (force !== true) {\n", " const cell = $(document.getElementById(\"fe0062d8-4ea1-42b8-bc8e-166e0e29d52f\")).parents('.cell').data().cell;\n", " cell.output_area.append_execute_result(NB_LOAD_WARNING)\n", " }\n", " }\n", "\n", " if (root._bokeh_is_loading === 0) {\n", " console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n", " run_inline_js();\n", " } else {\n", " load_libs(css_urls, js_urls, function() {\n", " console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n", " run_inline_js();\n", " });\n", " }\n", "}(window));" ], "application/vnd.bokehjs_load.v0+json": "(function(root) {\n function now() {\n return new Date();\n }\n\n const force = true;\n\n if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n root._bokeh_onload_callbacks = [];\n root._bokeh_is_loading = undefined;\n }\n\n\n if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n root._bokeh_timeout = Date.now() + 5000;\n root._bokeh_failed_load = false;\n }\n\n const NB_LOAD_WARNING = {'data': {'text/html':\n \"
\\n\"+\n \"

\\n\"+\n \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n \"

\\n\"+\n \"\\n\"+\n \"\\n\"+\n \"from bokeh.resources import INLINE\\n\"+\n \"output_notebook(resources=INLINE)\\n\"+\n \"\\n\"+\n \"
\"}};\n\n function display_loaded() {\n const el = document.getElementById(\"fe0062d8-4ea1-42b8-bc8e-166e0e29d52f\");\n if (el != null) {\n el.textContent = \"BokehJS is loading...\";\n }\n if (root.Bokeh !== undefined) {\n if (el != null) {\n el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n }\n } else if (Date.now() < root._bokeh_timeout) {\n setTimeout(display_loaded, 100)\n }\n }\n\n function run_callbacks() {\n try {\n root._bokeh_onload_callbacks.forEach(function(callback) {\n if (callback != null)\n callback();\n });\n } finally {\n delete root._bokeh_onload_callbacks\n }\n console.debug(\"Bokeh: all callbacks have finished\");\n }\n\n function load_libs(css_urls, js_urls, callback) {\n if (css_urls == null) css_urls = [];\n if (js_urls == null) js_urls = [];\n\n root._bokeh_onload_callbacks.push(callback);\n if (root._bokeh_is_loading > 0) {\n console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n return null;\n }\n if (js_urls == null || js_urls.length === 0) {\n run_callbacks();\n return null;\n }\n console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n root._bokeh_is_loading = css_urls.length + js_urls.length;\n\n function on_load() {\n root._bokeh_is_loading--;\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n run_callbacks()\n }\n }\n\n function on_error(url) {\n console.error(\"failed to load \" + url);\n }\n\n for (let i = 0; i < css_urls.length; i++) {\n const url = css_urls[i];\n const element = document.createElement(\"link\");\n element.onload = on_load;\n element.onerror = on_error.bind(null, url);\n element.rel = \"stylesheet\";\n element.type = \"text/css\";\n element.href = url;\n console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n document.body.appendChild(element);\n }\n\n for (let i = 0; i < js_urls.length; i++) {\n const url = js_urls[i];\n const element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error.bind(null, url);\n element.async = false;\n element.src = url;\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n };\n\n function inject_raw_css(css) {\n const element = document.createElement(\"style\");\n element.appendChild(document.createTextNode(css));\n document.body.appendChild(element);\n }\n\n const js_urls = [\"https://cdn.bokeh.org/bokeh/release/bokeh-3.2.1.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-gl-3.2.1.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-widgets-3.2.1.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-tables-3.2.1.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-mathjax-3.2.1.min.js\"];\n const css_urls = [];\n\n const inline_js = [ function(Bokeh) {\n Bokeh.set_log_level(\"info\");\n },\nfunction(Bokeh) {\n }\n ];\n\n function run_inline_js() {\n if (root.Bokeh !== undefined || force === true) {\n for (let i = 0; i < inline_js.length; i++) {\n inline_js[i].call(root, root.Bokeh);\n }\nif (force === true) {\n display_loaded();\n }} else if (Date.now() < root._bokeh_timeout) {\n setTimeout(run_inline_js, 100);\n } else if (!root._bokeh_failed_load) {\n console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n root._bokeh_failed_load = true;\n } else if (force !== true) {\n const cell = $(document.getElementById(\"fe0062d8-4ea1-42b8-bc8e-166e0e29d52f\")).parents('.cell').data().cell;\n cell.output_area.append_execute_result(NB_LOAD_WARNING)\n }\n }\n\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n run_inline_js();\n } else {\n load_libs(css_urls, js_urls, function() {\n console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n run_inline_js();\n });\n }\n}(window));" }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import numpy as np\n", "import pandas as pd\n", "\n", "import iqplot\n", "\n", "import bokeh.io\n", "bokeh.io.output_notebook()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "
" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "It often happens that experiments consist of multiple data files that need to be brought together into a single data frame to work with in exploratory data analysis and subsequent analyses. Through its concatenation and merging capabilities, Pandas provides powerful tools for handling this sort of data." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## The frog tongue strike data set\n", "\n", "As usual, we will work with a real data set to learn about concatenation and merging of data frames. The data set we will use comes from a fun paper about the adhesive properties of frog tongues. The reference is [Kleinteich and Gorb, Tongue adhesion in the horned frog *Ceratophrys sp.*, *Sci. Rep.*, **4**, 5225, 2014](https://dx.doi.org/10.1038%2Fsrep05225). You might also want to check out a *New York Times* feature on the paper [here](http://www.nytimes.com/2014/08/25/science/a-frog-thats-a-living-breathing-pac-man.html).\n", "\n", "In this paper, the authors investigated various properties of the adhesive characteristics of the tongues of horned frogs when they strike prey. The authors had a striking pad connected to a cantilever to measure forces. They also used high speed cameras to capture the strike and record relevant data.\n", "\n", "To get an idea of the experimental set up, you can check out this movie, kindly sent to me by Thomas Kleinteich. If video does not play in your browser, you may download it [here](kleinteich_frog_strike.mp4).\n", "\n", "
\n", " \n", "\n", " \n", "
" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### The data files\n", "\n", "I pulled data files from the [Kleinteich and Gorb paper](https://dx.doi.org/10.1038%2Fsrep05225). You can download the data files here: [https://s3.amazonaws.com/bebi103.caltech.edu/data/frog_strikes.zip](https://s3.amazonaws.com/bebi103.caltech.edu/data/frog_strikes.zip).\n", "\n", "There are four files, one for each of the four frogs, labeled with IDs I, II, III, and IV, that were studied. To see the format of the files, we can look at the content of the file for frog I. You can use\n", "\n", " head -n 20 ../data/frog_strikes_I.csv\n", " \n", "from the command line. Here is the content of the first data file.\n", "\n", "```\n", "# These data are from Kleinteich and Gorb, Sci. Rep., 4, 5225, 2014.\n", "# Frog ID: I\n", "# Age: adult\n", "# Snout-vent-length (SVL): 63 mm\n", "# Body weight: 63.1 g\n", "# Species: Ceratophrys cranwelli crossed with Ceratophrys cornuta\n", "date,trial number,impact force (mN),impact time (ms),impact force / body weight,adhesive force (mN),time frog pulls on target (ms),adhesive force / body weight,adhesive impulse (N-s),total contact area (mm2),contact area without mucus (mm2),contact area with mucus / contact area without mucus,contact pressure (Pa),adhesive strength (Pa)\n", "2013_02_26,3,1205,46,1.95,-785,884,1.27,-0.290,387,70,0.82,3117,-2030\n", "2013_02_26,4,2527,44,4.08,-983,248,1.59,-0.181,101,94,0.07,24923,-9695\n", "2013_03_01,1,1745,34,2.82,-850,211,1.37,-0.157,83,79,0.05,21020,-10239\n", "2013_03_01,2,1556,41,2.51,-455,1025,0.74,-0.170,330,158,0.52,4718,-1381\n", "2013_03_01,3,493,36,0.80,-974,499,1.57,-0.423,245,216,0.12,2012,-3975\n", "2013_03_01,4,2276,31,3.68,-592,969,0.96,-0.176,341,106,0.69,6676,-1737\n", "2013_03_05,1,556,43,0.90,-512,835,0.83,-0.285,359,110,0.69,1550,-1427\n", "2013_03_05,2,1928,46,3.11,-804,508,1.30,-0.285,246,178,0.28,7832,-3266\n", "2013_03_05,3,2641,50,4.27,-690,491,1.12,-0.239,269,224,0.17,9824,-2568\n", "2013_03_05,4,1897,41,3.06,-462,839,0.75,-0.328,266,176,0.34,7122,-1733\n", "2013_03_12,1,1891,40,3.06,-766,1069,1.24,-0.380,408,33,0.92,4638,-1879\n", "2013_03_12,2,1545,48,2.50,-715,649,1.15,-0.298,141,112,0.21,10947,-5064\n", "2013_03_12,3,1307,29,2.11,-613,1845,0.99,-0.768,455,92,0.80,2874,-1348\n", "2013_03_12,4,1692,31,2.73,-677,917,1.09,-0.457,186,129,0.31,9089,-3636\n", "2013_03_12,5,1543,38,2.49,-528,750,0.85,-0.353,153,148,0.03,10095,-3453\n", "2013_03_15,1,1282,31,2.07,-452,785,0.73,-0.253,290,105,0.64,4419,-1557\n", "2013_03_15,2,775,34,1.25,-430,837,0.70,-0.276,257,124,0.52,3019,-1677\n", "2013_03_15,3,2032,60,3.28,-652,486,1.05,-0.257,147,134,0.09,13784,-4425\n", "2013_03_15,4,1240,34,2.00,-692,906,1.12,-0.317,364,260,0.28,3406,-1901\n", "2013_03_15,5,473,40,0.76,-536,1218,0.87,-0.382,259,168,0.35,1830,-2073\n", "```\n", "\n", "The first lines all begin with `#` signs, signifying that they are comments. They do give important information about the frog, though.\n", "\n", "The first line after the comments are the headers, giving the column names for the data frame we will load." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Concatenating data frames\n", "\n", "We would like to have all of the data frames be together in one data frame so we can conveniently do things like make plots comparing the four frogs. Let's read in the data sets and make a list of data frames." ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
datetrial numberimpact force (mN)impact time (ms)impact force / body weightadhesive force (mN)time frog pulls on target (ms)adhesive force / body weightadhesive impulse (N-s)total contact area (mm2)contact area without mucus (mm2)contact area with mucus / contact area without mucuscontact pressure (Pa)adhesive strength (Pa)
02013_02_2631205461.95-7858841.27-0.290387700.823117-2030
12013_02_2642527444.08-9832481.59-0.181101940.0724923-9695
22013_03_0111745342.82-8502111.37-0.15783790.0521020-10239
32013_03_0121556412.51-45510250.74-0.1703301580.524718-1381
42013_03_013493360.80-9744991.57-0.4232452160.122012-3975
\n", "
" ], "text/plain": [ " date trial number impact force (mN) impact time (ms) \\\n", "0 2013_02_26 3 1205 46 \n", "1 2013_02_26 4 2527 44 \n", "2 2013_03_01 1 1745 34 \n", "3 2013_03_01 2 1556 41 \n", "4 2013_03_01 3 493 36 \n", "\n", " impact force / body weight adhesive force (mN) \\\n", "0 1.95 -785 \n", "1 4.08 -983 \n", "2 2.82 -850 \n", "3 2.51 -455 \n", "4 0.80 -974 \n", "\n", " time frog pulls on target (ms) adhesive force / body weight \\\n", "0 884 1.27 \n", "1 248 1.59 \n", "2 211 1.37 \n", "3 1025 0.74 \n", "4 499 1.57 \n", "\n", " adhesive impulse (N-s) total contact area (mm2) \\\n", "0 -0.290 387 \n", "1 -0.181 101 \n", "2 -0.157 83 \n", "3 -0.170 330 \n", "4 -0.423 245 \n", "\n", " contact area without mucus (mm2) \\\n", "0 70 \n", "1 94 \n", "2 79 \n", "3 158 \n", "4 216 \n", "\n", " contact area with mucus / contact area without mucus \\\n", "0 0.82 \n", "1 0.07 \n", "2 0.05 \n", "3 0.52 \n", "4 0.12 \n", "\n", " contact pressure (Pa) adhesive strength (Pa) \n", "0 3117 -2030 \n", "1 24923 -9695 \n", "2 21020 -10239 \n", "3 4718 -1381 \n", "4 2012 -3975 " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# On a local machine, we would do this: fnames = glob.glob('../data/frog_strikes_*.csv')\n", "# But for Colab compatibility, we will do it by hand\n", "fnames = [\n", " os.path.join(data_path, f\"frog_strikes_{frog_id}.csv\")\n", " for frog_id in [\"I\", \"II\", \"III\", \"IV\"]\n", "]\n", "\n", "dfs = [pd.read_csv(f, comment=\"#\") for f in fnames]\n", "\n", "# Take a look at first data frame\n", "dfs[0].head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We have successfully loaded in all of the data frames. They all have the same columns (as given by the CSV files) and they all have the same indexes (range indexes that were applied be default when loading from the CSV files). We do not really care about the indexes. So, we wish to tape the data frames together vertically. We can use the `pd.concat()` function to do this.\n", "\n", "Before we do that, though, we might notice a problem. We will not have information to tell us which frog is which. We might therefore like to add a column to each data frame that has the frog ID, and then concatenate them. We can parse the ID of the frog from the file name, as we can see by looking at the file names." ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['../data/frog_strikes_I.csv',\n", " '../data/frog_strikes_II.csv',\n", " '../data/frog_strikes_III.csv',\n", " '../data/frog_strikes_IV.csv']" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fnames" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "So, for each data frame/file name pair, we extract the Roman numeral and add a column to the data frame containing the frog ID." ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
datetrial numberimpact force (mN)impact time (ms)impact force / body weightadhesive force (mN)time frog pulls on target (ms)adhesive force / body weightadhesive impulse (N-s)total contact area (mm2)contact area without mucus (mm2)contact area with mucus / contact area without mucuscontact pressure (Pa)adhesive strength (Pa)ID
02013_02_2631205461.95-7858841.27-0.290387700.823117-2030I
12013_02_2642527444.08-9832481.59-0.181101940.0724923-9695I
22013_03_0111745342.82-8502111.37-0.15783790.0521020-10239I
32013_03_0121556412.51-45510250.74-0.1703301580.524718-1381I
42013_03_013493360.80-9744991.57-0.4232452160.122012-3975I
\n", "
" ], "text/plain": [ " date trial number impact force (mN) impact time (ms) \\\n", "0 2013_02_26 3 1205 46 \n", "1 2013_02_26 4 2527 44 \n", "2 2013_03_01 1 1745 34 \n", "3 2013_03_01 2 1556 41 \n", "4 2013_03_01 3 493 36 \n", "\n", " impact force / body weight adhesive force (mN) \\\n", "0 1.95 -785 \n", "1 4.08 -983 \n", "2 2.82 -850 \n", "3 2.51 -455 \n", "4 0.80 -974 \n", "\n", " time frog pulls on target (ms) adhesive force / body weight \\\n", "0 884 1.27 \n", "1 248 1.59 \n", "2 211 1.37 \n", "3 1025 0.74 \n", "4 499 1.57 \n", "\n", " adhesive impulse (N-s) total contact area (mm2) \\\n", "0 -0.290 387 \n", "1 -0.181 101 \n", "2 -0.157 83 \n", "3 -0.170 330 \n", "4 -0.423 245 \n", "\n", " contact area without mucus (mm2) \\\n", "0 70 \n", "1 94 \n", "2 79 \n", "3 158 \n", "4 216 \n", "\n", " contact area with mucus / contact area without mucus \\\n", "0 0.82 \n", "1 0.07 \n", "2 0.05 \n", "3 0.52 \n", "4 0.12 \n", "\n", " contact pressure (Pa) adhesive strength (Pa) ID \n", "0 3117 -2030 I \n", "1 24923 -9695 I \n", "2 21020 -10239 I \n", "3 4718 -1381 I \n", "4 2012 -3975 I " ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "for i, f in enumerate(fnames):\n", " frog_id = f[f.rfind('_')+1:f.rfind('.')]\n", " dfs[i]['ID'] = frog_id\n", " \n", "# Take a look\n", "dfs[0].head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Good! Now all data frames have an `'ID'` column, and we can concatenate. The `pd.concat()` function takes as input a list of data frames to be concatenated. Since we do not care about the index, we can use the `ignore_index=True` kwarg." ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Number of rows: 80 \n", "Unique IDs: ['I' 'II' 'III' 'IV']\n" ] } ], "source": [ "df = pd.concat(dfs, ignore_index=True)\n", "\n", "# Make sure we got them all\n", "print('Number of rows:', len(df), '\\nUnique IDs:', df['ID'].unique())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Check!" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### More advanced concatenation\n", "\n", "When we concatenated, we updated each data frame with a fresh column. The `pd.concat()` function can handle some of this for you. If we instead passed a dictionary of data frames instead of a list, it applies the keys to each data frame that is concatenated using a multiindex. First, we'll read in the data frames as a dictionary of data frames instead of a list." ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "dict_keys(['I', 'II', 'III', 'IV'])" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Make dictionary of data frames\n", "dfs = {\n", " f[f.rfind(\"_\") + 1 : f.rfind(\".\")]: pd.read_csv(f, comment=\"#\")\n", " for i, f in enumerate(fnames)\n", "}\n", "\n", "# Verify that keys are in fact IDs\n", "dfs.keys()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Now, if we call `pd.concat()` with dictionary input, we get a new data frame with a multiindex." ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
datetrial numberimpact force (mN)impact time (ms)impact force / body weightadhesive force (mN)time frog pulls on target (ms)adhesive force / body weightadhesive impulse (N-s)total contact area (mm2)contact area without mucus (mm2)contact area with mucus / contact area without mucuscontact pressure (Pa)adhesive strength (Pa)
I02013_02_2631205461.95-7858841.27-0.290387700.823117-2030
12013_02_2642527444.08-9832481.59-0.181101940.0724923-9695
22013_03_0111745342.82-8502111.37-0.15783790.0521020-10239
32013_03_0121556412.51-45510250.74-0.1703301580.524718-1381
42013_03_013493360.80-9744991.57-0.4232452160.122012-3975
\n", "
" ], "text/plain": [ " date trial number impact force (mN) impact time (ms) \\\n", "I 0 2013_02_26 3 1205 46 \n", " 1 2013_02_26 4 2527 44 \n", " 2 2013_03_01 1 1745 34 \n", " 3 2013_03_01 2 1556 41 \n", " 4 2013_03_01 3 493 36 \n", "\n", " impact force / body weight adhesive force (mN) \\\n", "I 0 1.95 -785 \n", " 1 4.08 -983 \n", " 2 2.82 -850 \n", " 3 2.51 -455 \n", " 4 0.80 -974 \n", "\n", " time frog pulls on target (ms) adhesive force / body weight \\\n", "I 0 884 1.27 \n", " 1 248 1.59 \n", " 2 211 1.37 \n", " 3 1025 0.74 \n", " 4 499 1.57 \n", "\n", " adhesive impulse (N-s) total contact area (mm2) \\\n", "I 0 -0.290 387 \n", " 1 -0.181 101 \n", " 2 -0.157 83 \n", " 3 -0.170 330 \n", " 4 -0.423 245 \n", "\n", " contact area without mucus (mm2) \\\n", "I 0 70 \n", " 1 94 \n", " 2 79 \n", " 3 158 \n", " 4 216 \n", "\n", " contact area with mucus / contact area without mucus \\\n", "I 0 0.82 \n", " 1 0.07 \n", " 2 0.05 \n", " 3 0.52 \n", " 4 0.12 \n", "\n", " contact pressure (Pa) adhesive strength (Pa) \n", "I 0 3117 -2030 \n", " 1 24923 -9695 \n", " 2 21020 -10239 \n", " 3 4718 -1381 \n", " 4 2012 -3975 " ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.concat(dfs)\n", "\n", "# Take a look\n", "df.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We have a multiindex for the rows, with the high level index being the ID and the low level index being the original index of the data frame that was concatenated. It is useful to give these indexes names so we can conveniently refer to them. We can do that by setting the `df.index.names` property as\n", "\n", "```python\n", "df.index.names = ['ID', 'original index']\n", "```\n", "\n", "We can instead specify a `names` kwarg when we call `pd.concat()`. This kwarg specifies the names of the resulting multiindex from the concatenation." ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
datetrial numberimpact force (mN)impact time (ms)impact force / body weightadhesive force (mN)time frog pulls on target (ms)adhesive force / body weightadhesive impulse (N-s)total contact area (mm2)contact area without mucus (mm2)contact area with mucus / contact area without mucuscontact pressure (Pa)adhesive strength (Pa)
IDoriginal index
I02013_02_2631205461.95-7858841.27-0.290387700.823117-2030
12013_02_2642527444.08-9832481.59-0.181101940.0724923-9695
22013_03_0111745342.82-8502111.37-0.15783790.0521020-10239
32013_03_0121556412.51-45510250.74-0.1703301580.524718-1381
42013_03_013493360.80-9744991.57-0.4232452160.122012-3975
\n", "
" ], "text/plain": [ " date trial number impact force (mN) \\\n", "ID original index \n", "I 0 2013_02_26 3 1205 \n", " 1 2013_02_26 4 2527 \n", " 2 2013_03_01 1 1745 \n", " 3 2013_03_01 2 1556 \n", " 4 2013_03_01 3 493 \n", "\n", " impact time (ms) impact force / body weight \\\n", "ID original index \n", "I 0 46 1.95 \n", " 1 44 4.08 \n", " 2 34 2.82 \n", " 3 41 2.51 \n", " 4 36 0.80 \n", "\n", " adhesive force (mN) time frog pulls on target (ms) \\\n", "ID original index \n", "I 0 -785 884 \n", " 1 -983 248 \n", " 2 -850 211 \n", " 3 -455 1025 \n", " 4 -974 499 \n", "\n", " adhesive force / body weight adhesive impulse (N-s) \\\n", "ID original index \n", "I 0 1.27 -0.290 \n", " 1 1.59 -0.181 \n", " 2 1.37 -0.157 \n", " 3 0.74 -0.170 \n", " 4 1.57 -0.423 \n", "\n", " total contact area (mm2) contact area without mucus (mm2) \\\n", "ID original index \n", "I 0 387 70 \n", " 1 101 94 \n", " 2 83 79 \n", " 3 330 158 \n", " 4 245 216 \n", "\n", " contact area with mucus / contact area without mucus \\\n", "ID original index \n", "I 0 0.82 \n", " 1 0.07 \n", " 2 0.05 \n", " 3 0.52 \n", " 4 0.12 \n", "\n", " contact pressure (Pa) adhesive strength (Pa) \n", "ID original index \n", "I 0 3117 -2030 \n", " 1 24923 -9695 \n", " 2 21020 -10239 \n", " 3 4718 -1381 \n", " 4 2012 -3975 " ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.concat(dfs, names=['ID', 'original index'])\n", "\n", "# Take a look\n", "df.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We conveniently have labeled indexes, and we can now make `ID` a column in the data frame using the `reset_index()` method." ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
IDoriginal indexdatetrial numberimpact force (mN)impact time (ms)impact force / body weightadhesive force (mN)time frog pulls on target (ms)adhesive force / body weightadhesive impulse (N-s)total contact area (mm2)contact area without mucus (mm2)contact area with mucus / contact area without mucuscontact pressure (Pa)adhesive strength (Pa)
0I02013_02_2631205461.95-7858841.27-0.290387700.823117-2030
1I12013_02_2642527444.08-9832481.59-0.181101940.0724923-9695
2I22013_03_0111745342.82-8502111.37-0.15783790.0521020-10239
3I32013_03_0121556412.51-45510250.74-0.1703301580.524718-1381
4I42013_03_013493360.80-9744991.57-0.4232452160.122012-3975
\n", "
" ], "text/plain": [ " ID original index date trial number impact force (mN) \\\n", "0 I 0 2013_02_26 3 1205 \n", "1 I 1 2013_02_26 4 2527 \n", "2 I 2 2013_03_01 1 1745 \n", "3 I 3 2013_03_01 2 1556 \n", "4 I 4 2013_03_01 3 493 \n", "\n", " impact time (ms) impact force / body weight adhesive force (mN) \\\n", "0 46 1.95 -785 \n", "1 44 4.08 -983 \n", "2 34 2.82 -850 \n", "3 41 2.51 -455 \n", "4 36 0.80 -974 \n", "\n", " time frog pulls on target (ms) adhesive force / body weight \\\n", "0 884 1.27 \n", "1 248 1.59 \n", "2 211 1.37 \n", "3 1025 0.74 \n", "4 499 1.57 \n", "\n", " adhesive impulse (N-s) total contact area (mm2) \\\n", "0 -0.290 387 \n", "1 -0.181 101 \n", "2 -0.157 83 \n", "3 -0.170 330 \n", "4 -0.423 245 \n", "\n", " contact area without mucus (mm2) \\\n", "0 70 \n", "1 94 \n", "2 79 \n", "3 158 \n", "4 216 \n", "\n", " contact area with mucus / contact area without mucus \\\n", "0 0.82 \n", "1 0.07 \n", "2 0.05 \n", "3 0.52 \n", "4 0.12 \n", "\n", " contact pressure (Pa) adhesive strength (Pa) \n", "0 3117 -2030 \n", "1 24923 -9695 \n", "2 21020 -10239 \n", "3 4718 -1381 \n", "4 2012 -3975 " ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = df.reset_index()\n", "\n", "df.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We now have a default range index for the data frame that we do not care about. Because the original index was not informative either, we can delete that column if we like, but it is not really a burden to have an unused column laying around in a data set this small. Nonetheless, let's blow it away." ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
IDdatetrial numberimpact force (mN)impact time (ms)impact force / body weightadhesive force (mN)time frog pulls on target (ms)adhesive force / body weightadhesive impulse (N-s)total contact area (mm2)contact area without mucus (mm2)contact area with mucus / contact area without mucuscontact pressure (Pa)adhesive strength (Pa)
0I2013_02_2631205461.95-7858841.27-0.290387700.823117-2030
1I2013_02_2642527444.08-9832481.59-0.181101940.0724923-9695
2I2013_03_0111745342.82-8502111.37-0.15783790.0521020-10239
3I2013_03_0121556412.51-45510250.74-0.1703301580.524718-1381
4I2013_03_013493360.80-9744991.57-0.4232452160.122012-3975
\n", "
" ], "text/plain": [ " ID date trial number impact force (mN) impact time (ms) \\\n", "0 I 2013_02_26 3 1205 46 \n", "1 I 2013_02_26 4 2527 44 \n", "2 I 2013_03_01 1 1745 34 \n", "3 I 2013_03_01 2 1556 41 \n", "4 I 2013_03_01 3 493 36 \n", "\n", " impact force / body weight adhesive force (mN) \\\n", "0 1.95 -785 \n", "1 4.08 -983 \n", "2 2.82 -850 \n", "3 2.51 -455 \n", "4 0.80 -974 \n", "\n", " time frog pulls on target (ms) adhesive force / body weight \\\n", "0 884 1.27 \n", "1 248 1.59 \n", "2 211 1.37 \n", "3 1025 0.74 \n", "4 499 1.57 \n", "\n", " adhesive impulse (N-s) total contact area (mm2) \\\n", "0 -0.290 387 \n", "1 -0.181 101 \n", "2 -0.157 83 \n", "3 -0.170 330 \n", "4 -0.423 245 \n", "\n", " contact area without mucus (mm2) \\\n", "0 70 \n", "1 94 \n", "2 79 \n", "3 158 \n", "4 216 \n", "\n", " contact area with mucus / contact area without mucus \\\n", "0 0.82 \n", "1 0.07 \n", "2 0.05 \n", "3 0.52 \n", "4 0.12 \n", "\n", " contact pressure (Pa) adhesive strength (Pa) \n", "0 3117 -2030 \n", "1 24923 -9695 \n", "2 21020 -10239 \n", "3 4718 -1381 \n", "4 2012 -3975 " ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "del df['original index']\n", "\n", "df.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We now have a nice, tidy data frame!" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Creating a DataFrame from scratch\n", "\n", "Looking back at the [headers of the original data files](#The-data-files), we see that there is information present in the header that we would like to have in our data frame. For example, it would be nice to know if each strike came from an adult or juvenile. Or what the snout-vent length was. Working toward the goal of including this in our data frame, we will first construct a new data frame containing information about each frog." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Data frames from dictionaries\n", "\n", "One way do create this new data frame is to first construct a dictionary with the respective fields. Since these data sets are small, we can look at the files and make the dictionary by hand." ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "data_dict = {\n", " \"ID\": [\"I\", \"II\", \"III\", \"IV\"],\n", " \"age\": [\"adult\", \"adult\", \"juvenile\", \"juvenile\"],\n", " \"SVL (mm)\": [63, 70, 28, 31],\n", " \"body weight (g)\": [63.1, 72.7, 12.7, 12.7],\n", " \"species\": [\"cross\", \"cross\", \"cranwelli\", \"cranwelli\"],\n", "}" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Now that we have this dictionary, we can convert it into a `DataFrame` by instantiating a `pd.DataFrame` class with it, using the `data` kwarg." ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
IDageSVL (mm)body weight (g)species
0Iadult6363.1cross
1IIadult7072.7cross
2IIIjuvenile2812.7cranwelli
3IVjuvenile3112.7cranwelli
\n", "
" ], "text/plain": [ " ID age SVL (mm) body weight (g) species\n", "0 I adult 63 63.1 cross\n", "1 II adult 70 72.7 cross\n", "2 III juvenile 28 12.7 cranwelli\n", "3 IV juvenile 31 12.7 cranwelli" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Make it into a DataFrame\n", "df_frog_info = pd.DataFrame(data=data_dict)\n", "\n", "# Take a look\n", "df_frog_info" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Nice!" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Data frames from numpy arrays\n", "\n", "Sometimes the data sets are not small enough to construct a dictionary by hand. Oftentimes, we have a two-dimensional array of data that we want to make into a `DataFrame`. As an example, let's say we have a Numpy array where the first column is snout vent length and the second is weight." ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[63. , 63.1],\n", " [70. , 72.7],\n", " [28. , 12.7],\n", " [31. , 12.7]])" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data = np.array([[63, 70, 28, 31], [63.1, 72.7, 12.7, 12.7]]).transpose()\n", "\n", "# Verify that it's what we think it is\n", "data" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "To make this into a `DataFrame`, we again create `pd.DataFrame` instance, but this time we also specify the `columns` keyword argument." ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SVL (mm)weight (g)
063.063.1
170.072.7
228.012.7
331.012.7
\n", "
" ], "text/plain": [ " SVL (mm) weight (g)\n", "0 63.0 63.1\n", "1 70.0 72.7\n", "2 28.0 12.7\n", "3 31.0 12.7" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_demo = pd.DataFrame(data=data, columns=[\"SVL (mm)\", \"weight (g)\"])\n", "\n", "# Take a look\n", "df_demo" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "That also works. Generally, any two-dimensional Numpy array can be converted into a `DataFrame` in this way. You just need to supply column names." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Programmatically creating a data frame\n", "\n", "Hand-entering data should be minimized. The information about each frog were hand-entered once by the experimenter. We should not hand-enter them again. We therefore should parse the comment lines of input files to get the pertinent information.\n", "\n", "Note, though, that in the case of a single experiment with only four data sets, hand entering might be faster and indeed less error prone than doing it programmatically. We should definitely do it programmatically if we have a large number of data files or will ever do an experiment with the same file format again.\n", "\n", "So, let's programmatically parse the files. We start by writing a function to parse the metadata from a single file. Recall that the comment lines look like this:\n", "\n", "```\n", "# These data are from Kleinteich and Gorb, Sci. Rep., 4, 5225, 2014.\n", "# Frog ID: I\n", "# Age: adult\n", "# Snout-vent-length (SVL): 63 mm\n", "# Body weight: 63.1 g\n", "# Species: Ceratophrys cranwelli crossed with Ceratophrys cornuta\n", "```\n", "\n", "(The function below will not work with Colab because `open()` does not work for files specified by a URL.)" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "def parse_frog_metadata(fname):\n", " with open(fname, 'r') as f:\n", " # Citation line, ignore.\n", " f.readline()\n", " \n", " # Frog ID\n", " line = f.readline()\n", " frog_id = line[line.find(':')+1:].strip()\n", " \n", " # Age\n", " line = f.readline()\n", " age = line[line.find(':')+1:].strip()\n", " \n", " # SVL, assume units given as mm\n", " line = f.readline()\n", " svl = line[line.find(':')+1:line.rfind(' ')].strip()\n", " \n", " # Body weight, assume units given as g\n", " line = f.readline()\n", " body_weight = line[line.find(':')+1:line.rfind(' ')].strip()\n", "\n", " # Species (either cranwelli or cross)\n", " line = f.readline()\n", " species = line[line.find(':')+1:].strip()\n", " if 'cross' in species:\n", " species = 'cross'\n", " else:\n", " species = 'cranwelli'\n", "\n", " return frog_id, age, svl, body_weight, species" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Let's take it for a spin." ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "('I', 'adult', '63', '63.1', 'cross')" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "parse_frog_metadata(os.path.join(data_path, 'frog_strikes_I.csv'))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Looks good! Now we can create a list of tuples to use as data for making a data frame." ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[('I', 'adult', '63', '63.1', 'cross'),\n", " ('II', 'adult', '70', '72.7', 'cross'),\n", " ('III', 'juvenile', '28', '12.7', 'cranwelli'),\n", " ('IV', 'juvenile', '31', '12.7', 'cranwelli')]" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data = [parse_frog_metadata(f) for f in fnames]\n", " \n", "# Take a look\n", "data" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We now input this list of tuples, plus the column names, into `pd.DataFrame()`, and we've got our data frame." ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
IDageSVL (mm)body weight (g)species
0Iadult6363.1cross
1IIadult7072.7cross
2IIIjuvenile2812.7cranwelli
3IVjuvenile3112.7cranwelli
\n", "
" ], "text/plain": [ " ID age SVL (mm) body weight (g) species\n", "0 I adult 63 63.1 cross\n", "1 II adult 70 72.7 cross\n", "2 III juvenile 28 12.7 cranwelli\n", "3 IV juvenile 31 12.7 cranwelli" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_frog_info = pd.DataFrame(\n", " data=data, \n", " columns=[\"ID\", \"age\", \"SVL (mm)\", \"body weight (g)\", \"species\"]\n", ")\n", "\n", "# Take a look\n", "df_frog_info" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Merging DataFrames\n", "\n", "Our ultimate goal is to add the information about the frogs into our main data frame, `df`, that we have been working with. " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Brute force merge\n", "\n", "We can do it using tools we have already learned. For each row in the `DataFrame`, we can add the relevant value in each column. Because this will not be the final way I recommend doing this, I will do these operations on a copy of `df` using the `copy()` method." ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
IDdatetrial numberimpact force (mN)impact time (ms)impact force / body weightadhesive force (mN)time frog pulls on target (ms)adhesive force / body weightadhesive impulse (N-s)total contact area (mm2)contact area without mucus (mm2)contact area with mucus / contact area without mucuscontact pressure (Pa)adhesive strength (Pa)ageSVL (mm)body weight (g)species
0I2013_02_2631205461.95-7858841.27-0.290387700.823117-2030adult6363.1cross
1I2013_02_2642527444.08-9832481.59-0.181101940.0724923-9695adult6363.1cross
2I2013_03_0111745342.82-8502111.37-0.15783790.0521020-10239adult6363.1cross
3I2013_03_0121556412.51-45510250.74-0.1703301580.524718-1381adult6363.1cross
4I2013_03_013493360.80-9744991.57-0.4232452160.122012-3975adult6363.1cross
\n", "
" ], "text/plain": [ " ID date trial number impact force (mN) impact time (ms) \\\n", "0 I 2013_02_26 3 1205 46 \n", "1 I 2013_02_26 4 2527 44 \n", "2 I 2013_03_01 1 1745 34 \n", "3 I 2013_03_01 2 1556 41 \n", "4 I 2013_03_01 3 493 36 \n", "\n", " impact force / body weight adhesive force (mN) \\\n", "0 1.95 -785 \n", "1 4.08 -983 \n", "2 2.82 -850 \n", "3 2.51 -455 \n", "4 0.80 -974 \n", "\n", " time frog pulls on target (ms) adhesive force / body weight \\\n", "0 884 1.27 \n", "1 248 1.59 \n", "2 211 1.37 \n", "3 1025 0.74 \n", "4 499 1.57 \n", "\n", " adhesive impulse (N-s) total contact area (mm2) \\\n", "0 -0.290 387 \n", "1 -0.181 101 \n", "2 -0.157 83 \n", "3 -0.170 330 \n", "4 -0.423 245 \n", "\n", " contact area without mucus (mm2) \\\n", "0 70 \n", "1 94 \n", "2 79 \n", "3 158 \n", "4 216 \n", "\n", " contact area with mucus / contact area without mucus \\\n", "0 0.82 \n", "1 0.07 \n", "2 0.05 \n", "3 0.52 \n", "4 0.12 \n", "\n", " contact pressure (Pa) adhesive strength (Pa) age SVL (mm) \\\n", "0 3117 -2030 adult 63 \n", "1 24923 -9695 adult 63 \n", "2 21020 -10239 adult 63 \n", "3 4718 -1381 adult 63 \n", "4 2012 -3975 adult 63 \n", "\n", " body weight (g) species \n", "0 63.1 cross \n", "1 63.1 cross \n", "2 63.1 cross \n", "3 63.1 cross \n", "4 63.1 cross " ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Make a copy of df\n", "df_copy = df.copy()\n", "\n", "# Build each column\n", "for col in df_frog_info.columns[df_frog_info.columns != 'ID']:\n", " # Make a new column with empty values\n", " df_copy[col] = np.empty(len(df_copy))\n", " \n", " # Add in each entry, row by row\n", " for i, r in df_copy.iterrows():\n", " ind = df_frog_info['ID'] == r['ID']\n", " df_copy.loc[i, col] = df_frog_info.loc[ind, col].iloc[0]\n", " \n", "# Take a look at the updated DataFrame\n", "df_copy.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Note that I used the `iterrows()` method of the `df_copy` data frame. This iterator gives an index (which I called `i`) and a row of a data frame (which I called `r`). This method, and the analogous one for iterating over columns, `iteritems()`, can be useful.\n", "\n", "But this approach seems rather clunky. " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Using pd.merge()\n", "\n", "There is a much better way to do it is to use Pandas's [built-in merge() method](https://pandas.pydata.org/pandas-docs/stable/merging.html#database-style-dataframe-joining-merging). Called with all the default keyword arguments, this function finds common columns between two `DataFrame`s (in this case, there is just one, the `ID` column), and then uses those columns to merge them, filling in values that match in the common columns. This is exactly what we want." ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
IDdatetrial numberimpact force (mN)impact time (ms)impact force / body weightadhesive force (mN)time frog pulls on target (ms)adhesive force / body weightadhesive impulse (N-s)total contact area (mm2)contact area without mucus (mm2)contact area with mucus / contact area without mucuscontact pressure (Pa)adhesive strength (Pa)ageSVL (mm)body weight (g)species
0I2013_02_2631205461.95-7858841.27-0.290387700.823117-2030adult6363.1cross
1I2013_02_2642527444.08-9832481.59-0.181101940.0724923-9695adult6363.1cross
2I2013_03_0111745342.82-8502111.37-0.15783790.0521020-10239adult6363.1cross
3I2013_03_0121556412.51-45510250.74-0.1703301580.524718-1381adult6363.1cross
4I2013_03_013493360.80-9744991.57-0.4232452160.122012-3975adult6363.1cross
\n", "
" ], "text/plain": [ " ID date trial number impact force (mN) impact time (ms) \\\n", "0 I 2013_02_26 3 1205 46 \n", "1 I 2013_02_26 4 2527 44 \n", "2 I 2013_03_01 1 1745 34 \n", "3 I 2013_03_01 2 1556 41 \n", "4 I 2013_03_01 3 493 36 \n", "\n", " impact force / body weight adhesive force (mN) \\\n", "0 1.95 -785 \n", "1 4.08 -983 \n", "2 2.82 -850 \n", "3 2.51 -455 \n", "4 0.80 -974 \n", "\n", " time frog pulls on target (ms) adhesive force / body weight \\\n", "0 884 1.27 \n", "1 248 1.59 \n", "2 211 1.37 \n", "3 1025 0.74 \n", "4 499 1.57 \n", "\n", " adhesive impulse (N-s) total contact area (mm2) \\\n", "0 -0.290 387 \n", "1 -0.181 101 \n", "2 -0.157 83 \n", "3 -0.170 330 \n", "4 -0.423 245 \n", "\n", " contact area without mucus (mm2) \\\n", "0 70 \n", "1 94 \n", "2 79 \n", "3 158 \n", "4 216 \n", "\n", " contact area with mucus / contact area without mucus \\\n", "0 0.82 \n", "1 0.07 \n", "2 0.05 \n", "3 0.52 \n", "4 0.12 \n", "\n", " contact pressure (Pa) adhesive strength (Pa) age SVL (mm) \\\n", "0 3117 -2030 adult 63 \n", "1 24923 -9695 adult 63 \n", "2 21020 -10239 adult 63 \n", "3 4718 -1381 adult 63 \n", "4 2012 -3975 adult 63 \n", "\n", " body weight (g) species \n", "0 63.1 cross \n", "1 63.1 cross \n", "2 63.1 cross \n", "3 63.1 cross \n", "4 63.1 cross " ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = df.merge(df_frog_info)\n", "\n", "# Check it out!\n", "df.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Note that the entries for the added columns were repeated appropriately, e.g., body weight column had 63 for every row corresponding to frog I. \n", "\n", "I think this example of merging `DataFrame`s highlights the power of using them in your data analysis. Note also that there are plenty of options for how merges are done, and you should consult the [Pandas documentation](https://pandas.pydata.org/pandas-docs/stable/user_guide/merging.html#database-style-dataframe-or-named-series-joining-merging).\n", "\n", "This example also brings up an important point. When you have to perform operations on data frames, you can often \"brute force\" it with loops, etc. But if what you are trying to do seems like something a data analyst would frequently encounter, there is a good chance it's already built-in to Pandas, and you should ask Google how to do it." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## At long last, a plot!\n", "\n", "While the purpose of this part of the lesson was to learn how to concatenate and merge data frames, going through all of that wrangling effort would somehow be unsatisfying if we we didn't generate a plot. Let's compare the impact force on a per-mass basis for each frog." ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/javascript": [ "(function(root) {\n", " function embed_document(root) {\n", " const docs_json = {\"d5db22b2-971e-4bb2-b214-ca6c42cee39b\":{\"version\":\"3.2.1\",\"title\":\"Bokeh Application\",\"roots\":[{\"type\":\"object\",\"name\":\"Figure\",\"id\":\"p1002\",\"attributes\":{\"x_range\":{\"type\":\"object\",\"name\":\"DataRange1d\",\"id\":\"p1004\"},\"y_range\":{\"type\":\"object\",\"name\":\"FactorRange\",\"id\":\"p1001\",\"attributes\":{\"factors\":[\"IV\",\"III\",\"II\",\"I\"]}},\"x_scale\":{\"type\":\"object\",\"name\":\"LinearScale\",\"id\":\"p1011\"},\"y_scale\":{\"type\":\"object\",\"name\":\"CategoricalScale\",\"id\":\"p1012\"},\"title\":{\"type\":\"object\",\"name\":\"Title\",\"id\":\"p1009\"},\"renderers\":[{\"type\":\"object\",\"name\":\"GlyphRenderer\",\"id\":\"p1040\",\"attributes\":{\"name\":\"hover_glyphs\",\"data_source\":{\"type\":\"object\",\"name\":\"ColumnDataSource\",\"id\":\"p1031\",\"attributes\":{\"selected\":{\"type\":\"object\",\"name\":\"Selection\",\"id\":\"p1032\",\"attributes\":{\"indices\":[],\"line_indices\":[]}},\"selection_policy\":{\"type\":\"object\",\"name\":\"UnionRenderers\",\"id\":\"p1033\"},\"data\":{\"type\":\"map\",\"entries\":[[\"index\",{\"type\":\"ndarray\",\"array\":{\"type\":\"bytes\",\"data\":\"AAAAAAEAAAACAAAAAwAAAAQAAAAFAAAABgAAAAcAAAAIAAAACQAAAAoAAAALAAAADAAAAA0AAAAOAAAADwAAABAAAAARAAAAEgAAABMAAAAUAAAAFQAAABYAAAAXAAAAGAAAABkAAAAaAAAAGwAAABwAAAAdAAAAHgAAAB8AAAAgAAAAIQAAACIAAAAjAAAAJAAAACUAAAAmAAAAJwAAAA==\"},\"shape\":[40],\"dtype\":\"int32\",\"order\":\"little\"}],[\"age\",{\"type\":\"ndarray\",\"array\":[\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\"],\"shape\":[40],\"dtype\":\"object\",\"order\":\"little\"}],[\"impact force / body weight\",{\"type\":\"ndarray\",\"array\":{\"type\":\"bytes\",\"data\":\"MzMzMzMz/z9SuB6F61EQQI/C9ShcjwZAFK5H4XoUBECamZmZmZnpP3E9CtejcA1AzczMzMzM7D/hehSuR+EIQBSuR+F6FBFAexSuR+F6CEB7FK5H4XoIQAAAAAAAAARA4XoUrkfhAEDXo3A9CtcFQOxRuB6F6wNAj8L1KFyPAEAAAAAAAAD0Pz0K16NwPQpAAAAAAAAAAEBSuB6F61HoP1K4HoXrUQ5AuB6F61G49j+kcD0K16PoP9ejcD0K1wFAUrgehetR9D/2KFyPwvUMQNejcD0K1/M/rkfhehSu9z9cj8L1KFwLQGZmZmZmZuY/ZmZmZmZm+j8pXI/C9SjkP1K4HoXrUfw/j8L1KFyP4j89CtejcD0GQFyPwvUoXPM/UrgehetR8D/NzMzMzMzsP0jhehSuR/E/hetRuB6F+z8=\"},\"shape\":[40],\"dtype\":\"float64\",\"order\":\"little\"}],[\"ID\",{\"type\":\"ndarray\",\"array\":[\"I\",\"I\",\"I\",\"I\",\"I\",\"I\",\"I\",\"I\",\"I\",\"I\",\"I\",\"I\",\"I\",\"I\",\"I\",\"I\",\"I\",\"I\",\"I\",\"I\",\"II\",\"II\",\"II\",\"II\",\"II\",\"II\",\"II\",\"II\",\"II\",\"II\",\"II\",\"II\",\"II\",\"II\",\"II\",\"II\",\"II\",\"II\",\"II\",\"II\"],\"shape\":[40],\"dtype\":\"object\",\"order\":\"little\"}],[\"cat\",{\"type\":\"ndarray\",\"array\":[\"I\",\"I\",\"I\",\"I\",\"I\",\"I\",\"I\",\"I\",\"I\",\"I\",\"I\",\"I\",\"I\",\"I\",\"I\",\"I\",\"I\",\"I\",\"I\",\"I\",\"II\",\"II\",\"II\",\"II\",\"II\",\"II\",\"II\",\"II\",\"II\",\"II\",\"II\",\"II\",\"II\",\"II\",\"II\",\"II\",\"II\",\"II\",\"II\",\"II\"],\"shape\":[40],\"dtype\":\"object\",\"order\":\"little\"}],[\"__label\",{\"type\":\"ndarray\",\"array\":[\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\",\"adult\"],\"shape\":[40],\"dtype\":\"object\",\"order\":\"little\"}]]}}},\"view\":{\"type\":\"object\",\"name\":\"CDSView\",\"id\":\"p1041\",\"attributes\":{\"filter\":{\"type\":\"object\",\"name\":\"AllIndices\",\"id\":\"p1042\"}}},\"glyph\":{\"type\":\"object\",\"name\":\"Circle\",\"id\":\"p1037\",\"attributes\":{\"x\":{\"type\":\"field\",\"field\":\"impact force / body weight\"},\"y\":{\"type\":\"field\",\"field\":\"cat\",\"transform\":{\"type\":\"object\",\"name\":\"Jitter\",\"id\":\"p1030\",\"attributes\":{\"width\":0.1,\"distribution\":\"normal\",\"range\":{\"id\":\"p1001\"}}}},\"line_color\":{\"type\":\"value\",\"value\":\"#1f77b3\"},\"fill_color\":{\"type\":\"value\",\"value\":\"#1f77b3\"},\"hatch_color\":{\"type\":\"value\",\"value\":\"#1f77b3\"}}},\"nonselection_glyph\":{\"type\":\"object\",\"name\":\"Circle\",\"id\":\"p1038\",\"attributes\":{\"x\":{\"type\":\"field\",\"field\":\"impact force / body weight\"},\"y\":{\"type\":\"field\",\"field\":\"cat\",\"transform\":{\"id\":\"p1030\"}},\"line_color\":{\"type\":\"value\",\"value\":\"#1f77b3\"},\"line_alpha\":{\"type\":\"value\",\"value\":0.1},\"fill_color\":{\"type\":\"value\",\"value\":\"#1f77b3\"},\"fill_alpha\":{\"type\":\"value\",\"value\":0.1},\"hatch_color\":{\"type\":\"value\",\"value\":\"#1f77b3\"},\"hatch_alpha\":{\"type\":\"value\",\"value\":0.1}}},\"muted_glyph\":{\"type\":\"object\",\"name\":\"Circle\",\"id\":\"p1039\",\"attributes\":{\"x\":{\"type\":\"field\",\"field\":\"impact force / body weight\"},\"y\":{\"type\":\"field\",\"field\":\"cat\",\"transform\":{\"id\":\"p1030\"}},\"line_color\":{\"type\":\"value\",\"value\":\"#1f77b3\"},\"line_alpha\":{\"type\":\"value\",\"value\":0.2},\"fill_color\":{\"type\":\"value\",\"value\":\"#1f77b3\"},\"fill_alpha\":{\"type\":\"value\",\"value\":0.2},\"hatch_color\":{\"type\":\"value\",\"value\":\"#1f77b3\"},\"hatch_alpha\":{\"type\":\"value\",\"value\":0.2}}}}},{\"type\":\"object\",\"name\":\"GlyphRenderer\",\"id\":\"p1052\",\"attributes\":{\"name\":\"hover_glyphs\",\"data_source\":{\"type\":\"object\",\"name\":\"ColumnDataSource\",\"id\":\"p1043\",\"attributes\":{\"selected\":{\"type\":\"object\",\"name\":\"Selection\",\"id\":\"p1044\",\"attributes\":{\"indices\":[],\"line_indices\":[]}},\"selection_policy\":{\"type\":\"object\",\"name\":\"UnionRenderers\",\"id\":\"p1045\"},\"data\":{\"type\":\"map\",\"entries\":[[\"index\",{\"type\":\"ndarray\",\"array\":{\"type\":\"bytes\",\"data\":\"KAAAACkAAAAqAAAAKwAAACwAAAAtAAAALgAAAC8AAAAwAAAAMQAAADIAAAAzAAAANAAAADUAAAA2AAAANwAAADgAAAA5AAAAOgAAADsAAAA8AAAAPQAAAD4AAAA/AAAAQAAAAEEAAABCAAAAQwAAAEQAAABFAAAARgAAAEcAAABIAAAASQAAAEoAAABLAAAATAAAAE0AAABOAAAATwAAAA==\"},\"shape\":[40],\"dtype\":\"int32\",\"order\":\"little\"}],[\"age\",{\"type\":\"ndarray\",\"array\":[\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\"],\"shape\":[40],\"dtype\":\"object\",\"order\":\"little\"}],[\"impact force / body weight\",{\"type\":\"ndarray\",\"array\":{\"type\":\"bytes\",\"data\":\"w/UoXI/CE0CkcD0K16MKQOF6FK5H4QRA9ihcj8L1GECkcD0K16MTQIXrUbgehRFASOF6FK5HEUCkcD0K16MSQPYoXI/C9RlAmpmZmZmZDUCF61G4HoUNQClcj8L1KBRA9ihcj8L1E0CF61G4HoURQDMzMzMzMxFAuB6F61G4CEDXo3A9CtcJQMP1KFyPwhNAZmZmZmZmFUBxPQrXo3APQHsUrkfhevQ/zczMzMzM8D/sUbgehevRP/YoXI/C9QpAH4XrUbgeBUDD9Shcj8LFP+xRuB6F6w1APQrXo3A9AEBxPQrXo3AVQFyPwvUoXBFAhetRuB6F9z+F61G4HoX3P8P1KFyPwhFAuB6F61G4DkBI4XoUrkcYQAAAAAAAAAhAAAAAAAAAEkAzMzMzMzMVQEjhehSuRxJA16NwPQrXC0A=\"},\"shape\":[40],\"dtype\":\"float64\",\"order\":\"little\"}],[\"ID\",{\"type\":\"ndarray\",\"array\":[\"III\",\"III\",\"III\",\"III\",\"III\",\"III\",\"III\",\"III\",\"III\",\"III\",\"III\",\"III\",\"III\",\"III\",\"III\",\"III\",\"III\",\"III\",\"III\",\"III\",\"IV\",\"IV\",\"IV\",\"IV\",\"IV\",\"IV\",\"IV\",\"IV\",\"IV\",\"IV\",\"IV\",\"IV\",\"IV\",\"IV\",\"IV\",\"IV\",\"IV\",\"IV\",\"IV\",\"IV\"],\"shape\":[40],\"dtype\":\"object\",\"order\":\"little\"}],[\"cat\",{\"type\":\"ndarray\",\"array\":[\"III\",\"III\",\"III\",\"III\",\"III\",\"III\",\"III\",\"III\",\"III\",\"III\",\"III\",\"III\",\"III\",\"III\",\"III\",\"III\",\"III\",\"III\",\"III\",\"III\",\"IV\",\"IV\",\"IV\",\"IV\",\"IV\",\"IV\",\"IV\",\"IV\",\"IV\",\"IV\",\"IV\",\"IV\",\"IV\",\"IV\",\"IV\",\"IV\",\"IV\",\"IV\",\"IV\",\"IV\"],\"shape\":[40],\"dtype\":\"object\",\"order\":\"little\"}],[\"__label\",{\"type\":\"ndarray\",\"array\":[\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\",\"juvenile\"],\"shape\":[40],\"dtype\":\"object\",\"order\":\"little\"}]]}}},\"view\":{\"type\":\"object\",\"name\":\"CDSView\",\"id\":\"p1053\",\"attributes\":{\"filter\":{\"type\":\"object\",\"name\":\"AllIndices\",\"id\":\"p1054\"}}},\"glyph\":{\"type\":\"object\",\"name\":\"Circle\",\"id\":\"p1049\",\"attributes\":{\"x\":{\"type\":\"field\",\"field\":\"impact force / body weight\"},\"y\":{\"type\":\"field\",\"field\":\"cat\",\"transform\":{\"id\":\"p1030\"}},\"line_color\":{\"type\":\"value\",\"value\":\"#ff7e0e\"},\"fill_color\":{\"type\":\"value\",\"value\":\"#ff7e0e\"},\"hatch_color\":{\"type\":\"value\",\"value\":\"#ff7e0e\"}}},\"nonselection_glyph\":{\"type\":\"object\",\"name\":\"Circle\",\"id\":\"p1050\",\"attributes\":{\"x\":{\"type\":\"field\",\"field\":\"impact force / body weight\"},\"y\":{\"type\":\"field\",\"field\":\"cat\",\"transform\":{\"id\":\"p1030\"}},\"line_color\":{\"type\":\"value\",\"value\":\"#ff7e0e\"},\"line_alpha\":{\"type\":\"value\",\"value\":0.1},\"fill_color\":{\"type\":\"value\",\"value\":\"#ff7e0e\"},\"fill_alpha\":{\"type\":\"value\",\"value\":0.1},\"hatch_color\":{\"type\":\"value\",\"value\":\"#ff7e0e\"},\"hatch_alpha\":{\"type\":\"value\",\"value\":0.1}}},\"muted_glyph\":{\"type\":\"object\",\"name\":\"Circle\",\"id\":\"p1051\",\"attributes\":{\"x\":{\"type\":\"field\",\"field\":\"impact force / body weight\"},\"y\":{\"type\":\"field\",\"field\":\"cat\",\"transform\":{\"id\":\"p1030\"}},\"line_color\":{\"type\":\"value\",\"value\":\"#ff7e0e\"},\"line_alpha\":{\"type\":\"value\",\"value\":0.2},\"fill_color\":{\"type\":\"value\",\"value\":\"#ff7e0e\"},\"fill_alpha\":{\"type\":\"value\",\"value\":0.2},\"hatch_color\":{\"type\":\"value\",\"value\":\"#ff7e0e\"},\"hatch_alpha\":{\"type\":\"value\",\"value\":0.2}}}}}],\"toolbar\":{\"type\":\"object\",\"name\":\"Toolbar\",\"id\":\"p1010\",\"attributes\":{\"tools\":[{\"type\":\"object\",\"name\":\"PanTool\",\"id\":\"p1023\"},{\"type\":\"object\",\"name\":\"WheelZoomTool\",\"id\":\"p1024\"},{\"type\":\"object\",\"name\":\"BoxZoomTool\",\"id\":\"p1025\",\"attributes\":{\"overlay\":{\"type\":\"object\",\"name\":\"BoxAnnotation\",\"id\":\"p1026\",\"attributes\":{\"syncable\":false,\"level\":\"overlay\",\"visible\":false,\"left_units\":\"canvas\",\"right_units\":\"canvas\",\"bottom_units\":\"canvas\",\"top_units\":\"canvas\",\"line_color\":\"black\",\"line_alpha\":1.0,\"line_width\":2,\"line_dash\":[4,4],\"fill_color\":\"lightgrey\",\"fill_alpha\":0.5}}}},{\"type\":\"object\",\"name\":\"SaveTool\",\"id\":\"p1027\"},{\"type\":\"object\",\"name\":\"ResetTool\",\"id\":\"p1028\"},{\"type\":\"object\",\"name\":\"HelpTool\",\"id\":\"p1029\"}]}},\"toolbar_location\":\"above\",\"left\":[{\"type\":\"object\",\"name\":\"CategoricalAxis\",\"id\":\"p1018\",\"attributes\":{\"ticker\":{\"type\":\"object\",\"name\":\"CategoricalTicker\",\"id\":\"p1019\"},\"formatter\":{\"type\":\"object\",\"name\":\"CategoricalTickFormatter\",\"id\":\"p1020\"},\"axis_label\":\"frog ID\",\"major_label_policy\":{\"type\":\"object\",\"name\":\"AllLabels\",\"id\":\"p1021\"}}}],\"right\":[{\"type\":\"object\",\"name\":\"Legend\",\"id\":\"p1055\",\"attributes\":{\"location\":\"center\",\"title\":\"age\",\"click_policy\":\"hide\",\"items\":[{\"type\":\"object\",\"name\":\"LegendItem\",\"id\":\"p1056\",\"attributes\":{\"label\":{\"type\":\"value\",\"value\":\"adult\"},\"renderers\":[{\"id\":\"p1040\"}]}},{\"type\":\"object\",\"name\":\"LegendItem\",\"id\":\"p1057\",\"attributes\":{\"label\":{\"type\":\"value\",\"value\":\"juvenile\"},\"renderers\":[{\"id\":\"p1052\"}]}}]}}],\"below\":[{\"type\":\"object\",\"name\":\"LinearAxis\",\"id\":\"p1013\",\"attributes\":{\"ticker\":{\"type\":\"object\",\"name\":\"BasicTicker\",\"id\":\"p1014\",\"attributes\":{\"mantissas\":[1,2,5]}},\"formatter\":{\"type\":\"object\",\"name\":\"BasicTickFormatter\",\"id\":\"p1015\"},\"axis_label\":\"impact force / body weight (mN/g)\",\"major_label_policy\":{\"type\":\"object\",\"name\":\"AllLabels\",\"id\":\"p1016\"}}}],\"center\":[{\"type\":\"object\",\"name\":\"Grid\",\"id\":\"p1017\",\"attributes\":{\"axis\":{\"id\":\"p1013\"}}},{\"type\":\"object\",\"name\":\"Grid\",\"id\":\"p1022\",\"attributes\":{\"dimension\":1,\"axis\":{\"id\":\"p1018\"},\"grid_line_color\":null}}],\"frame_width\":375,\"frame_height\":275}}]}};\n", " const render_items = [{\"docid\":\"d5db22b2-971e-4bb2-b214-ca6c42cee39b\",\"roots\":{\"p1002\":\"d7e59000-dcd9-44e4-87f0-ad254f913e20\"},\"root_ids\":[\"p1002\"]}];\n", " root.Bokeh.embed.embed_items_notebook(docs_json, render_items);\n", " }\n", " if (root.Bokeh !== undefined) {\n", " embed_document(root);\n", " } else {\n", " let attempts = 0;\n", " const timer = setInterval(function(root) {\n", " if (root.Bokeh !== undefined) {\n", " clearInterval(timer);\n", " embed_document(root);\n", " } else {\n", " attempts++;\n", " if (attempts > 100) {\n", " clearInterval(timer);\n", " console.log(\"Bokeh: ERROR: Unable to run BokehJS code because BokehJS library is missing\");\n", " }\n", " }\n", " }, 10, root)\n", " }\n", "})(window);" ], "application/vnd.bokehjs_exec.v0+json": "" }, "metadata": { "application/vnd.bokehjs_exec.v0+json": { "id": "p1002" } }, "output_type": "display_data" } ], "source": [ "p = iqplot.strip(\n", " df,\n", " q=\"impact force / body weight\",\n", " cats=\"ID\",\n", " color_column=\"age\",\n", " spread=\"jitter\",\n", " x_axis_label=\"impact force / body weight (mN/g)\",\n", " y_axis_label=\"frog ID\"\n", ")\n", "\n", "bokeh.io.show(p)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Apparently Frog III consistently packs a powerful punch, er.... tongue." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Computing environment" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Python implementation: CPython\n", "Python version : 3.11.5\n", "IPython version : 8.15.0\n", "\n", "numpy : 1.24.3\n", "pandas : 2.0.3\n", "bokeh : 3.2.1\n", "iqplot : 0.3.5\n", "jupyterlab: 4.0.6\n", "\n" ] } ], "source": [ "%load_ext watermark\n", "%watermark -v -p numpy,pandas,bokeh,iqplot,jupyterlab" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.5" } }, "nbformat": 4, "nbformat_minor": 4 }