{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Exercise 9 - Statistics" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# 1. Data preparation\n", "\n", "## 1.1 Load data into frames" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "scrolled": true }, "outputs": [], "source": [ "pheno = pd.read_csv('phenoTable.csv')\n", "geno = pd.read_csv('genoTable.csv')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 1.2 [Merge data frames](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.merge.html)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# 2. Tasks\n", "\n", "## 2.1 Running basic statistical analysis\n", "For this example we will start with a fairly complicated dataset from a genetics analysis done at the Institute of Biomechanics, ETHZ." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 2.2.1. Introduction\n", "There are 1000 mouse femur bones which have been measured at high resolution and a number of shape analyses run on each sample. - Phenotypical Information - Each column represents a metric which was assessed in the images - CORT_DTO__C_TH for example is the mean thickness of the cortical bone." ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
| \n", " | BMD | \n", "MECHANICS_STIFFNESS | \n", "CORT_DTO__C_TH | \n", "CORT_DTO__C_TH_SD | \n", "CORT_MOM__J | \n", "CT_TH_RAD | \n", "CT_TH_RAD_STD | \n", "CANAL_VOLUME | \n", "CANAL_COUNT | \n", "CANAL_DENSITY | \n", "... | \n", "CANAL_THETA | \n", "CANAL_THETA_CV | \n", "CANAL_PCA1 | \n", "CANAL_PCA1_CV | \n", "CANAL_PCA2 | \n", "CANAL_PCA2_CV | \n", "CANAL_PCA3 | \n", "CANAL_PCA3_CV | \n", "FEMALE | \n", "ID | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "0.030221 | \n", "57.163181 | \n", "0.186455 | \n", "0.019785 | \n", "0.103288 | \n", "78.558303 | \n", "17.440679 | \n", "18351.469264 | \n", "31.0 | \n", "72.458800 | \n", "... | \n", "59.576428 | \n", "0.281042 | \n", "443.537228 | \n", "1.326217 | \n", "120.150958 | \n", "1.677884 | \n", "30.294477 | \n", "0.700402 | \n", "0 | \n", "351 | \n", "
| 1 | \n", "0.032788 | \n", "54.972011 | \n", "0.183007 | \n", "0.015696 | \n", "0.126947 | \n", "88.691516 | \n", "22.238608 | \n", "27002.217716 | \n", "137.0 | \n", "206.113056 | \n", "... | \n", "54.487601 | \n", "0.401896 | \n", "293.627859 | \n", "1.272190 | \n", "84.416139 | \n", "1.541258 | \n", "34.940901 | \n", "0.804821 | \n", "0 | \n", "356 | \n", "
| 2 | \n", "0.036075 | \n", "73.590881 | \n", "0.216930 | \n", "0.028019 | \n", "0.171012 | \n", "79.973567 | \n", "8.862339 | \n", "18464.688139 | \n", "128.0 | \n", "177.921019 | \n", "... | \n", "56.120693 | \n", "0.356876 | \n", "326.470697 | \n", "1.155693 | \n", "87.714578 | \n", "1.051160 | \n", "32.911487 | \n", "0.754326 | \n", "0 | \n", "357 | \n", "
| 3 | \n", "0.031145 | \n", "49.854823 | \n", "0.193758 | \n", "0.024087 | \n", "0.099639 | \n", "88.215056 | \n", "23.288367 | \n", "42840.614369 | \n", "147.0 | \n", "247.019809 | \n", "... | \n", "50.206993 | \n", "0.445938 | \n", "243.130372 | \n", "1.014527 | \n", "81.448541 | \n", "1.162161 | \n", "37.690527 | \n", "0.944862 | \n", "0 | \n", "359 | \n", "
| 4 | \n", "0.034226 | \n", "66.578296 | \n", "0.175598 | \n", "0.018144 | \n", "0.176490 | \n", "79.330125 | \n", "15.968669 | \n", "25474.883270 | \n", "271.0 | \n", "349.344731 | \n", "... | \n", "53.561597 | \n", "0.441762 | \n", "243.212520 | \n", "1.041145 | \n", "80.598173 | \n", "1.394151 | \n", "39.716728 | \n", "1.075045 | \n", "1 | \n", "360 | \n", "
5 rows × 35 columns
\n", "| \n", " | ID | \n", "D1Mit64 | \n", "D1Mit236 | \n", "D1Mit7 | \n", "D1Mit386 | \n", "D1Mit14 | \n", "D1Mit540 | \n", "D1Mit17 | \n", "D2Mit365 | \n", "D2Mit323 | \n", "... | \n", "D18Mit64 | \n", "D18Mit147 | \n", "D18Mit123 | \n", "D18Mit9 | \n", "D18Mit4 | \n", "D19Mit68 | \n", "D19Mit40 | \n", "D19MIT88 | \n", "D19MIT17 | \n", "D19MIT108 | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "351 | \n", "H | \n", "H | \n", "H | \n", "H | \n", "H | \n", "H | \n", "H | \n", "H | \n", "H | \n", "... | \n", "H | \n", "H | \n", "H | \n", "H | \n", "H | \n", "H | \n", "H | \n", "H | \n", "H | \n", "H | \n", "
| 1 | \n", "353 | \n", "B | \n", "B | \n", "B | \n", "B | \n", "H | \n", "H | \n", "H | \n", "H | \n", "A | \n", "... | \n", "H | \n", "A | \n", "A | \n", "A | \n", "H | \n", "H | \n", "H | \n", "H | \n", "H | \n", "H | \n", "
| 2 | \n", "354 | \n", "H | \n", "A | \n", "A | \n", "A | \n", "A | \n", "H | \n", "H | \n", "H | \n", "H | \n", "... | \n", "H | \n", "- | \n", "H | \n", "H | \n", "A | \n", "H | \n", "H | \n", "A | \n", "H | \n", "H | \n", "
| 3 | \n", "355 | \n", "A | \n", "A | \n", "H | \n", "H | \n", "- | \n", "H | \n", "H | \n", "A | \n", "A | \n", "... | \n", "H | \n", "H | \n", "A | \n", "A | \n", "A | \n", "A | \n", "A | \n", "- | \n", "A | \n", "A | \n", "
| 4 | \n", "356 | \n", "H | \n", "A | \n", "A | \n", "A | \n", "A | \n", "A | \n", "H | \n", "H | \n", "- | \n", "... | \n", "B | \n", "H | \n", "B | \n", "B | \n", "H | \n", "B | \n", "B | \n", "B | \n", "- | \n", "H | \n", "
5 rows × 99 columns
\n", "