{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "### Statistical Thinking in Python (Part 1)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Exploratory Data Analysis " ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": true }, "outputs": [], "source": [ " # import\n", "import pandas as pd\n", "import numpy as np\n", "import seaborn as sns\n", "from sklearn.datasets import load_iris\n", "import matplotlib.pyplot as plt\n", "\n", "%matplotlib inline" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['SectionC']\n" ] } ], "source": [ "# reading excel file\n", "fh = pd.ExcelFile(\"dataset/EAVS.xlsx\")\n", "fh\n", "print(fh.sheet_names)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", " | State | \n", "Jurisdiction | \n", "FIPSCode | \n", "QC1a | \n", "QC1b | \n", "QC1c | \n", "QC1d | \n", "QC1e | \n", "QC1f | \n", "QC1f_Other | \n", "... | \n", "QC5s | \n", "QC5s_Other | \n", "QC5t | \n", "QC5t_Other | \n", "QC5u_ | \n", "QC5u_Other | \n", "QC5v | \n", "QC5_Total | \n", "QC5v_Other | \n", "QC5_Comment | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "AK | \n", "ALASKA | \n", "200000000 | \n", "22861.0 | \n", "19296.0 | \n", "101.0 | \n", "306.0 | \n", "3158.0 | \n", "NaN | \n", "NaN | \n", "... | \n", "73.0 | \n", "Postmarked or witnessed after election day | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "552.0 | \n", "NaN | \n", "NaN | \n", "
1 | \n", "AL | \n", "AUTAUGA COUNTY | \n", "100100000 | \n", "-999999.0 | \n", "-999999.0 | \n", "-999999.0 | \n", "-999999.0 | \n", "-999999.0 | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
2 | \n", "AL | \n", "BALDWIN COUNTY | \n", "100300000 | \n", "-999999.0 | \n", "-999999.0 | \n", "-999999.0 | \n", "-999999.0 | \n", "-999999.0 | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
3 | \n", "AL | \n", "BARBOUR COUNTY | \n", "100500000 | \n", "-999999.0 | \n", "-999999.0 | \n", "-999999.0 | \n", "-999999.0 | \n", "-999999.0 | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
4 | \n", "AL | \n", "BIBB COUNTY | \n", "100700000 | \n", "-999999.0 | \n", "-999999.0 | \n", "-999999.0 | \n", "-999999.0 | \n", "-999999.0 | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
5 rows × 60 columns
\n", "\n", " | sepal length (cm) | \n", "sepal width (cm) | \n", "petal length (cm) | \n", "petal width (cm) | \n", "species | \n", "
---|---|---|---|---|---|
0 | \n", "5.1 | \n", "3.5 | \n", "1.4 | \n", "0.2 | \n", "Setosa | \n", "
1 | \n", "4.9 | \n", "3.0 | \n", "1.4 | \n", "0.2 | \n", "Setosa | \n", "
2 | \n", "4.7 | \n", "3.2 | \n", "1.3 | \n", "0.2 | \n", "Setosa | \n", "
3 | \n", "4.6 | \n", "3.1 | \n", "1.5 | \n", "0.2 | \n", "Setosa | \n", "
4 | \n", "5.0 | \n", "3.6 | \n", "1.4 | \n", "0.2 | \n", "Setosa | \n", "