{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Examples and Exercises from Think Stats, 2nd Edition\n",
    "\n",
    "http://thinkstats2.com\n",
    "\n",
    "Copyright 2016 Allen B. Downey\n",
    "\n",
    "MIT License: https://opensource.org/licenses/MIT\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from __future__ import print_function, division\n",
    "\n",
    "import nsfg #importing the nsfg dataset"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Examples from Chapter 1\n",
    "\n",
    "Read NSFG data into a Pandas DataFrame."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>caseid</th>\n",
       "      <th>pregordr</th>\n",
       "      <th>howpreg_n</th>\n",
       "      <th>howpreg_p</th>\n",
       "      <th>moscurrp</th>\n",
       "      <th>nowprgdk</th>\n",
       "      <th>pregend1</th>\n",
       "      <th>pregend2</th>\n",
       "      <th>nbrnaliv</th>\n",
       "      <th>multbrth</th>\n",
       "      <th>...</th>\n",
       "      <th>laborfor_i</th>\n",
       "      <th>religion_i</th>\n",
       "      <th>metro_i</th>\n",
       "      <th>basewgt</th>\n",
       "      <th>adj_mod_basewgt</th>\n",
       "      <th>finalwgt</th>\n",
       "      <th>secu_p</th>\n",
       "      <th>sest</th>\n",
       "      <th>cmintvw</th>\n",
       "      <th>totalwgt_lb</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>6.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>3410.389399</td>\n",
       "      <td>3869.349602</td>\n",
       "      <td>6448.271112</td>\n",
       "      <td>2</td>\n",
       "      <td>9</td>\n",
       "      <td>NaN</td>\n",
       "      <td>8.8125</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>6.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>3410.389399</td>\n",
       "      <td>3869.349602</td>\n",
       "      <td>6448.271112</td>\n",
       "      <td>2</td>\n",
       "      <td>9</td>\n",
       "      <td>NaN</td>\n",
       "      <td>7.8750</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>5.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>3.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>7226.301740</td>\n",
       "      <td>8567.549110</td>\n",
       "      <td>12999.542264</td>\n",
       "      <td>2</td>\n",
       "      <td>12</td>\n",
       "      <td>NaN</td>\n",
       "      <td>9.1250</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>6.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>7226.301740</td>\n",
       "      <td>8567.549110</td>\n",
       "      <td>12999.542264</td>\n",
       "      <td>2</td>\n",
       "      <td>12</td>\n",
       "      <td>NaN</td>\n",
       "      <td>7.0000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>6.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>7226.301740</td>\n",
       "      <td>8567.549110</td>\n",
       "      <td>12999.542264</td>\n",
       "      <td>2</td>\n",
       "      <td>12</td>\n",
       "      <td>NaN</td>\n",
       "      <td>6.1875</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 244 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   caseid  pregordr  howpreg_n  howpreg_p  moscurrp  nowprgdk  pregend1  \\\n",
       "0       1         1        NaN        NaN       NaN       NaN       6.0   \n",
       "1       1         2        NaN        NaN       NaN       NaN       6.0   \n",
       "2       2         1        NaN        NaN       NaN       NaN       5.0   \n",
       "3       2         2        NaN        NaN       NaN       NaN       6.0   \n",
       "4       2         3        NaN        NaN       NaN       NaN       6.0   \n",
       "\n",
       "   pregend2  nbrnaliv  multbrth     ...       laborfor_i  religion_i  metro_i  \\\n",
       "0       NaN       1.0       NaN     ...                0           0        0   \n",
       "1       NaN       1.0       NaN     ...                0           0        0   \n",
       "2       NaN       3.0       5.0     ...                0           0        0   \n",
       "3       NaN       1.0       NaN     ...                0           0        0   \n",
       "4       NaN       1.0       NaN     ...                0           0        0   \n",
       "\n",
       "       basewgt  adj_mod_basewgt      finalwgt  secu_p  sest  cmintvw  \\\n",
       "0  3410.389399      3869.349602   6448.271112       2     9      NaN   \n",
       "1  3410.389399      3869.349602   6448.271112       2     9      NaN   \n",
       "2  7226.301740      8567.549110  12999.542264       2    12      NaN   \n",
       "3  7226.301740      8567.549110  12999.542264       2    12      NaN   \n",
       "4  7226.301740      8567.549110  12999.542264       2    12      NaN   \n",
       "\n",
       "   totalwgt_lb  \n",
       "0       8.8125  \n",
       "1       7.8750  \n",
       "2       9.1250  \n",
       "3       7.0000  \n",
       "4       6.1875  \n",
       "\n",
       "[5 rows x 244 columns]"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "preg = nsfg.ReadFemPreg() #reading the data into a pandas data frame\n",
    "preg.head() #shows the first 5 rows of the data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Print the column names."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['caseid', 'pregordr', 'howpreg_n', 'howpreg_p', 'moscurrp', 'nowprgdk',\n",
       "       'pregend1', 'pregend2', 'nbrnaliv', 'multbrth',\n",
       "       ...\n",
       "       'laborfor_i', 'religion_i', 'metro_i', 'basewgt', 'adj_mod_basewgt',\n",
       "       'finalwgt', 'secu_p', 'sest', 'cmintvw', 'totalwgt_lb'],\n",
       "      dtype='object', length=244)"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "preg.columns #index of column names"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Select a single column name."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'pregordr'"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "preg.columns[1]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Select a column and check what type it is."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "pandas.core.series.Series"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pregordr = preg['pregordr']\n",
    "type(pregordr)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Print a column."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0        1\n",
       "1        2\n",
       "2        1\n",
       "3        2\n",
       "4        3\n",
       "5        1\n",
       "6        2\n",
       "7        3\n",
       "8        1\n",
       "9        2\n",
       "10       1\n",
       "11       1\n",
       "12       2\n",
       "13       3\n",
       "14       1\n",
       "15       2\n",
       "16       3\n",
       "17       1\n",
       "18       2\n",
       "19       1\n",
       "20       2\n",
       "21       1\n",
       "22       2\n",
       "23       1\n",
       "24       2\n",
       "25       3\n",
       "26       1\n",
       "27       1\n",
       "28       2\n",
       "29       3\n",
       "        ..\n",
       "13563    2\n",
       "13564    3\n",
       "13565    1\n",
       "13566    1\n",
       "13567    1\n",
       "13568    2\n",
       "13569    1\n",
       "13570    2\n",
       "13571    3\n",
       "13572    4\n",
       "13573    1\n",
       "13574    2\n",
       "13575    1\n",
       "13576    1\n",
       "13577    2\n",
       "13578    1\n",
       "13579    2\n",
       "13580    1\n",
       "13581    2\n",
       "13582    3\n",
       "13583    1\n",
       "13584    2\n",
       "13585    1\n",
       "13586    2\n",
       "13587    3\n",
       "13588    1\n",
       "13589    2\n",
       "13590    3\n",
       "13591    4\n",
       "13592    5\n",
       "Name: pregordr, Length: 13593, dtype: int64"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pregordr"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Select a single element from a column."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pregordr[0]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Select a slice from a column."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2    1\n",
       "3    2\n",
       "4    3\n",
       "Name: pregordr, dtype: int64"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pregordr[2:5]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Select a column using dot notation."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "pregordr = preg.pregordr"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0        1\n",
       "1        1\n",
       "2        1\n",
       "3        1\n",
       "4        1\n",
       "5        1\n",
       "6        1\n",
       "7        1\n",
       "8        1\n",
       "9        1\n",
       "10       1\n",
       "11       1\n",
       "12       1\n",
       "13       2\n",
       "14       4\n",
       "15       1\n",
       "16       1\n",
       "17       1\n",
       "18       4\n",
       "19       1\n",
       "20       1\n",
       "21       1\n",
       "22       4\n",
       "23       1\n",
       "24       1\n",
       "25       1\n",
       "26       1\n",
       "27       1\n",
       "28       1\n",
       "29       1\n",
       "        ..\n",
       "13563    1\n",
       "13564    1\n",
       "13565    1\n",
       "13566    1\n",
       "13567    2\n",
       "13568    5\n",
       "13569    1\n",
       "13570    1\n",
       "13571    1\n",
       "13572    1\n",
       "13573    1\n",
       "13574    1\n",
       "13575    2\n",
       "13576    1\n",
       "13577    6\n",
       "13578    1\n",
       "13579    1\n",
       "13580    4\n",
       "13581    1\n",
       "13582    5\n",
       "13583    2\n",
       "13584    1\n",
       "13585    2\n",
       "13586    2\n",
       "13587    2\n",
       "13588    1\n",
       "13589    2\n",
       "13590    2\n",
       "13591    1\n",
       "13592    1\n",
       "Name: outcome, Length: 13593, dtype: int64"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "preg.outcome"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Count the number of times each value occurs."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1    9148\n",
       "4    1921\n",
       "2    1862\n",
       "6     352\n",
       "5     190\n",
       "3     120\n",
       "Name: outcome, dtype: int64"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "preg.outcome.value_counts()\n",
    "#type(preg.outcome.value_counts())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1    9148\n",
       "2    1862\n",
       "3     120\n",
       "4    1921\n",
       "5     190\n",
       "6     352\n",
       "Name: outcome, dtype: int64"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "preg.outcome.value_counts().sort_index()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Check the values of another variable."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.0        8\n",
       "1.0       40\n",
       "2.0       53\n",
       "3.0       98\n",
       "4.0      229\n",
       "5.0      697\n",
       "6.0     2223\n",
       "7.0     3049\n",
       "8.0     1889\n",
       "9.0      623\n",
       "10.0     132\n",
       "11.0      26\n",
       "12.0      10\n",
       "13.0       3\n",
       "14.0       3\n",
       "15.0       1\n",
       "Name: birthwgt_lb, dtype: int64"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "preg.birthwgt_lb.value_counts().sort_index()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Make a dictionary that maps from each respondent's `caseid` to a list of indices into the pregnancy `DataFrame`.  Use it to select the pregnancy outcomes for a single respondent."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([4, 4, 4, 4, 4, 4, 1])"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "caseid = 10229\n",
    "preg_map = nsfg.MakePregMap(preg)\n",
    "indices = preg_map[caseid]\n",
    "preg.outcome[indices].values"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "## Exercises"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Select the `birthord` column, print the value counts, and compare to results published in the [codebook](http://www.icpsr.umich.edu/nsfg6/Controller?displayPage=labelDetails&fileCode=PREG&section=A&subSec=8016&srtLabel=611933)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1.0     4413\n",
      "2.0     2874\n",
      "3.0     1234\n",
      "4.0      421\n",
      "5.0      126\n",
      "6.0       50\n",
      "7.0       20\n",
      "8.0        7\n",
      "9.0        2\n",
      "10.0       1\n",
      "Name: birthord, dtype: int64\n"
     ]
    }
   ],
   "source": [
    "pregBirthOrder = preg[\"birthord\"]# Solution goes here\n",
    "print (pregBirthOrder.value_counts())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We can also use `isnull` to count the number of nans."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "4445"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "preg.birthord.isnull().sum()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Select the `prglngth` column, print the value counts, and compare to results published in the [codebook](http://www.icpsr.umich.edu/nsfg6/Controller?displayPage=labelDetails&fileCode=PREG&section=A&subSec=8016&srtLabel=611931)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0       15\n",
       "1        9\n",
       "2       78\n",
       "3      151\n",
       "4      412\n",
       "5      181\n",
       "6      543\n",
       "7      175\n",
       "8      409\n",
       "9      594\n",
       "10     137\n",
       "11     202\n",
       "12     170\n",
       "13     446\n",
       "14      29\n",
       "15      39\n",
       "16      44\n",
       "17     253\n",
       "18      17\n",
       "19      34\n",
       "20      18\n",
       "21      37\n",
       "22     147\n",
       "23      12\n",
       "24      31\n",
       "25      15\n",
       "26     117\n",
       "27       8\n",
       "28      38\n",
       "29      23\n",
       "30     198\n",
       "31      29\n",
       "32     122\n",
       "33      50\n",
       "34      60\n",
       "35     357\n",
       "36     329\n",
       "37     457\n",
       "38     609\n",
       "39    4744\n",
       "40    1120\n",
       "41     591\n",
       "42     328\n",
       "43     148\n",
       "44      46\n",
       "45      10\n",
       "46       1\n",
       "47       1\n",
       "48       7\n",
       "50       2\n",
       "Name: prglngth, dtype: int64"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "preg.prglngth.value_counts().sort_index() #how to slice the data to grab a selected few of the indices?"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "To compute the mean of a column, you can invoke the `mean` method on a Series.  For example, here is the mean birthweight in pounds:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "7.265628457623368"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "preg.totalwgt_lb.mean()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Create a new column named <tt>totalwgt_kg</tt> that contains birth weight in kilograms.  Compute its mean.  Remember that when you create a new column, you have to use dictionary syntax, not dot notation."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "15.984382606771542"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "preg[\"totalwgt_kg\"] = preg.totalwgt_lb*2.2# Solution goes here\n",
    "preg.totalwgt_kg.mean()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "`nsfg.py` also provides `ReadFemResp`, which reads the female respondents file and returns a `DataFrame`:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "resp = nsfg.ReadFemResp()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "`DataFrame` provides a method `head` that displays the first five rows:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>caseid</th>\n",
       "      <th>rscrinf</th>\n",
       "      <th>rdormres</th>\n",
       "      <th>rostscrn</th>\n",
       "      <th>rscreenhisp</th>\n",
       "      <th>rscreenrace</th>\n",
       "      <th>age_a</th>\n",
       "      <th>age_r</th>\n",
       "      <th>cmbirth</th>\n",
       "      <th>agescrn</th>\n",
       "      <th>...</th>\n",
       "      <th>pubassis_i</th>\n",
       "      <th>basewgt</th>\n",
       "      <th>adj_mod_basewgt</th>\n",
       "      <th>finalwgt</th>\n",
       "      <th>secu_r</th>\n",
       "      <th>sest</th>\n",
       "      <th>cmintvw</th>\n",
       "      <th>cmlstyr</th>\n",
       "      <th>screentime</th>\n",
       "      <th>intvlngth</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2298</td>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "      <td>5.0</td>\n",
       "      <td>27</td>\n",
       "      <td>27</td>\n",
       "      <td>902</td>\n",
       "      <td>27</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>3247.916977</td>\n",
       "      <td>5123.759559</td>\n",
       "      <td>5556.717241</td>\n",
       "      <td>2</td>\n",
       "      <td>18</td>\n",
       "      <td>1234</td>\n",
       "      <td>1222</td>\n",
       "      <td>18:26:36</td>\n",
       "      <td>110.492667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>5012</td>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>5.0</td>\n",
       "      <td>42</td>\n",
       "      <td>42</td>\n",
       "      <td>718</td>\n",
       "      <td>42</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>2335.279149</td>\n",
       "      <td>2846.799490</td>\n",
       "      <td>4744.191350</td>\n",
       "      <td>2</td>\n",
       "      <td>18</td>\n",
       "      <td>1233</td>\n",
       "      <td>1221</td>\n",
       "      <td>16:30:59</td>\n",
       "      <td>64.294000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>11586</td>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>5.0</td>\n",
       "      <td>43</td>\n",
       "      <td>43</td>\n",
       "      <td>708</td>\n",
       "      <td>43</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>2335.279149</td>\n",
       "      <td>2846.799490</td>\n",
       "      <td>4744.191350</td>\n",
       "      <td>2</td>\n",
       "      <td>18</td>\n",
       "      <td>1234</td>\n",
       "      <td>1222</td>\n",
       "      <td>18:19:09</td>\n",
       "      <td>75.149167</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>6794</td>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>5.0</td>\n",
       "      <td>15</td>\n",
       "      <td>15</td>\n",
       "      <td>1042</td>\n",
       "      <td>15</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>3783.152221</td>\n",
       "      <td>5071.464231</td>\n",
       "      <td>5923.977368</td>\n",
       "      <td>2</td>\n",
       "      <td>18</td>\n",
       "      <td>1234</td>\n",
       "      <td>1222</td>\n",
       "      <td>15:54:43</td>\n",
       "      <td>28.642833</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>616</td>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>5.0</td>\n",
       "      <td>20</td>\n",
       "      <td>20</td>\n",
       "      <td>991</td>\n",
       "      <td>20</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>5341.329968</td>\n",
       "      <td>6437.335772</td>\n",
       "      <td>7229.128072</td>\n",
       "      <td>2</td>\n",
       "      <td>18</td>\n",
       "      <td>1233</td>\n",
       "      <td>1221</td>\n",
       "      <td>14:19:44</td>\n",
       "      <td>69.502667</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 3087 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   caseid  rscrinf  rdormres  rostscrn  rscreenhisp  rscreenrace  age_a  \\\n",
       "0    2298        1         5         5            1          5.0     27   \n",
       "1    5012        1         5         1            5          5.0     42   \n",
       "2   11586        1         5         1            5          5.0     43   \n",
       "3    6794        5         5         4            1          5.0     15   \n",
       "4     616        1         5         4            1          5.0     20   \n",
       "\n",
       "   age_r  cmbirth  agescrn     ...      pubassis_i      basewgt  \\\n",
       "0     27      902       27     ...               0  3247.916977   \n",
       "1     42      718       42     ...               0  2335.279149   \n",
       "2     43      708       43     ...               0  2335.279149   \n",
       "3     15     1042       15     ...               0  3783.152221   \n",
       "4     20      991       20     ...               0  5341.329968   \n",
       "\n",
       "   adj_mod_basewgt     finalwgt  secu_r  sest  cmintvw  cmlstyr  screentime  \\\n",
       "0      5123.759559  5556.717241       2    18     1234     1222    18:26:36   \n",
       "1      2846.799490  4744.191350       2    18     1233     1221    16:30:59   \n",
       "2      2846.799490  4744.191350       2    18     1234     1222    18:19:09   \n",
       "3      5071.464231  5923.977368       2    18     1234     1222    15:54:43   \n",
       "4      6437.335772  7229.128072       2    18     1233     1221    14:19:44   \n",
       "\n",
       "    intvlngth  \n",
       "0  110.492667  \n",
       "1   64.294000  \n",
       "2   75.149167  \n",
       "3   28.642833  \n",
       "4   69.502667  \n",
       "\n",
       "[5 rows x 3087 columns]"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "resp.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Select the `age_r` column from `resp` and print the value counts.  How old are the youngest and oldest respondents?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "15    217\n",
       "16    223\n",
       "17    234\n",
       "18    235\n",
       "19    241\n",
       "20    258\n",
       "21    267\n",
       "22    287\n",
       "23    282\n",
       "24    269\n",
       "25    267\n",
       "26    260\n",
       "27    255\n",
       "28    252\n",
       "29    262\n",
       "30    292\n",
       "31    278\n",
       "32    273\n",
       "33    257\n",
       "34    255\n",
       "35    262\n",
       "36    266\n",
       "37    271\n",
       "38    256\n",
       "39    215\n",
       "40    256\n",
       "41    250\n",
       "42    215\n",
       "43    253\n",
       "44    235\n",
       "Name: age_r, dtype: int64"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "resp.age_r.value_counts().sort_index()# Solution goes here\n",
    "#oldest are 44 and youngest are 15"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We can use the `caseid` to match up rows from `resp` and `preg`.  For example, we can select the row from `resp` for `caseid` 2298 like this:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0        True\n",
       "1       False\n",
       "2       False\n",
       "3       False\n",
       "4       False\n",
       "5       False\n",
       "6       False\n",
       "7       False\n",
       "8       False\n",
       "9       False\n",
       "10      False\n",
       "11      False\n",
       "12      False\n",
       "13      False\n",
       "14      False\n",
       "15      False\n",
       "16      False\n",
       "17      False\n",
       "18      False\n",
       "19      False\n",
       "20      False\n",
       "21      False\n",
       "22      False\n",
       "23      False\n",
       "24      False\n",
       "25      False\n",
       "26      False\n",
       "27      False\n",
       "28      False\n",
       "29      False\n",
       "        ...  \n",
       "7613    False\n",
       "7614    False\n",
       "7615    False\n",
       "7616    False\n",
       "7617    False\n",
       "7618    False\n",
       "7619    False\n",
       "7620    False\n",
       "7621    False\n",
       "7622    False\n",
       "7623    False\n",
       "7624    False\n",
       "7625    False\n",
       "7626    False\n",
       "7627    False\n",
       "7628    False\n",
       "7629    False\n",
       "7630    False\n",
       "7631    False\n",
       "7632    False\n",
       "7633    False\n",
       "7634    False\n",
       "7635    False\n",
       "7636    False\n",
       "7637    False\n",
       "7638    False\n",
       "7639    False\n",
       "7640    False\n",
       "7641    False\n",
       "7642    False\n",
       "Name: caseid, Length: 7643, dtype: bool"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "resp.caseid==2298"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>caseid</th>\n",
       "      <th>rscrinf</th>\n",
       "      <th>rdormres</th>\n",
       "      <th>rostscrn</th>\n",
       "      <th>rscreenhisp</th>\n",
       "      <th>rscreenrace</th>\n",
       "      <th>age_a</th>\n",
       "      <th>age_r</th>\n",
       "      <th>cmbirth</th>\n",
       "      <th>agescrn</th>\n",
       "      <th>...</th>\n",
       "      <th>pubassis_i</th>\n",
       "      <th>basewgt</th>\n",
       "      <th>adj_mod_basewgt</th>\n",
       "      <th>finalwgt</th>\n",
       "      <th>secu_r</th>\n",
       "      <th>sest</th>\n",
       "      <th>cmintvw</th>\n",
       "      <th>cmlstyr</th>\n",
       "      <th>screentime</th>\n",
       "      <th>intvlngth</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2298</td>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "      <td>5.0</td>\n",
       "      <td>27</td>\n",
       "      <td>27</td>\n",
       "      <td>902</td>\n",
       "      <td>27</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>3247.916977</td>\n",
       "      <td>5123.759559</td>\n",
       "      <td>5556.717241</td>\n",
       "      <td>2</td>\n",
       "      <td>18</td>\n",
       "      <td>1234</td>\n",
       "      <td>1222</td>\n",
       "      <td>18:26:36</td>\n",
       "      <td>110.492667</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1 rows × 3087 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   caseid  rscrinf  rdormres  rostscrn  rscreenhisp  rscreenrace  age_a  \\\n",
       "0    2298        1         5         5            1          5.0     27   \n",
       "\n",
       "   age_r  cmbirth  agescrn     ...      pubassis_i      basewgt  \\\n",
       "0     27      902       27     ...               0  3247.916977   \n",
       "\n",
       "   adj_mod_basewgt     finalwgt  secu_r  sest  cmintvw  cmlstyr  screentime  \\\n",
       "0      5123.759559  5556.717241       2    18     1234     1222    18:26:36   \n",
       "\n",
       "    intvlngth  \n",
       "0  110.492667  \n",
       "\n",
       "[1 rows x 3087 columns]"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "resp[resp.caseid==2298] #isn't this evaluating a boolean condition? how is t/f being used as a key?"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "And we can get the corresponding rows from `preg` like this:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>caseid</th>\n",
       "      <th>pregordr</th>\n",
       "      <th>howpreg_n</th>\n",
       "      <th>howpreg_p</th>\n",
       "      <th>moscurrp</th>\n",
       "      <th>nowprgdk</th>\n",
       "      <th>pregend1</th>\n",
       "      <th>pregend2</th>\n",
       "      <th>nbrnaliv</th>\n",
       "      <th>multbrth</th>\n",
       "      <th>...</th>\n",
       "      <th>religion_i</th>\n",
       "      <th>metro_i</th>\n",
       "      <th>basewgt</th>\n",
       "      <th>adj_mod_basewgt</th>\n",
       "      <th>finalwgt</th>\n",
       "      <th>secu_p</th>\n",
       "      <th>sest</th>\n",
       "      <th>cmintvw</th>\n",
       "      <th>totalwgt_lb</th>\n",
       "      <th>totalwgt_kg</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2610</th>\n",
       "      <td>2298</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>6.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>3247.916977</td>\n",
       "      <td>5123.759559</td>\n",
       "      <td>5556.717241</td>\n",
       "      <td>2</td>\n",
       "      <td>18</td>\n",
       "      <td>NaN</td>\n",
       "      <td>6.8750</td>\n",
       "      <td>15.1250</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2611</th>\n",
       "      <td>2298</td>\n",
       "      <td>2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>6.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>3247.916977</td>\n",
       "      <td>5123.759559</td>\n",
       "      <td>5556.717241</td>\n",
       "      <td>2</td>\n",
       "      <td>18</td>\n",
       "      <td>NaN</td>\n",
       "      <td>5.5000</td>\n",
       "      <td>12.1000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2612</th>\n",
       "      <td>2298</td>\n",
       "      <td>3</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>6.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>3247.916977</td>\n",
       "      <td>5123.759559</td>\n",
       "      <td>5556.717241</td>\n",
       "      <td>2</td>\n",
       "      <td>18</td>\n",
       "      <td>NaN</td>\n",
       "      <td>4.1875</td>\n",
       "      <td>9.2125</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2613</th>\n",
       "      <td>2298</td>\n",
       "      <td>4</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>6.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>3247.916977</td>\n",
       "      <td>5123.759559</td>\n",
       "      <td>5556.717241</td>\n",
       "      <td>2</td>\n",
       "      <td>18</td>\n",
       "      <td>NaN</td>\n",
       "      <td>6.8750</td>\n",
       "      <td>15.1250</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>4 rows × 245 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      caseid  pregordr  howpreg_n  howpreg_p  moscurrp  nowprgdk  pregend1  \\\n",
       "2610    2298         1        NaN        NaN       NaN       NaN       6.0   \n",
       "2611    2298         2        NaN        NaN       NaN       NaN       6.0   \n",
       "2612    2298         3        NaN        NaN       NaN       NaN       6.0   \n",
       "2613    2298         4        NaN        NaN       NaN       NaN       6.0   \n",
       "\n",
       "      pregend2  nbrnaliv  multbrth     ...       religion_i  metro_i  \\\n",
       "2610       NaN       1.0       NaN     ...                0        0   \n",
       "2611       NaN       1.0       NaN     ...                0        0   \n",
       "2612       NaN       1.0       NaN     ...                0        0   \n",
       "2613       NaN       1.0       NaN     ...                0        0   \n",
       "\n",
       "          basewgt  adj_mod_basewgt     finalwgt  secu_p  sest  cmintvw  \\\n",
       "2610  3247.916977      5123.759559  5556.717241       2    18      NaN   \n",
       "2611  3247.916977      5123.759559  5556.717241       2    18      NaN   \n",
       "2612  3247.916977      5123.759559  5556.717241       2    18      NaN   \n",
       "2613  3247.916977      5123.759559  5556.717241       2    18      NaN   \n",
       "\n",
       "      totalwgt_lb  totalwgt_kg  \n",
       "2610       6.8750      15.1250  \n",
       "2611       5.5000      12.1000  \n",
       "2612       4.1875       9.2125  \n",
       "2613       6.8750      15.1250  \n",
       "\n",
       "[4 rows x 245 columns]"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "preg[preg.caseid==2298]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "How old is the respondent with `caseid` 1?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1069    44\n",
       "Name: age_r, dtype: int64"
      ]
     },
     "execution_count": 47,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "resp[resp.caseid==1].age_r# Solution goes here"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "What are the pregnancy lengths for the respondent with `caseid` 2298?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2610    40\n",
       "2611    36\n",
       "2612    30\n",
       "2613    40\n",
       "Name: prglngth, dtype: int64"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "preg[preg.caseid==2298].prglngth# Solution goes here"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "What was the birthweight of the first baby born to the respondent with `caseid` 5012?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "5515    6.0\n",
       "Name: totalwgt_lb, dtype: float64"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "preg[preg.caseid==5012].totalwgt_lb"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}