{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Examples and Exercises from Think Stats, 2nd Edition\n", "\n", "http://thinkstats2.com\n", "\n", "Copyright 2016 Allen B. Downey\n", "\n", "MIT License: https://opensource.org/licenses/MIT\n" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "from __future__ import print_function, division\n", "\n", "import nsfg #importing the nsfg dataset" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Examples from Chapter 1\n", "\n", "Read NSFG data into a Pandas DataFrame." ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
caseidpregordrhowpreg_nhowpreg_pmoscurrpnowprgdkpregend1pregend2nbrnalivmultbrth...laborfor_ireligion_imetro_ibasewgtadj_mod_basewgtfinalwgtsecu_psestcmintvwtotalwgt_lb
011NaNNaNNaNNaN6.0NaN1.0NaN...0003410.3893993869.3496026448.27111229NaN8.8125
112NaNNaNNaNNaN6.0NaN1.0NaN...0003410.3893993869.3496026448.27111229NaN7.8750
221NaNNaNNaNNaN5.0NaN3.05.0...0007226.3017408567.54911012999.542264212NaN9.1250
322NaNNaNNaNNaN6.0NaN1.0NaN...0007226.3017408567.54911012999.542264212NaN7.0000
423NaNNaNNaNNaN6.0NaN1.0NaN...0007226.3017408567.54911012999.542264212NaN6.1875
\n", "

5 rows × 244 columns

\n", "
" ], "text/plain": [ " caseid pregordr howpreg_n howpreg_p moscurrp nowprgdk pregend1 \\\n", "0 1 1 NaN NaN NaN NaN 6.0 \n", "1 1 2 NaN NaN NaN NaN 6.0 \n", "2 2 1 NaN NaN NaN NaN 5.0 \n", "3 2 2 NaN NaN NaN NaN 6.0 \n", "4 2 3 NaN NaN NaN NaN 6.0 \n", "\n", " pregend2 nbrnaliv multbrth ... laborfor_i religion_i metro_i \\\n", "0 NaN 1.0 NaN ... 0 0 0 \n", "1 NaN 1.0 NaN ... 0 0 0 \n", "2 NaN 3.0 5.0 ... 0 0 0 \n", "3 NaN 1.0 NaN ... 0 0 0 \n", "4 NaN 1.0 NaN ... 0 0 0 \n", "\n", " basewgt adj_mod_basewgt finalwgt secu_p sest cmintvw \\\n", "0 3410.389399 3869.349602 6448.271112 2 9 NaN \n", "1 3410.389399 3869.349602 6448.271112 2 9 NaN \n", "2 7226.301740 8567.549110 12999.542264 2 12 NaN \n", "3 7226.301740 8567.549110 12999.542264 2 12 NaN \n", "4 7226.301740 8567.549110 12999.542264 2 12 NaN \n", "\n", " totalwgt_lb \n", "0 8.8125 \n", "1 7.8750 \n", "2 9.1250 \n", "3 7.0000 \n", "4 6.1875 \n", "\n", "[5 rows x 244 columns]" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "preg = nsfg.ReadFemPreg() #reading the data into a pandas data frame\n", "preg.head() #shows the first 5 rows of the data" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Print the column names." ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['caseid', 'pregordr', 'howpreg_n', 'howpreg_p', 'moscurrp', 'nowprgdk',\n", " 'pregend1', 'pregend2', 'nbrnaliv', 'multbrth',\n", " ...\n", " 'laborfor_i', 'religion_i', 'metro_i', 'basewgt', 'adj_mod_basewgt',\n", " 'finalwgt', 'secu_p', 'sest', 'cmintvw', 'totalwgt_lb'],\n", " dtype='object', length=244)" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "preg.columns #index of column names" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Select a single column name." ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'pregordr'" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "preg.columns[1]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Select a column and check what type it is." ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "pandas.core.series.Series" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pregordr = preg['pregordr']\n", "type(pregordr)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Print a column." ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 1\n", "1 2\n", "2 1\n", "3 2\n", "4 3\n", "5 1\n", "6 2\n", "7 3\n", "8 1\n", "9 2\n", "10 1\n", "11 1\n", "12 2\n", "13 3\n", "14 1\n", "15 2\n", "16 3\n", "17 1\n", "18 2\n", "19 1\n", "20 2\n", "21 1\n", "22 2\n", "23 1\n", "24 2\n", "25 3\n", "26 1\n", "27 1\n", "28 2\n", "29 3\n", " ..\n", "13563 2\n", "13564 3\n", "13565 1\n", "13566 1\n", "13567 1\n", "13568 2\n", "13569 1\n", "13570 2\n", "13571 3\n", "13572 4\n", "13573 1\n", "13574 2\n", "13575 1\n", "13576 1\n", "13577 2\n", "13578 1\n", "13579 2\n", "13580 1\n", "13581 2\n", "13582 3\n", "13583 1\n", "13584 2\n", "13585 1\n", "13586 2\n", "13587 3\n", "13588 1\n", "13589 2\n", "13590 3\n", "13591 4\n", "13592 5\n", "Name: pregordr, Length: 13593, dtype: int64" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pregordr" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Select a single element from a column." ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "1" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pregordr[0]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Select a slice from a column." ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "2 1\n", "3 2\n", "4 3\n", "Name: pregordr, dtype: int64" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pregordr[2:5]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Select a column using dot notation." ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "pregordr = preg.pregordr" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 1\n", "1 1\n", "2 1\n", "3 1\n", "4 1\n", "5 1\n", "6 1\n", "7 1\n", "8 1\n", "9 1\n", "10 1\n", "11 1\n", "12 1\n", "13 2\n", "14 4\n", "15 1\n", "16 1\n", "17 1\n", "18 4\n", "19 1\n", "20 1\n", "21 1\n", "22 4\n", "23 1\n", "24 1\n", "25 1\n", "26 1\n", "27 1\n", "28 1\n", "29 1\n", " ..\n", "13563 1\n", "13564 1\n", "13565 1\n", "13566 1\n", "13567 2\n", "13568 5\n", "13569 1\n", "13570 1\n", "13571 1\n", "13572 1\n", "13573 1\n", "13574 1\n", "13575 2\n", "13576 1\n", "13577 6\n", "13578 1\n", "13579 1\n", "13580 4\n", "13581 1\n", "13582 5\n", "13583 2\n", "13584 1\n", "13585 2\n", "13586 2\n", "13587 2\n", "13588 1\n", "13589 2\n", "13590 2\n", "13591 1\n", "13592 1\n", "Name: outcome, Length: 13593, dtype: int64" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "preg.outcome" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Count the number of times each value occurs." ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "1 9148\n", "4 1921\n", "2 1862\n", "6 352\n", "5 190\n", "3 120\n", "Name: outcome, dtype: int64" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "preg.outcome.value_counts()\n", "#type(preg.outcome.value_counts())" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "1 9148\n", "2 1862\n", "3 120\n", "4 1921\n", "5 190\n", "6 352\n", "Name: outcome, dtype: int64" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "preg.outcome.value_counts().sort_index()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Check the values of another variable." ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.0 8\n", "1.0 40\n", "2.0 53\n", "3.0 98\n", "4.0 229\n", "5.0 697\n", "6.0 2223\n", "7.0 3049\n", "8.0 1889\n", "9.0 623\n", "10.0 132\n", "11.0 26\n", "12.0 10\n", "13.0 3\n", "14.0 3\n", "15.0 1\n", "Name: birthwgt_lb, dtype: int64" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "preg.birthwgt_lb.value_counts().sort_index()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Make a dictionary that maps from each respondent's `caseid` to a list of indices into the pregnancy `DataFrame`. Use it to select the pregnancy outcomes for a single respondent." ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([4, 4, 4, 4, 4, 4, 1])" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "caseid = 10229\n", "preg_map = nsfg.MakePregMap(preg)\n", "indices = preg_map[caseid]\n", "preg.outcome[indices].values" ] }, { "cell_type": "markdown", "metadata": { "collapsed": true }, "source": [ "## Exercises" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Select the `birthord` column, print the value counts, and compare to results published in the [codebook](http://www.icpsr.umich.edu/nsfg6/Controller?displayPage=labelDetails&fileCode=PREG§ion=A&subSec=8016&srtLabel=611933)" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1.0 4413\n", "2.0 2874\n", "3.0 1234\n", "4.0 421\n", "5.0 126\n", "6.0 50\n", "7.0 20\n", "8.0 7\n", "9.0 2\n", "10.0 1\n", "Name: birthord, dtype: int64\n" ] } ], "source": [ "pregBirthOrder = preg[\"birthord\"]# Solution goes here\n", "print (pregBirthOrder.value_counts())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We can also use `isnull` to count the number of nans." ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "4445" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "preg.birthord.isnull().sum()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Select the `prglngth` column, print the value counts, and compare to results published in the [codebook](http://www.icpsr.umich.edu/nsfg6/Controller?displayPage=labelDetails&fileCode=PREG§ion=A&subSec=8016&srtLabel=611931)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 15\n", "1 9\n", "2 78\n", "3 151\n", "4 412\n", "5 181\n", "6 543\n", "7 175\n", "8 409\n", "9 594\n", "10 137\n", "11 202\n", "12 170\n", "13 446\n", "14 29\n", "15 39\n", "16 44\n", "17 253\n", "18 17\n", "19 34\n", "20 18\n", "21 37\n", "22 147\n", "23 12\n", "24 31\n", "25 15\n", "26 117\n", "27 8\n", "28 38\n", "29 23\n", "30 198\n", "31 29\n", "32 122\n", "33 50\n", "34 60\n", "35 357\n", "36 329\n", "37 457\n", "38 609\n", "39 4744\n", "40 1120\n", "41 591\n", "42 328\n", "43 148\n", "44 46\n", "45 10\n", "46 1\n", "47 1\n", "48 7\n", "50 2\n", "Name: prglngth, dtype: int64" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "preg.prglngth.value_counts().sort_index() #how to slice the data to grab a selected few of the indices?" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "To compute the mean of a column, you can invoke the `mean` method on a Series. For example, here is the mean birthweight in pounds:" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "7.265628457623368" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "preg.totalwgt_lb.mean()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Create a new column named totalwgt_kg that contains birth weight in kilograms. Compute its mean. Remember that when you create a new column, you have to use dictionary syntax, not dot notation." ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "15.984382606771542" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "preg[\"totalwgt_kg\"] = preg.totalwgt_lb*2.2# Solution goes here\n", "preg.totalwgt_kg.mean()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "`nsfg.py` also provides `ReadFemResp`, which reads the female respondents file and returns a `DataFrame`:" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "resp = nsfg.ReadFemResp()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "`DataFrame` provides a method `head` that displays the first five rows:" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
caseidrscrinfrdormresrostscrnrscreenhisprscreenraceage_aage_rcmbirthagescrn...pubassis_ibasewgtadj_mod_basewgtfinalwgtsecu_rsestcmintvwcmlstyrscreentimeintvlngth
0229815515.0272790227...03247.9169775123.7595595556.7172412181234122218:26:36110.492667
1501215155.0424271842...02335.2791492846.7994904744.1913502181233122116:30:5964.294000
21158615155.0434370843...02335.2791492846.7994904744.1913502181234122218:19:0975.149167
3679455415.01515104215...03783.1522215071.4642315923.9773682181234122215:54:4328.642833
461615415.0202099120...05341.3299686437.3357727229.1280722181233122114:19:4469.502667
\n", "

5 rows × 3087 columns

\n", "
" ], "text/plain": [ " caseid rscrinf rdormres rostscrn rscreenhisp rscreenrace age_a \\\n", "0 2298 1 5 5 1 5.0 27 \n", "1 5012 1 5 1 5 5.0 42 \n", "2 11586 1 5 1 5 5.0 43 \n", "3 6794 5 5 4 1 5.0 15 \n", "4 616 1 5 4 1 5.0 20 \n", "\n", " age_r cmbirth agescrn ... pubassis_i basewgt \\\n", "0 27 902 27 ... 0 3247.916977 \n", "1 42 718 42 ... 0 2335.279149 \n", "2 43 708 43 ... 0 2335.279149 \n", "3 15 1042 15 ... 0 3783.152221 \n", "4 20 991 20 ... 0 5341.329968 \n", "\n", " adj_mod_basewgt finalwgt secu_r sest cmintvw cmlstyr screentime \\\n", "0 5123.759559 5556.717241 2 18 1234 1222 18:26:36 \n", "1 2846.799490 4744.191350 2 18 1233 1221 16:30:59 \n", "2 2846.799490 4744.191350 2 18 1234 1222 18:19:09 \n", "3 5071.464231 5923.977368 2 18 1234 1222 15:54:43 \n", "4 6437.335772 7229.128072 2 18 1233 1221 14:19:44 \n", "\n", " intvlngth \n", "0 110.492667 \n", "1 64.294000 \n", "2 75.149167 \n", "3 28.642833 \n", "4 69.502667 \n", "\n", "[5 rows x 3087 columns]" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "resp.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Select the `age_r` column from `resp` and print the value counts. How old are the youngest and oldest respondents?" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "15 217\n", "16 223\n", "17 234\n", "18 235\n", "19 241\n", "20 258\n", "21 267\n", "22 287\n", "23 282\n", "24 269\n", "25 267\n", "26 260\n", "27 255\n", "28 252\n", "29 262\n", "30 292\n", "31 278\n", "32 273\n", "33 257\n", "34 255\n", "35 262\n", "36 266\n", "37 271\n", "38 256\n", "39 215\n", "40 256\n", "41 250\n", "42 215\n", "43 253\n", "44 235\n", "Name: age_r, dtype: int64" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "resp.age_r.value_counts().sort_index()# Solution goes here\n", "#oldest are 44 and youngest are 15" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We can use the `caseid` to match up rows from `resp` and `preg`. For example, we can select the row from `resp` for `caseid` 2298 like this:" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 True\n", "1 False\n", "2 False\n", "3 False\n", "4 False\n", "5 False\n", "6 False\n", "7 False\n", "8 False\n", "9 False\n", "10 False\n", "11 False\n", "12 False\n", "13 False\n", "14 False\n", "15 False\n", "16 False\n", "17 False\n", "18 False\n", "19 False\n", "20 False\n", "21 False\n", "22 False\n", "23 False\n", "24 False\n", "25 False\n", "26 False\n", "27 False\n", "28 False\n", "29 False\n", " ... \n", "7613 False\n", "7614 False\n", "7615 False\n", "7616 False\n", "7617 False\n", "7618 False\n", "7619 False\n", "7620 False\n", "7621 False\n", "7622 False\n", "7623 False\n", "7624 False\n", "7625 False\n", "7626 False\n", "7627 False\n", "7628 False\n", "7629 False\n", "7630 False\n", "7631 False\n", "7632 False\n", "7633 False\n", "7634 False\n", "7635 False\n", "7636 False\n", "7637 False\n", "7638 False\n", "7639 False\n", "7640 False\n", "7641 False\n", "7642 False\n", "Name: caseid, Length: 7643, dtype: bool" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "resp.caseid==2298" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
caseidrscrinfrdormresrostscrnrscreenhisprscreenraceage_aage_rcmbirthagescrn...pubassis_ibasewgtadj_mod_basewgtfinalwgtsecu_rsestcmintvwcmlstyrscreentimeintvlngth
0229815515.0272790227...03247.9169775123.7595595556.7172412181234122218:26:36110.492667
\n", "

1 rows × 3087 columns

\n", "
" ], "text/plain": [ " caseid rscrinf rdormres rostscrn rscreenhisp rscreenrace age_a \\\n", "0 2298 1 5 5 1 5.0 27 \n", "\n", " age_r cmbirth agescrn ... pubassis_i basewgt \\\n", "0 27 902 27 ... 0 3247.916977 \n", "\n", " adj_mod_basewgt finalwgt secu_r sest cmintvw cmlstyr screentime \\\n", "0 5123.759559 5556.717241 2 18 1234 1222 18:26:36 \n", "\n", " intvlngth \n", "0 110.492667 \n", "\n", "[1 rows x 3087 columns]" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "resp[resp.caseid==2298] #isn't this evaluating a boolean condition? how is t/f being used as a key?" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "And we can get the corresponding rows from `preg` like this:" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
caseidpregordrhowpreg_nhowpreg_pmoscurrpnowprgdkpregend1pregend2nbrnalivmultbrth...religion_imetro_ibasewgtadj_mod_basewgtfinalwgtsecu_psestcmintvwtotalwgt_lbtotalwgt_kg
261022981NaNNaNNaNNaN6.0NaN1.0NaN...003247.9169775123.7595595556.717241218NaN6.875015.1250
261122982NaNNaNNaNNaN6.0NaN1.0NaN...003247.9169775123.7595595556.717241218NaN5.500012.1000
261222983NaNNaNNaNNaN6.0NaN1.0NaN...003247.9169775123.7595595556.717241218NaN4.18759.2125
261322984NaNNaNNaNNaN6.0NaN1.0NaN...003247.9169775123.7595595556.717241218NaN6.875015.1250
\n", "

4 rows × 245 columns

\n", "
" ], "text/plain": [ " caseid pregordr howpreg_n howpreg_p moscurrp nowprgdk pregend1 \\\n", "2610 2298 1 NaN NaN NaN NaN 6.0 \n", "2611 2298 2 NaN NaN NaN NaN 6.0 \n", "2612 2298 3 NaN NaN NaN NaN 6.0 \n", "2613 2298 4 NaN NaN NaN NaN 6.0 \n", "\n", " pregend2 nbrnaliv multbrth ... religion_i metro_i \\\n", "2610 NaN 1.0 NaN ... 0 0 \n", "2611 NaN 1.0 NaN ... 0 0 \n", "2612 NaN 1.0 NaN ... 0 0 \n", "2613 NaN 1.0 NaN ... 0 0 \n", "\n", " basewgt adj_mod_basewgt finalwgt secu_p sest cmintvw \\\n", "2610 3247.916977 5123.759559 5556.717241 2 18 NaN \n", "2611 3247.916977 5123.759559 5556.717241 2 18 NaN \n", "2612 3247.916977 5123.759559 5556.717241 2 18 NaN \n", "2613 3247.916977 5123.759559 5556.717241 2 18 NaN \n", "\n", " totalwgt_lb totalwgt_kg \n", "2610 6.8750 15.1250 \n", "2611 5.5000 12.1000 \n", "2612 4.1875 9.2125 \n", "2613 6.8750 15.1250 \n", "\n", "[4 rows x 245 columns]" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "preg[preg.caseid==2298]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "How old is the respondent with `caseid` 1?" ] }, { "cell_type": "code", "execution_count": 47, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "1069 44\n", "Name: age_r, dtype: int64" ] }, "execution_count": 47, "metadata": {}, "output_type": "execute_result" } ], "source": [ "resp[resp.caseid==1].age_r# Solution goes here" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "What are the pregnancy lengths for the respondent with `caseid` 2298?" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "2610 40\n", "2611 36\n", "2612 30\n", "2613 40\n", "Name: prglngth, dtype: int64" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "preg[preg.caseid==2298].prglngth# Solution goes here" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "What was the birthweight of the first baby born to the respondent with `caseid` 5012?" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "5515 6.0\n", "Name: totalwgt_lb, dtype: float64" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "preg[preg.caseid==5012].totalwgt_lb" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.1" } }, "nbformat": 4, "nbformat_minor": 1 }