{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [], "source": [ "from __future__ import print_function, division\n", "\n", "import nsfg3" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import pandas\n", "import numpy as np\n", "\n", "def ReadFemResp1995():\n", " \"\"\"Reads respondent data from NSFG Cycle 5.\n", "\n", " returns: DataFrame\n", " \"\"\"\n", " dat_file = '1995FemRespData.dat.gz'\n", " names = ['cmintvw', 'timesmar', 'cmmarrhx', 'cmbirth', 'finalwgt']\n", " colspecs = [(12360-1, 12363),\n", " (4637-1, 4638),\n", " (11759-1, 11762),\n", " (14-1, 16),\n", " (12350-1, 12359)]\n", " df = pandas.read_fwf(dat_file, \n", " compression='gzip', \n", " colspecs=colspecs, \n", " names=names)\n", "\n", " df.timesmar.replace([98, 99], np.nan, inplace=True)\n", " df['evrmarry'] = (df.timesmar > 0)\n", "\n", " nsfg3.CleanData(df)\n", " return df" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "collapsed": false }, "outputs": [], "source": [ "df = ReadFemResp1995()" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "4006" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sum(~df.evrmarry)" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "3918" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(df[(df.cmbirth >= 604) & (df.cmbirth <= 720)])" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "192" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(df[(df.cmmarrhx >= 780) & (df.cmmarrhx <= 840)])" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "1141 514\n", "1142 2448\n", "1143 2466\n", "1144 1692\n", "1145 1381\n", "1146 1076\n", "1147 722\n", "1148 172\n", "1149 81\n", "1150 295\n", "Name: cmintvw, dtype: int64" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.cmintvw.value_counts().sort_index()" ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "349.8761 1\n", "356.8193 1\n", "433.7413 1\n", "434.1431 1\n", "441.2415 1\n", "463.0787 1\n", "464.5584 1\n", "468.2044 1\n", "472.6256 1\n", "482.7008 1\n", "495.1924 1\n", "498.6220 1\n", "507.9773 1\n", "511.4543 1\n", "519.3578 1\n", "523.6670 1\n", "525.7335 1\n", "555.8899 1\n", "561.4898 1\n", "561.6297 1\n", "564.7645 1\n", "567.7595 1\n", "589.4533 1\n", "590.1313 1\n", "590.1364 1\n", "617.0250 1\n", "634.0294 1\n", "634.8276 1\n", "637.1154 1\n", "640.9224 1\n", " ..\n", "18588.5661 1\n", "18702.0666 1\n", "18890.5291 1\n", "19985.1776 1\n", "20077.5177 1\n", "20161.3343 1\n", "20457.1239 1\n", "20628.0956 1\n", "20731.0408 1\n", "20864.2010 1\n", "20916.7939 1\n", "21558.0296 1\n", "21886.1411 1\n", "22304.0038 1\n", "22568.3367 1\n", "22718.1978 1\n", "22812.9377 1\n", "23231.5175 1\n", "23497.7818 1\n", "23693.3226 1\n", "23758.9147 1\n", "24146.9122 1\n", "24653.6224 1\n", "24916.6825 1\n", "25049.8728 1\n", "25568.3298 1\n", "25840.3059 1\n", "26067.4314 1\n", "26562.8055 1\n", "33549.8227 1\n", "Name: finalwgt, dtype: int64" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.finalwgt.value_counts().sort_index()" ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "1 5559\n", "2 1077\n", "3 174\n", "4 26\n", "5 5\n", "Name: timesmar, dtype: int64" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.timesmar.value_counts().sort_index()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.1" } }, "nbformat": 4, "nbformat_minor": 0 }