{ "metadata": { "name": "", "signature": "sha256:28f034a531b1925724959079a55f964ded07e33205fc78a28e8e1109a5cb8032" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "heading", "level": 1, "metadata": {}, "source": [ "Example: 2012 Federal Election Commission Database" ] }, { "cell_type": "code", "collapsed": false, "input": [ "from __future__ import division\n", "from numpy.random import randn\n", "import numpy as np\n", "import os\n", "import matplotlib.pyplot as plt\n", "np.random.seed(12345)\n", "plt.rc('figure', figsize=(10, 6))\n", "from pandas import *\n", "import pandas\n", "np.set_printoptions(precision=4)\n", "%cd book_scripts/fec" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "/home/phillip/Documents/code/py/pandas-book/rev_539000/book_scripts/fec\n" ] } ], "prompt_number": 3 }, { "cell_type": "code", "collapsed": false, "input": [ "fec = read_csv('P00000001-ALL.csv')" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 5 }, { "cell_type": "code", "collapsed": false, "input": [ "fec" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
cmte_idcand_idcand_nmcontbr_nmcontbr_city...receipt_descmemo_cdmemo_textform_tpfile_num
0 C00410118 P20002978 Bachmann, Michelle HARVEY, WILLIAM MOBILE... NaN NaN NaN SA17A 736166
1 C00410118 P20002978 Bachmann, Michelle HARVEY, WILLIAM MOBILE... NaN NaN NaN SA17A 736166
2 C00410118 P20002978 Bachmann, Michelle SMITH, LANIER LANETT... NaN NaN NaN SA17A 749073
3 C00410118 P20002978 Bachmann, Michelle BLEVINS, DARONDA PIGGOTT... NaN NaN NaN SA17A 749073
4 C00410118 P20002978 Bachmann, Michelle WARDENBURG, HAROLD HOT SPRINGS NATION... NaN NaN NaN SA17A 736166
5 C00410118 P20002978 Bachmann, Michelle BECKMAN, JAMES SPRINGDALE... NaN NaN NaN SA17A 736166
6 C00410118 P20002978 Bachmann, Michelle BLEVINS, DARONDA PIGGOTT... NaN NaN NaN SA17A 736166
....................................
1001724 C00500587 P20003281 Perry, Rick HEFFERNAN, JILL PRINCE MRS. INFO REQUESTED... NaN NaN NaN SA17A 751678
1001725 C00500587 P20003281 Perry, Rick ELWOOD, MIKE MR. INFO REQUESTED... NaN NaN NaN SA17A 751678
1001726 C00500587 P20003281 Perry, Rick GORMAN, CHRIS D. MR. INFO REQUESTED... REATTRIBUTION / REDESIGNATION REQUESTED (AUTOM... NaN REATTRIBUTION / REDESIGNATION REQUESTED (AUTOM... SA17A 751678
1001727 C00500587 P20003281 Perry, Rick DUFFY, DAVID A. MR. INFO REQUESTED... NaN NaN NaN SA17A 751678
1001728 C00500587 P20003281 Perry, Rick GRANE, BRYAN F. MR. INFO REQUESTED... NaN NaN NaN SA17A 751678
1001729 C00500587 P20003281 Perry, Rick TOLBERT, DARYL MR. INFO REQUESTED... NaN NaN NaN SA17A 751678
1001730 C00500587 P20003281 Perry, Rick ANDERSON, MARILEE MRS. INFO REQUESTED... NaN NaN NaN SA17A 751678
\n", "

1001731 rows \u00d7 16 columns

\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 6, "text": [ " cmte_id cand_id cand_nm \\\n", "0 C00410118 P20002978 Bachmann, Michelle \n", "1 C00410118 P20002978 Bachmann, Michelle \n", "2 C00410118 P20002978 Bachmann, Michelle \n", "3 C00410118 P20002978 Bachmann, Michelle \n", "4 C00410118 P20002978 Bachmann, Michelle \n", "5 C00410118 P20002978 Bachmann, Michelle \n", "6 C00410118 P20002978 Bachmann, Michelle \n", "... ... ... ... \n", "1001724 C00500587 P20003281 Perry, Rick \n", "1001725 C00500587 P20003281 Perry, Rick \n", "1001726 C00500587 P20003281 Perry, Rick \n", "1001727 C00500587 P20003281 Perry, Rick \n", "1001728 C00500587 P20003281 Perry, Rick \n", "1001729 C00500587 P20003281 Perry, Rick \n", "1001730 C00500587 P20003281 Perry, Rick \n", "\n", " contbr_nm contbr_city ... \\\n", "0 HARVEY, WILLIAM MOBILE ... \n", "1 HARVEY, WILLIAM MOBILE ... \n", "2 SMITH, LANIER LANETT ... \n", "3 BLEVINS, DARONDA PIGGOTT ... \n", "4 WARDENBURG, HAROLD HOT SPRINGS NATION ... \n", "5 BECKMAN, JAMES SPRINGDALE ... \n", "6 BLEVINS, DARONDA PIGGOTT ... \n", "... ... ... ... \n", "1001724 HEFFERNAN, JILL PRINCE MRS. INFO REQUESTED ... \n", "1001725 ELWOOD, MIKE MR. INFO REQUESTED ... \n", "1001726 GORMAN, CHRIS D. MR. INFO REQUESTED ... \n", "1001727 DUFFY, DAVID A. MR. INFO REQUESTED ... \n", "1001728 GRANE, BRYAN F. MR. INFO REQUESTED ... \n", "1001729 TOLBERT, DARYL MR. INFO REQUESTED ... \n", "1001730 ANDERSON, MARILEE MRS. INFO REQUESTED ... \n", "\n", " receipt_desc memo_cd \\\n", "0 NaN NaN \n", "1 NaN NaN \n", "2 NaN NaN \n", "3 NaN NaN \n", "4 NaN NaN \n", "5 NaN NaN \n", "6 NaN NaN \n", "... ... ... \n", "1001724 NaN NaN \n", "1001725 NaN NaN \n", "1001726 REATTRIBUTION / REDESIGNATION REQUESTED (AUTOM... NaN \n", "1001727 NaN NaN \n", "1001728 NaN NaN \n", "1001729 NaN NaN \n", "1001730 NaN NaN \n", "\n", " memo_text form_tp file_num \n", "0 NaN SA17A 736166 \n", "1 NaN SA17A 736166 \n", "2 NaN SA17A 749073 \n", "3 NaN SA17A 749073 \n", "4 NaN SA17A 736166 \n", "5 NaN SA17A 736166 \n", "6 NaN SA17A 736166 \n", "... ... ... ... \n", "1001724 NaN SA17A 751678 \n", "1001725 NaN SA17A 751678 \n", "1001726 REATTRIBUTION / REDESIGNATION REQUESTED (AUTOM... SA17A 751678 \n", "1001727 NaN SA17A 751678 \n", "1001728 NaN SA17A 751678 \n", "1001729 NaN SA17A 751678 \n", "1001730 NaN SA17A 751678 \n", "\n", "[1001731 rows x 16 columns]" ] } ], "prompt_number": 6 }, { "cell_type": "code", "collapsed": false, "input": [ "fec.ix[123456]" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 7, "text": [ "cmte_id C00431445\n", "cand_id P80003338\n", "cand_nm Obama, Barack\n", "contbr_nm ELLMAN, IRA\n", "contbr_city TEMPE\n", "...\n", "contb_receipt_dt 01-DEC-11\n", "receipt_desc NaN\n", "memo_cd NaN\n", "memo_text NaN\n", "form_tp SA17A\n", "file_num 772372\n", "Name: 123456, Length: 16, dtype: object" ] } ], "prompt_number": 7 }, { "cell_type": "code", "collapsed": false, "input": [ "unique_cands = fec.cand_nm.unique()\n", "unique_cands\n", "unique_cands[2]" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 8, "text": [ "'Obama, Barack'" ] } ], "prompt_number": 8 }, { "cell_type": "code", "collapsed": false, "input": [ "parties = {'Bachmann, Michelle': 'Republican',\n", " 'Cain, Herman': 'Republican',\n", " 'Gingrich, Newt': 'Republican',\n", " 'Huntsman, Jon': 'Republican',\n", " 'Johnson, Gary Earl': 'Republican',\n", " 'McCotter, Thaddeus G': 'Republican',\n", " 'Obama, Barack': 'Democrat',\n", " 'Paul, Ron': 'Republican',\n", " 'Pawlenty, Timothy': 'Republican',\n", " 'Perry, Rick': 'Republican',\n", " \"Roemer, Charles E. 'Buddy' III\": 'Republican',\n", " 'Romney, Mitt': 'Republican',\n", " 'Santorum, Rick': 'Republican'}" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 9 }, { "cell_type": "raw", "metadata": {}, "source": [ "parties = {'Bachmann, Michelle': 'Republican',\n", " 'Cain, Herman': 'Republican',\n", " 'Gingrich, Newt': 'Republican',\n", " 'Huntsman, Jon': 'Republican',\n", " 'Johnson, Gary Earl': 'Republican',\n", " 'McCotter, Thaddeus G': 'Republican',\n", " 'Obama, Barack': 'Democrat',\n", " 'Paul, Ron': 'Republican',\n", " 'Pawlenty, Timothy': 'Republican',\n", " 'Perry, Rick': 'Republican',\n", " \"Roemer, Charles E. 'Buddy' III\": 'Republican',\n", " 'Romney, Mitt': 'Republican',\n", " 'Santorum, Rick': 'Republican'}" ] }, { "cell_type": "code", "collapsed": false, "input": [ "fec.cand_nm[123456:123461]\n", "fec.cand_nm[123456:123461].map(parties)\n", "# Add it as a column\n", "fec['party'] = fec.cand_nm.map(parties)\n", "fec['party'].value_counts()" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 10, "text": [ "Democrat 593746\n", "Republican 407985\n", "dtype: int64" ] } ], "prompt_number": 10 }, { "cell_type": "code", "collapsed": false, "input": [ "(fec.contb_receipt_amt > 0).value_counts()" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 11, "text": [ "True 991475\n", "False 10256\n", "dtype: int64" ] } ], "prompt_number": 11 }, { "cell_type": "code", "collapsed": false, "input": [ "fec = fec[fec.contb_receipt_amt > 0]" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 12 }, { "cell_type": "code", "collapsed": false, "input": [ "fec_mrbo = fec[fec.cand_nm.isin(['Obama, Barack', 'Romney, Mitt'])]" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 15 } ], "metadata": {} } ] }