{ "metadata": { "name": "", "signature": "sha256:77901a93beeb6f6cd14f51d34b271e5c97e4c4224c2fbbe42f24ff32885531e5" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Index, Select, And Filter pandas Dataframes\n", "\n", "- **Author:** [Chris Albon](http://www.chrisalbon.com/), [@ChrisAlbon](https://twitter.com/chrisalbon)\n", "- **Date:** -\n", "- **Repo:** [Python 3 code snippets for data science](https://github.com/chrisalbon/code_py)\n", "- **Note:**" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Import modules" ] }, { "cell_type": "code", "collapsed": false, "input": [ "import pandas as pd" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 2 }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Create a dataframe" ] }, { "cell_type": "code", "collapsed": false, "input": [ "data = {'name': ['Jason', 'Molly', 'Tina', 'Jake', 'Amy'], \n", " 'year': [2012, 2012, 2013, 2014, 2014], \n", " 'reports': [4, 24, 31, 2, 3],\n", " 'coverage': [25, 94, 57, 62, 70]}\n", "df = pd.DataFrame(data, index = ['Cochice', 'Pima', 'Santa Cruz', 'Maricopa', 'Yuma'])\n", "df" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
coveragenamereportsyear
Cochice 25 Jason 4 2012
Pima 94 Molly 24 2012
Santa Cruz 57 Tina 31 2013
Maricopa 62 Jake 2 2014
Yuma 70 Amy 3 2014
\n", "

5 rows \u00d7 4 columns

\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 5, "text": [ " coverage name reports year\n", "Cochice 25 Jason 4 2012\n", "Pima 94 Molly 24 2012\n", "Santa Cruz 57 Tina 31 2013\n", "Maricopa 62 Jake 2 2014\n", "Yuma 70 Amy 3 2014\n", "\n", "[5 rows x 4 columns]" ] } ], "prompt_number": 5 }, { "cell_type": "markdown", "metadata": {}, "source": [ "### View a column of the dataframe" ] }, { "cell_type": "code", "collapsed": false, "input": [ "df['name']" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 6, "text": [ "Cochice Jason\n", "Pima Molly\n", "Santa Cruz Tina\n", "Maricopa Jake\n", "Yuma Amy\n", "Name: name, dtype: object" ] } ], "prompt_number": 6 }, { "cell_type": "markdown", "metadata": {}, "source": [ "### View two columns of the dataframe" ] }, { "cell_type": "code", "collapsed": false, "input": [ "df[['name', 'reports']]" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
namereports
Cochice Jason 4
Pima Molly 24
Santa Cruz Tina 31
Maricopa Jake 2
Yuma Amy 3
\n", "

5 rows \u00d7 2 columns

\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 11, "text": [ " name reports\n", "Cochice Jason 4\n", "Pima Molly 24\n", "Santa Cruz Tina 31\n", "Maricopa Jake 2\n", "Yuma Amy 3\n", "\n", "[5 rows x 2 columns]" ] } ], "prompt_number": 11 }, { "cell_type": "markdown", "metadata": {}, "source": [ "### View the first two rows of the dataframe" ] }, { "cell_type": "code", "collapsed": false, "input": [ "df[:2]" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
coveragenamereportsyear
Cochice 25 Jason 4 2012
Pima 94 Molly 24 2012
\n", "

2 rows \u00d7 4 columns

\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 9, "text": [ " coverage name reports year\n", "Cochice 25 Jason 4 2012\n", "Pima 94 Molly 24 2012\n", "\n", "[2 rows x 4 columns]" ] } ], "prompt_number": 9 }, { "cell_type": "markdown", "metadata": {}, "source": [ "### View all rows where coverage is more than 50" ] }, { "cell_type": "code", "collapsed": false, "input": [ "df[df['coverage'] > 50]" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
coveragenamereportsyear
Pima 94 Molly 24 2012
Santa Cruz 57 Tina 31 2013
Maricopa 62 Jake 2 2014
Yuma 70 Amy 3 2014
\n", "

4 rows \u00d7 4 columns

\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 10, "text": [ " coverage name reports year\n", "Pima 94 Molly 24 2012\n", "Santa Cruz 57 Tina 31 2013\n", "Maricopa 62 Jake 2 2014\n", "Yuma 70 Amy 3 2014\n", "\n", "[4 rows x 4 columns]" ] } ], "prompt_number": 10 }, { "cell_type": "markdown", "metadata": {}, "source": [ "### View a row" ] }, { "cell_type": "code", "collapsed": false, "input": [ "df.ix['Maricopa']" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 17, "text": [ "coverage 62\n", "name Jake\n", "reports 2\n", "year 2014\n", "Name: Maricopa, dtype: object" ] } ], "prompt_number": 17 }, { "cell_type": "markdown", "metadata": {}, "source": [ "### View a column" ] }, { "cell_type": "code", "collapsed": false, "input": [ "df.ix[:, 'coverage']" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 14, "text": [ "Cochice 25\n", "Pima 94\n", "Santa Cruz 57\n", "Maricopa 62\n", "Yuma 70\n", "Name: coverage, dtype: int64" ] } ], "prompt_number": 14 }, { "cell_type": "markdown", "metadata": {}, "source": [ "### View the value based on a row and column" ] }, { "cell_type": "code", "collapsed": false, "input": [ "df.ix['Yuma', 'coverage']" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 16, "text": [ "70" ] } ], "prompt_number": 16 }, { "cell_type": "code", "collapsed": false, "input": [], "language": "python", "metadata": {}, "outputs": [] } ], "metadata": {} } ] }