{ "metadata": { "name": "", "signature": "sha256:7128f541e51edf5efd89b622756b4c926ddb6117d0a476ca9c544b1615d2b6ea" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Data Wrangling In Pandas\n", "\n", "- **Author:** [Chris Albon](http://www.chrisalbon.com/), [@ChrisAlbon](https://twitter.com/chrisalbon)\n", "- **Date:** -\n", "- **Repo:** [Python 3 code snippets for data science](https://github.com/chrisalbon/code_py)\n", "- **Note:**" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### import modules" ] }, { "cell_type": "code", "collapsed": false, "input": [ "import pandas as pd" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 3 }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Create dataframe" ] }, { "cell_type": "code", "collapsed": false, "input": [ "raw_data = {'first_name': ['Jason', 'Molly', 'Tina', 'Jake', 'Amy'], \n", " 'last_name': ['Miller', 'Jacobson', 'Ali', 'Milner', 'Cooze'], \n", " 'age': [42, 52, 36, 24, 73], \n", " 'preTestScore': [4, 24, 31, 2, 3],\n", " 'postTestScore': [25, 94, 57, 62, 70]}\n", "df = pd.DataFrame(raw_data, columns = ['first_name', 'last_name', 'age', 'preTestScore', 'postTestScore'])\n", "df" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
first_namelast_nameagepreTestScorepostTestScore
0 Jason Miller 42 4 25
1 Molly Jacobson 52 24 94
2 Tina Ali 36 31 57
3 Jake Milner 24 2 62
4 Amy Cooze 73 3 70
\n", "

5 rows \u00d7 5 columns

\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 9, "text": [ " first_name last_name age preTestScore postTestScore\n", "0 Jason Miller 42 4 25\n", "1 Molly Jacobson 52 24 94\n", "2 Tina Ali 36 31 57\n", "3 Jake Milner 24 2 62\n", "4 Amy Cooze 73 3 70\n", "\n", "[5 rows x 5 columns]" ] } ], "prompt_number": 9 }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Create 2nd dataframe" ] }, { "cell_type": "code", "collapsed": false, "input": [ "raw_data_2 = {'first_name': ['Sarah', 'Gueniva', 'Know', 'Sara', 'Cat'], \n", " 'last_name': ['Mornig', 'Jaker', 'Alom', 'Ormon', 'Koozer'], \n", " 'age': [53, 26, 72, 73, 24], \n", " 'preTestScore': [13, 52, 72, 26, 26],\n", " 'postTestScore': [82, 52, 56, 234, 254]}\n", "df_2 = pd.DataFrame(raw_data_2, columns = ['first_name', 'last_name', 'age', 'preTestScore', 'postTestScore'])\n", "df_2" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
first_namelast_nameagepreTestScorepostTestScore
0 Sarah Mornig 53 13 82
1 Gueniva Jaker 26 52 52
2 Know Alom 72 72 56
3 Sara Ormon 73 26 234
4 Cat Koozer 24 26 254
\n", "

5 rows \u00d7 5 columns

\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 10, "text": [ " first_name last_name age preTestScore postTestScore\n", "0 Sarah Mornig 53 13 82\n", "1 Gueniva Jaker 26 52 52\n", "2 Know Alom 72 72 56\n", "3 Sara Ormon 73 26 234\n", "4 Cat Koozer 24 26 254\n", "\n", "[5 rows x 5 columns]" ] } ], "prompt_number": 10 }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Create 3rd dataframe" ] }, { "cell_type": "code", "collapsed": false, "input": [ "raw_data_3 = {'first_name': ['Sarah', 'Gueniva', 'Know', 'Sara', 'Cat'], \n", " 'last_name': ['Mornig', 'Jaker', 'Alom', 'Ormon', 'Koozer'],\n", " 'postTestScore_2': [82, 52, 56, 234, 254]}\n", "df_3 = pd.DataFrame(raw_data_3, columns = ['first_name', 'last_name', 'postTestScore_2'])\n", "df_3" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
first_namelast_namepostTestScore_2
0 Sarah Mornig 82
1 Gueniva Jaker 52
2 Know Alom 56
3 Sara Ormon 234
4 Cat Koozer 254
\n", "

5 rows \u00d7 3 columns

\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 11, "text": [ " first_name last_name postTestScore_2\n", "0 Sarah Mornig 82\n", "1 Gueniva Jaker 52\n", "2 Know Alom 56\n", "3 Sara Ormon 234\n", "4 Cat Koozer 254\n", "\n", "[5 rows x 3 columns]" ] } ], "prompt_number": 11 }, { "cell_type": "code", "collapsed": false, "input": [], "language": "python", "metadata": {}, "outputs": [] } ], "metadata": {} } ] }