{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Pandas_QuickStart\n", "Origin from http://pandas.pydata.org/pandas-docs/stable/ \n", "by [openthings@163.com](http://my.oschina.net/u/2306127/blog?catalog=3420733), 2016-04. \n", "\n", "## 6.1 Object Creation\n", "Creating a Series by passing a list of values, letting pandas create a default integer index: " ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "0 1.0\n", "1 3.0\n", "2 5.0\n", "3 NaN\n", "4 6.0\n", "5 8.0\n", "dtype: float64" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "\n", "s = pd.Series([1,3,5,np.nan,6,8])\n", "s" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Creating a DataFrame by passing a numpy array, with a datetime index and labeled columns:" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',\n", " '2013-01-05', '2013-01-06'],\n", " dtype='datetime64[ns]', freq='D')" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dates = pd.date_range('20130101', periods=6)\n", "dates" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ABCD
2013-01-01-1.334013-0.3482970.388654-0.225967
2013-01-02-0.139974-1.3477890.8170770.192477
2013-01-03-1.082739-0.544105-1.423883-1.247367
2013-01-040.034788-0.6772210.1204490.794341
2013-01-050.428547-0.610156-0.950891-0.058047
2013-01-060.125631-0.116653-0.544575-1.578785
\n", "
" ], "text/plain": [ " A B C D\n", "2013-01-01 -1.334013 -0.348297 0.388654 -0.225967\n", "2013-01-02 -0.139974 -1.347789 0.817077 0.192477\n", "2013-01-03 -1.082739 -0.544105 -1.423883 -1.247367\n", "2013-01-04 0.034788 -0.677221 0.120449 0.794341\n", "2013-01-05 0.428547 -0.610156 -0.950891 -0.058047\n", "2013-01-06 0.125631 -0.116653 -0.544575 -1.578785" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=list('ABCD'))\n", "\n", "df" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Creating a DataFrame by passing a dict of objects that can be converted to series-like." ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ABCDEF
01.02013-01-021.03testfoo
11.02013-01-021.03trainfoo
21.02013-01-021.03testfoo
31.02013-01-021.03trainfoo
\n", "
" ], "text/plain": [ " A B C D E F\n", "0 1.0 2013-01-02 1.0 3 test foo\n", "1 1.0 2013-01-02 1.0 3 train foo\n", "2 1.0 2013-01-02 1.0 3 test foo\n", "3 1.0 2013-01-02 1.0 3 train foo" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df2 = pd.DataFrame({ 'A' : 1.,\n", "'B' : pd.Timestamp('20130102'),\n", "'C' : pd.Series(1,index=list(range(4)),dtype='float32'),\n", "'D' : np.array([3] * 4,dtype='int32'),\n", "'E' : pd.Categorical([\"test\",\"train\",\"test\",\"train\"]),\n", "'F' : 'foo' })\n", "\n", "df2" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "A float64\n", "B datetime64[ns]\n", "C float32\n", "D int32\n", "E category\n", "F object\n", "dtype: object" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df2.dtypes" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "If you’re using IPython, tab completion for column names (as well as public attributes) is automatically enabled. Here’s a subset of the attributes that will be completed:\n", " \n", " In [13]: df2." ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "0 1.0\n", "1 1.0\n", "2 1.0\n", "3 1.0\n", "Name: A, dtype: float64" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df2." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "As you can see, the columns A, B, C, and D are automatically tab completed. E is there as well; the rest of the attributes have been truncated for brevity." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 6.2 Viewing Data" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ABCD
2013-01-01-1.334013-0.3482970.388654-0.225967
2013-01-02-0.139974-1.3477890.8170770.192477
2013-01-03-1.082739-0.544105-1.423883-1.247367
2013-01-040.034788-0.6772210.1204490.794341
2013-01-050.428547-0.610156-0.950891-0.058047
\n", "
" ], "text/plain": [ " A B C D\n", "2013-01-01 -1.334013 -0.348297 0.388654 -0.225967\n", "2013-01-02 -0.139974 -1.347789 0.817077 0.192477\n", "2013-01-03 -1.082739 -0.544105 -1.423883 -1.247367\n", "2013-01-04 0.034788 -0.677221 0.120449 0.794341\n", "2013-01-05 0.428547 -0.610156 -0.950891 -0.058047" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head()" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ABCD
2013-01-040.034788-0.6772210.1204490.794341
2013-01-050.428547-0.610156-0.950891-0.058047
2013-01-060.125631-0.116653-0.544575-1.578785
\n", "
" ], "text/plain": [ " A B C D\n", "2013-01-04 0.034788 -0.677221 0.120449 0.794341\n", "2013-01-05 0.428547 -0.610156 -0.950891 -0.058047\n", "2013-01-06 0.125631 -0.116653 -0.544575 -1.578785" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.tail(3)" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',\n", " '2013-01-05', '2013-01-06'],\n", " dtype='datetime64[ns]', freq='D')" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.index" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "array([[-1.33401275, -0.34829657, 0.38865407, -0.22596701],\n", " [-0.13997444, -1.34778853, 0.81707707, 0.19247685],\n", " [-1.0827386 , -0.5441047 , -1.42388302, -1.24736743],\n", " [ 0.03478847, -0.67722051, 0.12044917, 0.7943414 ],\n", " [ 0.42854678, -0.61015602, -0.95089113, -0.0580473 ],\n", " [ 0.12563068, -0.11665286, -0.54457518, -1.57878468]])" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.values" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ABCD
count6.0000006.0000006.0000006.000000
mean-0.327960-0.607370-0.265528-0.353891
std0.7108870.4159400.8530350.896606
min-1.334013-1.347789-1.423883-1.578785
25%-0.847048-0.660454-0.849312-0.992017
50%-0.052593-0.577130-0.212063-0.142007
75%0.102920-0.3972490.3216030.129846
max0.428547-0.1166530.8170770.794341
\n", "
" ], "text/plain": [ " A B C D\n", "count 6.000000 6.000000 6.000000 6.000000\n", "mean -0.327960 -0.607370 -0.265528 -0.353891\n", "std 0.710887 0.415940 0.853035 0.896606\n", "min -1.334013 -1.347789 -1.423883 -1.578785\n", "25% -0.847048 -0.660454 -0.849312 -0.992017\n", "50% -0.052593 -0.577130 -0.212063 -0.142007\n", "75% 0.102920 -0.397249 0.321603 0.129846\n", "max 0.428547 -0.116653 0.817077 0.794341" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.describe()" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
2013-01-01 00:00:002013-01-02 00:00:002013-01-03 00:00:002013-01-04 00:00:002013-01-05 00:00:002013-01-06 00:00:00
A-1.334013-0.139974-1.0827390.0347880.4285470.125631
B-0.348297-1.347789-0.544105-0.677221-0.610156-0.116653
C0.3886540.817077-1.4238830.120449-0.950891-0.544575
D-0.2259670.192477-1.2473670.794341-0.058047-1.578785
\n", "
" ], "text/plain": [ " 2013-01-01 2013-01-02 2013-01-03 2013-01-04 2013-01-05 2013-01-06\n", "A -1.334013 -0.139974 -1.082739 0.034788 0.428547 0.125631\n", "B -0.348297 -1.347789 -0.544105 -0.677221 -0.610156 -0.116653\n", "C 0.388654 0.817077 -1.423883 0.120449 -0.950891 -0.544575\n", "D -0.225967 0.192477 -1.247367 0.794341 -0.058047 -1.578785" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.T" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
DCBA
2013-01-01-0.2259670.388654-0.348297-1.334013
2013-01-020.1924770.817077-1.347789-0.139974
2013-01-03-1.247367-1.423883-0.544105-1.082739
2013-01-040.7943410.120449-0.6772210.034788
2013-01-05-0.058047-0.950891-0.6101560.428547
2013-01-06-1.578785-0.544575-0.1166530.125631
\n", "
" ], "text/plain": [ " D C B A\n", "2013-01-01 -0.225967 0.388654 -0.348297 -1.334013\n", "2013-01-02 0.192477 0.817077 -1.347789 -0.139974\n", "2013-01-03 -1.247367 -1.423883 -0.544105 -1.082739\n", "2013-01-04 0.794341 0.120449 -0.677221 0.034788\n", "2013-01-05 -0.058047 -0.950891 -0.610156 0.428547\n", "2013-01-06 -1.578785 -0.544575 -0.116653 0.125631" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.sort_index(axis=1, ascending=False)" ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ABCD
2013-01-02-0.139974-1.3477890.8170770.192477
2013-01-040.034788-0.6772210.1204490.794341
2013-01-050.428547-0.610156-0.950891-0.058047
2013-01-03-1.082739-0.544105-1.423883-1.247367
2013-01-01-1.334013-0.3482970.388654-0.225967
2013-01-060.125631-0.116653-0.544575-1.578785
\n", "
" ], "text/plain": [ " A B C D\n", "2013-01-02 -0.139974 -1.347789 0.817077 0.192477\n", "2013-01-04 0.034788 -0.677221 0.120449 0.794341\n", "2013-01-05 0.428547 -0.610156 -0.950891 -0.058047\n", "2013-01-03 -1.082739 -0.544105 -1.423883 -1.247367\n", "2013-01-01 -1.334013 -0.348297 0.388654 -0.225967\n", "2013-01-06 0.125631 -0.116653 -0.544575 -1.578785" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.sort_values(by='B')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 6.3 Selection" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Getting" ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "2013-01-01 -1.334013\n", "2013-01-02 -0.139974\n", "2013-01-03 -1.082739\n", "2013-01-04 0.034788\n", "2013-01-05 0.428547\n", "2013-01-06 0.125631\n", "Freq: D, Name: A, dtype: float64" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['A']" ] }, { "cell_type": "code", "execution_count": 23, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ABCD
2013-01-01-1.334013-0.3482970.388654-0.225967
2013-01-02-0.139974-1.3477890.8170770.192477
2013-01-03-1.082739-0.544105-1.423883-1.247367
\n", "
" ], "text/plain": [ " A B C D\n", "2013-01-01 -1.334013 -0.348297 0.388654 -0.225967\n", "2013-01-02 -0.139974 -1.347789 0.817077 0.192477\n", "2013-01-03 -1.082739 -0.544105 -1.423883 -1.247367" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[0:3]" ] }, { "cell_type": "code", "execution_count": 24, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ABCD
2013-01-02-0.139974-1.3477890.8170770.192477
2013-01-03-1.082739-0.544105-1.423883-1.247367
2013-01-040.034788-0.6772210.1204490.794341
\n", "
" ], "text/plain": [ " A B C D\n", "2013-01-02 -0.139974 -1.347789 0.817077 0.192477\n", "2013-01-03 -1.082739 -0.544105 -1.423883 -1.247367\n", "2013-01-04 0.034788 -0.677221 0.120449 0.794341" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['20130102':'20130104']" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 6.3.2 Selection by Label" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "For getting a cross section using a label" ] }, { "cell_type": "code", "execution_count": 25, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "A -1.334013\n", "B -0.348297\n", "C 0.388654\n", "D -0.225967\n", "Name: 2013-01-01 00:00:00, dtype: float64" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.loc[dates[0]]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Selecting on a multi-axis by label" ] }, { "cell_type": "code", "execution_count": 26, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
AB
2013-01-01-1.334013-0.348297
2013-01-02-0.139974-1.347789
2013-01-03-1.082739-0.544105
2013-01-040.034788-0.677221
2013-01-050.428547-0.610156
2013-01-060.125631-0.116653
\n", "
" ], "text/plain": [ " A B\n", "2013-01-01 -1.334013 -0.348297\n", "2013-01-02 -0.139974 -1.347789\n", "2013-01-03 -1.082739 -0.544105\n", "2013-01-04 0.034788 -0.677221\n", "2013-01-05 0.428547 -0.610156\n", "2013-01-06 0.125631 -0.116653" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.loc[:,['A','B']]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Showing label slicing, both endpoints are included" ] }, { "cell_type": "code", "execution_count": 27, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
AB
2013-01-02-0.139974-1.347789
2013-01-03-1.082739-0.544105
2013-01-040.034788-0.677221
\n", "
" ], "text/plain": [ " A B\n", "2013-01-02 -0.139974 -1.347789\n", "2013-01-03 -1.082739 -0.544105\n", "2013-01-04 0.034788 -0.677221" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.loc['20130102':'20130104',['A','B']]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Reduction in the dimensions of the returned object" ] }, { "cell_type": "code", "execution_count": 30, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "A -0.139974\n", "B -1.347789\n", "Name: 2013-01-02 00:00:00, dtype: float64" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.loc['20130102',['A','B']]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "For getting a scalar value" ] }, { "cell_type": "code", "execution_count": 31, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "-1.3340127475498547" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.loc[dates[0],'A']" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "For getting fast access to a scalar (equiv to the prior method)" ] }, { "cell_type": "code", "execution_count": 32, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "-1.3340127475498547" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.at[dates[0],'A']" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 6.3.3 Selection by Position\n", "See more in Selection by Position\n", "Select via the position of the passed integers" ] }, { "cell_type": "code", "execution_count": 33, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "A 0.034788\n", "B -0.677221\n", "C 0.120449\n", "D 0.794341\n", "Name: 2013-01-04 00:00:00, dtype: float64" ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.iloc[3]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "By integer slices, acting similar to numpy/python" ] }, { "cell_type": "code", "execution_count": 34, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
AB
2013-01-040.034788-0.677221
2013-01-050.428547-0.610156
\n", "
" ], "text/plain": [ " A B\n", "2013-01-04 0.034788 -0.677221\n", "2013-01-05 0.428547 -0.610156" ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.iloc[3:5,0:2]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "By lists of integer position locations, similar to the numpy/python style" ] }, { "cell_type": "code", "execution_count": 35, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
AC
2013-01-02-0.1399740.817077
2013-01-03-1.082739-1.423883
2013-01-050.428547-0.950891
\n", "
" ], "text/plain": [ " A C\n", "2013-01-02 -0.139974 0.817077\n", "2013-01-03 -1.082739 -1.423883\n", "2013-01-05 0.428547 -0.950891" ] }, "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.iloc[[1,2,4],[0,2]]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "For slicing rows explicitly" ] }, { "cell_type": "code", "execution_count": 36, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ABCD
2013-01-02-0.139974-1.3477890.8170770.192477
2013-01-03-1.082739-0.544105-1.423883-1.247367
\n", "
" ], "text/plain": [ " A B C D\n", "2013-01-02 -0.139974 -1.347789 0.817077 0.192477\n", "2013-01-03 -1.082739 -0.544105 -1.423883 -1.247367" ] }, "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.iloc[1:3,:]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "For slicing columns explicitly" ] }, { "cell_type": "code", "execution_count": 37, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
BC
2013-01-01-0.3482970.388654
2013-01-02-1.3477890.817077
2013-01-03-0.544105-1.423883
2013-01-04-0.6772210.120449
2013-01-05-0.610156-0.950891
2013-01-06-0.116653-0.544575
\n", "
" ], "text/plain": [ " B C\n", "2013-01-01 -0.348297 0.388654\n", "2013-01-02 -1.347789 0.817077\n", "2013-01-03 -0.544105 -1.423883\n", "2013-01-04 -0.677221 0.120449\n", "2013-01-05 -0.610156 -0.950891\n", "2013-01-06 -0.116653 -0.544575" ] }, "execution_count": 37, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.iloc[:,1:3]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "For getting a value explicitly" ] }, { "cell_type": "code", "execution_count": 39, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "-1.3477885295869219" ] }, "execution_count": 39, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.iloc[1,1]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "For getting fast access to a scalar (equiv to the prior method)" ] }, { "cell_type": "code", "execution_count": 40, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "-1.3477885295869219" ] }, "execution_count": 40, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.iat[1,1]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 6.3.4 Boolean Indexing" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Using a single column’s values to select data." ] }, { "cell_type": "code", "execution_count": 41, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ABCD
2013-01-040.034788-0.6772210.1204490.794341
2013-01-050.428547-0.610156-0.950891-0.058047
2013-01-060.125631-0.116653-0.544575-1.578785
\n", "
" ], "text/plain": [ " A B C D\n", "2013-01-04 0.034788 -0.677221 0.120449 0.794341\n", "2013-01-05 0.428547 -0.610156 -0.950891 -0.058047\n", "2013-01-06 0.125631 -0.116653 -0.544575 -1.578785" ] }, "execution_count": 41, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[df.A > 0]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "A where operation for getting." ] }, { "cell_type": "code", "execution_count": 42, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ABCD
2013-01-01NaNNaN0.388654NaN
2013-01-02NaNNaN0.8170770.192477
2013-01-03NaNNaNNaNNaN
2013-01-040.034788NaN0.1204490.794341
2013-01-050.428547NaNNaNNaN
2013-01-060.125631NaNNaNNaN
\n", "
" ], "text/plain": [ " A B C D\n", "2013-01-01 NaN NaN 0.388654 NaN\n", "2013-01-02 NaN NaN 0.817077 0.192477\n", "2013-01-03 NaN NaN NaN NaN\n", "2013-01-04 0.034788 NaN 0.120449 0.794341\n", "2013-01-05 0.428547 NaN NaN NaN\n", "2013-01-06 0.125631 NaN NaN NaN" ] }, "execution_count": 42, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[df > 0]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Using the isin() method for filtering:" ] }, { "cell_type": "code", "execution_count": 43, "metadata": { "collapsed": true }, "outputs": [], "source": [ "df2 = df.copy()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "添加一列。" ] }, { "cell_type": "code", "execution_count": 44, "metadata": { "collapsed": true }, "outputs": [], "source": [ "df2['E'] = ['one', 'one','two','three','four','three']" ] }, { "cell_type": "code", "execution_count": 45, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ABCDE
2013-01-01-1.334013-0.3482970.388654-0.225967one
2013-01-02-0.139974-1.3477890.8170770.192477one
2013-01-03-1.082739-0.544105-1.423883-1.247367two
2013-01-040.034788-0.6772210.1204490.794341three
2013-01-050.428547-0.610156-0.950891-0.058047four
2013-01-060.125631-0.116653-0.544575-1.578785three
\n", "
" ], "text/plain": [ " A B C D E\n", "2013-01-01 -1.334013 -0.348297 0.388654 -0.225967 one\n", "2013-01-02 -0.139974 -1.347789 0.817077 0.192477 one\n", "2013-01-03 -1.082739 -0.544105 -1.423883 -1.247367 two\n", "2013-01-04 0.034788 -0.677221 0.120449 0.794341 three\n", "2013-01-05 0.428547 -0.610156 -0.950891 -0.058047 four\n", "2013-01-06 0.125631 -0.116653 -0.544575 -1.578785 three" ] }, "execution_count": 45, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df2" ] }, { "cell_type": "code", "execution_count": 46, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ABCDE
2013-01-03-1.082739-0.544105-1.423883-1.247367two
2013-01-050.428547-0.610156-0.950891-0.058047four
\n", "
" ], "text/plain": [ " A B C D E\n", "2013-01-03 -1.082739 -0.544105 -1.423883 -1.247367 two\n", "2013-01-05 0.428547 -0.610156 -0.950891 -0.058047 four" ] }, "execution_count": 46, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df2[df2['E'].isin(['two','four'])]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 6.3.5 Setting\n", "Setting a new column automatically aligns the data by the indexes" ] }, { "cell_type": "code", "execution_count": 48, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "2013-01-02 1\n", "2013-01-03 2\n", "2013-01-04 3\n", "2013-01-05 4\n", "2013-01-06 5\n", "2013-01-07 6\n", "Freq: D, dtype: int64" ] }, "execution_count": 48, "metadata": {}, "output_type": "execute_result" } ], "source": [ "s1 = pd.Series([1,2,3,4,5,6], index=pd.date_range('20130102', periods=6))\n", "s1" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Setting values by position" ] }, { "cell_type": "code", "execution_count": 49, "metadata": { "collapsed": true }, "outputs": [], "source": [ "df.iat[0,1] = 0" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Setting by assigning with a numpy array" ] }, { "cell_type": "code", "execution_count": 50, "metadata": { "collapsed": true }, "outputs": [], "source": [ "df.loc[:,'D'] = np.array([5] * len(df))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The result of the prior setting operations" ] }, { "cell_type": "code", "execution_count": 51, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ABCD
2013-01-01-1.3340130.0000000.3886545
2013-01-02-0.139974-1.3477890.8170775
2013-01-03-1.082739-0.544105-1.4238835
2013-01-040.034788-0.6772210.1204495
2013-01-050.428547-0.610156-0.9508915
2013-01-060.125631-0.116653-0.5445755
\n", "
" ], "text/plain": [ " A B C D\n", "2013-01-01 -1.334013 0.000000 0.388654 5\n", "2013-01-02 -0.139974 -1.347789 0.817077 5\n", "2013-01-03 -1.082739 -0.544105 -1.423883 5\n", "2013-01-04 0.034788 -0.677221 0.120449 5\n", "2013-01-05 0.428547 -0.610156 -0.950891 5\n", "2013-01-06 0.125631 -0.116653 -0.544575 5" ] }, "execution_count": 51, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "A where operation with setting." ] }, { "cell_type": "code", "execution_count": 52, "metadata": { "collapsed": true }, "outputs": [], "source": [ "df2 = df.copy()" ] }, { "cell_type": "code", "execution_count": 53, "metadata": { "collapsed": true }, "outputs": [], "source": [ "df2[df2 > 0] = -df2" ] }, { "cell_type": "code", "execution_count": 54, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ABCD
2013-01-01-1.3340130.000000-0.388654-5
2013-01-02-0.139974-1.347789-0.817077-5
2013-01-03-1.082739-0.544105-1.423883-5
2013-01-04-0.034788-0.677221-0.120449-5
2013-01-05-0.428547-0.610156-0.950891-5
2013-01-06-0.125631-0.116653-0.544575-5
\n", "
" ], "text/plain": [ " A B C D\n", "2013-01-01 -1.334013 0.000000 -0.388654 -5\n", "2013-01-02 -0.139974 -1.347789 -0.817077 -5\n", "2013-01-03 -1.082739 -0.544105 -1.423883 -5\n", "2013-01-04 -0.034788 -0.677221 -0.120449 -5\n", "2013-01-05 -0.428547 -0.610156 -0.950891 -5\n", "2013-01-06 -0.125631 -0.116653 -0.544575 -5" ] }, "execution_count": 54, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df2" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.1" } }, "nbformat": 4, "nbformat_minor": 0 }