{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Pandas_QuickStart\n",
"Origin from http://pandas.pydata.org/pandas-docs/stable/ \n",
"by [openthings@163.com](http://my.oschina.net/u/2306127/blog?catalog=3420733), 2016-04. \n",
"\n",
"## 6.1 Object Creation\n",
"Creating a Series by passing a list of values, letting pandas create a default integer index: "
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"0 1.0\n",
"1 3.0\n",
"2 5.0\n",
"3 NaN\n",
"4 6.0\n",
"5 8.0\n",
"dtype: float64"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"\n",
"s = pd.Series([1,3,5,np.nan,6,8])\n",
"s"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Creating a DataFrame by passing a numpy array, with a datetime index and labeled columns:"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',\n",
" '2013-01-05', '2013-01-06'],\n",
" dtype='datetime64[ns]', freq='D')"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dates = pd.date_range('20130101', periods=6)\n",
"dates"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"
\n",
" \n",
" \n",
" | \n",
" A | \n",
" B | \n",
" C | \n",
" D | \n",
"
\n",
" \n",
" \n",
" \n",
" 2013-01-01 | \n",
" -1.334013 | \n",
" -0.348297 | \n",
" 0.388654 | \n",
" -0.225967 | \n",
"
\n",
" \n",
" 2013-01-02 | \n",
" -0.139974 | \n",
" -1.347789 | \n",
" 0.817077 | \n",
" 0.192477 | \n",
"
\n",
" \n",
" 2013-01-03 | \n",
" -1.082739 | \n",
" -0.544105 | \n",
" -1.423883 | \n",
" -1.247367 | \n",
"
\n",
" \n",
" 2013-01-04 | \n",
" 0.034788 | \n",
" -0.677221 | \n",
" 0.120449 | \n",
" 0.794341 | \n",
"
\n",
" \n",
" 2013-01-05 | \n",
" 0.428547 | \n",
" -0.610156 | \n",
" -0.950891 | \n",
" -0.058047 | \n",
"
\n",
" \n",
" 2013-01-06 | \n",
" 0.125631 | \n",
" -0.116653 | \n",
" -0.544575 | \n",
" -1.578785 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" A B C D\n",
"2013-01-01 -1.334013 -0.348297 0.388654 -0.225967\n",
"2013-01-02 -0.139974 -1.347789 0.817077 0.192477\n",
"2013-01-03 -1.082739 -0.544105 -1.423883 -1.247367\n",
"2013-01-04 0.034788 -0.677221 0.120449 0.794341\n",
"2013-01-05 0.428547 -0.610156 -0.950891 -0.058047\n",
"2013-01-06 0.125631 -0.116653 -0.544575 -1.578785"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=list('ABCD'))\n",
"\n",
"df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Creating a DataFrame by passing a dict of objects that can be converted to series-like."
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" A | \n",
" B | \n",
" C | \n",
" D | \n",
" E | \n",
" F | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1.0 | \n",
" 2013-01-02 | \n",
" 1.0 | \n",
" 3 | \n",
" test | \n",
" foo | \n",
"
\n",
" \n",
" 1 | \n",
" 1.0 | \n",
" 2013-01-02 | \n",
" 1.0 | \n",
" 3 | \n",
" train | \n",
" foo | \n",
"
\n",
" \n",
" 2 | \n",
" 1.0 | \n",
" 2013-01-02 | \n",
" 1.0 | \n",
" 3 | \n",
" test | \n",
" foo | \n",
"
\n",
" \n",
" 3 | \n",
" 1.0 | \n",
" 2013-01-02 | \n",
" 1.0 | \n",
" 3 | \n",
" train | \n",
" foo | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" A B C D E F\n",
"0 1.0 2013-01-02 1.0 3 test foo\n",
"1 1.0 2013-01-02 1.0 3 train foo\n",
"2 1.0 2013-01-02 1.0 3 test foo\n",
"3 1.0 2013-01-02 1.0 3 train foo"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2 = pd.DataFrame({ 'A' : 1.,\n",
"'B' : pd.Timestamp('20130102'),\n",
"'C' : pd.Series(1,index=list(range(4)),dtype='float32'),\n",
"'D' : np.array([3] * 4,dtype='int32'),\n",
"'E' : pd.Categorical([\"test\",\"train\",\"test\",\"train\"]),\n",
"'F' : 'foo' })\n",
"\n",
"df2"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"A float64\n",
"B datetime64[ns]\n",
"C float32\n",
"D int32\n",
"E category\n",
"F object\n",
"dtype: object"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2.dtypes"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"If you’re using IPython, tab completion for column names (as well as public attributes) is automatically enabled. Here’s a subset of the attributes that will be completed:\n",
" \n",
" In [13]: df2."
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"0 1.0\n",
"1 1.0\n",
"2 1.0\n",
"3 1.0\n",
"Name: A, dtype: float64"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"As you can see, the columns A, B, C, and D are automatically tab completed. E is there as well; the rest of the attributes have been truncated for brevity."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 6.2 Viewing Data"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" A | \n",
" B | \n",
" C | \n",
" D | \n",
"
\n",
" \n",
" \n",
" \n",
" 2013-01-01 | \n",
" -1.334013 | \n",
" -0.348297 | \n",
" 0.388654 | \n",
" -0.225967 | \n",
"
\n",
" \n",
" 2013-01-02 | \n",
" -0.139974 | \n",
" -1.347789 | \n",
" 0.817077 | \n",
" 0.192477 | \n",
"
\n",
" \n",
" 2013-01-03 | \n",
" -1.082739 | \n",
" -0.544105 | \n",
" -1.423883 | \n",
" -1.247367 | \n",
"
\n",
" \n",
" 2013-01-04 | \n",
" 0.034788 | \n",
" -0.677221 | \n",
" 0.120449 | \n",
" 0.794341 | \n",
"
\n",
" \n",
" 2013-01-05 | \n",
" 0.428547 | \n",
" -0.610156 | \n",
" -0.950891 | \n",
" -0.058047 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" A B C D\n",
"2013-01-01 -1.334013 -0.348297 0.388654 -0.225967\n",
"2013-01-02 -0.139974 -1.347789 0.817077 0.192477\n",
"2013-01-03 -1.082739 -0.544105 -1.423883 -1.247367\n",
"2013-01-04 0.034788 -0.677221 0.120449 0.794341\n",
"2013-01-05 0.428547 -0.610156 -0.950891 -0.058047"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" A | \n",
" B | \n",
" C | \n",
" D | \n",
"
\n",
" \n",
" \n",
" \n",
" 2013-01-04 | \n",
" 0.034788 | \n",
" -0.677221 | \n",
" 0.120449 | \n",
" 0.794341 | \n",
"
\n",
" \n",
" 2013-01-05 | \n",
" 0.428547 | \n",
" -0.610156 | \n",
" -0.950891 | \n",
" -0.058047 | \n",
"
\n",
" \n",
" 2013-01-06 | \n",
" 0.125631 | \n",
" -0.116653 | \n",
" -0.544575 | \n",
" -1.578785 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" A B C D\n",
"2013-01-04 0.034788 -0.677221 0.120449 0.794341\n",
"2013-01-05 0.428547 -0.610156 -0.950891 -0.058047\n",
"2013-01-06 0.125631 -0.116653 -0.544575 -1.578785"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.tail(3)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',\n",
" '2013-01-05', '2013-01-06'],\n",
" dtype='datetime64[ns]', freq='D')"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.index"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"array([[-1.33401275, -0.34829657, 0.38865407, -0.22596701],\n",
" [-0.13997444, -1.34778853, 0.81707707, 0.19247685],\n",
" [-1.0827386 , -0.5441047 , -1.42388302, -1.24736743],\n",
" [ 0.03478847, -0.67722051, 0.12044917, 0.7943414 ],\n",
" [ 0.42854678, -0.61015602, -0.95089113, -0.0580473 ],\n",
" [ 0.12563068, -0.11665286, -0.54457518, -1.57878468]])"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.values"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" A | \n",
" B | \n",
" C | \n",
" D | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 6.000000 | \n",
" 6.000000 | \n",
" 6.000000 | \n",
" 6.000000 | \n",
"
\n",
" \n",
" mean | \n",
" -0.327960 | \n",
" -0.607370 | \n",
" -0.265528 | \n",
" -0.353891 | \n",
"
\n",
" \n",
" std | \n",
" 0.710887 | \n",
" 0.415940 | \n",
" 0.853035 | \n",
" 0.896606 | \n",
"
\n",
" \n",
" min | \n",
" -1.334013 | \n",
" -1.347789 | \n",
" -1.423883 | \n",
" -1.578785 | \n",
"
\n",
" \n",
" 25% | \n",
" -0.847048 | \n",
" -0.660454 | \n",
" -0.849312 | \n",
" -0.992017 | \n",
"
\n",
" \n",
" 50% | \n",
" -0.052593 | \n",
" -0.577130 | \n",
" -0.212063 | \n",
" -0.142007 | \n",
"
\n",
" \n",
" 75% | \n",
" 0.102920 | \n",
" -0.397249 | \n",
" 0.321603 | \n",
" 0.129846 | \n",
"
\n",
" \n",
" max | \n",
" 0.428547 | \n",
" -0.116653 | \n",
" 0.817077 | \n",
" 0.794341 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" A B C D\n",
"count 6.000000 6.000000 6.000000 6.000000\n",
"mean -0.327960 -0.607370 -0.265528 -0.353891\n",
"std 0.710887 0.415940 0.853035 0.896606\n",
"min -1.334013 -1.347789 -1.423883 -1.578785\n",
"25% -0.847048 -0.660454 -0.849312 -0.992017\n",
"50% -0.052593 -0.577130 -0.212063 -0.142007\n",
"75% 0.102920 -0.397249 0.321603 0.129846\n",
"max 0.428547 -0.116653 0.817077 0.794341"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.describe()"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" 2013-01-01 00:00:00 | \n",
" 2013-01-02 00:00:00 | \n",
" 2013-01-03 00:00:00 | \n",
" 2013-01-04 00:00:00 | \n",
" 2013-01-05 00:00:00 | \n",
" 2013-01-06 00:00:00 | \n",
"
\n",
" \n",
" \n",
" \n",
" A | \n",
" -1.334013 | \n",
" -0.139974 | \n",
" -1.082739 | \n",
" 0.034788 | \n",
" 0.428547 | \n",
" 0.125631 | \n",
"
\n",
" \n",
" B | \n",
" -0.348297 | \n",
" -1.347789 | \n",
" -0.544105 | \n",
" -0.677221 | \n",
" -0.610156 | \n",
" -0.116653 | \n",
"
\n",
" \n",
" C | \n",
" 0.388654 | \n",
" 0.817077 | \n",
" -1.423883 | \n",
" 0.120449 | \n",
" -0.950891 | \n",
" -0.544575 | \n",
"
\n",
" \n",
" D | \n",
" -0.225967 | \n",
" 0.192477 | \n",
" -1.247367 | \n",
" 0.794341 | \n",
" -0.058047 | \n",
" -1.578785 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" 2013-01-01 2013-01-02 2013-01-03 2013-01-04 2013-01-05 2013-01-06\n",
"A -1.334013 -0.139974 -1.082739 0.034788 0.428547 0.125631\n",
"B -0.348297 -1.347789 -0.544105 -0.677221 -0.610156 -0.116653\n",
"C 0.388654 0.817077 -1.423883 0.120449 -0.950891 -0.544575\n",
"D -0.225967 0.192477 -1.247367 0.794341 -0.058047 -1.578785"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.T"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" D | \n",
" C | \n",
" B | \n",
" A | \n",
"
\n",
" \n",
" \n",
" \n",
" 2013-01-01 | \n",
" -0.225967 | \n",
" 0.388654 | \n",
" -0.348297 | \n",
" -1.334013 | \n",
"
\n",
" \n",
" 2013-01-02 | \n",
" 0.192477 | \n",
" 0.817077 | \n",
" -1.347789 | \n",
" -0.139974 | \n",
"
\n",
" \n",
" 2013-01-03 | \n",
" -1.247367 | \n",
" -1.423883 | \n",
" -0.544105 | \n",
" -1.082739 | \n",
"
\n",
" \n",
" 2013-01-04 | \n",
" 0.794341 | \n",
" 0.120449 | \n",
" -0.677221 | \n",
" 0.034788 | \n",
"
\n",
" \n",
" 2013-01-05 | \n",
" -0.058047 | \n",
" -0.950891 | \n",
" -0.610156 | \n",
" 0.428547 | \n",
"
\n",
" \n",
" 2013-01-06 | \n",
" -1.578785 | \n",
" -0.544575 | \n",
" -0.116653 | \n",
" 0.125631 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" D C B A\n",
"2013-01-01 -0.225967 0.388654 -0.348297 -1.334013\n",
"2013-01-02 0.192477 0.817077 -1.347789 -0.139974\n",
"2013-01-03 -1.247367 -1.423883 -0.544105 -1.082739\n",
"2013-01-04 0.794341 0.120449 -0.677221 0.034788\n",
"2013-01-05 -0.058047 -0.950891 -0.610156 0.428547\n",
"2013-01-06 -1.578785 -0.544575 -0.116653 0.125631"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.sort_index(axis=1, ascending=False)"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" A | \n",
" B | \n",
" C | \n",
" D | \n",
"
\n",
" \n",
" \n",
" \n",
" 2013-01-02 | \n",
" -0.139974 | \n",
" -1.347789 | \n",
" 0.817077 | \n",
" 0.192477 | \n",
"
\n",
" \n",
" 2013-01-04 | \n",
" 0.034788 | \n",
" -0.677221 | \n",
" 0.120449 | \n",
" 0.794341 | \n",
"
\n",
" \n",
" 2013-01-05 | \n",
" 0.428547 | \n",
" -0.610156 | \n",
" -0.950891 | \n",
" -0.058047 | \n",
"
\n",
" \n",
" 2013-01-03 | \n",
" -1.082739 | \n",
" -0.544105 | \n",
" -1.423883 | \n",
" -1.247367 | \n",
"
\n",
" \n",
" 2013-01-01 | \n",
" -1.334013 | \n",
" -0.348297 | \n",
" 0.388654 | \n",
" -0.225967 | \n",
"
\n",
" \n",
" 2013-01-06 | \n",
" 0.125631 | \n",
" -0.116653 | \n",
" -0.544575 | \n",
" -1.578785 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" A B C D\n",
"2013-01-02 -0.139974 -1.347789 0.817077 0.192477\n",
"2013-01-04 0.034788 -0.677221 0.120449 0.794341\n",
"2013-01-05 0.428547 -0.610156 -0.950891 -0.058047\n",
"2013-01-03 -1.082739 -0.544105 -1.423883 -1.247367\n",
"2013-01-01 -1.334013 -0.348297 0.388654 -0.225967\n",
"2013-01-06 0.125631 -0.116653 -0.544575 -1.578785"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.sort_values(by='B')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 6.3 Selection"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Getting"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"2013-01-01 -1.334013\n",
"2013-01-02 -0.139974\n",
"2013-01-03 -1.082739\n",
"2013-01-04 0.034788\n",
"2013-01-05 0.428547\n",
"2013-01-06 0.125631\n",
"Freq: D, Name: A, dtype: float64"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['A']"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" A | \n",
" B | \n",
" C | \n",
" D | \n",
"
\n",
" \n",
" \n",
" \n",
" 2013-01-01 | \n",
" -1.334013 | \n",
" -0.348297 | \n",
" 0.388654 | \n",
" -0.225967 | \n",
"
\n",
" \n",
" 2013-01-02 | \n",
" -0.139974 | \n",
" -1.347789 | \n",
" 0.817077 | \n",
" 0.192477 | \n",
"
\n",
" \n",
" 2013-01-03 | \n",
" -1.082739 | \n",
" -0.544105 | \n",
" -1.423883 | \n",
" -1.247367 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" A B C D\n",
"2013-01-01 -1.334013 -0.348297 0.388654 -0.225967\n",
"2013-01-02 -0.139974 -1.347789 0.817077 0.192477\n",
"2013-01-03 -1.082739 -0.544105 -1.423883 -1.247367"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[0:3]"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" A | \n",
" B | \n",
" C | \n",
" D | \n",
"
\n",
" \n",
" \n",
" \n",
" 2013-01-02 | \n",
" -0.139974 | \n",
" -1.347789 | \n",
" 0.817077 | \n",
" 0.192477 | \n",
"
\n",
" \n",
" 2013-01-03 | \n",
" -1.082739 | \n",
" -0.544105 | \n",
" -1.423883 | \n",
" -1.247367 | \n",
"
\n",
" \n",
" 2013-01-04 | \n",
" 0.034788 | \n",
" -0.677221 | \n",
" 0.120449 | \n",
" 0.794341 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" A B C D\n",
"2013-01-02 -0.139974 -1.347789 0.817077 0.192477\n",
"2013-01-03 -1.082739 -0.544105 -1.423883 -1.247367\n",
"2013-01-04 0.034788 -0.677221 0.120449 0.794341"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['20130102':'20130104']"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 6.3.2 Selection by Label"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"For getting a cross section using a label"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"A -1.334013\n",
"B -0.348297\n",
"C 0.388654\n",
"D -0.225967\n",
"Name: 2013-01-01 00:00:00, dtype: float64"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.loc[dates[0]]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Selecting on a multi-axis by label"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" A | \n",
" B | \n",
"
\n",
" \n",
" \n",
" \n",
" 2013-01-01 | \n",
" -1.334013 | \n",
" -0.348297 | \n",
"
\n",
" \n",
" 2013-01-02 | \n",
" -0.139974 | \n",
" -1.347789 | \n",
"
\n",
" \n",
" 2013-01-03 | \n",
" -1.082739 | \n",
" -0.544105 | \n",
"
\n",
" \n",
" 2013-01-04 | \n",
" 0.034788 | \n",
" -0.677221 | \n",
"
\n",
" \n",
" 2013-01-05 | \n",
" 0.428547 | \n",
" -0.610156 | \n",
"
\n",
" \n",
" 2013-01-06 | \n",
" 0.125631 | \n",
" -0.116653 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" A B\n",
"2013-01-01 -1.334013 -0.348297\n",
"2013-01-02 -0.139974 -1.347789\n",
"2013-01-03 -1.082739 -0.544105\n",
"2013-01-04 0.034788 -0.677221\n",
"2013-01-05 0.428547 -0.610156\n",
"2013-01-06 0.125631 -0.116653"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.loc[:,['A','B']]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Showing label slicing, both endpoints are included"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" A | \n",
" B | \n",
"
\n",
" \n",
" \n",
" \n",
" 2013-01-02 | \n",
" -0.139974 | \n",
" -1.347789 | \n",
"
\n",
" \n",
" 2013-01-03 | \n",
" -1.082739 | \n",
" -0.544105 | \n",
"
\n",
" \n",
" 2013-01-04 | \n",
" 0.034788 | \n",
" -0.677221 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" A B\n",
"2013-01-02 -0.139974 -1.347789\n",
"2013-01-03 -1.082739 -0.544105\n",
"2013-01-04 0.034788 -0.677221"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.loc['20130102':'20130104',['A','B']]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Reduction in the dimensions of the returned object"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"A -0.139974\n",
"B -1.347789\n",
"Name: 2013-01-02 00:00:00, dtype: float64"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.loc['20130102',['A','B']]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"For getting a scalar value"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"-1.3340127475498547"
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.loc[dates[0],'A']"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"For getting fast access to a scalar (equiv to the prior method)"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"-1.3340127475498547"
]
},
"execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.at[dates[0],'A']"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 6.3.3 Selection by Position\n",
"See more in Selection by Position\n",
"Select via the position of the passed integers"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"A 0.034788\n",
"B -0.677221\n",
"C 0.120449\n",
"D 0.794341\n",
"Name: 2013-01-04 00:00:00, dtype: float64"
]
},
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.iloc[3]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"By integer slices, acting similar to numpy/python"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" A | \n",
" B | \n",
"
\n",
" \n",
" \n",
" \n",
" 2013-01-04 | \n",
" 0.034788 | \n",
" -0.677221 | \n",
"
\n",
" \n",
" 2013-01-05 | \n",
" 0.428547 | \n",
" -0.610156 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" A B\n",
"2013-01-04 0.034788 -0.677221\n",
"2013-01-05 0.428547 -0.610156"
]
},
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.iloc[3:5,0:2]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"By lists of integer position locations, similar to the numpy/python style"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" A | \n",
" C | \n",
"
\n",
" \n",
" \n",
" \n",
" 2013-01-02 | \n",
" -0.139974 | \n",
" 0.817077 | \n",
"
\n",
" \n",
" 2013-01-03 | \n",
" -1.082739 | \n",
" -1.423883 | \n",
"
\n",
" \n",
" 2013-01-05 | \n",
" 0.428547 | \n",
" -0.950891 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" A C\n",
"2013-01-02 -0.139974 0.817077\n",
"2013-01-03 -1.082739 -1.423883\n",
"2013-01-05 0.428547 -0.950891"
]
},
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.iloc[[1,2,4],[0,2]]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"For slicing rows explicitly"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" A | \n",
" B | \n",
" C | \n",
" D | \n",
"
\n",
" \n",
" \n",
" \n",
" 2013-01-02 | \n",
" -0.139974 | \n",
" -1.347789 | \n",
" 0.817077 | \n",
" 0.192477 | \n",
"
\n",
" \n",
" 2013-01-03 | \n",
" -1.082739 | \n",
" -0.544105 | \n",
" -1.423883 | \n",
" -1.247367 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" A B C D\n",
"2013-01-02 -0.139974 -1.347789 0.817077 0.192477\n",
"2013-01-03 -1.082739 -0.544105 -1.423883 -1.247367"
]
},
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.iloc[1:3,:]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"For slicing columns explicitly"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" B | \n",
" C | \n",
"
\n",
" \n",
" \n",
" \n",
" 2013-01-01 | \n",
" -0.348297 | \n",
" 0.388654 | \n",
"
\n",
" \n",
" 2013-01-02 | \n",
" -1.347789 | \n",
" 0.817077 | \n",
"
\n",
" \n",
" 2013-01-03 | \n",
" -0.544105 | \n",
" -1.423883 | \n",
"
\n",
" \n",
" 2013-01-04 | \n",
" -0.677221 | \n",
" 0.120449 | \n",
"
\n",
" \n",
" 2013-01-05 | \n",
" -0.610156 | \n",
" -0.950891 | \n",
"
\n",
" \n",
" 2013-01-06 | \n",
" -0.116653 | \n",
" -0.544575 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" B C\n",
"2013-01-01 -0.348297 0.388654\n",
"2013-01-02 -1.347789 0.817077\n",
"2013-01-03 -0.544105 -1.423883\n",
"2013-01-04 -0.677221 0.120449\n",
"2013-01-05 -0.610156 -0.950891\n",
"2013-01-06 -0.116653 -0.544575"
]
},
"execution_count": 37,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.iloc[:,1:3]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"For getting a value explicitly"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"-1.3477885295869219"
]
},
"execution_count": 39,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.iloc[1,1]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"For getting fast access to a scalar (equiv to the prior method)"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"-1.3477885295869219"
]
},
"execution_count": 40,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.iat[1,1]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 6.3.4 Boolean Indexing"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Using a single column’s values to select data."
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" A | \n",
" B | \n",
" C | \n",
" D | \n",
"
\n",
" \n",
" \n",
" \n",
" 2013-01-04 | \n",
" 0.034788 | \n",
" -0.677221 | \n",
" 0.120449 | \n",
" 0.794341 | \n",
"
\n",
" \n",
" 2013-01-05 | \n",
" 0.428547 | \n",
" -0.610156 | \n",
" -0.950891 | \n",
" -0.058047 | \n",
"
\n",
" \n",
" 2013-01-06 | \n",
" 0.125631 | \n",
" -0.116653 | \n",
" -0.544575 | \n",
" -1.578785 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" A B C D\n",
"2013-01-04 0.034788 -0.677221 0.120449 0.794341\n",
"2013-01-05 0.428547 -0.610156 -0.950891 -0.058047\n",
"2013-01-06 0.125631 -0.116653 -0.544575 -1.578785"
]
},
"execution_count": 41,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[df.A > 0]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"A where operation for getting."
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" A | \n",
" B | \n",
" C | \n",
" D | \n",
"
\n",
" \n",
" \n",
" \n",
" 2013-01-01 | \n",
" NaN | \n",
" NaN | \n",
" 0.388654 | \n",
" NaN | \n",
"
\n",
" \n",
" 2013-01-02 | \n",
" NaN | \n",
" NaN | \n",
" 0.817077 | \n",
" 0.192477 | \n",
"
\n",
" \n",
" 2013-01-03 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 2013-01-04 | \n",
" 0.034788 | \n",
" NaN | \n",
" 0.120449 | \n",
" 0.794341 | \n",
"
\n",
" \n",
" 2013-01-05 | \n",
" 0.428547 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 2013-01-06 | \n",
" 0.125631 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" A B C D\n",
"2013-01-01 NaN NaN 0.388654 NaN\n",
"2013-01-02 NaN NaN 0.817077 0.192477\n",
"2013-01-03 NaN NaN NaN NaN\n",
"2013-01-04 0.034788 NaN 0.120449 0.794341\n",
"2013-01-05 0.428547 NaN NaN NaN\n",
"2013-01-06 0.125631 NaN NaN NaN"
]
},
"execution_count": 42,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[df > 0]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Using the isin() method for filtering:"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"df2 = df.copy()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"添加一列。"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"df2['E'] = ['one', 'one','two','three','four','three']"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" A | \n",
" B | \n",
" C | \n",
" D | \n",
" E | \n",
"
\n",
" \n",
" \n",
" \n",
" 2013-01-01 | \n",
" -1.334013 | \n",
" -0.348297 | \n",
" 0.388654 | \n",
" -0.225967 | \n",
" one | \n",
"
\n",
" \n",
" 2013-01-02 | \n",
" -0.139974 | \n",
" -1.347789 | \n",
" 0.817077 | \n",
" 0.192477 | \n",
" one | \n",
"
\n",
" \n",
" 2013-01-03 | \n",
" -1.082739 | \n",
" -0.544105 | \n",
" -1.423883 | \n",
" -1.247367 | \n",
" two | \n",
"
\n",
" \n",
" 2013-01-04 | \n",
" 0.034788 | \n",
" -0.677221 | \n",
" 0.120449 | \n",
" 0.794341 | \n",
" three | \n",
"
\n",
" \n",
" 2013-01-05 | \n",
" 0.428547 | \n",
" -0.610156 | \n",
" -0.950891 | \n",
" -0.058047 | \n",
" four | \n",
"
\n",
" \n",
" 2013-01-06 | \n",
" 0.125631 | \n",
" -0.116653 | \n",
" -0.544575 | \n",
" -1.578785 | \n",
" three | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" A B C D E\n",
"2013-01-01 -1.334013 -0.348297 0.388654 -0.225967 one\n",
"2013-01-02 -0.139974 -1.347789 0.817077 0.192477 one\n",
"2013-01-03 -1.082739 -0.544105 -1.423883 -1.247367 two\n",
"2013-01-04 0.034788 -0.677221 0.120449 0.794341 three\n",
"2013-01-05 0.428547 -0.610156 -0.950891 -0.058047 four\n",
"2013-01-06 0.125631 -0.116653 -0.544575 -1.578785 three"
]
},
"execution_count": 45,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" A | \n",
" B | \n",
" C | \n",
" D | \n",
" E | \n",
"
\n",
" \n",
" \n",
" \n",
" 2013-01-03 | \n",
" -1.082739 | \n",
" -0.544105 | \n",
" -1.423883 | \n",
" -1.247367 | \n",
" two | \n",
"
\n",
" \n",
" 2013-01-05 | \n",
" 0.428547 | \n",
" -0.610156 | \n",
" -0.950891 | \n",
" -0.058047 | \n",
" four | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" A B C D E\n",
"2013-01-03 -1.082739 -0.544105 -1.423883 -1.247367 two\n",
"2013-01-05 0.428547 -0.610156 -0.950891 -0.058047 four"
]
},
"execution_count": 46,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2[df2['E'].isin(['two','four'])]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 6.3.5 Setting\n",
"Setting a new column automatically aligns the data by the indexes"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"2013-01-02 1\n",
"2013-01-03 2\n",
"2013-01-04 3\n",
"2013-01-05 4\n",
"2013-01-06 5\n",
"2013-01-07 6\n",
"Freq: D, dtype: int64"
]
},
"execution_count": 48,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"s1 = pd.Series([1,2,3,4,5,6], index=pd.date_range('20130102', periods=6))\n",
"s1"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Setting values by position"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"df.iat[0,1] = 0"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Setting by assigning with a numpy array"
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"df.loc[:,'D'] = np.array([5] * len(df))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The result of the prior setting operations"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" A | \n",
" B | \n",
" C | \n",
" D | \n",
"
\n",
" \n",
" \n",
" \n",
" 2013-01-01 | \n",
" -1.334013 | \n",
" 0.000000 | \n",
" 0.388654 | \n",
" 5 | \n",
"
\n",
" \n",
" 2013-01-02 | \n",
" -0.139974 | \n",
" -1.347789 | \n",
" 0.817077 | \n",
" 5 | \n",
"
\n",
" \n",
" 2013-01-03 | \n",
" -1.082739 | \n",
" -0.544105 | \n",
" -1.423883 | \n",
" 5 | \n",
"
\n",
" \n",
" 2013-01-04 | \n",
" 0.034788 | \n",
" -0.677221 | \n",
" 0.120449 | \n",
" 5 | \n",
"
\n",
" \n",
" 2013-01-05 | \n",
" 0.428547 | \n",
" -0.610156 | \n",
" -0.950891 | \n",
" 5 | \n",
"
\n",
" \n",
" 2013-01-06 | \n",
" 0.125631 | \n",
" -0.116653 | \n",
" -0.544575 | \n",
" 5 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" A B C D\n",
"2013-01-01 -1.334013 0.000000 0.388654 5\n",
"2013-01-02 -0.139974 -1.347789 0.817077 5\n",
"2013-01-03 -1.082739 -0.544105 -1.423883 5\n",
"2013-01-04 0.034788 -0.677221 0.120449 5\n",
"2013-01-05 0.428547 -0.610156 -0.950891 5\n",
"2013-01-06 0.125631 -0.116653 -0.544575 5"
]
},
"execution_count": 51,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"A where operation with setting."
]
},
{
"cell_type": "code",
"execution_count": 52,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"df2 = df.copy()"
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"df2[df2 > 0] = -df2"
]
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" A | \n",
" B | \n",
" C | \n",
" D | \n",
"
\n",
" \n",
" \n",
" \n",
" 2013-01-01 | \n",
" -1.334013 | \n",
" 0.000000 | \n",
" -0.388654 | \n",
" -5 | \n",
"
\n",
" \n",
" 2013-01-02 | \n",
" -0.139974 | \n",
" -1.347789 | \n",
" -0.817077 | \n",
" -5 | \n",
"
\n",
" \n",
" 2013-01-03 | \n",
" -1.082739 | \n",
" -0.544105 | \n",
" -1.423883 | \n",
" -5 | \n",
"
\n",
" \n",
" 2013-01-04 | \n",
" -0.034788 | \n",
" -0.677221 | \n",
" -0.120449 | \n",
" -5 | \n",
"
\n",
" \n",
" 2013-01-05 | \n",
" -0.428547 | \n",
" -0.610156 | \n",
" -0.950891 | \n",
" -5 | \n",
"
\n",
" \n",
" 2013-01-06 | \n",
" -0.125631 | \n",
" -0.116653 | \n",
" -0.544575 | \n",
" -5 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" A B C D\n",
"2013-01-01 -1.334013 0.000000 -0.388654 -5\n",
"2013-01-02 -0.139974 -1.347789 -0.817077 -5\n",
"2013-01-03 -1.082739 -0.544105 -1.423883 -5\n",
"2013-01-04 -0.034788 -0.677221 -0.120449 -5\n",
"2013-01-05 -0.428547 -0.610156 -0.950891 -5\n",
"2013-01-06 -0.125631 -0.116653 -0.544575 -5"
]
},
"execution_count": 54,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.1"
}
},
"nbformat": 4,
"nbformat_minor": 0
}