{
"cells": [
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"0 1\n",
"1 3\n",
"2 5\n",
"3 NaN\n",
"4 6\n",
"5 8\n",
"dtype: float64"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"\n",
"s = pd.Series([1,3,5,np.nan,6,8])\n",
"s"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"
\n",
" \n",
" \n",
" | \n",
" A | \n",
" B | \n",
" C | \n",
" D | \n",
"
\n",
" \n",
" \n",
" \n",
" 2013-01-01 | \n",
" 0.709714 | \n",
" -0.570070 | \n",
" -0.550845 | \n",
" -1.057212 | \n",
"
\n",
" \n",
" 2013-01-02 | \n",
" -0.628737 | \n",
" 0.391646 | \n",
" -1.091479 | \n",
" 0.537669 | \n",
"
\n",
" \n",
" 2013-01-03 | \n",
" -0.294052 | \n",
" -1.296279 | \n",
" -0.912759 | \n",
" 0.441580 | \n",
"
\n",
" \n",
" 2013-01-04 | \n",
" 0.528582 | \n",
" 0.532051 | \n",
" -1.274615 | \n",
" 0.146372 | \n",
"
\n",
" \n",
" 2013-01-05 | \n",
" 0.501640 | \n",
" 1.164900 | \n",
" 2.032659 | \n",
" 0.443303 | \n",
"
\n",
" \n",
" 2013-01-06 | \n",
" 1.862346 | \n",
" 2.191615 | \n",
" -0.349397 | \n",
" -0.309474 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" A B C D\n",
"2013-01-01 0.709714 -0.570070 -0.550845 -1.057212\n",
"2013-01-02 -0.628737 0.391646 -1.091479 0.537669\n",
"2013-01-03 -0.294052 -1.296279 -0.912759 0.441580\n",
"2013-01-04 0.528582 0.532051 -1.274615 0.146372\n",
"2013-01-05 0.501640 1.164900 2.032659 0.443303\n",
"2013-01-06 1.862346 2.191615 -0.349397 -0.309474"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Cteate a DataFrame\n",
"dates = pd.date_range('20130101', periods=6)\n",
"# dates \n",
"df = pd.DataFrame(np.random.randn(6,4),index = dates,columns=list('ABCD'))\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"A float64\n",
"B float64\n",
"C float64\n",
"D float64\n",
"dtype: object"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.dtypes"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" A | \n",
" B | \n",
" C | \n",
" D | \n",
"
\n",
" \n",
" \n",
" \n",
" 2013-01-01 | \n",
" 0.709714 | \n",
" -0.570070 | \n",
" -0.550845 | \n",
" -1.057212 | \n",
"
\n",
" \n",
" 2013-01-02 | \n",
" -0.628737 | \n",
" 0.391646 | \n",
" -1.091479 | \n",
" 0.537669 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" A B C D\n",
"2013-01-01 0.709714 -0.570070 -0.550845 -1.057212\n",
"2013-01-02 -0.628737 0.391646 -1.091479 0.537669"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head(2)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" A | \n",
" B | \n",
" C | \n",
" D | \n",
"
\n",
" \n",
" \n",
" \n",
" 2013-01-05 | \n",
" 0.501640 | \n",
" 1.164900 | \n",
" 2.032659 | \n",
" 0.443303 | \n",
"
\n",
" \n",
" 2013-01-06 | \n",
" 1.862346 | \n",
" 2.191615 | \n",
" -0.349397 | \n",
" -0.309474 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" A B C D\n",
"2013-01-05 0.501640 1.164900 2.032659 0.443303\n",
"2013-01-06 1.862346 2.191615 -0.349397 -0.309474"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.tail(2)"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',\n",
" '2013-01-05', '2013-01-06'],\n",
" dtype='datetime64[ns]', freq='D')"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Display the index, columns, and the underlying numpy data\n",
"df.index\n",
"df.columns\n",
"df.values"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" A | \n",
" B | \n",
" C | \n",
" D | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 6.000000 | \n",
" 6.000000 | \n",
" 6.000000 | \n",
" 6.000000 | \n",
"
\n",
" \n",
" mean | \n",
" 0.446582 | \n",
" 0.402311 | \n",
" -0.357739 | \n",
" 0.033706 | \n",
"
\n",
" \n",
" std | \n",
" 0.870429 | \n",
" 1.235381 | \n",
" 1.219708 | \n",
" 0.617847 | \n",
"
\n",
" \n",
" min | \n",
" -0.628737 | \n",
" -1.296279 | \n",
" -1.274615 | \n",
" -1.057212 | \n",
"
\n",
" \n",
" 25% | \n",
" -0.095129 | \n",
" -0.329641 | \n",
" -1.046799 | \n",
" -0.195513 | \n",
"
\n",
" \n",
" 50% | \n",
" 0.515111 | \n",
" 0.461849 | \n",
" -0.731802 | \n",
" 0.293976 | \n",
"
\n",
" \n",
" 75% | \n",
" 0.664431 | \n",
" 1.006688 | \n",
" -0.399759 | \n",
" 0.442872 | \n",
"
\n",
" \n",
" max | \n",
" 1.862346 | \n",
" 2.191615 | \n",
" 2.032659 | \n",
" 0.537669 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" A B C D\n",
"count 6.000000 6.000000 6.000000 6.000000\n",
"mean 0.446582 0.402311 -0.357739 0.033706\n",
"std 0.870429 1.235381 1.219708 0.617847\n",
"min -0.628737 -1.296279 -1.274615 -1.057212\n",
"25% -0.095129 -0.329641 -1.046799 -0.195513\n",
"50% 0.515111 0.461849 -0.731802 0.293976\n",
"75% 0.664431 1.006688 -0.399759 0.442872\n",
"max 1.862346 2.191615 2.032659 0.537669"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 查看描述性统计\n",
"df.describe()"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"2013-01-01 NaN\n",
"2013-01-02 NaN\n",
"2013-01-03 1\n",
"2013-01-04 3\n",
"2013-01-05 5\n",
"2013-01-06 NaN\n",
"Freq: D, dtype: float64"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"s = pd.Series([1,3,5,np.nan,6,8], index=dates).shift(2)\n",
"s"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" A | \n",
" B | \n",
" C | \n",
" D | \n",
"
\n",
" \n",
" \n",
" \n",
" 2013-01-01 | \n",
" 0.709714 | \n",
" -0.570070 | \n",
" -0.550845 | \n",
" -1.057212 | \n",
"
\n",
" \n",
" 2013-01-02 | \n",
" 0.080977 | \n",
" -0.178424 | \n",
" -1.642323 | \n",
" -0.519543 | \n",
"
\n",
" \n",
" 2013-01-03 | \n",
" -0.213076 | \n",
" -1.474703 | \n",
" -2.555082 | \n",
" -0.077962 | \n",
"
\n",
" \n",
" 2013-01-04 | \n",
" 0.315507 | \n",
" -0.942652 | \n",
" -3.829697 | \n",
" 0.068409 | \n",
"
\n",
" \n",
" 2013-01-05 | \n",
" 0.817147 | \n",
" 0.222248 | \n",
" -1.797039 | \n",
" 0.511712 | \n",
"
\n",
" \n",
" 2013-01-06 | \n",
" 2.679493 | \n",
" 2.413864 | \n",
" -2.146435 | \n",
" 0.202238 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" A B C D\n",
"2013-01-01 0.709714 -0.570070 -0.550845 -1.057212\n",
"2013-01-02 0.080977 -0.178424 -1.642323 -0.519543\n",
"2013-01-03 -0.213076 -1.474703 -2.555082 -0.077962\n",
"2013-01-04 0.315507 -0.942652 -3.829697 0.068409\n",
"2013-01-05 0.817147 0.222248 -1.797039 0.511712\n",
"2013-01-06 2.679493 2.413864 -2.146435 0.202238"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Return cumulative sum over requested axis.\n",
"df.apply(np.cumsum)"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" A | \n",
" B | \n",
" C | \n",
" D | \n",
"
\n",
" \n",
" \n",
" \n",
" 2013-01-01 | \n",
" 0.709714 | \n",
" -0.570070 | \n",
" -0.550845 | \n",
" -1.057212 | \n",
"
\n",
" \n",
" 2013-01-02 | \n",
" -0.628737 | \n",
" 0.391646 | \n",
" -1.091479 | \n",
" 0.537669 | \n",
"
\n",
" \n",
" 2013-01-03 | \n",
" -0.294052 | \n",
" -1.296279 | \n",
" -0.912759 | \n",
" 0.441580 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" A B C D\n",
"2013-01-01 0.709714 -0.570070 -0.550845 -1.057212\n",
"2013-01-02 -0.628737 0.391646 -1.091479 0.537669\n",
"2013-01-03 -0.294052 -1.296279 -0.912759 0.441580"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Concat function\n",
"piece = [df[:2],df[2:3],df[3:1]]\n",
"pd.concat(piece)"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {
"collapsed": false,
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" key | \n",
" lval_x | \n",
" lval_y | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" foo | \n",
" 1 | \n",
" 4 | \n",
"
\n",
" \n",
" 1 | \n",
" foo | \n",
" 1 | \n",
" 5 | \n",
"
\n",
" \n",
" 2 | \n",
" foo | \n",
" 2 | \n",
" 4 | \n",
"
\n",
" \n",
" 3 | \n",
" foo | \n",
" 2 | \n",
" 5 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" key lval_x lval_y\n",
"0 foo 1 4\n",
"1 foo 1 5\n",
"2 foo 2 4\n",
"3 foo 2 5"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Merge function\n",
"left = pd.DataFrame({'key': ['foo', 'foo'], 'lval': [1, 2]})\n",
"right = pd.DataFrame({'key': ['foo', 'foo'], 'lval': [4, 5]})\n",
"pd.merge(left,right,on='key')"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" A B C D\n",
"0 0.307377 0.587502 -1.498826 -1.413681\n",
"1 0.606972 -0.949604 0.330130 0.221957\n",
"2 -0.224912 -1.682801 0.376430 0.132976\n",
"3 0.177087 1.054522 1.107316 0.857462\n",
"4 -1.378083 0.585530 0.877582 -1.344324\n",
"5 -0.288912 0.378269 -0.178293 1.931936\n",
"6 -0.196315 -0.300350 2.258136 0.007789\n",
"7 -0.863666 0.695809 -2.187412 -0.436914\n",
"A 0.177087\n",
"B 1.054522\n",
"C 1.107316\n",
"D 0.857462\n",
"Name: 3, dtype: float64\n"
]
},
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" A | \n",
" B | \n",
" C | \n",
" D | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 0.307377 | \n",
" 0.587502 | \n",
" -1.498826 | \n",
" -1.413681 | \n",
"
\n",
" \n",
" 1 | \n",
" 0.606972 | \n",
" -0.949604 | \n",
" 0.330130 | \n",
" 0.221957 | \n",
"
\n",
" \n",
" 2 | \n",
" -0.224912 | \n",
" -1.682801 | \n",
" 0.376430 | \n",
" 0.132976 | \n",
"
\n",
" \n",
" 3 | \n",
" 0.177087 | \n",
" 1.054522 | \n",
" 1.107316 | \n",
" 0.857462 | \n",
"
\n",
" \n",
" 4 | \n",
" -1.378083 | \n",
" 0.585530 | \n",
" 0.877582 | \n",
" -1.344324 | \n",
"
\n",
" \n",
" 5 | \n",
" -0.288912 | \n",
" 0.378269 | \n",
" -0.178293 | \n",
" 1.931936 | \n",
"
\n",
" \n",
" 6 | \n",
" -0.196315 | \n",
" -0.300350 | \n",
" 2.258136 | \n",
" 0.007789 | \n",
"
\n",
" \n",
" 7 | \n",
" -0.863666 | \n",
" 0.695809 | \n",
" -2.187412 | \n",
" -0.436914 | \n",
"
\n",
" \n",
" 8 | \n",
" 0.177087 | \n",
" 1.054522 | \n",
" 1.107316 | \n",
" 0.857462 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" A B C D\n",
"0 0.307377 0.587502 -1.498826 -1.413681\n",
"1 0.606972 -0.949604 0.330130 0.221957\n",
"2 -0.224912 -1.682801 0.376430 0.132976\n",
"3 0.177087 1.054522 1.107316 0.857462\n",
"4 -1.378083 0.585530 0.877582 -1.344324\n",
"5 -0.288912 0.378269 -0.178293 1.931936\n",
"6 -0.196315 -0.300350 2.258136 0.007789\n",
"7 -0.863666 0.695809 -2.187412 -0.436914\n",
"8 0.177087 1.054522 1.107316 0.857462"
]
},
"execution_count": 38,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Append function\n",
"df = pd.DataFrame(np.random.randn(8, 4), columns=['A','B','C','D'])\n",
"s = df.iloc[3]\n",
"print(df)\n",
"print(s)\n",
"df.append(s, ignore_index=True)"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {
"collapsed": false,
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" A B C D\n",
"0 foo one -1.010950 -1.443440\n",
"1 bar one -1.023700 0.052766\n",
"2 foo two -0.642614 -2.438775\n",
"3 bar three -0.442711 0.525121\n",
"4 foo two -1.176690 -0.230537\n",
"5 bar two -0.771803 -0.347051\n",
"6 foo one -1.313567 -1.211388\n",
"7 foo three 0.779921 -1.279009\n"
]
},
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" C | \n",
" D | \n",
"
\n",
" \n",
" A | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" bar | \n",
" -2.238214 | \n",
" 0.230836 | \n",
"
\n",
" \n",
" foo | \n",
" -3.363901 | \n",
" -6.603149 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" C D\n",
"A \n",
"bar -2.238214 0.230836\n",
"foo -3.363901 -6.603149"
]
},
"execution_count": 39,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = pd.DataFrame({'A' : ['foo', 'bar', 'foo', 'bar',\n",
"\t\t\t\t\t\t'foo', 'bar', 'foo', 'foo'],\n",
"\t\t\t\t\t\t'B' : ['one', 'one', 'two', 'three',\n",
"\t\t\t\t\t\t'two', 'two', 'one', 'three'],\n",
"\t\t\t\t\t\t'C' : np.random.randn(8),\n",
"\t\t\t\t\t\t'D' : np.random.randn(8)})\n",
"# Grouping and then applying a function sum to the resulting groups.\n",
"\n",
"print(df)\n",
"df.groupby('A').sum()"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"263.22"
]
},
"execution_count": 44,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Time Series \n",
"rng = pd.date_range('1/1/2012', periods=100, freq='S')\n",
"# print(rng)\n",
"ts = pd.Series(np.random.randint(0, 500, len(rng)), index=rng)\n",
"# print(ts)\n",
"ts.resample('5Min').sum()"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/ulson_hu/anaconda/lib/python2.7/site-packages/matplotlib/__init__.py:830: MatplotlibDeprecationWarning: axes.color_cycle is deprecated and replaced with axes.prop_cycle; please use the latter.\n",
" mplDeprecation)\n"
]
},
{
"data": {
"text/plain": [
""
]
},
"execution_count": 45,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# ts = pd.Series(np.random.randn(1000), index=pd.date_range('1/1/2000', periods=1000))\n",
"ts=pd.Series(np.random.randn(1000),index=pd.date_range('1/1/2000',periods=1000))\n",
"ts = ts.cumsum()\n",
"ts.plot()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.12"
}
},
"nbformat": 4,
"nbformat_minor": 0
}