{ "cells": [ { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "0 1\n", "1 3\n", "2 5\n", "3 NaN\n", "4 6\n", "5 8\n", "dtype: float64" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "\n", "s = pd.Series([1,3,5,np.nan,6,8])\n", "s" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ABCD
2013-01-010.709714-0.570070-0.550845-1.057212
2013-01-02-0.6287370.391646-1.0914790.537669
2013-01-03-0.294052-1.296279-0.9127590.441580
2013-01-040.5285820.532051-1.2746150.146372
2013-01-050.5016401.1649002.0326590.443303
2013-01-061.8623462.191615-0.349397-0.309474
\n", "
" ], "text/plain": [ " A B C D\n", "2013-01-01 0.709714 -0.570070 -0.550845 -1.057212\n", "2013-01-02 -0.628737 0.391646 -1.091479 0.537669\n", "2013-01-03 -0.294052 -1.296279 -0.912759 0.441580\n", "2013-01-04 0.528582 0.532051 -1.274615 0.146372\n", "2013-01-05 0.501640 1.164900 2.032659 0.443303\n", "2013-01-06 1.862346 2.191615 -0.349397 -0.309474" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Cteate a DataFrame\n", "dates = pd.date_range('20130101', periods=6)\n", "# dates \n", "df = pd.DataFrame(np.random.randn(6,4),index = dates,columns=list('ABCD'))\n", "df" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "A float64\n", "B float64\n", "C float64\n", "D float64\n", "dtype: object" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.dtypes" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ABCD
2013-01-010.709714-0.570070-0.550845-1.057212
2013-01-02-0.6287370.391646-1.0914790.537669
\n", "
" ], "text/plain": [ " A B C D\n", "2013-01-01 0.709714 -0.570070 -0.550845 -1.057212\n", "2013-01-02 -0.628737 0.391646 -1.091479 0.537669" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head(2)" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ABCD
2013-01-050.5016401.1649002.0326590.443303
2013-01-061.8623462.191615-0.349397-0.309474
\n", "
" ], "text/plain": [ " A B C D\n", "2013-01-05 0.501640 1.164900 2.032659 0.443303\n", "2013-01-06 1.862346 2.191615 -0.349397 -0.309474" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.tail(2)" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',\n", " '2013-01-05', '2013-01-06'],\n", " dtype='datetime64[ns]', freq='D')" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Display the index, columns, and the underlying numpy data\n", "df.index\n", "df.columns\n", "df.values" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ABCD
count6.0000006.0000006.0000006.000000
mean0.4465820.402311-0.3577390.033706
std0.8704291.2353811.2197080.617847
min-0.628737-1.296279-1.274615-1.057212
25%-0.095129-0.329641-1.046799-0.195513
50%0.5151110.461849-0.7318020.293976
75%0.6644311.006688-0.3997590.442872
max1.8623462.1916152.0326590.537669
\n", "
" ], "text/plain": [ " A B C D\n", "count 6.000000 6.000000 6.000000 6.000000\n", "mean 0.446582 0.402311 -0.357739 0.033706\n", "std 0.870429 1.235381 1.219708 0.617847\n", "min -0.628737 -1.296279 -1.274615 -1.057212\n", "25% -0.095129 -0.329641 -1.046799 -0.195513\n", "50% 0.515111 0.461849 -0.731802 0.293976\n", "75% 0.664431 1.006688 -0.399759 0.442872\n", "max 1.862346 2.191615 2.032659 0.537669" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 查看描述性统计\n", "df.describe()" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "2013-01-01 NaN\n", "2013-01-02 NaN\n", "2013-01-03 1\n", "2013-01-04 3\n", "2013-01-05 5\n", "2013-01-06 NaN\n", "Freq: D, dtype: float64" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "s = pd.Series([1,3,5,np.nan,6,8], index=dates).shift(2)\n", "s" ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ABCD
2013-01-010.709714-0.570070-0.550845-1.057212
2013-01-020.080977-0.178424-1.642323-0.519543
2013-01-03-0.213076-1.474703-2.555082-0.077962
2013-01-040.315507-0.942652-3.8296970.068409
2013-01-050.8171470.222248-1.7970390.511712
2013-01-062.6794932.413864-2.1464350.202238
\n", "
" ], "text/plain": [ " A B C D\n", "2013-01-01 0.709714 -0.570070 -0.550845 -1.057212\n", "2013-01-02 0.080977 -0.178424 -1.642323 -0.519543\n", "2013-01-03 -0.213076 -1.474703 -2.555082 -0.077962\n", "2013-01-04 0.315507 -0.942652 -3.829697 0.068409\n", "2013-01-05 0.817147 0.222248 -1.797039 0.511712\n", "2013-01-06 2.679493 2.413864 -2.146435 0.202238" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Return cumulative sum over requested axis.\n", "df.apply(np.cumsum)" ] }, { "cell_type": "code", "execution_count": 30, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ABCD
2013-01-010.709714-0.570070-0.550845-1.057212
2013-01-02-0.6287370.391646-1.0914790.537669
2013-01-03-0.294052-1.296279-0.9127590.441580
\n", "
" ], "text/plain": [ " A B C D\n", "2013-01-01 0.709714 -0.570070 -0.550845 -1.057212\n", "2013-01-02 -0.628737 0.391646 -1.091479 0.537669\n", "2013-01-03 -0.294052 -1.296279 -0.912759 0.441580" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Concat function\n", "piece = [df[:2],df[2:3],df[3:1]]\n", "pd.concat(piece)" ] }, { "cell_type": "code", "execution_count": 29, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
keylval_xlval_y
0foo14
1foo15
2foo24
3foo25
\n", "
" ], "text/plain": [ " key lval_x lval_y\n", "0 foo 1 4\n", "1 foo 1 5\n", "2 foo 2 4\n", "3 foo 2 5" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Merge function\n", "left = pd.DataFrame({'key': ['foo', 'foo'], 'lval': [1, 2]})\n", "right = pd.DataFrame({'key': ['foo', 'foo'], 'lval': [4, 5]})\n", "pd.merge(left,right,on='key')" ] }, { "cell_type": "code", "execution_count": 38, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " A B C D\n", "0 0.307377 0.587502 -1.498826 -1.413681\n", "1 0.606972 -0.949604 0.330130 0.221957\n", "2 -0.224912 -1.682801 0.376430 0.132976\n", "3 0.177087 1.054522 1.107316 0.857462\n", "4 -1.378083 0.585530 0.877582 -1.344324\n", "5 -0.288912 0.378269 -0.178293 1.931936\n", "6 -0.196315 -0.300350 2.258136 0.007789\n", "7 -0.863666 0.695809 -2.187412 -0.436914\n", "A 0.177087\n", "B 1.054522\n", "C 1.107316\n", "D 0.857462\n", "Name: 3, dtype: float64\n" ] }, { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ABCD
00.3073770.587502-1.498826-1.413681
10.606972-0.9496040.3301300.221957
2-0.224912-1.6828010.3764300.132976
30.1770871.0545221.1073160.857462
4-1.3780830.5855300.877582-1.344324
5-0.2889120.378269-0.1782931.931936
6-0.196315-0.3003502.2581360.007789
7-0.8636660.695809-2.187412-0.436914
80.1770871.0545221.1073160.857462
\n", "
" ], "text/plain": [ " A B C D\n", "0 0.307377 0.587502 -1.498826 -1.413681\n", "1 0.606972 -0.949604 0.330130 0.221957\n", "2 -0.224912 -1.682801 0.376430 0.132976\n", "3 0.177087 1.054522 1.107316 0.857462\n", "4 -1.378083 0.585530 0.877582 -1.344324\n", "5 -0.288912 0.378269 -0.178293 1.931936\n", "6 -0.196315 -0.300350 2.258136 0.007789\n", "7 -0.863666 0.695809 -2.187412 -0.436914\n", "8 0.177087 1.054522 1.107316 0.857462" ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Append function\n", "df = pd.DataFrame(np.random.randn(8, 4), columns=['A','B','C','D'])\n", "s = df.iloc[3]\n", "print(df)\n", "print(s)\n", "df.append(s, ignore_index=True)" ] }, { "cell_type": "code", "execution_count": 39, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " A B C D\n", "0 foo one -1.010950 -1.443440\n", "1 bar one -1.023700 0.052766\n", "2 foo two -0.642614 -2.438775\n", "3 bar three -0.442711 0.525121\n", "4 foo two -1.176690 -0.230537\n", "5 bar two -0.771803 -0.347051\n", "6 foo one -1.313567 -1.211388\n", "7 foo three 0.779921 -1.279009\n" ] }, { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CD
A
bar-2.2382140.230836
foo-3.363901-6.603149
\n", "
" ], "text/plain": [ " C D\n", "A \n", "bar -2.238214 0.230836\n", "foo -3.363901 -6.603149" ] }, "execution_count": 39, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.DataFrame({'A' : ['foo', 'bar', 'foo', 'bar',\n", "\t\t\t\t\t\t'foo', 'bar', 'foo', 'foo'],\n", "\t\t\t\t\t\t'B' : ['one', 'one', 'two', 'three',\n", "\t\t\t\t\t\t'two', 'two', 'one', 'three'],\n", "\t\t\t\t\t\t'C' : np.random.randn(8),\n", "\t\t\t\t\t\t'D' : np.random.randn(8)})\n", "# Grouping and then applying a function sum to the resulting groups.\n", "\n", "print(df)\n", "df.groupby('A').sum()" ] }, { "cell_type": "code", "execution_count": 44, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "263.22" ] }, "execution_count": 44, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Time Series \n", "rng = pd.date_range('1/1/2012', periods=100, freq='S')\n", "# print(rng)\n", "ts = pd.Series(np.random.randint(0, 500, len(rng)), index=rng)\n", "# print(ts)\n", "ts.resample('5Min').sum()" ] }, { "cell_type": "code", "execution_count": 45, "metadata": { "collapsed": false }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/Users/ulson_hu/anaconda/lib/python2.7/site-packages/matplotlib/__init__.py:830: MatplotlibDeprecationWarning: axes.color_cycle is deprecated and replaced with axes.prop_cycle; please use the latter.\n", " mplDeprecation)\n" ] }, { "data": { "text/plain": [ "" ] }, "execution_count": 45, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# ts = pd.Series(np.random.randn(1000), index=pd.date_range('1/1/2000', periods=1000))\n", "ts=pd.Series(np.random.randn(1000),index=pd.date_range('1/1/2000',periods=1000))\n", "ts = ts.cumsum()\n", "ts.plot()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.12" } }, "nbformat": 4, "nbformat_minor": 0 }