{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "from pandas import Series, DataFrame\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td> 0.081227</td>\n",
       "      <td> 1.651024</td>\n",
       "      <td>-0.063561</td>\n",
       "      <td> 1.992570</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>-0.060838</td>\n",
       "      <td>-0.293773</td>\n",
       "      <td>-0.757681</td>\n",
       "      <td>-0.397578</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td> 1.025647</td>\n",
       "      <td>-0.353300</td>\n",
       "      <td>-0.878448</td>\n",
       "      <td>-2.015514</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>-0.788950</td>\n",
       "      <td>-0.221509</td>\n",
       "      <td>-1.079488</td>\n",
       "      <td>-0.833900</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td> 1.038247</td>\n",
       "      <td> 0.376582</td>\n",
       "      <td> 0.698767</td>\n",
       "      <td> 0.401919</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>-0.067863</td>\n",
       "      <td> 0.174289</td>\n",
       "      <td> 1.914769</td>\n",
       "      <td>-0.808617</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          A         B         C         D\n",
       "0  0.081227  1.651024 -0.063561  1.992570\n",
       "1 -0.060838 -0.293773 -0.757681 -0.397578\n",
       "2  1.025647 -0.353300 -0.878448 -2.015514\n",
       "3 -0.788950 -0.221509 -1.079488 -0.833900\n",
       "4  1.038247  0.376582  0.698767  0.401919\n",
       "5 -0.067863  0.174289  1.914769 -0.808617"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = DataFrame(np.random.randn(6,4), columns=list('ABCD'))\n",
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##1. DataFrame选择数据"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**选择A列的数据**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0   -0.532235\n",
       "1    1.282245\n",
       "2    1.894709\n",
       "3   -1.421003\n",
       "4   -0.477041\n",
       "5   -2.055907\n",
       "Name: A, dtype: float64"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['A']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**切片得到行数据**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td> 1.282245</td>\n",
       "      <td>-2.136740</td>\n",
       "      <td> 0.969922</td>\n",
       "      <td> 0.110193</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td> 1.894709</td>\n",
       "      <td> 0.732707</td>\n",
       "      <td>-1.164495</td>\n",
       "      <td>-0.379666</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          A         B         C         D\n",
       "1  1.282245 -2.136740  0.969922  0.110193\n",
       "2  1.894709  0.732707 -1.164495 -0.379666"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df[1:3]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**DataFrame的loc方法帮助选择数据**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "A    0.081227\n",
       "B    1.651024\n",
       "C   -0.063561\n",
       "D    1.992570\n",
       "Name: 0, dtype: float64"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 选择第0行数据\n",
    "df.loc[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td> 0.081227</td>\n",
       "      <td> 1.651024</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>-0.060838</td>\n",
       "      <td>-0.293773</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td> 1.025647</td>\n",
       "      <td>-0.353300</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>-0.788950</td>\n",
       "      <td>-0.221509</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td> 1.038247</td>\n",
       "      <td> 0.376582</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>-0.067863</td>\n",
       "      <td> 0.174289</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          A         B\n",
       "0  0.081227  1.651024\n",
       "1 -0.060838 -0.293773\n",
       "2  1.025647 -0.353300\n",
       "3 -0.788950 -0.221509\n",
       "4  1.038247  0.376582\n",
       "5 -0.067863  0.174289"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 选择多列数据\n",
    "df.loc[:, ['A', 'B']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td> 0.081227</td>\n",
       "      <td> 1.651024</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>-0.060838</td>\n",
       "      <td>-0.293773</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td> 1.025647</td>\n",
       "      <td>-0.353300</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          A         B\n",
       "0  0.081227  1.651024\n",
       "1 -0.060838 -0.293773\n",
       "2  1.025647 -0.353300"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 选择局部数据，行列交叉区域的数据\n",
    "df.loc[0:2, ['A', 'B']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.081227162656888133"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 只选择一个数据\n",
    "df.loc[0, 'A']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**at方法用于专门获取某个值**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.081227162656888133"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.at[0, 'A']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##2. DataFrame切片操作"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**iloc方法提取第四行数据**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "A   -0.788950\n",
       "B   -0.221509\n",
       "C   -1.079488\n",
       "D   -0.833900\n",
       "Name: 3, dtype: float64"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.iloc[3]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "pandas.core.series.Series"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 返回series数据类型\n",
    "type(df.iloc[3])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>-0.788950</td>\n",
       "      <td>-0.221509</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td> 1.038247</td>\n",
       "      <td> 0.376582</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          A         B\n",
       "3 -0.788950 -0.221509\n",
       "4  1.038247  0.376582"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 返回地4-5行，1-2列\n",
    "df.iloc[3:5, 0:2]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>C</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>-0.060838</td>\n",
       "      <td>-0.757681</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td> 1.025647</td>\n",
       "      <td>-0.878448</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td> 1.038247</td>\n",
       "      <td> 0.698767</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          A         C\n",
       "1 -0.060838 -0.757681\n",
       "2  1.025647 -0.878448\n",
       "4  1.038247  0.698767"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 提取不连续行和列的数\n",
    "df.iloc[[1,2,4], [0,2]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "-0.29377253872215964"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 提取某一个值\n",
    "df.iloc[1,1]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**iat是专门提取某个数的方法，效率更高**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "-0.29377253872215964"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.iat[1,1]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##3. DataFrame筛选数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td> 0.081227</td>\n",
       "      <td> 1.651024</td>\n",
       "      <td>-0.063561</td>\n",
       "      <td> 1.992570</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td> 1.038247</td>\n",
       "      <td> 0.376582</td>\n",
       "      <td> 0.698767</td>\n",
       "      <td> 0.401919</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          A         B         C         D\n",
       "0  0.081227  1.651024 -0.063561  1.992570\n",
       "4  1.038247  0.376582  0.698767  0.401919"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 筛选D列数据中大于0的行\n",
    "df[df.D > 0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td> 0.081227</td>\n",
       "      <td> 1.651024</td>\n",
       "      <td>-0.063561</td>\n",
       "      <td> 1.99257</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          A         B         C        D\n",
       "0  0.081227  1.651024 -0.063561  1.99257"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 使用&符号实现多条件筛选\n",
    "df[(df.D > 0) & (df.C < 0)]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**加入我们只需要A和B列的数据，而D和C列数据都是用于筛选的，可如此写**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td> 0.081227</td>\n",
       "      <td> 1.651024</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          A         B\n",
       "0  0.081227  1.651024"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df[['A', 'B']][(df.D > 0) & (df.C < 0)]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**通过insin方法来筛选特定的值**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# \n",
    "alist = [1, 0.054497, 0.36]"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}