{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import pandas as pd\n", "from pandas import Series, DataFrame\n", "import numpy as np" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ABCD
0 0.081227 1.651024-0.063561 1.992570
1-0.060838-0.293773-0.757681-0.397578
2 1.025647-0.353300-0.878448-2.015514
3-0.788950-0.221509-1.079488-0.833900
4 1.038247 0.376582 0.698767 0.401919
5-0.067863 0.174289 1.914769-0.808617
\n", "
" ], "text/plain": [ " A B C D\n", "0 0.081227 1.651024 -0.063561 1.992570\n", "1 -0.060838 -0.293773 -0.757681 -0.397578\n", "2 1.025647 -0.353300 -0.878448 -2.015514\n", "3 -0.788950 -0.221509 -1.079488 -0.833900\n", "4 1.038247 0.376582 0.698767 0.401919\n", "5 -0.067863 0.174289 1.914769 -0.808617" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = DataFrame(np.random.randn(6,4), columns=list('ABCD'))\n", "df" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##1. DataFrame选择数据" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**选择A列的数据**" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "0 -0.532235\n", "1 1.282245\n", "2 1.894709\n", "3 -1.421003\n", "4 -0.477041\n", "5 -2.055907\n", "Name: A, dtype: float64" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['A']" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**切片得到行数据**" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ABCD
1 1.282245-2.136740 0.969922 0.110193
2 1.894709 0.732707-1.164495-0.379666
\n", "
" ], "text/plain": [ " A B C D\n", "1 1.282245 -2.136740 0.969922 0.110193\n", "2 1.894709 0.732707 -1.164495 -0.379666" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[1:3]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**DataFrame的loc方法帮助选择数据**" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "A 0.081227\n", "B 1.651024\n", "C -0.063561\n", "D 1.992570\n", "Name: 0, dtype: float64" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 选择第0行数据\n", "df.loc[0]" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
AB
0 0.081227 1.651024
1-0.060838-0.293773
2 1.025647-0.353300
3-0.788950-0.221509
4 1.038247 0.376582
5-0.067863 0.174289
\n", "
" ], "text/plain": [ " A B\n", "0 0.081227 1.651024\n", "1 -0.060838 -0.293773\n", "2 1.025647 -0.353300\n", "3 -0.788950 -0.221509\n", "4 1.038247 0.376582\n", "5 -0.067863 0.174289" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 选择多列数据\n", "df.loc[:, ['A', 'B']]" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
AB
0 0.081227 1.651024
1-0.060838-0.293773
2 1.025647-0.353300
\n", "
" ], "text/plain": [ " A B\n", "0 0.081227 1.651024\n", "1 -0.060838 -0.293773\n", "2 1.025647 -0.353300" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 选择局部数据,行列交叉区域的数据\n", "df.loc[0:2, ['A', 'B']]" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "0.081227162656888133" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 只选择一个数据\n", "df.loc[0, 'A']" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**at方法用于专门获取某个值**" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "0.081227162656888133" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.at[0, 'A']" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##2. DataFrame切片操作" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**iloc方法提取第四行数据**" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "A -0.788950\n", "B -0.221509\n", "C -1.079488\n", "D -0.833900\n", "Name: 3, dtype: float64" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.iloc[3]" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "pandas.core.series.Series" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 返回series数据类型\n", "type(df.iloc[3])" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
AB
3-0.788950-0.221509
4 1.038247 0.376582
\n", "
" ], "text/plain": [ " A B\n", "3 -0.788950 -0.221509\n", "4 1.038247 0.376582" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 返回地4-5行,1-2列\n", "df.iloc[3:5, 0:2]" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
AC
1-0.060838-0.757681
2 1.025647-0.878448
4 1.038247 0.698767
\n", "
" ], "text/plain": [ " A C\n", "1 -0.060838 -0.757681\n", "2 1.025647 -0.878448\n", "4 1.038247 0.698767" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 提取不连续行和列的数\n", "df.iloc[[1,2,4], [0,2]]" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "-0.29377253872215964" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 提取某一个值\n", "df.iloc[1,1]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**iat是专门提取某个数的方法,效率更高**" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "-0.29377253872215964" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.iat[1,1]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##3. DataFrame筛选数据" ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ABCD
0 0.081227 1.651024-0.063561 1.992570
4 1.038247 0.376582 0.698767 0.401919
\n", "
" ], "text/plain": [ " A B C D\n", "0 0.081227 1.651024 -0.063561 1.992570\n", "4 1.038247 0.376582 0.698767 0.401919" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 筛选D列数据中大于0的行\n", "df[df.D > 0]" ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ABCD
0 0.081227 1.651024-0.063561 1.99257
\n", "
" ], "text/plain": [ " A B C D\n", "0 0.081227 1.651024 -0.063561 1.99257" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 使用&符号实现多条件筛选\n", "df[(df.D > 0) & (df.C < 0)]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**加入我们只需要A和B列的数据,而D和C列数据都是用于筛选的,可如此写**" ] }, { "cell_type": "code", "execution_count": 23, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
AB
0 0.081227 1.651024
\n", "
" ], "text/plain": [ " A B\n", "0 0.081227 1.651024" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[['A', 'B']][(df.D > 0) & (df.C < 0)]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**通过insin方法来筛选特定的值**" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# \n", "alist = [1, 0.054497, 0.36]" ] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.5" } }, "nbformat": 4, "nbformat_minor": 0 }