{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import pandas as pd\n",
"from pandas import Series, DataFrame\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"
\n",
" \n",
" \n",
" | \n",
" A | \n",
" B | \n",
" C | \n",
" D | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 0.081227 | \n",
" 1.651024 | \n",
" -0.063561 | \n",
" 1.992570 | \n",
"
\n",
" \n",
" 1 | \n",
" -0.060838 | \n",
" -0.293773 | \n",
" -0.757681 | \n",
" -0.397578 | \n",
"
\n",
" \n",
" 2 | \n",
" 1.025647 | \n",
" -0.353300 | \n",
" -0.878448 | \n",
" -2.015514 | \n",
"
\n",
" \n",
" 3 | \n",
" -0.788950 | \n",
" -0.221509 | \n",
" -1.079488 | \n",
" -0.833900 | \n",
"
\n",
" \n",
" 4 | \n",
" 1.038247 | \n",
" 0.376582 | \n",
" 0.698767 | \n",
" 0.401919 | \n",
"
\n",
" \n",
" 5 | \n",
" -0.067863 | \n",
" 0.174289 | \n",
" 1.914769 | \n",
" -0.808617 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" A B C D\n",
"0 0.081227 1.651024 -0.063561 1.992570\n",
"1 -0.060838 -0.293773 -0.757681 -0.397578\n",
"2 1.025647 -0.353300 -0.878448 -2.015514\n",
"3 -0.788950 -0.221509 -1.079488 -0.833900\n",
"4 1.038247 0.376582 0.698767 0.401919\n",
"5 -0.067863 0.174289 1.914769 -0.808617"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = DataFrame(np.random.randn(6,4), columns=list('ABCD'))\n",
"df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##1. DataFrame选择数据"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**选择A列的数据**"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"0 -0.532235\n",
"1 1.282245\n",
"2 1.894709\n",
"3 -1.421003\n",
"4 -0.477041\n",
"5 -2.055907\n",
"Name: A, dtype: float64"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['A']"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**切片得到行数据**"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" A | \n",
" B | \n",
" C | \n",
" D | \n",
"
\n",
" \n",
" \n",
" \n",
" 1 | \n",
" 1.282245 | \n",
" -2.136740 | \n",
" 0.969922 | \n",
" 0.110193 | \n",
"
\n",
" \n",
" 2 | \n",
" 1.894709 | \n",
" 0.732707 | \n",
" -1.164495 | \n",
" -0.379666 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" A B C D\n",
"1 1.282245 -2.136740 0.969922 0.110193\n",
"2 1.894709 0.732707 -1.164495 -0.379666"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[1:3]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**DataFrame的loc方法帮助选择数据**"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"A 0.081227\n",
"B 1.651024\n",
"C -0.063561\n",
"D 1.992570\n",
"Name: 0, dtype: float64"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 选择第0行数据\n",
"df.loc[0]"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" A | \n",
" B | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 0.081227 | \n",
" 1.651024 | \n",
"
\n",
" \n",
" 1 | \n",
" -0.060838 | \n",
" -0.293773 | \n",
"
\n",
" \n",
" 2 | \n",
" 1.025647 | \n",
" -0.353300 | \n",
"
\n",
" \n",
" 3 | \n",
" -0.788950 | \n",
" -0.221509 | \n",
"
\n",
" \n",
" 4 | \n",
" 1.038247 | \n",
" 0.376582 | \n",
"
\n",
" \n",
" 5 | \n",
" -0.067863 | \n",
" 0.174289 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" A B\n",
"0 0.081227 1.651024\n",
"1 -0.060838 -0.293773\n",
"2 1.025647 -0.353300\n",
"3 -0.788950 -0.221509\n",
"4 1.038247 0.376582\n",
"5 -0.067863 0.174289"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 选择多列数据\n",
"df.loc[:, ['A', 'B']]"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" A | \n",
" B | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 0.081227 | \n",
" 1.651024 | \n",
"
\n",
" \n",
" 1 | \n",
" -0.060838 | \n",
" -0.293773 | \n",
"
\n",
" \n",
" 2 | \n",
" 1.025647 | \n",
" -0.353300 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" A B\n",
"0 0.081227 1.651024\n",
"1 -0.060838 -0.293773\n",
"2 1.025647 -0.353300"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 选择局部数据,行列交叉区域的数据\n",
"df.loc[0:2, ['A', 'B']]"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"0.081227162656888133"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 只选择一个数据\n",
"df.loc[0, 'A']"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**at方法用于专门获取某个值**"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"0.081227162656888133"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.at[0, 'A']"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##2. DataFrame切片操作"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**iloc方法提取第四行数据**"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"A -0.788950\n",
"B -0.221509\n",
"C -1.079488\n",
"D -0.833900\n",
"Name: 3, dtype: float64"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.iloc[3]"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"pandas.core.series.Series"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 返回series数据类型\n",
"type(df.iloc[3])"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" A | \n",
" B | \n",
"
\n",
" \n",
" \n",
" \n",
" 3 | \n",
" -0.788950 | \n",
" -0.221509 | \n",
"
\n",
" \n",
" 4 | \n",
" 1.038247 | \n",
" 0.376582 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" A B\n",
"3 -0.788950 -0.221509\n",
"4 1.038247 0.376582"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 返回地4-5行,1-2列\n",
"df.iloc[3:5, 0:2]"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" A | \n",
" C | \n",
"
\n",
" \n",
" \n",
" \n",
" 1 | \n",
" -0.060838 | \n",
" -0.757681 | \n",
"
\n",
" \n",
" 2 | \n",
" 1.025647 | \n",
" -0.878448 | \n",
"
\n",
" \n",
" 4 | \n",
" 1.038247 | \n",
" 0.698767 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" A C\n",
"1 -0.060838 -0.757681\n",
"2 1.025647 -0.878448\n",
"4 1.038247 0.698767"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 提取不连续行和列的数\n",
"df.iloc[[1,2,4], [0,2]]"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"-0.29377253872215964"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 提取某一个值\n",
"df.iloc[1,1]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**iat是专门提取某个数的方法,效率更高**"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"-0.29377253872215964"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.iat[1,1]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##3. DataFrame筛选数据"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" A | \n",
" B | \n",
" C | \n",
" D | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 0.081227 | \n",
" 1.651024 | \n",
" -0.063561 | \n",
" 1.992570 | \n",
"
\n",
" \n",
" 4 | \n",
" 1.038247 | \n",
" 0.376582 | \n",
" 0.698767 | \n",
" 0.401919 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" A B C D\n",
"0 0.081227 1.651024 -0.063561 1.992570\n",
"4 1.038247 0.376582 0.698767 0.401919"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 筛选D列数据中大于0的行\n",
"df[df.D > 0]"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" A | \n",
" B | \n",
" C | \n",
" D | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 0.081227 | \n",
" 1.651024 | \n",
" -0.063561 | \n",
" 1.99257 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" A B C D\n",
"0 0.081227 1.651024 -0.063561 1.99257"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 使用&符号实现多条件筛选\n",
"df[(df.D > 0) & (df.C < 0)]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**加入我们只需要A和B列的数据,而D和C列数据都是用于筛选的,可如此写**"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" A | \n",
" B | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 0.081227 | \n",
" 1.651024 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" A B\n",
"0 0.081227 1.651024"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[['A', 'B']][(df.D > 0) & (df.C < 0)]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**通过insin方法来筛选特定的值**"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# \n",
"alist = [1, 0.054497, 0.36]"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.5"
}
},
"nbformat": 4,
"nbformat_minor": 0
}