{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# 分组与聚合" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "* GroupBy对象" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np" ] }, { "cell_type": "code", "execution_count": 61, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ABCD
0fooone-0.527734-0.031381
1barone-0.198668-0.849724
2footwo1.5411031.186877
3barthree1.5900231.487310
4footwo1.212341-1.679589
5bartwo-0.1255111.110535
6fooone0.852693-0.631104
7foothree-1.3331030.989434
\n", "
" ], "text/plain": [ " A B C D\n", "0 foo one -0.527734 -0.031381\n", "1 bar one -0.198668 -0.849724\n", "2 foo two 1.541103 1.186877\n", "3 bar three 1.590023 1.487310\n", "4 foo two 1.212341 -1.679589\n", "5 bar two -0.125511 1.110535\n", "6 foo one 0.852693 -0.631104\n", "7 foo three -1.333103 0.989434" ] }, "execution_count": 61, "metadata": {}, "output_type": "execute_result" } ], "source": [ " df = pd.DataFrame({'A' : ['foo', 'bar', 'foo', 'bar',\n", " 'foo', 'bar', 'foo', 'foo'],\n", " 'B' : ['one', 'one', 'two', 'three',\n", " 'two', 'two', 'one', 'three'],\n", " 'C' : np.random.randn(8),\n", " 'D' : np.random.randn(8)})\n", "df" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.groupby('A',as_index=True) # groupby是一个分组对象,分组之后并不是直接产生结果的" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ACD
0bar-0.813530-1.682979
1foo-2.071223-3.836994
\n", "
" ], "text/plain": [ " A C D\n", "0 bar -0.813530 -1.682979\n", "1 foo -2.071223 -3.836994" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.groupby('A',as_index=False).sum() # 分组后的聚合是对每一组进行聚合 " ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Int64Index([0, 1], dtype='int64')" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.groupby('A',as_index=False).sum().index" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CD
A
bar-0.813530-1.682979
foo-2.071223-3.836994
\n", "
" ], "text/plain": [ " C D\n", "A \n", "bar -0.813530 -1.682979\n", "foo -2.071223 -3.836994" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.groupby('A').sum()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(2, 2)" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.groupby('A').sum().shape\n" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "A\n", "bar 3\n", "foo 5\n", "dtype: int64" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.groupby('A').size()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CD
AB
barone-0.984471-0.043672
three-1.4295101.017294
two-0.692597-0.970318
fooone1.3482520.953936
three-1.678742-0.110585
two-0.039560-0.730062
\n", "
" ], "text/plain": [ " C D\n", "A B \n", "bar one -0.984471 -0.043672\n", " three -1.429510 1.017294\n", " two -0.692597 -0.970318\n", "foo one 1.348252 0.953936\n", " three -1.678742 -0.110585\n", " two -0.039560 -0.730062" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.groupby(['A','B']).sum() " ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "A B \n", "bar one 1\n", " three 1\n", " two 1\n", "foo one 2\n", " three 1\n", " two 2\n", "dtype: int64" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.groupby(['A','B']).size()" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ABCD
0fooone-1.566378-0.767842
1barone-0.9257670.997244
2footwo0.334522-3.222793
3barthree-0.077704-2.128931
4footwo-1.011080-0.997273
5bartwo0.189941-0.551293
6fooone0.4620630.030302
7foothree-0.2903501.120612
\n", "
" ], "text/plain": [ " A B C D\n", "0 foo one -1.566378 -0.767842\n", "1 bar one -0.925767 0.997244\n", "2 foo two 0.334522 -3.222793\n", "3 bar three -0.077704 -2.128931\n", "4 foo two -1.011080 -0.997273\n", "5 bar two 0.189941 -0.551293\n", "6 foo one 0.462063 0.030302\n", "7 foo three -0.290350 1.120612" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CD
1-0.4260980.165805
2-0.251420-2.116332
\n", "
" ], "text/plain": [ " C D\n", "1 -0.426098 0.165805\n", "2 -0.251420 -2.116332" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 按自定义key分组,列表\n", "self_def_key = [1, 1, 2, 2, 2, 1, 1, 1]\n", "df.groupby(self_def_key).mean()" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "A B \n", "bar one 1\n", " three 1\n", " two 1\n", "foo one 2\n", " three 1\n", " two 2\n", "dtype: int64" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 按自定义key分组,多层列表\n", "df.groupby([df['A'], df['B']]).size()" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CD
AB
barone-0.9257670.997244
three-0.077704-2.128931
two0.189941-0.551293
fooone-0.552157-0.368770
three-0.2903501.120612
two-0.338279-2.110033
\n", "
" ], "text/plain": [ " C D\n", "A B \n", "bar one -0.925767 0.997244\n", " three -0.077704 -2.128931\n", " two 0.189941 -0.551293\n", "foo one -0.552157 -0.368770\n", " three -0.290350 1.120612\n", " two -0.338279 -2.110033" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.groupby([df['A'], df['B']]).mean()" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CD
Bonethreetwoonethreetwo
A
bar-0.925767-0.0777040.1899410.997244-2.128931-0.551293
foo-0.552157-0.290350-0.338279-0.3687701.120612-2.110033
\n", "
" ], "text/plain": [ " C D \n", "B one three two one three two\n", "A \n", "bar -0.925767 -0.077704 0.189941 0.997244 -2.128931 -0.551293\n", "foo -0.552157 -0.290350 -0.338279 -0.368770 1.120612 -2.110033" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.groupby([df['A'], df['B']]).mean().unstack() " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "* GroupBy对象分组迭代 \n", "groupBy对象是一个可迭代的对象 \n", "\n", "可以遍历,也可以转成列表" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "--- bar\n", "*** A B C D\n", "1 bar one -0.925767 0.997244\n", "3 bar three -0.077704 -2.128931\n", "5 bar two 0.189941 -0.551293\n", "--- foo\n", "*** A B C D\n", "0 foo one -1.566378 -0.767842\n", "2 foo two 0.334522 -3.222793\n", "4 foo two -1.011080 -0.997273\n", "6 foo one 0.462063 0.030302\n", "7 foo three -0.290350 1.120612\n" ] } ], "source": [ "# 单层分组\n", "grouped1 = df.groupby('A')\n", "for group_name, group_data in grouped1:\n", " print('---',group_name)\n", " print('***',group_data)" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "--- ('bar', 'one')\n", "*** A B C D\n", "1 bar one -0.925767 0.997244\n", "--- ('bar', 'three')\n", "*** A B C D\n", "3 bar three -0.077704 -2.128931\n", "--- ('bar', 'two')\n", "*** A B C D\n", "5 bar two 0.189941 -0.551293\n", "--- ('foo', 'one')\n", "*** A B C D\n", "0 foo one -1.566378 -0.767842\n", "6 foo one 0.462063 0.030302\n", "--- ('foo', 'three')\n", "*** A B C D\n", "7 foo three -0.29035 1.120612\n", "--- ('foo', 'two')\n", "*** A B C D\n", "2 foo two 0.334522 -3.222793\n", "4 foo two -1.011080 -0.997273\n" ] } ], "source": [ "# 多层分组\n", "grouped2 = df.groupby(['A', 'B'])\n", "for group_name, group_data in grouped2:\n", " print('---',group_name)\n", " print('***',group_data)" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[('bar', A B C D\n", " 1 bar one -0.925767 0.997244\n", " 3 bar three -0.077704 -2.128931\n", " 5 bar two 0.189941 -0.551293), ('foo', A B C D\n", " 0 foo one -1.566378 -0.767842\n", " 2 foo two 0.334522 -3.222793\n", " 4 foo two -1.011080 -0.997273\n", " 6 foo one 0.462063 0.030302\n", " 7 foo three -0.290350 1.120612)]" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# GroupBy对象转换list,凡是可迭代对象都可以转换成列表\n", "list(grouped1)" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'bar': A B C D\n", " 1 bar one -0.925767 0.997244\n", " 3 bar three -0.077704 -2.128931\n", " 5 bar two 0.189941 -0.551293, 'foo': A B C D\n", " 0 foo one -1.566378 -0.767842\n", " 2 foo two 0.334522 -3.222793\n", " 4 foo two -1.011080 -0.997273\n", " 6 foo one 0.462063 0.030302\n", " 7 foo three -0.290350 1.120612}" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# GroupBy对象转换dict\n", "dict(list(grouped1))" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "A object\n", "B object\n", "C float64\n", "D float64\n", "dtype: object\n", "float64 2\n", "object 2\n", "dtype: int64\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
float64object
0-2.334220fooone
10.071477barone
2-2.888271footwo
3-2.206634barthree
4-2.008353footwo
5-0.361352bartwo
60.492366fooone
70.830262foothree
\n", "
" ], "text/plain": [ " float64 object\n", "0 -2.334220 fooone\n", "1 0.071477 barone\n", "2 -2.888271 footwo\n", "3 -2.206634 barthree\n", "4 -2.008353 footwo\n", "5 -0.361352 bartwo\n", "6 0.492366 fooone\n", "7 0.830262 foothree" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 按列分组\n", "print(df.dtypes)\n", "\n", "# 按数据类型分组\n", "print(df.groupby(df.dtypes, axis=1).size())\n", "df.groupby(df.dtypes, axis=1).sum()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "* 其他分组方法" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
abcde
A17.08.08.04
B7NaNNaNNaN2
C95.02.08.09
D44.08.07.03
E42.08.09.05
\n", "
" ], "text/plain": [ " a b c d e\n", "A 1 7.0 8.0 8.0 4\n", "B 7 NaN NaN NaN 2\n", "C 9 5.0 2.0 8.0 9\n", "D 4 4.0 8.0 7.0 3\n", "E 4 2.0 8.0 9.0 5" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df2 = pd.DataFrame(np.random.randint(1, 10, (5,5)),\n", " columns=['a', 'b', 'c', 'd', 'e'],\n", " index=['A', 'B', 'C', 'D', 'E'])\n", "df2.iloc[1, 1:4] = np.NaN\n", "df2" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "C 1\n", "java 2\n", "python 2\n", "dtype: int64\n", " C java python\n", "A 1 2 2\n", "B 0 1 1\n", "C 1 2 2\n", "D 1 2 2\n", "E 1 2 2\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Cjavapython
A8.012.08.0
BNaN2.07.0
C8.011.014.0
D7.011.08.0
E9.013.06.0
\n", "
" ], "text/plain": [ " C java python\n", "A 8.0 12.0 8.0\n", "B NaN 2.0 7.0\n", "C 8.0 11.0 14.0\n", "D 7.0 11.0 8.0\n", "E 9.0 13.0 6.0" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 通过字典分组\n", "mapping_dict = {'a':'python', 'b':'python', 'c':'java', 'd':'C', 'e':'java'}\n", "print(df2.groupby(mapping_dict, axis=1).size())\n", "print(df2.groupby(mapping_dict, axis=1).count()) # 非NaN的个数\n", "df2.groupby(mapping_dict, axis=1).sum()" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " a b c d e\n", "AA 8 5 9 4 8\n", "BBB 3 2 3 7 6\n", "CC 4 2 9 5 2\n", "D 4 1 2 5 2\n", "EE 3 2 2 7 6\n" ] }, { "data": { "text/plain": [ "[(1, a b c d e\n", " D 4 1 2 5 2), (2, a b c d e\n", " AA 8 5 9 4 8\n", " CC 4 2 9 5 2\n", " EE 3 2 2 7 6), (3, a b c d e\n", " BBB 3 2 3 7 6)]" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 通过函数分组\n", "df3 = pd.DataFrame(np.random.randint(1, 10, (5,5)),\n", " columns=['a', 'b', 'c', 'd', 'e'],\n", " index=['AA', 'BBB', 'CC', 'D', 'EE'])\n", "print(df3)\n", "\n", "def group_key(idx):\n", " \"\"\"\n", " idx 为列索引或行索引\n", " \"\"\"\n", " #return idx\n", " return len(idx)\n", "\n", "list(df3.groupby(group_key))" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "1 1\n", "2 3\n", "3 1\n", "dtype: int64" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 以上自定义函数等价于\n", "df3.groupby(len).size()" ] }, { "cell_type": "code", "execution_count": 33, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
languagePythonJavaPythonJavaPython
indexAABCB
038798
112216
262412
325774
455727
\n", "
" ], "text/plain": [ "language Python Java Python Java Python\n", "index A A B C B\n", "0 3 8 7 9 8\n", "1 1 2 2 1 6\n", "2 6 2 4 1 2\n", "3 2 5 7 7 4\n", "4 5 5 7 2 7" ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 通过索引级别分组\n", "columns = pd.MultiIndex.from_arrays([['Python', 'Java', 'Python', 'Java', 'Python'],\n", " ['A', 'A', 'B', 'C', 'B']], names=['language', 'index'])\n", "df4 = pd.DataFrame(np.random.randint(1, 10, (5, 5)), columns=columns)\n", "df4" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
indexABC
011159
1381
2861
37117
410142
\n", "
" ], "text/plain": [ "index A B C\n", "0 11 15 9\n", "1 3 8 1\n", "2 8 6 1\n", "3 7 11 7\n", "4 10 14 2" ] }, "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 根据language进行分组\n", "df4.groupby(level='language', axis=1).sum()\n", "df4.groupby(level='index', axis=1).sum()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "* 聚合" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ABCD
0fooone-1.566378-0.767842
1barone-0.9257670.997244
2footwo0.334522-3.222793
3barthree-0.077704-2.128931
4footwo-1.011080-0.997273
5bartwo0.189941-0.551293
6fooone0.4620630.030302
7foothree-0.2903501.120612
\n", "
" ], "text/plain": [ " A B C D\n", "0 foo one -1.566378 -0.767842\n", "1 bar one -0.925767 0.997244\n", "2 foo two 0.334522 -3.222793\n", "3 bar three -0.077704 -2.128931\n", "4 foo two -1.011080 -0.997273\n", "5 bar two 0.189941 -0.551293\n", "6 foo one 0.462063 0.030302\n", "7 foo three -0.290350 1.120612" ] }, "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Abarfoo
Ccount3.0000005.000000
mean-0.271177-0.414245
std0.5824730.869992
min-0.925767-1.566378
25%-0.501735-1.011080
50%-0.077704-0.290350
75%0.0561180.334522
max0.1899410.462063
Dcount3.0000005.000000
mean-0.560993-0.767399
std1.5631101.603333
min-2.128931-3.222793
25%-1.340112-0.997273
50%-0.551293-0.767842
75%0.2229760.030302
max0.9972441.120612
\n", "
" ], "text/plain": [ "A bar foo\n", "C count 3.000000 5.000000\n", " mean -0.271177 -0.414245\n", " std 0.582473 0.869992\n", " min -0.925767 -1.566378\n", " 25% -0.501735 -1.011080\n", " 50% -0.077704 -0.290350\n", " 75% 0.056118 0.334522\n", " max 0.189941 0.462063\n", "D count 3.000000 5.000000\n", " mean -0.560993 -0.767399\n", " std 1.563110 1.603333\n", " min -2.128931 -3.222793\n", " 25% -1.340112 -0.997273\n", " 50% -0.551293 -0.767842\n", " 75% 0.222976 0.030302\n", " max 0.997244 1.120612" ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 内置的聚合函数\n", "df.groupby('A').describe().T" ] }, { "cell_type": "code", "execution_count": 52, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " C D\n", "A \n", "bar 1.115708 3.126175\n", "foo 2.028442 4.343405\n", " C D\n", "A \n", "bar 1.115708 3.126175\n", "foo 2.028442 4.343405\n" ] } ], "source": [ "# 自定义聚合函数 \n", "def peak_range(df):\n", " \"\"\"\n", " 返回数值范围\n", " \"\"\"\n", " #print type(df) #参数为索引所对应的记录\n", " return df.max() - df.min()\n", "\n", "print(df.groupby('A').agg(peak_range))\n", "print(df.groupby('A').agg(lambda df : df.max() - df.min())) # agg传进去的一定要是reduce相关函数" ] }, { "cell_type": "code", "execution_count": 72, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CD
meanstdcountpeak_rangemeanstdcountpeak_range
A
bar0.4219481.01224431.7886910.5827071.25474432.337033
foo0.3490601.22661252.874206-0.0331531.18347652.866467
\n", "
" ], "text/plain": [ " C D \n", " mean std count peak_range mean std count peak_range\n", "A \n", "bar 0.421948 1.012244 3 1.788691 0.582707 1.254744 3 2.337033\n", "foo 0.349060 1.226612 5 2.874206 -0.033153 1.183476 5 2.866467" ] }, "execution_count": 72, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 应用多个聚合函数\n", "\n", "# 同时应用多个聚合函数\n", "df.groupby('A').agg(['mean', 'std', 'count', peak_range]) # 默认列名为函数名" ] }, { "cell_type": "code", "execution_count": 54, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CD
meanstdcountrangemeanstdcountrange
A
bar-0.2711770.58247331.115708-0.5609931.56311033.126175
foo-0.4142450.86999252.028442-0.7673991.60333354.343405
\n", "
" ], "text/plain": [ " C D \n", " mean std count range mean std count range\n", "A \n", "bar -0.271177 0.582473 3 1.115708 -0.560993 1.563110 3 3.126175\n", "foo -0.414245 0.869992 5 2.028442 -0.767399 1.603333 5 4.343405" ] }, "execution_count": 54, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.groupby('A').agg(['mean', 'std', 'count', ('range', peak_range)]) # 通过元组提供新的列名" ] }, { "cell_type": "code", "execution_count": 55, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CD
A
bar-0.271177-1.682979
foo-0.414245-3.836994
\n", "
" ], "text/plain": [ " C D\n", "A \n", "bar -0.271177 -1.682979\n", "foo -0.414245 -3.836994" ] }, "execution_count": 55, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 每列作用不同的聚合函数\n", "dict_mapping = {'C':'mean',\n", " 'D':'sum'}\n", "df.groupby('A').agg(dict_mapping)" ] }, { "cell_type": "code", "execution_count": 56, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CD
meanmaxsum
A
bar-0.2711770.189941-1.682979
foo-0.4142450.462063-3.836994
\n", "
" ], "text/plain": [ " C D\n", " mean max sum\n", "A \n", "bar -0.271177 0.189941 -1.682979\n", "foo -0.414245 0.462063 -3.836994" ] }, "execution_count": 56, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dict_mapping = {'C':['mean','max'],\n", " 'D':'sum'}\n", "df.groupby('A').agg(dict_mapping)" ] }, { "cell_type": "code", "execution_count": 57, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Country\n", "US 13608\n", "CN 2734\n", "CA 1468\n", "JP 1237\n", "KR 993\n", "GB 901\n", "MX 579\n", "TW 394\n", "TR 326\n", "PH 298\n", "Name: Brand, dtype: int64\n" ] }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAABRYAAAIICAYAAAAboLK7AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAAMTQAADE0B0s6tTgAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAIABJREFUeJzt3XGQ53dd3/HXG48G9bAgJEZ6SS7A\nBRsiDbFUYzMaaiGlccookg4Kjei0WGIdJ9IhhlY6to5n1PxhCW1ig3FIChVotRKFqk1EjBG0CQoU\nLgrLcYBJoGWYlQJG3/3j913yY5PL7Wf3d/vb23s8Zn5zu9/P93f72fnO/nb3uZ/v91vdHQAAAACA\nEY9a9gQAAAAAgBOPsAgAAAAADBMWAQAAAIBhwiIAAAAAMExYBAAAAACGCYsAAAAAwDBhEQAAAAAY\nJiwCAAAAAMOERQAAAABgmLAIAAAAAAzbs+wJLNIpp5zSp5566rKnAQAAAAAnpI997GNf6O5TNrLv\nrgqLp556ao4cObLsaQAAAADACamq7t/ovk6FBgAAAACGCYsAAAAAwDBhEQAAAAAYJiwCAAAAAMM2\nFBar6ueqaqWquqrOe5jxV68fq6oDVXVHVR2qqndV1blbHQMAAAAAdoaNrlh8c5KLknxk/UBVXZDk\nm5IcXjd0fZIbuvucJNckuXEBYwAAAADADlDdvfGdq1aSfHt3v3d6/5Qktyf57iS3rY1V1WlJDiV5\nYnc/UFWV5BOZBcjPbmasu1eONb99+/b1kSNHNvz5AAAAAAAPqqqPdfe+jey71Wss/niSm7v7w+u2\nn5Hk4939QJL0rF4eTnLmFsYAAAAAgB1i02Gxqi5M8qwkrz3KLuuXQtYCxtbP4cqqOrL2WF1dfaQp\nAwAAAAALspUVi9+a5OuSfHg6RXpfkrdX1fOSfDTJvqrakyTTKc1nZLb6cLNjD9Hd13b3vrXH3r17\nt/DpAAAAAAAbtemw2N0Hu/tJ3b2/u/cnOZLkku7+9e6+L8ldSV487f6CJCvdvbLZsc3OEwAAAABY\nvA3dvKWqrkvy/CSnJ/lkktXufuq6fVbypTd2eVqSm5I8Iclnklze3e/bytixuHkLAAAAAGzeyM1b\nhu4KvdMJiwAAAACwedt5V2gAAAAA4CQkLAIAAAAAw4RFAAAAAGCYsAgAAAAADBMWAQAAAIBhwiIA\nAAAAMExYBAAAAACGCYsAAAAAwLA9y54AG7f/qluXPYWTysrBS5c9BQAAAIAdy4pFAAAAAGCYsAgA\nAAAADBMWAQAAAIBhwiIAAAAAMExYBAAAAACGCYsAAAAAwDBhEQAAAAAYJiwCAAAAAMOERQAAAABg\nmLAIAAAAAAwTFgEAAACAYcIiAAAAADBMWAQAAAAAhgmLAAAAAMAwYREAAAAAGCYsAgAAAADDhEUA\nAAAAYJiwCAAAAAAMExYBAAAAgGHCIgAAAAAwTFgEAAAAAIYJiwAAAADAMGERAAAAABgmLAIAAAAA\nw4RFAAAAAGCYsAgAAAAADBMWAQAAAIBhwiIAAAAAMExYBAAAAACGCYsAAAAAwDBhEQAAAAAYJiwC\nAAAAAMOERQAAAABgmLAIAAAAAAwTFgEAAACAYcIiAAAAADBMWAQAAAAAhgmLAAAAAMAwYREAAAAA\nGCYsAgAAAADDhEUAAAAAYJiwCAAAAAAMExYBAAAAgGHCIgAAAAAwTFgEAAAAAIYJiwAAAADAMGER\nAAAAABgmLAIAAAAAw4RFAAAAAGCYsAgAAAAADNtQWKyqn6uqlarqqjpv2vaYqvrlqjpUVXdX1duq\nav/cc06btt1TVe+tqou2OgYAAAAA7AwbXbH45iQXJfnIuu03JHlad5+f5K3T+2sOJrmzuw8keWmS\nW6pqzxbHAAAAAIAdYENhsbvf0d1H1m37XHf/Wnf3tOnOJE+e2+WyJNdN+747yb2ZxcmtjAEAAAAA\nO8Air7H4Q0l+NUmq6glJHtXd98+NryQ5c7NjD/cBq+rKqjqy9lhdXV3YJwMAAAAAHN1CwmJVXZ3k\nQJJXzW3u9bstYOxLdPe13b1v7bF3796NThkAAAAA2IIth8WqekWS70zyvO7+bJJ096emsVPndj0r\nyeHNjm11ngAAAADA4mwpLFbVlUlelOQ53f3pdcNvSnLFtN+zkpye5J1bHAMAAAAAdoAN3W25qq5L\n8vzMIt9vVtVqkouT/GySDyW5raqS5PPd/Y3T016Z5PVVdU+SLyR5SXc/sMUxAAAAAGAH2FBY7O4r\nMq0iXOeRrn94b5LnLnIMAAAAANgZFnlXaAAAAADgJCEsAgAAAADDhEUAAAAAYJiwCAAAAAAMExYB\nAAAAgGHCIgAAAAAwTFgEAAAAAIYJiwAAAADAMGERAAAAABgmLAIAAAAAw4RFAAAAAGCYsAgAAAAA\nDBMWAQAAAIBhwiIAAAAAMExYBAAAAACGCYsAAAAAwDBhEQAAAAAYJiwCAAAAAMOERQAAAABgmLAI\nAAAAAAwTFgEAAACAYcIiAAAAADBMWAQAAAAAhgmLAAAAAMAwYREAAAAAGCYsAgAAAADDhEUAAAAA\nYJiwCAAAAAAMExYBAAAAgGHCIgAAAAAwTFgEAAAAAIYJiwAAAADAMGERAAAAABgmLAIAAAAAw4RF\nAAAAAGCYsAgAAAAADBMWAQAAAIBhwiIAAAAAMExYBAAAAACGCYsAAAAAwDBhEQAAAAAYJiwCAAAA\nAMOERQAAAABgmLAIAAAAAAwTFgEAAACAYcIiAAAAADBMWAQAAAAAhgmLAAAAAMAwYREAAAAAGCYs\nAgAAAADDhEUAAAAAYJiwCAAAAAAMExYBAAAAgGHCIgAAAAAwTFgEAAAAAIYJiwAAAADAMGERAAAA\nABgmLAIAAAAAw4RFAAAAAGDYhsJiVf1cVa1UVVfVeXPbD1TVHVV1qKreVVXnHs8xAAAAAGBn2OiK\nxTcnuSjJR9Ztvz7JDd19TpJrktx4nMcAAAAAgB1gQ2Gxu9/R3Ufmt1XVaUkuSHLztOktSc6uqv3H\nY2wznxwAAAAAcHxs5RqLZyT5eHc/kCTd3UkOJznzOI09RFVdWVVH1h6rq6tb+HQAAAAAgI3a6s1b\net37dZzHvvSDd1/b3fvWHnv37n3EyQIAAAAAi7FnC8/9aJJ9VbWnux+oqspsxeHhJJ89DmMAAAAA\nwA6x6RWL3X1fkruSvHja9IIkK929cjzGNjtPAAAAAGDxNrRisaquS/L8JKcn+c2qWu3upyZ5WZKb\nqurqJJ9Jcvnc047HGAAAAACwA2woLHb3FUmueJjtH0xy4VGes/AxAAAAAGBn2OrNWwAAAACAk5Cw\nCAAAAAAMExYBAAAAgGHCIgAAAAAwTFgEAAAAAIYJiwAAAADAMGERAAAAABgmLAIAAAAAw4RFAAAA\nAGCYsAgAAAAADBMWAQAAAIBhwiIAAAAAMExYBAAAAACGCYsAAAAAwDBhEQAAAAAYJiwCAAAAAMOE\nRQAAAABgmLAIAAAAAAwTFgEAAACAYcIiAAAAADBMWAQAAAAAhgmLAAAAAMAwYREAAAAAGCYsAgAA\nAADDhEUAAAAAYJiwCAAAAAAMExYBAAAAgGHCIgAAAAAwTFgEAAAAAIYJiwAAAADAMGERAAAAABgm\nLAIAAAAAw4RFAAAAAGCYsAgAAAAADBMWAQAAAIBhwiIAAAAAMExYBAAAAACGCYsAAAAAwDBhEQAA\nAAAYJiwCAAAAAMOERQAAAABgmLAIAAAAAAwTFgEAAACAYcIiAAAAADBMWAQAAAAAhgmLAAAAAMAw\nYREAAAAAGCYsAgAAAADDhEUAAAAAYJiwCAAAAAAMExYBAAAAgGHCIgAAAAAwTFgEAAAAAIYJiwAA\nAADAMGERAAAAABgmLAIAAAAAw4RFAAAAAGCYsAgAAAAADFtIWKyqS6rqD6vqrqp6b1VdPm0/rare\nVlX3TNsvmnvOpsYAAAAAgOXbclisqkryn5O8tLufmeTbk1xfVY9NcjDJnd19IMlLk9xSVXump252\nDAAAAABYskXGusdN/35Vkk8l+XySy5KcnSTd/e6qujfJRUlu38IYAAAAALBkWw6L3d1VdVmS/1pV\nf57k8Um+M8ljkzyqu++f230lyZlV9YTNjG11rgAAAADAYiziVOg9SX40yfO7+6wk35bkF6fhXr/7\n3NubHZv/2FdW1ZG1x+rq6tjkAQAAAIBNWcTNW85P8qTu/t1kdupyko8neUaSVNWpc/ueleRwd39q\nM2PrP3B3X9vd+9Yee/fuXcCnAwAAAAAcyyLC4keT7KuqpyVJVT01yVOSHErypiRXTNufleT0JO+c\nnrfZMQAAAABgyRZxjcV7q+plSd5cVX+V2WnLL+/uj1XVK5O8vqruSfKFJC/p7gemp252DAAAAABY\nsoXcFbq735DkDQ+z/d4kzz3KczY1BgAAAAAs3yJOhQYAAAAATjLCIgAAAAAwTFgEAAAAAIYJiwAA\nAADAMGERAAAAABgmLAIAAAAAw4RFAAAAAGCYsAgAAAAADBMWAQAAAIBhwiIAAAAAMExYBAAAAACG\nCYsAAAAAwDBhEQAAAAAYJiwCAAAAAMOERQAAAABgmLAIAAAAAAwTFgEAAACAYcIiAAAAADBMWAQA\nAAAAhgmLAAAAAMAwYREAAAAAGCYsAgAAAADDhEUAAAAAYJiwCAAAAAAMExYBAAAAgGHCIgAAAAAw\nTFgEAAAAAIYJiwAAAADAMGERAAAAABgmLAIAAAAAw4RFAAAAAGCYsAgAAAAADBMWAQAAAIBhwiIA\nAAAAMExYBAAAAACGCYsAAAAAwDBhEQAAAAAYJiwCAAAAAMOERQAAAABgmLAIAAAAAAwTFgEAAACA\nYcIiAAAAADBMWAQAAAAAhgmLAAAAAMAwYREAAAAAGCYsAgAAAADDhEUAAAAAYJiwCAAAAAAMExYB\nAAAAgGHCIgAAAAAwTFgEAAAAAIYJiwAAAADAMGERAAAAABgmLAIAAAAAw4RFAAAAAGCYsAgAAAAA\nDBMWAQAAAIBhwiIAAAAAMExYBAAAAACGLSQsVtUpVfWaqrqnqt5XVTdP2w9U1R1Vdaiq3lVV5849\nZ1NjAAAAAMDyLWrF4sEkf5XknO5+epJ/OW2/PskN3X1OkmuS3Dj3nM2OAQAAAABLtuWwWFVfmeSl\nSa7u7k6S7v5EVZ2W5IIkN0+7viXJ2VW1f7NjW50rAAAAALAYi1ix+JQkn0ryr6rqD6rqd6rq25Kc\nkeTj3f1AkkzR8XCSM7cw9iWq6sqqOrL2WF1dXcCnAwAAAAAcyyLC4qOTPDnJ+7v7byf5wSRvTLIn\nSa/bt+be3uzYgzt1X9vd+9Yee/fuHZ48AAAAADBuzwL+j49kdn3FW5Kku99TVR9OclaSfVW1p7sf\nqKrKbDXi4SSf3eQYAAAAALADbHnFYnd/MslvJbkkSarqrCRnJ/mdJHclefG06wuSrHT3Snfft5mx\nrc4VAAAAAFiMRaxYTJIfSPK6qvqpJH+Z5J9NN3B5WZKbqurqJJ9JcvncczY7BgAAAAAs2ULCYnd/\nKMnFD7P9g0kuPMpzNjUGAAAAACzfIm7eAgAAAACcZIRFAAAAAGCYsAgAAAAADBMWAQAAAIBhwiIA\nAAAAMExYBAAAAACGCYsAAAAAwDBhEQAAAAAYJiwCAAAAAMOERQAAAABgmLAIAAAAAAwTFgEAAACA\nYcIiAAAAADBMWAQAAAAAhgmLAAAAAMAwYREAAAAAGCYsAgAAAADDhEUAAAAAYJiwCAAAAAAMExYB\nAAAAgGHCIgAAAAAwTFgEAAAAAIYJiwAAAADAMGERAAAAABgmLAIAAAAAw4RFAAAAAGCYsAgAAAAA\nDBMWAQAAAIBhwiIAAAAAMExYBAAAAACGCYsAAAAAwDBhEQAAAAAYJiwCAAAAAMOERQAAAABgmLAI\nAAAAAAwTFgEAAACAYcIiAAAAADBMWAQAAAAAhgmLAAAAAMAwYREAAAAAGCYsAgAAAADDhEUAAAAA\nYJiwCAAAAAAMExYBAAAAgGHCIgAAAAAwTFgEAAAAAIYJiwAAAADAMGERAAAAABgmLAIAAAAAw4RF\nAAAAAGCYsAgAAAAADBMWAQAAAIBhwiIAAAAAMExYBAAAAACGCYsAAAAAwDBhEQAAAAAYJiwCAAAA\nAMOERQAAAABg2ELDYlW9uqq6qs6b3j9QVXdU1aGqeldVnTu376bGAAAAAIDlW1hYrKoLknxTksNz\nm69PckN3n5PkmiQ3LmAMAAAAAFiyhYTFqjolyXVJXp6kp22nJbkgyc3Tbm9JcnZV7d/s2CLmCgAA\nAABs3aJWLP54kpu7+8Nz285I8vHufiBJurszW8145hbGAAAAAIAdYMthsaouTPKsJK99mOFev/sC\nxuY/9pVVdWTtsbq6upEpAwAAAABbtIgVi9+a5OuSfLiqVpLsS/L2JOcl2VdVe5Kkqiqz1YiHk3x0\nk2Nforuv7e59a4+9e/cu4NMBAAAAAI5ly2Gxuw9295O6e393709yJMkl3f2LSe5K8uJp1xckWenu\nle6+bzNjW50rAAAAALAYe47z//+yJDdV1dVJPpPk8gWMAQAAAABLtvCwOK1aXHv7g0kuPMp+mxoD\nAAAAAJZvUXeFBgAAAABOIsIiAAAAADBMWAQAAAAAhgmLAAAAAMAwYREAAAAAGCYsAgAAAADDhEUA\nAAAAYJiwCAAAAAAMExYBAAAAgGHCIgAAAAAwTFgEAAAAAIYJiwAAAADAMGERAAAAABgmLAIAAAAA\nw4RFAAAAAGCYsAgAAAAADBMWAQAAAIBhwiIAAAAAMExYBAAAAACGCYsAAAAAwDBhEQAAAAAYJiwC\nAAAAAMOERQAAAABgmLAIAAAAAAwTFgEAAACAYcIiAAAAADBMWAQAAAAAhgmLAAAAAMAwYREAAAAA\nGCYsAgAAAADDhEUAAAAAYJiwCAAAAAAMExYBAAAAgGHCIgAAAAAwTFgEAAAAAIYJiwAAAADAMGER\nAAAAABgmLAIAAAAAw/YsewJwMtp/1a3LnsJJZeXgpcueAgAAAOw6ViwCAAAAAMOERQAAAABgmLAI\nAAAAAAwTFgEAAACAYcIiAAAAADBMWAQAAAAAhgmLAAAAAMAwYREAAAAAGCYsAgAAAADDhEUAAAAA\nYJiwCAAAAAAMExYBAAAAgGHCIgAAAAAwTFgEAAAAAIYJiwAAAADAMGERAAAAABgmLAIAAAAAw4RF\nAAAAAGCYsAgAAAAADBMWAQAAAIBhWw6LVfWYqvrlqjpUVXdX1duqav80dtr0/j1V9d6qumjueZsa\nAwAAAACWb1ErFm9I8rTuPj/JW6f3k+Rgkju7+0CSlya5par2bHEMAAAAAFiyLYfF7v5cd/9ad/e0\n6c4kT57evizJddN+705yb5KLtjgGAAAAACzZ8bjG4g8l+dWqekKSR3X3/XNjK0nO3OzYcZgrAAAA\nALAJCw2LVXV1kgNJXjVt6vW7zL292bH5j3dlVR1Ze6yuro5OGQAAAADYhIWFxap6RZLvTPK87v5s\nd39q2n7q3G5nJTm82bH1H7O7r+3ufWuPvXv3LurTAQAAAAAewULCYlVdmeRFSZ7T3Z+eG3pTkium\nfZ6V5PQk79ziGAAAAACwZFu+03JV7Uvys0k+lOS2qkqSz3f3NyZ5ZZLXV9U9Sb6Q5CXd/cD01M2O\nAQAAAABLtuWw2N1HcpRrIHb3vUmeu8gxAAAAAGD5jsddoQEAAACAXU5YBAAAAACGCYsAAAAAwDBh\nEQAAAAAYJiwCAAAAAMOERQAAAABgmLAIAAAAAAwTFgEAAACAYcIiAAAAADBMWAQAAAAAhgmLAAAA\nAMAwYREAAAAAGCYsAgAAAADD9ix7AgAnsv1X3brsKZxUVg5euuwpAAAAMLFiEQAAAAAYJiwCAAAA\nAMOERQAAAABgmLAIAAAAAAwTFgEAAACAYcIiAAAAADBsz7InAAA7wf6rbl32FE4qKwcvXfYUAACA\nLbJiEQAAAAAYJiwCAAAAAMOERQAAAABgmLAIAAAAAAwTFgEAAACAYcIiAAAAADBMWAQAAAAAhgmL\nAAAAAMCwPcueAADAIu2/6tZlT+GksnLw0mVPAQCAJbFiEQAAAAAYJiwCAAAAAMOcCg0AwI7ktPbt\n5bR2AGCUFYsAAAAAwDBhEQAAAAAYJiwCAAAAAMOERQAAAABgmLAIAAAAAAwTFgEAAACAYcIiAAAA\nADBMWAQAAAAAhgmLAAAAAMAwYREAAAAAGLZn2RMAAAB2t/1X3brsKZxUVg5euuwpAHCSsGIRAAAA\nABgmLAIAAAAAw4RFAAAAAGCYsAgAAAAADBMWAQAAAIBh7goNAADAhrjD9/Zyh29gp7NiEQAAAAAY\nZsUiAAAAnGSsPt1+VqCyGwmLAAAAACcwoXh7icQPcio0AAAAADBMWAQAAAAAhgmLAAAAAMAwYREA\nAAAAGCYsAgAAAADDhEUAAAAAYNiODYtVdaCq7qiqQ1X1rqo6d9lzAgAAAABmdmxYTHJ9khu6+5wk\n1yS5ccnzAQAAAAAmOzIsVtVpSS5IcvO06S1Jzq6q/cuaEwAAAADwoOruZc/hIarqG5K8vrvPndv2\nriSv6O53zG27MsmVc089PcmfbdtE2ai9SVaXPQkWwrHcPRzL3cOx3D0cy93Dsdw9HMvdw7HcXRzP\n3cOx3JlO7e5TNrLjnuM9ky1YXzzrITt0X5vk2u2ZDptVVUe6e9+y58HWOZa7h2O5eziWu4djuXs4\nlruHY7l7OJa7i+O5eziWJ74deSp0ko8m2VdVe5KkqirJGUkOL3VWAAAAAECSHRoWu/u+JHclefG0\n6QVJVrp7ZWmTAgAAAAC+aCefCv2yJDdV1dVJPpPk8iXPh81zuvru4VjuHo7l7uFY7h6O5e7hWO4e\njuXu4VjuLo7n7uFYnuB25M1bAAAAAICdbUeeCg0AAAAA7GzCIgAAAAAwTFhkYaqqq2rvum0rVXXe\n9Pa3VNXvVdXdVfX+qvrdqvqa5cyWR1JVe6rqx6rqA1X1vunfG6rq/Ok4/6e5ffdWlWsq7FCPcCwf\nN43/vemYvvhY/xc7x9pra1XdVFVHptfV/11V11fVo5c9Px7Zuu+Nj6mqX6mqN1bVLXPH8wNV9fqq\n+oplz5ejq6pHV9Wr515j76qqX56+X15cVZ+djufd0/g/Xfaceajpa/K++dfPue+PP1NVl03H8NHT\n2JdV1Z1V9b1LmzQPMfe19v6qemDu/f9SVbdX1ffM7ftvqurPq+qvzW3706r6luXMnvWOcTz3z217\nz/S4dNlz5uim19kPzB3TK6bj+MmH2fchXYGdTVhkW1TVniT/LcnLu/v87j43yfcl+fPlzoyjuDHJ\ns5Jc2N1PT3Jukt/I7EZKq0m+varOXeL82LijHcuvnsa/P8nt07+cmA529/lJnpnkGUl+YMnzYYOq\n6quSvC3JJ5J8d5K/yIPH8xlJ9if5waVNkI34hcy+9i7s7qd39zMze919+jT+/unnnvOTPCfJa6rq\nsUuaK4/scJJ/NPf+9yX5gyTp7l9KcijJq6exq5Lc3903becEeWRzX2v/MMmn197v7n+c5LYkz57b\n/eIkf5zk7yRJVe1L8rVJfn97Z83RHON4Zm7b30ryo0neUFVftrQJsxHfNR3TS5L8RJKvWvJ8WJCd\nfFdodpfHTo9PrG3o7g8ubzocTVU9NckLk5zZ3f83Sbr7r5K8qar2J/l8koNJfjLJ85c0TTbgkY7l\nNP64zH5Y+5tJ/qiqntLdf7qs+bI13f25qvqdJE9b9lzYkFOT/GKS3+juq5Kkqr442N1fqKo7kpy1\nnOlxLFV1IMl3JDlj7TU2Sbr7V6fxi9c95XGZ/UH1L7Zrjgx5XWYx8S1V9deTfFOSNyT58mn85Une\nU1UfySz4P3Mps2SzbsvsGKeqTkmyL8lPZRYY35lZdLyjuz+/rAmyJbdl9rvmVye5f8lz4Ri6+6NV\ndSjJc5c9FxbDikW2xfQD92uT3FNVv1ZV/7qqzln2vHhYFyS5p7sfsix9zmuTfH1V/d1tmhObc6xj\n+T1J/kd3/1mSWzL7hYoTVFU9PrO/AP/hsufChrwps6+/qx5ucAobz07ylm2dFSOemeRPuvv/PMI+\n566d9pXkriRXdffntmd6DHpHkidX1d9I8qLMvkb/cm1w+l76yiQ3JHnF9L2TE8edSZ5UVWckuTCz\nlYm/nQdXMT47szjFiemFSf5nd4uKJ4Cq+vokX5fkV5I8bu4097ur6u4lT49NEBbZDp0k3f3DSc5L\n8ktJzklyV1VdtMyJsTnd/YUkP5bkmmXPhS35/kx/vc/s1L3vdQrJCemqqroryW9lFqFuWu502KBb\nk7xw+iV33lVV9UdJ7k1yJH7R3em+eI3hqnrK9EvRB6vq56fNa6dCn5vkKUleVVUXLGWmbMTrk1ye\n2R/aXvcw49+R2del1YonmOln1zsyW6F4cZLbu/tQkrOmFYwXx+vtiWYtSB1O8h+TvGrZE+KY3jyF\nw+sze539i3zpae5rp79zghEWWaT7kzxx3bYnJrlv7Z3u/kh339TdL8nsh7fLtnF+bMz/SnKgqp5w\njP1uSfIVcTr0TnbUY1lV5yf5+iQ3VNVKkrdm9vX6D7Z1hizCwe5+Zndf0N0/3t1upnRi+OnMVj7d\nXlVnzm0/2N3PyOwPcN8Q18zcye7K7DX28UnS3X86/UL0k0kev37n7j6S2Sqpb9vWWTLipiQ/lORz\n3X3P/MB044+zMzsb4LKq+ubtnx5btHadxYszW62YJO/ObLXbadPbnDg+Pb3mnpXZZZreWFWPWfKc\neGTfNcXDb+7uNy97MiyOsMgivT3JP197p6r+SZJD3X1/ze4c/LyaLiBVVV+e2XXdXM9th+nuP8ls\n1dON9eCdg2s6nl82t19ndqHkf7eUiXJMxziWP5nkZ7v7rO7e3937k/xI3MQFtk13X5PkP2QWF89a\nN3Y4yb9I8mPT90x2mCk8/UrmXmMnX/lw+0+nt39DEteY3qG6++OZ/WzzyvntVfW1SX4myeXTqZYv\nS/I6X5snnNuS/P3Mrj19aNp2e2Zn4byzu13/9AQ0/U7yb5N8MnO/iwLbR1hkkX44s2uX/NG0xPlF\nSdbu2lWZrbr4YFW9J7NrgP3vctnzAAABIElEQVRhkuuWMlOO5fuSvCfJ71fV+5K8L8k3Z3ZR5C/q\n7rcl+dD2T48BRzuWl2S26nTeG5NcUlVfs71TZBP2JHGdtl2gu38myWsyWz1T68b+e5IPZHbTCHam\n783szrK/X1Xvr6rfzSxc/PQ0vnaNxbszu8bbzdNxZYfq7l/o7t9bt/nnk/z77v7jaZ9fz+y02p/Y\n7vmxJe/ObDXx/J2ffzvJgcwCIyeoKS7+SJJXVtVXLHs+cLIpZ0wBwIljWjnzgSSnd/f/W/Z8AACA\nk5cViwBwgqiqKzNbVfEKUREAAFg2KxYBAAAAgGFWLAIAAAAAw4RFAAAAAGCYsAgAAAAADBMWAQAA\nAIBhwiIAAAAAMExYBAAAAACGCYsAAAAAwLD/D29ZgU0gXQvGAAAAAElFTkSuQmCC\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# coding=utf-8\n", "import pandas as pd\n", "from matplotlib import pyplot as plt\n", "\n", "file_path = \"./starbucks_store_worldwide.csv\"\n", "\n", "df = pd.read_csv(file_path)\n", "\n", "#使用matplotlib呈现出店铺总数排名前10的国家\n", "#准备数据\n", "data1 = df.groupby(by=\"Country\").count()[\"Brand\"].sort_values(ascending=False)[:10]\n", "print(data1)\n", "_x = data1.index\n", "_y = data1.values\n", "\n", "#画图 \n", "plt.figure(figsize=(20,8),dpi=80)\n", "\n", "plt.bar(range(len(_x)),_y)\n", "\n", "plt.xticks(range(len(_x)),_x)\n", "\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "anaconda-cloud": {}, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.3" } }, "nbformat": 4, "nbformat_minor": 1 }