{ "metadata": { "name": "" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "heading", "level": 1, "metadata": {}, "source": [ "7. \ub370\uc774\ud130 \uc900\ube44\ud558\uae30: \ub2e4\ub4ec\uae30, \ubcc0\ud615, \ubcd1\ud569" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "- \ub370\uc774\ud130 \ubd84\uc11d\uacfc \ubaa8\ub378\ub9c1 \uc791\uc5c5\uc740 \ub370\uc774\ud130\ub97c \ubd88\ub7ec\uc624\uace0, \ub2e4\ub4ec\uace0, \ubcc0\ud615\ud558\uace0 \uc7ac\uc815\ub82c\ud558\ub294, \ub370\uc774\ud130 \uc900\ube44 \uacfc\uc815\uc5d0 \ub9ce\uc740 \uc2dc\uac04\uc774 \uc18c\uc694\n", "- \uac00\ub054 \ud30c\uc77c\uc774\ub098 \ub370\uc774\ud130\ubca0\uc774\uc2a4\uc5d0\uc11c \uc800\uc7a5\ub41c \ub370\uc774\ud130\uac00 \uc560\ud50c\ub9ac\ucf00\uc774\uc158\uc5d0\uc11c \uc0ac\uc6a9\ud558\uae30 \uc27d\uc9c0 \uc54a\uc740 \ubc29\uc2dd\uc73c\ub85c \uc800\uc7a5\ub418\uc5b4 \uc788\uc744 \ub54c\ub3c4 \uc874\uc7ac\n", "- \ub370\uc774\ud130\uac00 \uc800\uc7a5\ub41c \ud615\ud0dc\ub97c \ub2e4\ub978 \ud615\ud0dc\ub85c \ubc14\uafb8\uae30 \uc704\ud574(\ud30c\uc774\uc36c, Perl, R, \uc790\ubc14 \ud639\uc740 awk\ub098 sed \uac19\uc740 \uc720\ub2c9\uc2a4\uc758 \ud14d\uc2a4\ud2b8 \ucc98\ub9ac \uc720\ud2f8\ub9ac\ud2f0\ub3c4 \uc0ac\uc6a9\ud558\uc9c0\ub9cc \ud30c\uc774\uc36c \ud45c\uc900 \ub77c\uc774\ube0c\ub7ec\ub9ac\uc640 pandas\ub97c \ud568\uaed8 \uc0ac\uc6a9\ud558\uba74 \ud070 \uc5b4\ub824\uc6c0 \uc5c6\uc774 \ub370\uc774\ud130\ub97c \uc6d0\ud558\ub294 \ud615\ud0dc\ub85c \uac00\uacf5 \uac00\ub2a5\n", "- pandas\ub294 \uc774\ub7f0 \uc791\uc5c5\uc744 \uc704\ud55c \uc720\uc5f0\ud558\uace0 \ube60\ub978 \uace0\uc218\uc900\uc758 \uc54c\uace0\ub9ac\uc998\uacfc \ucc98\ub9ac \uae30\ub2a5 \uc81c\uacf5\n", "- pandas\uc5d0\uc11c \ucc3e\uc744 \uc218 \uc5c6\ub294 \uc0c8\ub85c\uc6b4 \ud615\ud0dc\uc758 \ub370\uc774\ud130 \ucc98\ub9ac \ubc29\uc2dd\uc744 \ubc1c\uacac\ud558\uac8c \ub418\uba74 \uc54c\ub824\ub2ec\ub77c\uace0 \ud568\n", "- pandas\ub294 \ub300\ubd80\ubd84\uc758 \uc124\uacc4\uc640 \uad6c\ud604\uc5d0 \uc2e4\uc81c \uc560\ud50c\ub9ac\ucf00\uc774\uc158 \uac1c\ubc1c \uacfc\uc815 \uc911\uc5d0 \ubc1c\uc0dd\ud55c \uc694\uad6c\uc0ac\ud56d \ubc18\uc601" ] }, { "cell_type": "heading", "level": 2, "metadata": {}, "source": [ "7.1 \ub370\uc774\ud130 \ud569\uce58\uae30" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### pandas \uac1d\uccb4\uc5d0 \uc800\uc7a5\ub41c \ub370\uc774\ud130\ub294 \uc5ec\ub7ec \ub0b4\uc7a5 \ud568\uc218\ub97c \uc774\uc6a9\ud574 \ud569\uce58\uae30 \uac00\ub2a5\n", "\n", "- pandas.merge\ub294 \ud558\ub098 \uc774\uc0c1\uc758 \ud0a4\ub97c \uae30\uc900\uc73c\ub85c DataFrame\uc758 \ub85c\uc6b0\ub97c \ud569\uce5c\ub2e4. SQL\uc774\ub098 \ub2e4\ub978 \uad00\uacc4\ud615 \ub370\uc774\ud130 \ubca0\uc774\uc2a4\uc758 join \uc5f0\uc0b0\uacfc \uc720\uc0ac\n", "- pandas.concat\uc740 \ud558\ub098\uc758 \ucd95\uc744 \ub530\ub77c \uac1d\uccb4\ub97c \uc774\uc5b4\ubd99\uc784\n", "- combine_first \uc778\uc2a4\ud134\uc2a4 \uba54\uc11c\ub4dc\ub294 \ub450 \uac1d\uccb4\ub97c \ud3ec\uac1c\uc11c \ud55c \uac1d\uccb4\uc5d0\uc11c \ub204\ub77d\ub41c \ub370\uc774\ud130\ub97c \ub2e4\ub978 \uac1d\uccb4\uc5d0 \uc788\ub294 \uac12\uc73c\ub85c \ucc44\uc6b8 \uc218 \uc788\ub3c4\ub85d \ud55c\ub2e4.\n", "\n", "#### \ub0b4\uc7a5 \ud568\uc218\ub294 \uc55e\uc73c\ub85c \uc774 \ucc45 \uc804\ubc18\uc5d0 \uac78\uccd0 \uacc4\uc18d \uc0ac\uc6a9\ud558\uac8c \ub420 \uac83\uc774\ubbc0\ub85c \ube60\ub978 \uc2dc\uac04 \ub0b4\uc5d0 \uc775\uc219\ud574\uc9c0\uc790(\ud53c\ud560 \uc218 \uc5c6\uc73c\uba74 \uc990\uae30\ub77c!)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 7.1.1 \ub370\uc774\ud130\ubca0\uc774\uc2a4 \uc2a4\ud0c0\uc77c\ub85c DataFrame \ud569\uce58\uae30\n", "\n", "- merge\ub098 join \uc5f0\uc0b0\uc740 \uad00\uacc4\ud615 \ub370\uc774\ud130\ubca0\uc774\uc2a4\uc758 \ud575\uc2ec\uc801\uc778 \uc5f0\uc0b0\uc73c\ub85c, \ud0a4\ub97c \ud558\ub098 \uc774\uc0c1 \uc0ac\uc6a9\ud574\uc11c \ub370\uc774\ud130 \uc9d1\ud569\uc758 \ub85c\uc6b0\ub97c \ud569\uce68" ] }, { "cell_type": "code", "collapsed": false, "input": [ "import pandas as pd\n", "from pandas import DataFrame, Series" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 1 }, { "cell_type": "code", "collapsed": false, "input": [ "df1 = DataFrame({'key': ['b', 'b', 'a', 'c', 'a', 'a', 'b'],\n", " 'data1': range(7)})" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 169 }, { "cell_type": "code", "collapsed": false, "input": [ "df2 = DataFrame({'key': ['a', 'b', 'd'],\n", " 'data2': range(3)})" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 170 }, { "cell_type": "code", "collapsed": false, "input": [ "df1" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
data1key
0 0 b
1 1 b
2 2 a
3 3 c
4 4 a
5 5 a
6 6 b
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 7, "text": [ " data1 key\n", "0 0 b\n", "1 1 b\n", "2 2 a\n", "3 3 c\n", "4 4 a\n", "5 5 a\n", "6 6 b" ] } ], "prompt_number": 7 }, { "cell_type": "code", "collapsed": false, "input": [ "df2" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
data2key
0 0 a
1 1 b
2 2 d
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 8, "text": [ " data2 key\n", "0 0 a\n", "1 1 b\n", "2 2 d" ] } ], "prompt_number": 8 }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### \uc77c\ub300\ub2e4\uc758 \uc608\uc81c\n", "\n", "- df1 \ub370\uc774\ud130\ub294 key \uce7c\ub7fc\uc5d0 \uc5ec\ub7ec \uac1c\uc758 a,b \uc874\uc7ac\n", "- df2\uc758 kye \uce7c\ub7fc\uc740 \uc720\uc77c\ud55c \ub85c\uc6b0\uac00 \uc874\uc7ac\n", "- \uc774 \uac1d\uccb4\uc5d0 \ub300\ud574 merge \ud568\uc218 \ud638\ucd9c" ] }, { "cell_type": "code", "collapsed": false, "input": [ "pd.merge(df1, df2)" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
data1keydata2
0 0 b 1
1 1 b 1
2 6 b 1
3 2 a 0
4 4 a 0
5 5 a 0
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 9, "text": [ " data1 key data2\n", "0 0 b 1\n", "1 1 b 1\n", "2 6 b 1\n", "3 2 a 0\n", "4 4 a 0\n", "5 5 a 0" ] } ], "prompt_number": 9 }, { "cell_type": "markdown", "metadata": {}, "source": [ "- \uc5ec\uae30\uc5d0\uc11c \ub098\ub294 \uc5b4\ub5a4 \uce7c\ub7fc\uc744 \ubcd1\ud569\ud560 \uac83\uc778\uc9c0 \uba85\uc2dc\ud558\uc9c0 \uc54a\uc558\ub294\ub370, merge \ud568\uc218\ub294 **\uacb9\uce58\ub294 \uce7c\ub7fc\uc758 \uc774\ub984**\uc744 **\ud0a4**\ub85c \uc0ac\uc6a9\n", "\n", "------\n", "\n", "- \uc774 \uc608\uc5d0\uc11c\ub294 key \uce7c\ub7fc(data1, key, data2, key \uc911 key\uac00 \uacb9\uce68)\n", "- \uc774\ub807\uac8c\ub3c4 \uc791\ub3d9\ud55c\ub2e4\uace0 \uc608\ub97c \ub4e4\uc5c8\uc9c0\ub9cc **\uba85\uc2dc\uc801**\uc73c\ub85c **\uc9c0\uc815**\ud558\ub294 **\uc2b5\uad00**\uc744 \ub4e4\uc774\uc790\n", "\n", "- df1\uc5d0\uc11c c\uc640 df2\uc5d0\uc11c d\ub294 \uad50\ucc28\ud558\ub294\uac8c \uc5c6\uc5b4\uc11c \uac12 \ub204\ub77d\n", "- \uc0c1\uc2dd\uc801\uc73c\ub85c \uae30\ubcf8 \uc804\uc81c\ub294 \uad50\ucc28\ud558\ub294 \uac83\uc774\ub2e4. \uc65c\ub0d0\ud558\uba74 \uad50\ucc28\ub3c4 \uc548\ud558\ub294\ub370 \ucd9c\ub825\ud574\uc8fc\uba74 \ub098\uc911\uc5d0 \ubb38\uc81c\uac00 \ub420 \uc218 \uc788\uc73c\ub2c8 \uc774\ub807\uac8c \ud55c\ub4ef\ud558\ub2e4. \uad50\ucc28 \uc548\ub418\ub294\uac74 \uc635\uc158\uc73c\ub85c \ud45c\ud604\ud574 \uc904 \uc218 \uc788\uc744 \uac83\uc774\ub2e4. \uc65c\ub0d0\uace0? \uadf8\uac8c \uc548\uc804\ube75\uc774\ub2c8\uae4c\n", "\n", "------" ] }, { "cell_type": "code", "collapsed": false, "input": [ "pd.merge(df1, df2, on='key')" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
data1keydata2
0 0 b 1
1 1 b 1
2 6 b 1
3 2 a 0
4 4 a 0
5 5 a 0
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 10, "text": [ " data1 key data2\n", "0 0 b 1\n", "1 1 b 1\n", "2 6 b 1\n", "3 2 a 0\n", "4 4 a 0\n", "5 5 a 0" ] } ], "prompt_number": 10 }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### \ub450 \uac1d\uccb4\uc5d0 \uacf5\ud1b5\ub418\ub294 \uce7c\ub7fc \uc774\ub984\uc774 \ud558\ub098\ub3c4 \uc5c6\ub2e4\uba74 \ub530\ub85c \uc9c0\uc815\n", "\n", "-----\n", "\n", "- df3\ub294 \uce7c\ub7fc\uc774 lkey\uc640 data1\n", "- df4\ub294 \uce7c\ub7fc\uc774 rkey\uc640 data2\n", "- \uc11c\ub85c \uacb9\uce58\ub294\uac8c \ud558\ub098\ub3c4 \uc5c6\ub2e4. \n", "- \uadf8\ub7ec\ub2c8 \uba85\uc2dc\uc801\uc73c\ub85c \uc9c0\uc815\ud574\uc918\uc57c \ud55c\ub2e4.\n", "\n", "------" ] }, { "cell_type": "code", "collapsed": false, "input": [ "df3 = DataFrame({'lkey': ['b', 'b', 'a', 'c', 'a', 'a', 'b'],\n", " 'data1': range(7)})" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 11 }, { "cell_type": "code", "collapsed": false, "input": [ "df4 = DataFrame({'rkey': ['a', 'b', 'd'],\n", " 'data2': range(3)})" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 12 }, { "cell_type": "code", "collapsed": false, "input": [ "df3" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
data1lkey
0 0 b
1 1 b
2 2 a
3 3 c
4 4 a
5 5 a
6 6 b
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 13, "text": [ " data1 lkey\n", "0 0 b\n", "1 1 b\n", "2 2 a\n", "3 3 c\n", "4 4 a\n", "5 5 a\n", "6 6 b" ] } ], "prompt_number": 13 }, { "cell_type": "code", "collapsed": false, "input": [ "df4" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
data2rkey
0 0 a
1 1 b
2 2 d
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 14, "text": [ " data2 rkey\n", "0 0 a\n", "1 1 b\n", "2 2 d" ] } ], "prompt_number": 14 }, { "cell_type": "code", "collapsed": false, "input": [ "# left, right \ub458 \ub2e4 \uac19\uc774 \uc368\uc918\uc57c \ud55c\ub2e4.\n", "# \ud558\ub098\ub9cc \uc368\uc8fc\uba74 \uc5b4\ub5bb\uac8c \ub420\uae4c? \uc774\ub7f4\ub550 Test \ud574\ubcf4\ub294\uac8c \uc9f1!\n", "pd.merge(df3, df4, left_on='lkey', right_on='rkey')" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
data1lkeydata2rkey
0 0 b 1 b
1 1 b 1 b
2 6 b 1 b
3 2 a 0 a
4 4 a 0 a
5 5 a 0 a
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 15, "text": [ " data1 lkey data2 rkey\n", "0 0 b 1 b\n", "1 1 b 1 b\n", "2 6 b 1 b\n", "3 2 a 0 a\n", "4 4 a 0 a\n", "5 5 a 0 a" ] } ], "prompt_number": 15 }, { "cell_type": "code", "collapsed": false, "input": [ "# TeypeError! \n", "# \uc18c\uc2a4\ub97c \ubcf4\uba74 right_on == left_on \ube44\uad50\ud558\ub294 \uacf3\uc774 \uc788\ub2e4.\n", "# \ub2f9\uc5f0\ud788 right_on\uc740 \uba85\uc2dc\ud558\uc9c0 \uc54a\uc558\uc73c\ub2c8 NoneType\uc73c\ub85c \ub118\uc5b4\uac00\uc11c \ube44\uad50\uac00 \uc548\ub41c\ub2e4!\n", "# \uc544 \uadf8 \uc804\uc5d0 len \ud568\uc218\uc5d0\uc11c NoneType\uc758 \uae38\uc774\ub97c \uc7ac\ub824\uace0 \ud558\ub2c8 \uc624\ub958\uac00 \ub098\ub294 \uac83\uc784!\n", "pd.merge(df3, df4, left_on='lkey')" ], "language": "python", "metadata": {}, "outputs": [ { "ename": "TypeError", "evalue": "object of type 'NoneType' has no len()", "output_type": "pyerr", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;31m# \ub2f9\uc5f0\ud788 right_on\uc740 \uba85\uc2dc\ud558\uc9c0 \uc54a\uc558\uc73c\ub2c8 NoneType\uc73c\ub85c \ub118\uc5b4\uac00\uc11c \ube44\uad50\uac00 \uc548\ub41c\ub2e4!\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;31m# \uc544 \uadf8 \uc804\uc5d0 len \ud568\uc218\uc5d0\uc11c NoneType\uc758 \uae38\uc774\ub97c \uc7ac\ub824\uace0 \ud558\ub2c8 \uc624\ub958\uac00 \ub098\ub294 \uac83\uc784!\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmerge\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf3\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdf4\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mleft_on\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'lkey'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;32m/Library/Python/2.7/site-packages/pandas-0.12.0_307_g3a2fe0b-py2.7-macosx-10.8-intel.egg/pandas/tools/merge.pyc\u001b[0m in \u001b[0;36mmerge\u001b[0;34m(left, right, how, on, left_on, right_on, left_index, right_index, sort, suffixes, copy)\u001b[0m\n\u001b[1;32m 34\u001b[0m \u001b[0mright_on\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mright_on\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mleft_index\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mleft_index\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 35\u001b[0m \u001b[0mright_index\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mright_index\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msort\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0msort\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msuffixes\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0msuffixes\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 36\u001b[0;31m copy=copy)\n\u001b[0m\u001b[1;32m 37\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mop\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_result\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 38\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0m__debug__\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/Library/Python/2.7/site-packages/pandas-0.12.0_307_g3a2fe0b-py2.7-macosx-10.8-intel.egg/pandas/tools/merge.pyc\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, left, right, how, on, left_on, right_on, axis, left_index, right_index, sort, suffixes, copy)\u001b[0m\n\u001b[1;32m 178\u001b[0m (self.left_join_keys,\n\u001b[1;32m 179\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mright_join_keys\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 180\u001b[0;31m self.join_names) = self._get_merge_keys()\n\u001b[0m\u001b[1;32m 181\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 182\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mget_result\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/Library/Python/2.7/site-packages/pandas-0.12.0_307_g3a2fe0b-py2.7-macosx-10.8-intel.egg/pandas/tools/merge.pyc\u001b[0m in \u001b[0;36m_get_merge_keys\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 296\u001b[0m \u001b[0mleft_keys\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mright_keys\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 297\u001b[0m \"\"\"\n\u001b[0;32m--> 298\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_validate_specification\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 299\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 300\u001b[0m \u001b[0mleft_keys\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/Library/Python/2.7/site-packages/pandas-0.12.0_307_g3a2fe0b-py2.7-macosx-10.8-intel.egg/pandas/tools/merge.pyc\u001b[0m in \u001b[0;36m_validate_specification\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 414\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mAssertionError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 415\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mleft_on\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mNone\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0mn\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 416\u001b[0;31m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mright_on\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mleft_on\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 417\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mAssertionError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 418\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mTypeError\u001b[0m: object of type 'NoneType' has no len()" ] } ], "prompt_number": 16 }, { "cell_type": "code", "collapsed": false, "input": [ "# None\uc740 \ub2f9\uc5f0\ud788 \uae38\uc774\uac00 \uc5c6\uc9c0.\n", "len(None)" ], "language": "python", "metadata": {}, "outputs": [ { "ename": "TypeError", "evalue": "object of type 'NoneType' has no len()", "output_type": "pyerr", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;31m# None\uc740 \ub2f9\uc5f0\ud788 \uae38\uc774\uac00 \uc5c6\uc9c0.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;31mTypeError\u001b[0m: object of type 'NoneType' has no len()" ] } ], "prompt_number": 17 }, { "cell_type": "markdown", "metadata": {}, "source": [ "- \uc55e\uc758 \uacb0\uacfc\ub97c \uc798 \uc0b4\ud3b4\ubcf4\uba74 'c'\uc640 'd'\uc5d0 \ud574\ub2f9\ud558\ub294 \uac12\uc774 \ube60\uc9d0\n", "- merge \ud568\uc218\ub294 \uae30\ubcf8\uc801\uc73c\ub85c \ub0b4\ubd80\uc870\uc778(inner join)\uc744 \uc218\ud589\ud558\uc5ec \uad50\uc9d1\ud569\uc778 \uacb0\uacfc\ub97c \ubc18\ud658\n", "\n", "#### how keyword\n", "\n", "- 'left': \uc67c\ucb49 \uc6b0\uc120 \uc678\ubd80\uc870\uc778. \uc67c\ucabd\uc758 \ubaa8\ub4e0 \ub85c\uc6b0\ub97c \ud3ec\ud568\ud558\ub294 \uacb0\uacfc \ubc18\ud658\n", "- 'right': \uc624\ub978\ucabd \uc6b0\uc120 \uc678\ubd80\uc870\uc778. \uc624\ub978\ucabd\uc758 \ubaa8\ub4e0 \ub85c\uc6b0\ub97c \ud3ec\ud568\ud558\ub294 \uacb0\uacfc \ubc18\ud658\n", "- 'outer': \uc644\uc804 \uc678\ubd80\uc870\uc778. \ud569\uc9d1\ud569\uc778 \uacb0\uacfc \ubc18\ud658" ] }, { "cell_type": "code", "collapsed": false, "input": [ "pd.merge(df1, df2, how='outer')" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
data1keydata2
0 0 b 1
1 1 b 1
2 6 b 1
3 2 a 0
4 4 a 0
5 5 a 0
6 3 cNaN
7NaN d 2
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 18, "text": [ " data1 key data2\n", "0 0 b 1\n", "1 1 b 1\n", "2 6 b 1\n", "3 2 a 0\n", "4 4 a 0\n", "5 5 a 0\n", "6 3 c NaN\n", "7 NaN d 2" ] } ], "prompt_number": 18 }, { "cell_type": "code", "collapsed": false, "input": [ "# left\uc5d0\ub9cc \uc788\ub294 c\uae4c\uc9c0 \ud3ec\ud568\uc774 \ub41c \uac83\uc744 \ud655\uc778\ud560 \uc218 \uc788\ub2e4.\n", "pd.merge(df1, df2, how='left')" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
data1keydata2
0 0 b 1
1 1 b 1
2 6 b 1
3 2 a 0
4 4 a 0
5 5 a 0
6 3 cNaN
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 19, "text": [ " data1 key data2\n", "0 0 b 1\n", "1 1 b 1\n", "2 6 b 1\n", "3 2 a 0\n", "4 4 a 0\n", "5 5 a 0\n", "6 3 c NaN" ] } ], "prompt_number": 19 }, { "cell_type": "code", "collapsed": false, "input": [ "# right\uc5d0\ub9cc \uc788\ub294 d\uae4c\uc9c0 \ud3ec\ud568\n", "pd.merge(df1, df2, how='right')" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
data1keydata2
0 0 b 1
1 1 b 1
2 6 b 1
3 2 a 0
4 4 a 0
5 5 a 0
6NaN d 2
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 171, "text": [ " data1 key data2\n", "0 0 b 1\n", "1 1 b 1\n", "2 6 b 1\n", "3 2 a 0\n", "4 4 a 0\n", "5 5 a 0\n", "6 NaN d 2" ] } ], "prompt_number": 171 }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### \ub2e4 \ub300 \ub2e4 \ubcd1\ud569\uc740 \uc798 \uc815\uc758\ub418\uc5b4 \uc788\uae34 \ud558\uc9c0\ub9cc \uc9c1\uad00\uc801\uc774\uc9c0\ub294 \uc54a\ub2e4." ] }, { "cell_type": "code", "collapsed": false, "input": [ "df1 = DataFrame({'key': ['b', 'b', 'a', 'c', 'a', 'b'],\n", " 'data1': range(6)})" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 172 }, { "cell_type": "code", "collapsed": false, "input": [ "df2 = DataFrame({'key': ['a', 'b', 'a', 'b', 'd'],\n", " 'data2': range(5)})" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 173 }, { "cell_type": "code", "collapsed": false, "input": [ "df1" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
data1key
0 0 b
1 1 b
2 2 a
3 3 c
4 4 a
5 5 b
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 174, "text": [ " data1 key\n", "0 0 b\n", "1 1 b\n", "2 2 a\n", "3 3 c\n", "4 4 a\n", "5 5 b" ] } ], "prompt_number": 174 }, { "cell_type": "code", "collapsed": false, "input": [ "df2" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
data2key
0 0 a
1 1 b
2 2 a
3 3 b
4 4 d
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 175, "text": [ " data2 key\n", "0 0 a\n", "1 1 b\n", "2 2 a\n", "3 3 b\n", "4 4 d" ] } ], "prompt_number": 175 }, { "cell_type": "code", "collapsed": false, "input": [ "pd.merge(df1, df2, on='key', how='left')" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
data1keydata2
0 0 b 1
1 0 b 3
2 1 b 1
3 1 b 3
4 5 b 1
5 5 b 3
6 2 a 0
7 2 a 2
8 4 a 0
9 4 a 2
10 3 cNaN
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 176, "text": [ " data1 key data2\n", "0 0 b 1\n", "1 0 b 3\n", "2 1 b 1\n", "3 1 b 3\n", "4 5 b 1\n", "5 5 b 3\n", "6 2 a 0\n", "7 2 a 2\n", "8 4 a 0\n", "9 4 a 2\n", "10 3 c NaN" ] } ], "prompt_number": 176 }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### \ub2e4 \ub300 \ub2e4 \uc870\uc778\uc740 \ub450 \ub85c\uc6b0\uc758 [\ub370\uce74\ub974\ud2b8 \uacf1](http://whiteship.tistory.com/1365) \ubc18\ud658(b)\n", "\n", "- \uc67c\ucabd \uc678\ubd80\uc870\uc778\uc744 \uae30\uc900\n", "- \uc67c\ucabd\uc5d0 \uc788\ub294 b\ub294 0,1,5 \uc774\ub807\uac8c 3\uac1c \uc874\uc7ac\n", "- \uc624\ub978\ucabd\uc5d0 \uc788\ub294 b\ub294 1,3 \uc774\ub807\uac8c 2\uac1c \uc874\uc7ac\n", "- 0,1,5\ub97c \uae30\uc900\uc73c\ub85c 2\uac1c\uc529 \uc870\ud569. 3 * 2 = 6. \ucd1d 6\uac1c\uc758 \uacb0\uacfc\n", "- \uc774 \uc870\uc778 \uba54\uc11c\ub4dc\ub294 \uacb0\uacfc\uc5d0 \ub098\ud0c0\ub098\ub294 \uad6c\ubcc4\ub418\ub294 \ud0a4\uc5d0 \ub300\ud574\uc11c\ub9cc \uc801\uc6a9" ] }, { "cell_type": "code", "collapsed": false, "input": [ "# how parameter default is inner\n", "pd.merge(df1, df2, how='inner')" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
data1keydata2
0 0 b 1
1 0 b 3
2 1 b 1
3 1 b 3
4 5 b 1
5 5 b 3
6 2 a 0
7 2 a 2
8 4 a 0
9 4 a 2
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 25, "text": [ " data1 key data2\n", "0 0 b 1\n", "1 0 b 3\n", "2 1 b 1\n", "3 1 b 3\n", "4 5 b 1\n", "5 5 b 3\n", "6 2 a 0\n", "7 2 a 2\n", "8 4 a 0\n", "9 4 a 2" ] } ], "prompt_number": 25 }, { "cell_type": "code", "collapsed": false, "input": [ "pd.merge(df1, df2)" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
data1keydata2
0 0 b 1
1 0 b 3
2 1 b 1
3 1 b 3
4 5 b 1
5 5 b 3
6 2 a 0
7 2 a 2
8 4 a 0
9 4 a 2
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 26, "text": [ " data1 key data2\n", "0 0 b 1\n", "1 0 b 3\n", "2 1 b 1\n", "3 1 b 3\n", "4 5 b 1\n", "5 5 b 3\n", "6 2 a 0\n", "7 2 a 2\n", "8 4 a 0\n", "9 4 a 2" ] } ], "prompt_number": 26 }, { "cell_type": "code", "collapsed": false, "input": [ "pd.merge?" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 27 }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### how Parameter\n", "\n", "how : {'left', 'right', 'outer', 'inner'}, default 'inner'\n", "\n", "- left: use only keys from left frame (SQL: left outer join)\n", "- right: use only keys from right frame (SQL: right outer join)\n", "- outer: use union of keys from both frames (SQL: full outer join)\n", "- inner: use intersection of keys from both frames (SQL: inner join)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### \uc5ec\ub7ec \uac1c\uc758 \ud0a4\ub97c \ubcd1\ud569\ud558\ub824\uba74 \uce7c\ub7fc \uc774\ub984\uc774 \ub4e4\uc5b4\uac04 \ub9ac\uc2a4\ud2b8 \ub118\uae40" ] }, { "cell_type": "code", "collapsed": false, "input": [ "left = DataFrame({'key1': ['foo', 'foo', 'bar'],\n", " 'key2': ['one', 'two', 'one'],\n", " 'lval': [1, 2, 3]})" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 28 }, { "cell_type": "code", "collapsed": false, "input": [ "right = DataFrame({'key1': ['foo', 'foo', 'bar', 'bar'],\n", " 'key2': ['one', 'one', 'one', 'two'],\n", " 'rval': [4, 5, 6, 7]})" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 29 }, { "cell_type": "code", "collapsed": false, "input": [ "left" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
key1key2lval
0 foo one 1
1 foo two 2
2 bar one 3
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 30, "text": [ " key1 key2 lval\n", "0 foo one 1\n", "1 foo two 2\n", "2 bar one 3" ] } ], "prompt_number": 30 }, { "cell_type": "code", "collapsed": false, "input": [ "right" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
key1key2rval
0 foo one 4
1 foo one 5
2 bar one 6
3 bar two 7
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 31, "text": [ " key1 key2 rval\n", "0 foo one 4\n", "1 foo one 5\n", "2 bar one 6\n", "3 bar two 7" ] } ], "prompt_number": 31 }, { "cell_type": "code", "collapsed": false, "input": [ "pd.merge(left, right, on=['key1', 'key2'], how='outer')" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
key1key2lvalrval
0 foo one 1 4
1 foo one 1 5
2 foo two 2NaN
3 bar one 3 6
4 bar twoNaN 7
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 32, "text": [ " key1 key2 lval rval\n", "0 foo one 1 4\n", "1 foo one 1 5\n", "2 foo two 2 NaN\n", "3 bar one 3 6\n", "4 bar two NaN 7" ] } ], "prompt_number": 32 }, { "cell_type": "markdown", "metadata": {}, "source": [ "------\n", "\n", "#### outer \ud574\uc11d\n", "\n", "- on parameter\ub85c key1, key2\ub97c \uae30\uc900\uc73c\ub85c \ubcd1\ud569\n", "- left\uc5d0\uc11c key1=foo, key2=one \uc77c \ub54c lval\uc740 1\uc774\ub2e4.\n", "- right\uc5d0\uc11c\ub294 rval\uc774 4\uc774\ub2e4.\n", "- \uc774\ub807\uac8c 1\uac1c\uc758 \ub85c\uc6b0\uac00 \uc644\uc131\ub41c\ub2e4.\n", "- \ubaa8\ub4e0 \ub85c\uc6b0\uc5d0 \ub300\ud574\uc11c \uc2e4\ud589\ud558\uace0\n", "- 2, 4 \uc778\ub371\uc2a4\ucc98\ub7fc \ud55c \ucabd\uc5d0\ub9cc \uac12\uc774 \uc788\ub294 \uacbd\uc6b0\ub294 NaN\uc73c\ub85c \ud45c\uc2dc\n", "\n", "-----" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### merge \uba54\uc11c\ub4dc\uc758 \uc885\ub958\uc5d0 \ub530\ub77c \uc5b4\ub5a4 \ud0a4 \uc870\ud569\uc774 \uacb0\uacfc\ub85c \ubc18\ud658\ub418\ub294\uc9c0 \uc54c\ub824\uba74 \uc2e4\uc81c \uad6c\ud604\uacfc\ub294 \uc870\uae08 \ub2e4\ub974\uc9c0\ub9cc \uc5ec\ub7ec \uac1c\uc758 \ud0a4\uac00 \ub4e4\uc5b4\uc788\ub294 \ud29c\ud480\uc758 \ubc30\uc5f4\uc774 \ub2e8\uc77c \uc870\uc778 \ud0a4\ub85c \uc0ac\uc6a9\ub41c\ub2e4\uace0 \uc0dd\uac01\ud558\uba74 \ub41c\ub2e4.(\ub77c\uace0 \uc368\uc838 \uc788\ub294\ub370 \ud574\uc11d \uc548\ub428...)\n", "\n", "#### \uce7c\ub7fc\uacfc \uce7c\ub7fc\uc744 \uc870\uc778\ud560 \ub54c \uc804\ub2ec\ud55c DataFrame \uac1d\uccb4\uc758 \uc0c9\uc778\uc740 \ubb34\uc2dc\ub41c\ub2e4.\n", "\n", "#### \uba38\uc9c0 \uc5f0\uc0b0\uc5d0\uc11c \uace0\ub824\ud574\uc57c \ud560 \uc0ac\ud56d\n", "\n", "- \uacb9\uce58\ub294 \uce7c\ub7fc \uc774\ub984\uc5d0 \ub300\ud55c \ucc98\ub9ac\n", "- \ucd95\uc758 \uc774\ub984\uc744 \ubcc0\uacbd\ud574\uc11c \uc218\ub3d9\uc73c\ub85c \uce7c\ub7fc \uc774\ub984\uc744 \uacb9\uce58\uac8c \ud560 \uc218\ub3c4 \uc788\uace0\n", "- merge \ud568\uc218\uc5d0 \uc788\ub294 suffixes \uc778\uc790\ub97c \ud1b5\ud574 \ub450 DataFrame \uac1d\uccb4\uc5d0\uc11c \uacb9\uce58\ub294 \uce7c\ub7fc \uc774\ub984 \ub4a4\uc5d0 \ubd99\uc778 \ubb38\uc790\uc5f4\uc744 \uc9c0\uc815\ud560 \uc218\ub3c4 \uc788\uc74c" ] }, { "cell_type": "code", "collapsed": false, "input": [ "pd.merge(left, right, on='key1')" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
key1key2_xlvalkey2_yrval
0 foo one 1 one 4
1 foo one 1 one 5
2 foo two 2 one 4
3 foo two 2 one 5
4 bar one 3 one 6
5 bar one 3 two 7
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 33, "text": [ " key1 key2_x lval key2_y rval\n", "0 foo one 1 one 4\n", "1 foo one 1 one 5\n", "2 foo two 2 one 4\n", "3 foo two 2 one 5\n", "4 bar one 3 one 6\n", "5 bar one 3 two 7" ] } ], "prompt_number": 33 }, { "cell_type": "code", "collapsed": false, "input": [ "pd.merge(left, right, on='key1', suffixes=('_left', '_right'))" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
key1key2_leftlvalkey2_rightrval
0 foo one 1 one 4
1 foo one 1 one 5
2 foo two 2 one 4
3 foo two 2 one 5
4 bar one 3 one 6
5 bar one 3 two 7
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 34, "text": [ " key1 key2_left lval key2_right rval\n", "0 foo one 1 one 4\n", "1 foo one 1 one 5\n", "2 foo two 2 one 4\n", "3 foo two 2 one 5\n", "4 bar one 3 one 6\n", "5 bar one 3 two 7" ] } ], "prompt_number": 34 }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### merge \ud568\uc218 \uc778\uc790 \ubaa9\ub85d\n", "\n", "\uc778\uc790 | \uc124\uba85\n", "--- | ---\n", "left | \uba38\uc9c0\ud558\ub824\ub294 DataFrame \uc911 \uc67c\ucabd\uc5d0 \uc704\uce58\ud55c DataFrame\n", "right | \uba38\uc9c0\ud558\ub824\ub294 DataFrame \uc911 \uc624\ub978\ucabd\uc5d0 \uc704\uce58\ud55c DataFrame\n", "how | \uc870\uc778\ubc29\ubc95. 'inner', 'outer', 'left', 'right'. \uae30\ubcf8\uac12\uc740 inner\n", "on | \uc870\uc778\ud558\ub824\ub294 \ub85c\uc6b0 \uc774\ub984. \ubc18\ub4dc\uc2dc \ub450 DataFrame \uac1d\uccb4 \ubaa8\ub450\uc5d0 \uc788\ub294 \uc774\ub984\uc774\uc5b4\uc57c \ud55c\ub2e4. \ub9cc\uc57d \uba85\uc2dc\ub418\uc9c0 \uc54a\uace0 \ub2e4\ub978 \uc870\uc778 \ud0a4\ub3c4 \uc8fc\uc5b4\uc9c0\uc9c0 \uc54a\uc73c\uba74 left\uc640 right\uc5d0\uc11c \uacf5\ud1b5\ub418\ub294 \uce7c\ub7fc\uc744 \uc870\uc778 \ud0a4\ub85c \uc0ac\uc6a9\ud55c\ub2e4.\n", "left_on | \uc870\uc778 \ud0a4\ub85c \uc0ac\uc6a9\ud560 left DataFrame\uc758 \uce7c\ub7fc\n", "right_on | \uc870\uc778 \ud0a4\ub85c \uc0ac\uc6a9\ud560 right DataFrame\uc758 \uce7c\ub7fc\n", "left_index | \uc870\uc778 \ud0a4\ub85c \uc0ac\uc6a9\ud560 left DataFrame\uc758 \uc0c9\uc778 \ub85c\uc6b0(\ub2e4\uc911 \uc0c9\uc778\uc77c \uacbd\uc6b0\uc758 \ud0a4)\n", "right_index | \uc870\uc778 \ud0a4\ub85c \uc0ac\uc6a9\ud560 right DataFrame\uc758 \uc0c9\uc778 \ub85c\uc6b0(\ub2e4\uc911 \uc0c9\uc778\uc77c \uacbd\uc6b0\uc758 \ud0a4)\n", "sort | \uc870\uc778 \ud0a4\uc5d0 \ub530\ub77c \ubcd1\ud569\ub41c \ub370\uc774\ud130\ub97c \uc0ac\uc804 \uc21c\uc73c\ub85c \uc815\ub82c. \uae30\ubcf8\uac12\uc740 True. \ub300\uc6a9\ub7c9 \ub370\uc774\ud130\uc758 \uacbd\uc6b0 False\ub77c\uba74 \uc131\ub2a5\uc0c1\uc758 \uc774\ub4dd\uc744 \uc5bb\uc744 \uc218\ub3c4 \uc788\ub2e4.\n", "suffixes | \uce7c\ub7fc \uc774\ub984\uc774 \uacb9\uce60 \uacbd\uc6b0 \uac01 \uce7c\ub7fc \uc774\ub984 \ub4a4\uc5d0 \ubd99\uc77c \ubb38\uc790\uc5f4\uc758 \ud29c\ud50c. \uae30\ubcf8\uac12\uc740 ('_x', '_y'). \ub9cc\uc57d 'data'\ub77c\ub294 \uce7c\ub7fc \uc774\ub984\uc774 \uc591\ucabd DataFrame\uc5d0 \uac19\uc774 \uc874\uc7ac\ud558\uba74 \uacb0\uacfc\uc5d0\uc11c\ub294 'data_x', 'data_y'\ub85c \ub098\ud0c0\ub09c\ub2e4.\n", "copy | False\uc77c \uacbd\uc6b0 \uc608\uc678\uc801\uc778 \uacbd\uc6b0\uc5d0 \uacb0\uacfc\ub85c \ub370\uc774\ud130\uac00 \ubcf5\uc0ac\ub418\uc9c0 \uc54a\ub3c4\ub85d \ud55c\ub2e4. \uae30\ubcf8\uac12\uc740 \ud56d\uc0c1 \ubcf5\uc0ac\uac00 \uc774\ub8e8\uc5b4\uc9c4\ub2e4." ] }, { "cell_type": "heading", "level": 3, "metadata": {}, "source": [ "7.1.2 \uc0c9\uc778 \uba38\uc9c0\ud558\uae30" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "- \uba38\uc9c0\ud558\ub824\ub294 \ud0a4\uac00 DataFrame\uc758 \uc0c9\uc778\uc77c \uc218 \uc788\ub2e4.\n", "- \uc774\ub54c\uc5d0\ub294 left_index = True, right_index = True \uc635\uc158\uc744 \uc9c0\uc815\ud574 \ud574\ub2f9 \uc0c9\uc778\uc744 \uba38\uc9c0 \ud0a4\ub85c \uc0ac\uc6a9\ud560 \uc218 \uc788\ub2e4." ] }, { "cell_type": "code", "collapsed": false, "input": [ "left1 = DataFrame({'key': ['a', 'b', 'a', 'a', 'b', 'c'],\n", " 'value': range(6)})" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 35 }, { "cell_type": "code", "collapsed": false, "input": [ "right1 = DataFrame({'group_val': [3.5, 7]}, index=['a', 'b'])" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 36 }, { "cell_type": "code", "collapsed": false, "input": [ "left1" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
keyvalue
0 a 0
1 b 1
2 a 2
3 a 3
4 b 4
5 c 5
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 37, "text": [ " key value\n", "0 a 0\n", "1 b 1\n", "2 a 2\n", "3 a 3\n", "4 b 4\n", "5 c 5" ] } ], "prompt_number": 37 }, { "cell_type": "code", "collapsed": false, "input": [ "right1" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
group_val
a 3.5
b 7.0
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 38, "text": [ " group_val\n", "a 3.5\n", "b 7.0" ] } ], "prompt_number": 38 }, { "cell_type": "code", "collapsed": false, "input": [ "# left_on\uc73c\ub85c \uc5b4\ub5a4 \uceec\ub7fc\uc774 \uc870\uc778 \ud0a4\ub85c \uc0ac\uc6a9\ub420\uc9c0 \uacb0\uc815\n", "# right_index\ub85c \uc5b4\ub5a4 \uc778\ub371\uc2a4\uac00 \uc870\uc778 \ud0a4\ub85c \uc0ac\uc6a9\ub420\uc9c0 \uacb0\uc815\n", "pd.merge(left1, right1, left_on='key', right_index=True)" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
keyvaluegroup_val
0 a 0 3.5
2 a 2 3.5
3 a 3 3.5
1 b 1 7.0
4 b 4 7.0
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 39, "text": [ " key value group_val\n", "0 a 0 3.5\n", "2 a 2 3.5\n", "3 a 3 3.5\n", "1 b 1 7.0\n", "4 b 4 7.0" ] } ], "prompt_number": 39 }, { "cell_type": "markdown", "metadata": {}, "source": [ "-------\n", "\n", "#### \ud574\uc11d\n", "\n", "- left\uc758 key\ub97c \uae30\uc900\uc73c\ub85c left1, right1\uc744 \uba38\uc9c0\ud558\ub294\ub370 right\ub294 index\ub97c \uae30\uc900\uc73c\ub85c \ud558\uaca0\ub2e4.\n", "- left1\uc5d0 a\uac00 3\uac1c \uc874\uc7ac\n", "- right1\uc5d0 a\uac00 \uc874\uc7ac\n", "- \uba38\uc9c0\ub41c \uac12\uc744 \ubcf4\uba74 \uc591\ucabd\uc5d0 \ubaa8\ub450 \uc788\ub294 a\ub97c \uae30\uc900\uc73c\ub85c \ud569\uccd0\uc9c4 \uac83\uc744 \ubcfc \uc218 \uc788\ub2e4.\n", "- \ub2e4\uc2dc \ud55c \ubc88 \uc5b8\uae09\ud558\uc9c0\ub9cc left1\uc758 key\uc640 right1\uc758 right_index\uac00 \uacb9\uccd0\uc9c0\uae30 \ub54c\ubb38\uc5d0 \uac00\ub2a5\ud55c \uc0c1\ud669\uc774\ub2e4.\n", "- \ub2e4\ub978 \uc635\uc158\uc774 \uc5c6\uae30 \ub54c\ubb38\uc5d0(default: inner. \uad50\ucc28) left1\uc5d0 \uc788\ub294 c\ub294 \ubcf4\uc5ec\uc8fc\uc9c0 \uc54a\ub294\ub2e4.\n", "\n", "------" ] }, { "cell_type": "code", "collapsed": false, "input": [ "pd.merge(left1, right1, left_on='key', right_index=True, how='outer')" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
keyvaluegroup_val
0 a 0 3.5
2 a 2 3.5
3 a 3 3.5
1 b 1 7.0
4 b 4 7.0
5 c 5 NaN
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 40, "text": [ " key value group_val\n", "0 a 0 3.5\n", "2 a 2 3.5\n", "3 a 3 3.5\n", "1 b 1 7.0\n", "4 b 4 7.0\n", "5 c 5 NaN" ] } ], "prompt_number": 40 }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### how\uc5d0 outer\n", "\n", "- \uc608\uc0c1\ud588\ub358\ub300\ub85c left1\uc758 c\uac00 \ud3ec\ud568\ub41c \uac83\uc744 \ud655\uc778\ud560 \uc218 \uc788\ub2e4." ] }, { "cell_type": "code", "collapsed": false, "input": [ "# right_on\uc744 \uc785\ub825\ud558\uc9c0 \uc54a\uc73c\ub2c8 \ub2f9\uc5f0\ud788 Error!\n", "pd.merge(left1, right1, left_on='key')" ], "language": "python", "metadata": {}, "outputs": [ { "ename": "TypeError", "evalue": "object of type 'NoneType' has no len()", "output_type": "pyerr", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;31m# right_on\uc744 \uc785\ub825\ud558\uc9c0 \uc54a\uc73c\ub2c8 \ub2f9\uc5f0\ud788 Error!\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmerge\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mleft1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mright1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mleft_on\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'key'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;32m/Library/Python/2.7/site-packages/pandas-0.12.0_307_g3a2fe0b-py2.7-macosx-10.8-intel.egg/pandas/tools/merge.pyc\u001b[0m in \u001b[0;36mmerge\u001b[0;34m(left, right, how, on, left_on, right_on, left_index, right_index, sort, suffixes, copy)\u001b[0m\n\u001b[1;32m 34\u001b[0m \u001b[0mright_on\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mright_on\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mleft_index\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mleft_index\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 35\u001b[0m \u001b[0mright_index\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mright_index\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msort\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0msort\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msuffixes\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0msuffixes\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 36\u001b[0;31m copy=copy)\n\u001b[0m\u001b[1;32m 37\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mop\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_result\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 38\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0m__debug__\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/Library/Python/2.7/site-packages/pandas-0.12.0_307_g3a2fe0b-py2.7-macosx-10.8-intel.egg/pandas/tools/merge.pyc\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, left, right, how, on, left_on, right_on, axis, left_index, right_index, sort, suffixes, copy)\u001b[0m\n\u001b[1;32m 178\u001b[0m (self.left_join_keys,\n\u001b[1;32m 179\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mright_join_keys\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 180\u001b[0;31m self.join_names) = self._get_merge_keys()\n\u001b[0m\u001b[1;32m 181\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 182\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mget_result\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/Library/Python/2.7/site-packages/pandas-0.12.0_307_g3a2fe0b-py2.7-macosx-10.8-intel.egg/pandas/tools/merge.pyc\u001b[0m in \u001b[0;36m_get_merge_keys\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 296\u001b[0m \u001b[0mleft_keys\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mright_keys\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 297\u001b[0m \"\"\"\n\u001b[0;32m--> 298\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_validate_specification\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 299\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 300\u001b[0m \u001b[0mleft_keys\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/Library/Python/2.7/site-packages/pandas-0.12.0_307_g3a2fe0b-py2.7-macosx-10.8-intel.egg/pandas/tools/merge.pyc\u001b[0m in \u001b[0;36m_validate_specification\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 414\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mAssertionError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 415\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mleft_on\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mNone\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0mn\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 416\u001b[0;31m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mright_on\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mleft_on\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 417\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mAssertionError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 418\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mTypeError\u001b[0m: object of type 'NoneType' has no len()" ] } ], "prompt_number": 41 }, { "cell_type": "code", "collapsed": false, "input": [ "# right_on\uc5d0 group_val\uc744 \uc785\ub825\ud558\uba74 \uad50\ucc28\ud558\ub294\uac8c \ud558\ub098\ub3c4 \uc5c6\uc73c\ub2c8 \uc544\ubb34\uac83\ub3c4 \ud45c\uc2dc\ud558\uc9c0 \uc54a\uc74c\n", "pd.merge(left1, right1, left_on='key', right_on='group_val')" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "
Int64Index([], dtype=int64)Empty DataFrame
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 42, "text": [ "Empty DataFrame\n", "Columns: [key, value, group_val]\n", "Index: []" ] } ], "prompt_number": 42 }, { "cell_type": "code", "collapsed": false, "input": [ "pd.merge(left1, right1, left_on='key', right_on='group_val', how='outer')" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
keyvaluegroup_val
0 a 0 NaN
1 a 2 NaN
2 a 3 NaN
3 b 1 NaN
4 b 4 NaN
5 c 5 NaN
6 NaNNaN 3.5
7 NaNNaN 7.0
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 43, "text": [ " key value group_val\n", "0 a 0 NaN\n", "1 a 2 NaN\n", "2 a 3 NaN\n", "3 b 1 NaN\n", "4 b 4 NaN\n", "5 c 5 NaN\n", "6 NaN NaN 3.5\n", "7 NaN NaN 7.0" ] } ], "prompt_number": 43 }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### \ud558\ub098\ub3c4 \uacb9\uce58\ub294\uac8c \uc5c6\uc744\ub54c how\ub85c outer\n", "\n", "- \uacb9\uce58\ub294\uac8c \ud558\ub098\ub3c4 \uc5c6\uc73c\ub2c8 inner \uc870\uc778\uc740 \uc544\ubb34\uac83\ub3c4 \uc548\ub098\uc624\ub294 \ubc18\uba74 outer \uc870\uc778\uc740 \ubaa8\ub450 \ud558\ub098 \uc774\uc0c1\uc740 NaN\uc778 \uac83\uc744 \ud655\uc778\ud560 \uc218 \uc788\ub2e4." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### \uba38\uc9c0\ub294 \uae30\ubcf8\uc801\uc73c\ub85c \uad50\uc9d1\ud569\uc744 \uad6c\ud558\uc9c0\ub9cc \uc678\ubd80\uc870\uc778\uc744 \uc2e4\ud589\ud574\uc11c \ud569\uc9d1\ud569\uc744 \uad6c\ud560 \uc218\ub3c4 \uc788\ub2e4." ] }, { "cell_type": "code", "collapsed": false, "input": [ "pd.merge(left1, right1, left_on='key', right_index=True, how='outer')" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
keyvaluegroup_val
0 a 0 3.5
2 a 2 3.5
3 a 3 3.5
1 b 1 7.0
4 b 4 7.0
5 c 5 NaN
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 44, "text": [ " key value group_val\n", "0 a 0 3.5\n", "2 a 2 3.5\n", "3 a 3 3.5\n", "1 b 1 7.0\n", "4 b 4 7.0\n", "5 c 5 NaN" ] } ], "prompt_number": 44 }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### \uacc4\uce35 \uc0c9\uc778\ub41c \ub370\uc774\ud130\ub294 \uc57d\uac04 \ubcf5\uc7a1\ud558\ub2e4." ] }, { "cell_type": "code", "collapsed": false, "input": [ "lefth = DataFrame({'key1': ['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada'],\n", " 'key2': [2000, 2001, 2002, 2001, 2002],\n", " 'data': np.arange(5.)})" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 45 }, { "cell_type": "code", "collapsed": false, "input": [ "righth = DataFrame(np.arange(12).reshape((6, 2)),\n", " index=[['Nevada', 'Nevada', 'Ohio', 'Ohio', 'Ohio', 'Ohio'],\n", " [2001, 2000, 2000, 2000, 2001, 2002]],\n", " columns=['event1', 'event2'])" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 46 }, { "cell_type": "code", "collapsed": false, "input": [ "lefth" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
datakey1key2
0 0 Ohio 2000
1 1 Ohio 2001
2 2 Ohio 2002
3 3 Nevada 2001
4 4 Nevada 2002
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 47, "text": [ " data key1 key2\n", "0 0 Ohio 2000\n", "1 1 Ohio 2001\n", "2 2 Ohio 2002\n", "3 3 Nevada 2001\n", "4 4 Nevada 2002" ] } ], "prompt_number": 47 }, { "cell_type": "code", "collapsed": false, "input": [ "righth" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
event1event2
Nevada2001 0 1
2000 2 3
Ohio2000 4 5
2000 6 7
2001 8 9
2002 10 11
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 48, "text": [ " event1 event2\n", "Nevada 2001 0 1\n", " 2000 2 3\n", "Ohio 2000 4 5\n", " 2000 6 7\n", " 2001 8 9\n", " 2002 10 11" ] } ], "prompt_number": 48 }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### \uc774\ub7f0 \uacbd\uc6b0\uc5d0\ub294 \ub9ac\uc2a4\ud2b8\ub85c \uc5ec\ub7ec \uac1c\uc758 \uce7c\ub7fc\uc744 \uc9c0\uc815\ud574\uc11c \uba38\uc9c0\ud574\uc57c \ud55c\ub2e4(\uc911\ubcf5\ub418\ub294 \uc0c9\uc778 \uac12\uc744 \ub2e4\ub8f0 \ub54c\ub294 \uc8fc\uc758)" ] }, { "cell_type": "code", "collapsed": false, "input": [ "# key1, key2\uc640 right_index\uac00 \uac19\uc73c\ub2c8 \ub428\n", "pd.merge(lefth, righth, left_on=['key1', 'key2'], right_index=True)" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
datakey1key2event1event2
0 0 Ohio 2000 4 5
0 0 Ohio 2000 6 7
1 1 Ohio 2001 8 9
2 2 Ohio 2002 10 11
3 3 Nevada 2001 0 1
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 49, "text": [ " data key1 key2 event1 event2\n", "0 0 Ohio 2000 4 5\n", "0 0 Ohio 2000 6 7\n", "1 1 Ohio 2001 8 9\n", "2 2 Ohio 2002 10 11\n", "3 3 Nevada 2001 0 1" ] } ], "prompt_number": 49 }, { "cell_type": "code", "collapsed": false, "input": [ "# key1\uacfc right_index\ub294 \ub2e4\ub974\ub2e4. key2\uc5d0 \ud574\ub2f9\ud558\ub294 \uc5f0\ub3c4\uac00 \uc5c6\uc5b4\uc11c Error!\n", "pd.merge(lefth, righth, left_on=['key1'], right_index=True)" ], "language": "python", "metadata": {}, "outputs": [ { "ename": "AssertionError", "evalue": "", "output_type": "pyerr", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[0;31mAssertionError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;31m# key1\uacfc right_index\ub294 \ub2e4\ub974\ub2e4. key2\uc5d0 \ud574\ub2f9\ud558\ub294 \uc5f0\ub3c4\uac00 \uc5c6\uc5b4\uc11c Error!\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmerge\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlefth\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrighth\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mleft_on\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'key1'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mright_index\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;32m/Library/Python/2.7/site-packages/pandas-0.12.0_307_g3a2fe0b-py2.7-macosx-10.8-intel.egg/pandas/tools/merge.pyc\u001b[0m in \u001b[0;36mmerge\u001b[0;34m(left, right, how, on, left_on, right_on, left_index, right_index, sort, suffixes, copy)\u001b[0m\n\u001b[1;32m 34\u001b[0m \u001b[0mright_on\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mright_on\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mleft_index\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mleft_index\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 35\u001b[0m \u001b[0mright_index\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mright_index\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msort\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0msort\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msuffixes\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0msuffixes\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 36\u001b[0;31m copy=copy)\n\u001b[0m\u001b[1;32m 37\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mop\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_result\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 38\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0m__debug__\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/Library/Python/2.7/site-packages/pandas-0.12.0_307_g3a2fe0b-py2.7-macosx-10.8-intel.egg/pandas/tools/merge.pyc\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, left, right, how, on, left_on, right_on, axis, left_index, right_index, sort, suffixes, copy)\u001b[0m\n\u001b[1;32m 178\u001b[0m (self.left_join_keys,\n\u001b[1;32m 179\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mright_join_keys\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 180\u001b[0;31m self.join_names) = self._get_merge_keys()\n\u001b[0m\u001b[1;32m 181\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 182\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mget_result\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/Library/Python/2.7/site-packages/pandas-0.12.0_307_g3a2fe0b-py2.7-macosx-10.8-intel.egg/pandas/tools/merge.pyc\u001b[0m in \u001b[0;36m_get_merge_keys\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 296\u001b[0m \u001b[0mleft_keys\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mright_keys\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 297\u001b[0m \"\"\"\n\u001b[0;32m--> 298\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_validate_specification\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 299\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 300\u001b[0m \u001b[0mleft_keys\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/Library/Python/2.7/site-packages/pandas-0.12.0_307_g3a2fe0b-py2.7-macosx-10.8-intel.egg/pandas/tools/merge.pyc\u001b[0m in \u001b[0;36m_validate_specification\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 406\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mright_index\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 407\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mleft_on\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mright\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnlevels\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 408\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mAssertionError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 409\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mright_on\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mNone\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0mn\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 410\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mright_on\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mAssertionError\u001b[0m: " ] } ], "prompt_number": 179 }, { "cell_type": "code", "collapsed": false, "input": [ "pd.merge(lefth, righth, left_on=['key1', 'key2'], right_index=True, how='outer')" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
datakey1key2event1event2
0 0 Ohio 2000 4 5
0 0 Ohio 2000 6 7
1 1 Ohio 2001 8 9
2 2 Ohio 2002 10 11
3 3 Nevada 2001 0 1
4 4 Nevada 2002NaNNaN
4NaN Nevada 2000 2 3
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 50, "text": [ " data key1 key2 event1 event2\n", "0 0 Ohio 2000 4 5\n", "0 0 Ohio 2000 6 7\n", "1 1 Ohio 2001 8 9\n", "2 2 Ohio 2002 10 11\n", "3 3 Nevada 2001 0 1\n", "4 4 Nevada 2002 NaN NaN\n", "4 NaN Nevada 2000 2 3" ] } ], "prompt_number": 50 }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### \uc591\ucabd\uc5d0 \uacf5\ud1b5\uc73c\ub85c \uc788\ub294 \uc5ec\ub7ec \uac1c\uc758 \uc0c9\uc778\uc744 \uba38\uc9c0\ud558\ub294 \uac83\uc740 \uc77c\ub3c4 \uc544\ub2c8\ub2e4." ] }, { "cell_type": "code", "collapsed": false, "input": [ "left2 = DataFrame([[1., 2.], [3., 4.], [5., 6.]], index=['a', 'c', 'e'],\n", " columns=['Ohio', 'Nevada'])" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 51 }, { "cell_type": "code", "collapsed": false, "input": [ "right2 = DataFrame([[7., 8.,], [9., 10.], [11., 12.], [13, 14]],\n", " index=['b', 'c', 'd', 'e'], columns=['Missouri', 'Alabama'])" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 52 }, { "cell_type": "code", "collapsed": false, "input": [ "left2" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
OhioNevada
a 1 2
c 3 4
e 5 6
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 53, "text": [ " Ohio Nevada\n", "a 1 2\n", "c 3 4\n", "e 5 6" ] } ], "prompt_number": 53 }, { "cell_type": "code", "collapsed": false, "input": [ "right2" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
MissouriAlabama
b 7 8
c 9 10
d 11 12
e 13 14
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 54, "text": [ " Missouri Alabama\n", "b 7 8\n", "c 9 10\n", "d 11 12\n", "e 13 14" ] } ], "prompt_number": 54 }, { "cell_type": "code", "collapsed": false, "input": [ "pd.merge(left2, right2, how='outer', left_index=True, right_index=True)" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
OhioNevadaMissouriAlabama
a 1 2NaNNaN
bNaNNaN 7 8
c 3 4 9 10
dNaNNaN 11 12
e 5 6 13 14
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 55, "text": [ " Ohio Nevada Missouri Alabama\n", "a 1 2 NaN NaN\n", "b NaN NaN 7 8\n", "c 3 4 9 10\n", "d NaN NaN 11 12\n", "e 5 6 13 14" ] } ], "prompt_number": 55 }, { "cell_type": "code", "collapsed": false, "input": [ "pd.merge(left2, right2, left_index=True, right_index=True)" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
OhioNevadaMissouriAlabama
c 3 4 9 10
e 5 6 13 14
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 56, "text": [ " Ohio Nevada Missouri Alabama\n", "c 3 4 9 10\n", "e 5 6 13 14" ] } ], "prompt_number": 56 }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### \uc0c9\uc778\uc73c\ub85c \uba38\uc9c0\ud560 \ub54c DataFrame\uc758 join \uba54\uc11c\ub4dc \ud3b8\ub9ac\uc131\n", "\n", "- join \uba54\uc11c\ub4dc\ub294 **\uce7c\ub7fc\uc774 \uacb9\uce58\uc9c0 \uc54a\uc73c\uba70 \uc644\uc804\ud788 \uac19\uac70\ub098 \uc720\uc0ac\ud55c \uc0c9\uc778 \uad6c\uc870**\ub97c \uac00\uc9c4 \uc5ec\ub7ec \uac1c\uc758 DataFrame \uac1d\uccb4\ub97c \ubcd1\ud569\ud560 \ub54c \uc0ac\uc6a9 \uac00\ub2a5" ] }, { "cell_type": "code", "collapsed": false, "input": [ "# \uc704\uc640 \ub611\uac19\uc740 \uacb0\uacfc\uc778\ub370 join\uc744 \uc0ac\uc6a9\ud558\uba74 \ud6e8\uc52c \uae54\ub054\ud558\ub2e4\n", "left2.join(right2, how='outer')" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
OhioNevadaMissouriAlabama
a 1 2NaNNaN
bNaNNaN 7 8
c 3 4 9 10
dNaNNaN 11 12
e 5 6 13 14
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 57, "text": [ " Ohio Nevada Missouri Alabama\n", "a 1 2 NaN NaN\n", "b NaN NaN 7 8\n", "c 3 4 9 10\n", "d NaN NaN 11 12\n", "e 5 6 13 14" ] } ], "prompt_number": 57 }, { "cell_type": "code", "collapsed": false, "input": [ "# \uc0c9\uc778 \uae30\uc900\uc73c\ub85c \uba38\uc9c0\ud560 \ub54c \uc0ac\uc6a9!\n", "left2.join(right2, how='inner')" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
OhioNevadaMissouriAlabama
c 3 4 9 10
e 5 6 13 14
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 180, "text": [ " Ohio Nevada Missouri Alabama\n", "c 3 4 9 10\n", "e 5 6 13 14" ] } ], "prompt_number": 180 }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### \uacfc\uac70\uc5d0 \uc791\uc131\ub41c pandas\uc758 \uc77c\ubd80 \ucf54\ub4dc \uc81c\uc57d\uc73c\ub85c \uc778\ud574 DataFrame\uc758 join \uba54\uc11c\ub4dc\ub294 \uc67c\ucabd \uc6b0\uc120 \uc870\uc778 \uc218\ud589\n", "\n", "- join \uba54\uc11c\ub4dc\ub97c \ud638\ucd9c\ud55c DataFrame\uc758 \uce7c\ub7fc \uc911 \ud558\ub098\uc5d0 \ub300\ud574\uc11c \uc870\uc778\uc744 \uc218\ud589\ud558\ub294 \uac83\ub3c4 \uac00\ub2a5" ] }, { "cell_type": "code", "collapsed": false, "input": [ "left1" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
keyvalue
0 a 0
1 b 1
2 a 2
3 a 3
4 b 4
5 c 5
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 58, "text": [ " key value\n", "0 a 0\n", "1 b 1\n", "2 a 2\n", "3 a 3\n", "4 b 4\n", "5 c 5" ] } ], "prompt_number": 58 }, { "cell_type": "code", "collapsed": false, "input": [ "right1" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
group_val
a 3.5
b 7.0
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 59, "text": [ " group_val\n", "a 3.5\n", "b 7.0" ] } ], "prompt_number": 59 }, { "cell_type": "code", "collapsed": false, "input": [ "# \uc67c\ucabd \uc6b0\uc120 \uc870\uc778\uc774\ub77c c\uac00 \uc788\uc74c\n", "left1.join(right1, on='key')" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
keyvaluegroup_val
0 a 0 3.5
1 b 1 7.0
2 a 2 3.5
3 a 3 3.5
4 b 4 7.0
5 c 5 NaN
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 60, "text": [ " key value group_val\n", "0 a 0 3.5\n", "1 b 1 7.0\n", "2 a 2 3.5\n", "3 a 3 3.5\n", "4 b 4 7.0\n", "5 c 5 NaN" ] } ], "prompt_number": 60 }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### \uc0c9\uc778 \ub300 \uc0c9\uc778\uc73c\ub85c \ub450 DataFrame\uc744 \ud569\uce58\ub824\uba74 \uadf8\ub0e5 \uac04\ub2e8\ud788 \uba38\uc9c0\ud558\ub824\ub294 DataFrame\uc758 \ub9ac\uc2a4\ud2b8\ub97c join \uba54\uc11c\ub4dc\uc5d0 \ub118\uae30\uba74 \ub41c\ub2e4. \ud558\uc9c0\ub9cc \uc77c\ubc18\uc801\uc73c\ub85c \uc774\ub7f0 \uba38\uc9c0\ub294 \ub2e4\uc74c\uc758 concat \uba54\uc11c\ub4dc\ub97c \uc0ac\uc6a9\ud55c\ub2e4\n", "\n", "#### concat \uba54\uc11c\ub4dc??\n", "\n", "- \uba54\uc11c\ub4dc\ub77c\uace0 \ud588\ub294\ub370 \uc5c6\uc74c... \ub118\uae38\ub54c list\ub85c \ub118\uae30\ub294\uac78 concat \uba54\uc11c\ub4dc\ub77c\uace0 \ubd80\ub974\ub098?\n", "- \uc544\ub2c8\uba74 list \uc548\uc5d0\uc11c ,\ub97c \uae30\uc900\uc73c\ub85c \ub118\uae30\ub294 \uac78 concat \uba54\uc11c\ub4dc\ub77c\uace0 \ubd80\ub974\ub098?\n", "- .concat() \uc73c\ub85c \ub098\uc640\uc57c \ud560 \uac83 \uac19\uc740\ub370 -_-;;" ] }, { "cell_type": "code", "collapsed": false, "input": [ "another = DataFrame([[7., 8.], [9., 10.], [11., 12.], [16., 17.]],\n", " index=['a', 'c', 'e', 'f'], columns=['New York', 'Oregon'])" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 61 }, { "cell_type": "code", "collapsed": false, "input": [ "left2" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
OhioNevada
a 1 2
c 3 4
e 5 6
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 62, "text": [ " Ohio Nevada\n", "a 1 2\n", "c 3 4\n", "e 5 6" ] } ], "prompt_number": 62 }, { "cell_type": "code", "collapsed": false, "input": [ "right2" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
MissouriAlabama
b 7 8
c 9 10
d 11 12
e 13 14
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 63, "text": [ " Missouri Alabama\n", "b 7 8\n", "c 9 10\n", "d 11 12\n", "e 13 14" ] } ], "prompt_number": 63 }, { "cell_type": "code", "collapsed": false, "input": [ "another" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
New YorkOregon
a 7 8
c 9 10
e 11 12
f 16 17
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 64, "text": [ " New York Oregon\n", "a 7 8\n", "c 9 10\n", "e 11 12\n", "f 16 17" ] } ], "prompt_number": 64 }, { "cell_type": "code", "collapsed": false, "input": [ "left2.join([right2, another])" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
OhioNevadaMissouriAlabamaNew YorkOregon
a 1 2NaNNaN 7 8
c 3 4 9 10 9 10
e 5 6 13 14 11 12
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 65, "text": [ " Ohio Nevada Missouri Alabama New York Oregon\n", "a 1 2 NaN NaN 7 8\n", "c 3 4 9 10 9 10\n", "e 5 6 13 14 11 12" ] } ], "prompt_number": 65 }, { "cell_type": "code", "collapsed": false, "input": [ "left2" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
OhioNevada
a 1 2
c 3 4
e 5 6
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 66, "text": [ " Ohio Nevada\n", "a 1 2\n", "c 3 4\n", "e 5 6" ] } ], "prompt_number": 66 }, { "cell_type": "code", "collapsed": false, "input": [ "right2" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
MissouriAlabama
b 7 8
c 9 10
d 11 12
e 13 14
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 67, "text": [ " Missouri Alabama\n", "b 7 8\n", "c 9 10\n", "d 11 12\n", "e 13 14" ] } ], "prompt_number": 67 }, { "cell_type": "code", "collapsed": false, "input": [ "another" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
New YorkOregon
a 7 8
c 9 10
e 11 12
f 16 17
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 68, "text": [ " New York Oregon\n", "a 7 8\n", "c 9 10\n", "e 11 12\n", "f 16 17" ] } ], "prompt_number": 68 }, { "cell_type": "code", "collapsed": false, "input": [ "left2.join([right2, another], how='outer')" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
OhioNevadaMissouriAlabamaNew YorkOregon
a 1 2NaNNaN 7 8
bNaNNaN 7 8NaNNaN
c 3 4 9 10 9 10
dNaNNaN 11 12NaNNaN
e 5 6 13 14 11 12
fNaNNaNNaNNaN 16 17
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 69, "text": [ " Ohio Nevada Missouri Alabama New York Oregon\n", "a 1 2 NaN NaN 7 8\n", "b NaN NaN 7 8 NaN NaN\n", "c 3 4 9 10 9 10\n", "d NaN NaN 11 12 NaN NaN\n", "e 5 6 13 14 11 12\n", "f NaN NaN NaN NaN 16 17" ] } ], "prompt_number": 69 }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 7.1.3 \ucd95 \ub530\ub77c \uc774\uc5b4\ubd99\uc774\uae30\n", "\n", "- concatenation(\uc774\uc5b4 \ubd99\uc774\uae30)\n", "- binding(\uc5f0\uacb0)\n", "- stacking(\uc801\uce35)\n", "- NumPy\ub294 ndarray\ub97c \uc5f0\uacb0\ud558\ub294 concatenate \ud568\uc218 \uc81c\uacf5" ] }, { "cell_type": "code", "collapsed": false, "input": [ "arr = np.arange(12).reshape((3, 4))" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 70 }, { "cell_type": "code", "collapsed": false, "input": [ "arr" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 71, "text": [ "array([[ 0, 1, 2, 3],\n", " [ 4, 5, 6, 7],\n", " [ 8, 9, 10, 11]])" ] } ], "prompt_number": 71 }, { "cell_type": "code", "collapsed": false, "input": [ "type(arr)" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 181, "text": [ "numpy.ndarray" ] } ], "prompt_number": 181 }, { "cell_type": "code", "collapsed": false, "input": [ "# axis = column\n", "np.concatenate([arr, arr], axis=1)" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 72, "text": [ "array([[ 0, 1, 2, 3, 0, 1, 2, 3],\n", " [ 4, 5, 6, 7, 4, 5, 6, 7],\n", " [ 8, 9, 10, 11, 8, 9, 10, 11]])" ] } ], "prompt_number": 72 }, { "cell_type": "code", "collapsed": false, "input": [ "np.concatenate([arr, arr], axis=0)" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 73, "text": [ "array([[ 0, 1, 2, 3],\n", " [ 4, 5, 6, 7],\n", " [ 8, 9, 10, 11],\n", " [ 0, 1, 2, 3],\n", " [ 4, 5, 6, 7],\n", " [ 8, 9, 10, 11]])" ] } ], "prompt_number": 73 }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Series\ub098 DataFrame \uac19\uc740 pandas \uac1d\uccb4\uc758 \ucee8\ud14d\uc2a4\ud2b8 \ub0b4\ubd80\uc5d0\ub294 \ucd95\ub9c8\ub2e4 \uc774\ub984\uc774 \uc788\uc5b4\uc11c \ubc30\uc5f4\uc744 \uc27d\uac8c \uc774\uc5b4\ubd99\uc77c \uc218 \uc788\ub3c4\ub85d \ub418\uc5b4 \uc788\ub2e4. \ub2e4\uc74c \ud2b9\uc774 \uc0ac\ud56d \uace0\ub824\n", "\n", "- \ub9cc\uc57d\uc5d0 \uc5f0\uacb0\ud558\ub824\ub294 \ub450 \uac1d\uccb4\uc758 \uc0c9\uc778\uc774 \uc11c\ub85c \ub2e4\ub974\ub2e4\uba74 \uacb0\uacfc\ub294 \uadf8 \uc0c9\uc778\uc758 \uad50\uc9d1\ud569\uc774\uc5b4\uc57c \ud558\ub294\uac00 \uc544\ub2c8\uba74 \ud569\uc9d1\ud569\uc774\uc5b4\uc57c \ud558\ub294\uac00?\n", "- \ud569\uccd0\uc9c4 \uacb0\uacfc\uc5d0\uc11c \ud569\uccd0\uc838\uae30 \uc804 \uac1d\uccb4\uc758 \ub370\uc774\ud130\ub97c \uad6c\ubd84\ud560 \uc218 \uc788\ub294\uac00?\n", "- \uc5b4\ub5a4 \ucd95\uc73c\ub85c \uc5f0\uacb0\ud560 \uac83\uc778\uc9c0 \uace0\ub824\ud574\uc57c \ud558\ub294\uac00?\n", "\n", "#### pandas\uc758 concat \ud568\uc218\ub294 \uc774 \uc0ac\ud56d\uc5d0 \ub300\ud55c \ud574\ub2f5 \uc81c\uacf5. \ub9e8\ub545\uc5d0 \ud5e4\ub529\ud574\ubd10\uc57c \ud55c\ub2e4! \ucf54\ub4dc\ub3c4 \uc9c1\uc811 \uccd0\ubd10\uc57c \ud55c\ub2e4!!\n", "\n", "#### \uc0c9\uc778\uc774 \uacb9\uce58\uc9c0 \uc54a\ub294 3\uac1c\uc758 Series \uac1d\uccb4" ] }, { "cell_type": "code", "collapsed": false, "input": [ "s1 = Series([0, 1], index=['a', 'b'])" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 184 }, { "cell_type": "code", "collapsed": false, "input": [ "s2 = Series([2, 3, 4], index=['c', 'd', 'e'])" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 185 }, { "cell_type": "code", "collapsed": false, "input": [ "s3 = Series([5, 6], index=['f', 'g'])" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 186 }, { "cell_type": "code", "collapsed": false, "input": [ "s1" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 187, "text": [ "a 0\n", "b 1\n", "dtype: int64" ] } ], "prompt_number": 187 }, { "cell_type": "code", "collapsed": false, "input": [ "s2" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 188, "text": [ "c 2\n", "d 3\n", "e 4\n", "dtype: int64" ] } ], "prompt_number": 188 }, { "cell_type": "code", "collapsed": false, "input": [ "s3" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 189, "text": [ "f 5\n", "g 6\n", "dtype: int64" ] } ], "prompt_number": 189 }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### \uc774 \uc138 \uac1d\uccb4\ub97c \ub9ac\uc2a4\ud2b8\ub85c \ubb36\uc5b4\uc11c concat \ud568\uc218\uc5d0 \uc804\ub2ec\ud558\uba74 \uac12\uacfc \uc0c9\uc778\uc744 \uc5f0\uacb0!" ] }, { "cell_type": "code", "collapsed": false, "input": [ "pd.concat([s1, s2, s3])" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 190, "text": [ "a 0\n", "b 1\n", "c 2\n", "d 3\n", "e 4\n", "f 5\n", "g 6\n", "dtype: int64" ] } ], "prompt_number": 190 }, { "cell_type": "code", "collapsed": false, "input": [ "# Series\uc778 \uac83\uc744 \ud655\uc778\ud560 \uc218 \uc788\ub2e4.\n", "type( pd.concat([s1, s2, s3]) )" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 191, "text": [ "pandas.core.series.Series" ] } ], "prompt_number": 191 }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### concat \ud568\uc218\ub294 axis = 0 \uae30\ubcf8\uac12. \uc0c8\ub85c\uc6b4 Series \uac1d\uccb4 \uc0dd\uc131\n", "\n", "- \ub9cc\uc57d axis=1\uc744 \ub118\uae34\ub2e4\uba74 \uacb0\uacfc\ub294 Series\uac00 \uc544\ub2c8\ub77c DataFrame\uc774 \ub420 \uac83\uc774\ub2e4.(axis=1\uc740 \uce7c\ub7fc\uc744 \uc758\ubbf8)" ] }, { "cell_type": "code", "collapsed": false, "input": [ "pd.concat([s1, s2, s3], axis=1)" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
012
a 0NaNNaN
b 1NaNNaN
cNaN 2NaN
dNaN 3NaN
eNaN 4NaN
fNaNNaN 5
gNaNNaN 6
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 192, "text": [ " 0 1 2\n", "a 0 NaN NaN\n", "b 1 NaN NaN\n", "c NaN 2 NaN\n", "d NaN 3 NaN\n", "e NaN 4 NaN\n", "f NaN NaN 5\n", "g NaN NaN 6" ] } ], "prompt_number": 192 }, { "cell_type": "code", "collapsed": false, "input": [ "# DataFrame\uc778 \uac83\uc744 \ud655\uc778\ud560 \uc218 \uc788\ub2e4\n", "type( pd.concat([s1, s2, s3], axis=1) )" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 193, "text": [ "pandas.core.frame.DataFrame" ] } ], "prompt_number": 193 }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### \uacb9\uce58\ub294 \ucd95\uc774 \uc5c6\uae30 \ub54c\ubb38\uc5d0 \uc678\ubd80\uc870\uc778(outer \uba54\uc11c\ub4dc)\uc73c\ub85c \uc815\ub82c\ub41c \ud569\uc9d1\ud569\uc744 \uc5bb\uc5c8\uc9c0\ub9cc join='inner'\ub97c \ub118\uaca8\uc11c \uad50\uc9d1\ud569\uc744 \uad6c\ud560 \uc218\ub3c4 \uc788\ub2e4." ] }, { "cell_type": "code", "collapsed": false, "input": [ "s4 = pd.concat([s1 * 5, s3])" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 194 }, { "cell_type": "code", "collapsed": false, "input": [ "s1" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 195, "text": [ "a 0\n", "b 1\n", "dtype: int64" ] } ], "prompt_number": 195 }, { "cell_type": "code", "collapsed": false, "input": [ "s1 * 5" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 196, "text": [ "a 0\n", "b 5\n", "dtype: int64" ] } ], "prompt_number": 196 }, { "cell_type": "code", "collapsed": false, "input": [ "s3" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 197, "text": [ "f 5\n", "g 6\n", "dtype: int64" ] } ], "prompt_number": 197 }, { "cell_type": "code", "collapsed": false, "input": [ "s4" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 198, "text": [ "a 0\n", "b 5\n", "f 5\n", "g 6\n", "dtype: int64" ] } ], "prompt_number": 198 }, { "cell_type": "code", "collapsed": false, "input": [ "pd.concat([s1, s4], axis=1)" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
01
a 0 0
b 1 5
fNaN 5
gNaN 6
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 88, "text": [ " 0 1\n", "a 0 0\n", "b 1 5\n", "f NaN 5\n", "g NaN 6" ] } ], "prompt_number": 88 }, { "cell_type": "code", "collapsed": false, "input": [ "pd.concat([s1, s4], axis=1, join='inner')" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
01
a 0 0
b 1 5
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 89, "text": [ " 0 1\n", "a 0 0\n", "b 1 5" ] } ], "prompt_number": 89 }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### join_axes \uc778\uc790\ub85c \uba38\uc9c0\ud558\ub824\ub294 \ucd95\uc744 \uc9c1\uc811 \uc9c0\uc815\ub3c4 \uac00\ub2a5" ] }, { "cell_type": "code", "collapsed": false, "input": [ "pd.concat([s1, s4], axis=1, join_axes=[['a', 'c', 'b', 'e']])" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
01
a 0 0
cNaNNaN
b 1 5
eNaNNaN
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 90, "text": [ " 0 1\n", "a 0 0\n", "c NaN NaN\n", "b 1 5\n", "e NaN NaN" ] } ], "prompt_number": 90 }, { "cell_type": "markdown", "metadata": {}, "source": [ "- Series\ub97c \uc774\uc5b4\ubd99\uc774\uae30 \uc804\uc758 \uac1c\ubcc4 Series\ub97c \uad6c\ubd84\ud560 \uc218 \uc5c6\ub2e4\ub294 \ubb38\uc81c\uac00 \uc0dd\uae30\ub294\ub370, \uc774 \uacbd\uc6b0 \uc774\uc5b4\ubd99\uc778 \ucd95\uc5d0 \ub300\ud574 \uacc4\uce35\uc801 \uc0c9\uc778\uc744 \uc0dd\uc131\ud558\uc5ec \uc2dd\ubcc4\uc774 \uac00\ub2a5\ud558\ub3c4\ub85d \ud560 \uc218 \uc788\ub2e4.\n", "- **\uacc4\uce35\uc801 \uc0c9\uc778\uc744 \uc0ac\uc6a9\ud558\ub824\uba74 keys \uc778\uc790 \uc0ac\uc6a9**" ] }, { "cell_type": "code", "collapsed": false, "input": [ "result = pd.concat([s1, s1, s3], keys=['one', 'two', 'three'])" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 91 }, { "cell_type": "code", "collapsed": false, "input": [ "s1" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 92, "text": [ "a 0\n", "b 1\n", "dtype: int64" ] } ], "prompt_number": 92 }, { "cell_type": "code", "collapsed": false, "input": [ "s2" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 93, "text": [ "c 2\n", "d 3\n", "e 4\n", "dtype: int64" ] } ], "prompt_number": 93 }, { "cell_type": "code", "collapsed": false, "input": [ "s3" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 94, "text": [ "f 5\n", "g 6\n", "dtype: int64" ] } ], "prompt_number": 94 }, { "cell_type": "code", "collapsed": false, "input": [ "# s1 = one, s2 = two, s3 = three\ub85c \ud560\ub2f9\n", "result" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 95, "text": [ "one a 0\n", " b 1\n", "two a 0\n", " b 1\n", "three f 5\n", " g 6\n", "dtype: int64" ] } ], "prompt_number": 95 }, { "cell_type": "code", "collapsed": false, "input": [ "# unstack \ud568\uc218\uc5d0 \ub300\ud55c \uc790\uc138\ud55c \ub0b4\uc6a9\uc740 \ub098\uc911\uc5d0 \uc54c\uc544\ubcf4\uc790\n", "result.unstack()" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
abfg
one 0 1NaNNaN
two 0 1NaNNaN
threeNaNNaN 5 6
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 96, "text": [ " a b f g\n", "one 0 1 NaN NaN\n", "two 0 1 NaN NaN\n", "three NaN NaN 5 6" ] } ], "prompt_number": 96 }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Series\ub294 axis=1\ub85c \ubcd1\ud569\ud560 \uacbd\uc6b0 keys\ub294 DataFrame\uc758 \uce7c\ub7fc \uc81c\ubaa9\uc774 \ub41c\ub2e4." ] }, { "cell_type": "code", "collapsed": false, "input": [ "pd.concat([s1, s2, s3], axis=1, keys=['one', 'two', 'three'])" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
onetwothree
a 0NaNNaN
b 1NaNNaN
cNaN 2NaN
dNaN 3NaN
eNaN 4NaN
fNaNNaN 5
gNaNNaN 6
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 97, "text": [ " one two three\n", "a 0 NaN NaN\n", "b 1 NaN NaN\n", "c NaN 2 NaN\n", "d NaN 3 NaN\n", "e NaN 4 NaN\n", "f NaN NaN 5\n", "g NaN NaN 6" ] } ], "prompt_number": 97 }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### DataFrame \uac1d\uccb4\uc5d0 \ub300\ud574\uc11c\ub3c4 \uc9c0\uae08\uae4c\uc9c0\uc640 \uac19\uc740 \ubc29\uc2dd\uc73c\ub85c \uc801\uc6a9 \uac00\ub2a5" ] }, { "cell_type": "code", "collapsed": false, "input": [ "df1 = DataFrame(np.arange(6).reshape(3, 2), index=['a', 'b', 'c'], \n", " columns=['one', 'two'])" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 200 }, { "cell_type": "code", "collapsed": false, "input": [ "df2 = DataFrame(5 + np.arange(4).reshape(2, 2), index=['a', 'c'],\n", " columns=['three', 'four'])" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 201 }, { "cell_type": "code", "collapsed": false, "input": [ "df1" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
onetwo
a 0 1
b 2 3
c 4 5
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 202, "text": [ " one two\n", "a 0 1\n", "b 2 3\n", "c 4 5" ] } ], "prompt_number": 202 }, { "cell_type": "code", "collapsed": false, "input": [ "df2" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
threefour
a 5 6
c 7 8
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 203, "text": [ " three four\n", "a 5 6\n", "c 7 8" ] } ], "prompt_number": 203 }, { "cell_type": "code", "collapsed": false, "input": [ "pd.concat([df1, df2], axis=1, keys=['level1', 'level2'])" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
level1level2
onetwothreefour
a 0 1 5 6
b 2 3NaNNaN
c 4 5 7 8
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 204, "text": [ " level1 level2 \n", " one two three four\n", "a 0 1 5 6\n", "b 2 3 NaN NaN\n", "c 4 5 7 8" ] } ], "prompt_number": 204 }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### \ub9ac\uc2a4\ud2b8 \ub300\uc2e0 \uac1d\uccb4\uc758 \uc0ac\uc804\uc744 \ub118\uae34\ub2e4\uba74 \uc0ac\uc804\uc758 \ud0a4\uac00 keys \uc635\uc165\uc73c\ub85c \uc0ac\uc6a9\ub428" ] }, { "cell_type": "code", "collapsed": false, "input": [ "pd.concat({'level1': df1, 'level2': df2}, axis=1)" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
level1level2
onetwothreefour
a 0 1 5 6
b 2 3NaNNaN
c 4 5 7 8
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 205, "text": [ " level1 level2 \n", " one two three four\n", "a 0 1 5 6\n", "b 2 3 NaN NaN\n", "c 4 5 7 8" ] } ], "prompt_number": 205 }, { "cell_type": "code", "collapsed": false, "input": [ "pd.concat([df1, df2], axis=1, keys=['level1', 'level2'],\n", " names=['upper', 'lower'])" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
upperlevel1level2
loweronetwothreefour
a 0 1 5 6
b 2 3NaNNaN
c 4 5 7 8
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 206, "text": [ "upper level1 level2 \n", "lower one two three four\n", "a 0 1 5 6\n", "b 2 3 NaN NaN\n", "c 4 5 7 8" ] } ], "prompt_number": 206 }, { "cell_type": "code", "collapsed": false, "input": [ "# Concatenate pandas objects along a particular axis with optional set logic\n", "# along the other axes. Can also add a layer of hierarchical indexing on the\n", "# concatenation axis, which may be useful if the labels are the same (or\n", "# oeverlapping) on the passed axis number\n", "pd.concat?" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 207 }, { "cell_type": "code", "collapsed": false, "input": [ "pd.concat([df1, df2], axis=1, keys=['level1', 'level2'],\n", " names=['upper'])" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
upperlevel1level2
onetwothreefour
a 0 1 5 6
b 2 3NaNNaN
c 4 5 7 8
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 208, "text": [ "upper level1 level2 \n", " one two three four\n", "a 0 1 5 6\n", "b 2 3 NaN NaN\n", "c 4 5 7 8" ] } ], "prompt_number": 208 }, { "cell_type": "code", "collapsed": false, "input": [ "# names\ub294 2\uac1c\uae4c\uc9c0\ub9cc \ub428. \ud604\uc7ac df\uc5d0\uc11c\n", "pd.concat([df1, df2], axis=1, keys=['level1', 'level2'],\n", " names=['upper', 'lower', 'test'])" ], "language": "python", "metadata": {}, "outputs": [ { "ename": "ValueError", "evalue": "Length of names (4) must be same as level (2)", "output_type": "pyerr", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m pd.concat([df1, df2], axis=1, keys=['level1', 'level2'],\n\u001b[0;32m----> 2\u001b[0;31m names=['upper', 'lower', 'test'])\n\u001b[0m", "\u001b[0;32m/Library/Python/2.7/site-packages/pandas-0.12.0_307_g3a2fe0b-py2.7-macosx-10.8-intel.egg/pandas/tools/merge.pyc\u001b[0m in \u001b[0;36mconcat\u001b[0;34m(objs, axis, join, join_axes, ignore_index, keys, levels, names, verify_integrity)\u001b[0m\n\u001b[1;32m 883\u001b[0m \u001b[0mignore_index\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mignore_index\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mjoin\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mjoin\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 884\u001b[0m \u001b[0mkeys\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mkeys\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlevels\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mlevels\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnames\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mnames\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 885\u001b[0;31m verify_integrity=verify_integrity)\n\u001b[0m\u001b[1;32m 886\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mop\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_result\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 887\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/Library/Python/2.7/site-packages/pandas-0.12.0_307_g3a2fe0b-py2.7-macosx-10.8-intel.egg/pandas/tools/merge.pyc\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, objs, axis, join, join_axes, keys, levels, names, ignore_index, verify_integrity)\u001b[0m\n\u001b[1;32m 958\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mverify_integrity\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mverify_integrity\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 959\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 960\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnew_axes\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_new_axes\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 961\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 962\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mget_result\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/Library/Python/2.7/site-packages/pandas-0.12.0_307_g3a2fe0b-py2.7-macosx-10.8-intel.egg/pandas/tools/merge.pyc\u001b[0m in \u001b[0;36m_get_new_axes\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1160\u001b[0m \u001b[0mconcat_axis\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1161\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1162\u001b[0;31m \u001b[0mconcat_axis\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_concat_axis\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1163\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1164\u001b[0m \u001b[0mnew_axes\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0maxis\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mconcat_axis\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/Library/Python/2.7/site-packages/pandas-0.12.0_307_g3a2fe0b-py2.7-macosx-10.8-intel.egg/pandas/tools/merge.pyc\u001b[0m in \u001b[0;36m_get_concat_axis\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1195\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1196\u001b[0m concat_axis = _make_concat_multiindex(indexes, self.keys,\n\u001b[0;32m-> 1197\u001b[0;31m self.levels, self.names)\n\u001b[0m\u001b[1;32m 1198\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1199\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_maybe_check_integrity\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mconcat_axis\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/Library/Python/2.7/site-packages/pandas-0.12.0_307_g3a2fe0b-py2.7-macosx-10.8-intel.egg/pandas/tools/merge.pyc\u001b[0m in \u001b[0;36m_make_concat_multiindex\u001b[0;34m(indexes, keys, levels, names)\u001b[0m\n\u001b[1;32m 1274\u001b[0m \u001b[0mnames\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnames\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0m_get_consensus_names\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mindexes\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1275\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1276\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mMultiIndex\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlevels\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mlevels\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlabels\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mlabel_list\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnames\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mnames\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1277\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1278\u001b[0m \u001b[0mnew_index\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mindexes\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/Library/Python/2.7/site-packages/pandas-0.12.0_307_g3a2fe0b-py2.7-macosx-10.8-intel.egg/pandas/core/index.pyc\u001b[0m in \u001b[0;36m__new__\u001b[0;34m(cls, levels, labels, sortorder, names, copy)\u001b[0m\n\u001b[1;32m 1599\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1600\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mnames\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1601\u001b[0;31m \u001b[0msubarr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_set_names\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnames\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1602\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1603\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/Library/Python/2.7/site-packages/pandas-0.12.0_307_g3a2fe0b-py2.7-macosx-10.8-intel.egg/pandas/core/index.pyc\u001b[0m in \u001b[0;36m_set_names\u001b[0;34m(self, values)\u001b[0m\n\u001b[1;32m 1783\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnlevels\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1784\u001b[0m raise ValueError('Length of names (%d) must be same as level '\n\u001b[0;32m-> 1785\u001b[0;31m '(%d)' % (len(values),self.nlevels))\n\u001b[0m\u001b[1;32m 1786\u001b[0m \u001b[0;31m# set the name\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1787\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlevel\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mzip\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlevels\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mValueError\u001b[0m: Length of names (4) must be same as level (2)" ] } ], "prompt_number": 212 }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### \ub9c8\uc9c0\ub9c9\uc73c\ub85c, DataFrame\uc758 \ub85c\uc6b0 \uc0c9\uc778\uc774 \ubd84\uc11d\uc5d0 \ubd88\ud544\uc694\ud55c \uacbd\uc6b0\uc5d0\ub294 \uc5b4\ub5bb\uac8c \ud560 \uac83\uc778\uac00?\n", "#### \uadf8 \ub54c\ub294 ignore_index_True \uc635\uc158 \uc801\uc6a9" ] }, { "cell_type": "code", "collapsed": false, "input": [ "df1 = DataFrame(np.random.randn(3, 4), columns=['a', 'b', 'c', 'd'])" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 108 }, { "cell_type": "code", "collapsed": false, "input": [ "df2 = DataFrame(np.random.randn(2, 3), columns=['b', 'd', 'a'])" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 109 }, { "cell_type": "code", "collapsed": false, "input": [ "df1" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
abcd
0 0.778910 0.311274 1.141611-2.813149
1-0.360643-0.794361 2.023458-0.449211
2-1.677890-0.345416 0.718920 0.171574
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 110, "text": [ " a b c d\n", "0 0.778910 0.311274 1.141611 -2.813149\n", "1 -0.360643 -0.794361 2.023458 -0.449211\n", "2 -1.677890 -0.345416 0.718920 0.171574" ] } ], "prompt_number": 110 }, { "cell_type": "code", "collapsed": false, "input": [ "df2" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
bda
0-2.771987 1.315566 1.019023
1-0.435467-1.595407-0.038993
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 111, "text": [ " b d a\n", "0 -2.771987 1.315566 1.019023\n", "1 -0.435467 -1.595407 -0.038993" ] } ], "prompt_number": 111 }, { "cell_type": "code", "collapsed": false, "input": [ "# index \ubd80\ubd84\uc774 \ud558\ub098\ub85c \ud569\uccd0\uc9d0\n", "pd.concat([df1, df2], ignore_index=True)" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
abcd
0 0.778910 0.311274 1.141611-2.813149
1-0.360643-0.794361 2.023458-0.449211
2-1.677890-0.345416 0.718920 0.171574
3 1.019023-2.771987 NaN 1.315566
4-0.038993-0.435467 NaN-1.595407
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 112, "text": [ " a b c d\n", "0 0.778910 0.311274 1.141611 -2.813149\n", "1 -0.360643 -0.794361 2.023458 -0.449211\n", "2 -1.677890 -0.345416 0.718920 0.171574\n", "3 1.019023 -2.771987 NaN 1.315566\n", "4 -0.038993 -0.435467 NaN -1.595407" ] } ], "prompt_number": 112 }, { "cell_type": "code", "collapsed": false, "input": [ "pd.concat([df1, df2])" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
abcd
0 0.778910 0.311274 1.141611-2.813149
1-0.360643-0.794361 2.023458-0.449211
2-1.677890-0.345416 0.718920 0.171574
0 1.019023-2.771987 NaN 1.315566
1-0.038993-0.435467 NaN-1.595407
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 113, "text": [ " a b c d\n", "0 0.778910 0.311274 1.141611 -2.813149\n", "1 -0.360643 -0.794361 2.023458 -0.449211\n", "2 -1.677890 -0.345416 0.718920 0.171574\n", "0 1.019023 -2.771987 NaN 1.315566\n", "1 -0.038993 -0.435467 NaN -1.595407" ] } ], "prompt_number": 113 }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### concat \ud568\uc218 \uc778\uc790\n", "\n", "\uc778\uc790 | \uc124\uba85\n", "--- | ---\n", "objs | \uc774\uc5b4\ubd99\uc77c pandas \uac1d\uccb4\uc758 \uc0ac\uc804\uc774\ub098 \ub9ac\uc2a4\ud2b8. \ud544\uc218 \uc778\uc790\n", "axis | \uc774\uc5b4\ubd99\uc77c \ucd95 \ubc29\ud5a5. \uae30\ubcf8\uac12\uc740 0\n", "join | \uc870\uc778 \ubc29\uc2dd. 'inner'(\ub0b4\ubd80\uc870\uc778, \uad50\uc9d1\ud569)\uc640 'outer'(\uc678\ubd80\uc870\uc778, \ud569\uc9d1\ud569)\uac00 \uc788\uc73c\uba70 \uae30\ubcf8\uac12\uc740 'outer'\n", "join_axes | \ud569\uc9d1\ud569/\uad50\uc9d1\ud569\uc744 \uc218\ud589\ud558\ub294 \ub300\uc2e0 \ub2e4\ub978 n-1 \ucd95\uc73c\ub85c \uc0ac\uc6a9\ud560 \uc0c9\uc778\uc744 \uc9c0\uc815\ud55c\ub2e4.\n", "keys | \uc774\uc5b4\ubd99\uc77c \uac1d\uccb4\ub098 \uc774\uc5b4\ubd99\uc778 \ucd95\uc5d0 \ub300\ud55c \uacc4\uce35 \uc0c9\uc778\uc744 \uc0dd\uc131\ud558\ub294 \ub370 \uc5f0\uad00\ub41c \uac12\uc774\ub2e4. \ub9ac\uc2a4\ud2b8\ub098 \uc784\uc758\uc758 \uac12\uc774 \ub4e4\uc5b4\uc788\ub294 \ubc30\uc5f4, \ud29c\ud50c\uc758 \ubc30\uc5f4 \ub610\ub294 \ubc30\uc5f4\uc758 \ub9ac\uc2a4\ud2b8(levels \uc635\uc158\uc5d0 \ub2e4\ucc28\uc6d0 \ubc30\uc5f4\uc774 \ub118\uc5b4\uc628 \uacbd\uc6b0)\uac00 \ub420 \uc218 \uc788\ub2e4.\n", "levels | \uacc4\uce35 \uc0c9\uc778 \ub808\ubca8\ub85c \uc0ac\uc6a9\ud560 \uc0c9\uc778\uc744 \uc9c0\uc815\ud55c\ub2e4. keys\uac00 \ub118\uc5b4\uc628 \uacbd\uc6b0 \uc5ec\ub7ec \uac1c\uc758 \uc0c9\uc778\uc744 \uc9c0\uc815\ud55c\ub2e4.\n", "names | keys\ub098 levels \ud639\uc740 \ub458 \ub2e4 \uc788\uc744 \uacbd\uc6b0, \uc0dd\uc131\ub41c \uacc4\uce35 \ub808\ubca8\uc744 \uc704\ud55c \uc774\ub984\n", "verify_integrity | \uc774\uc5b4\ubd99\uc778 \uac1d\uccb4\uc5d0 \uc911\ubcf5\ub418\ub294 \ucd95\uc774 \uc788\ub294\uc9c0 \uac80\uc0ac\ud558\uace0 \uc788\ub2e4\uba74 \uc608\uc678\ub97c \ubc1c\uc0dd\uc2dc\ud0a8\ub2e4. \uae30\ubcf8\uac12\uc740 False\ub85c, \uc911\ubcf5\uc744 \ud5c8\uc6a9\ud55c\ub2e4.\n", "ignore_index | \uc774\uc5b4\ubd99\uc778 \ucd95\uc758 \uc0c9\uc778\uc744 \uc720\uc9c0\ud558\uc9c0 \uc54a\uace0 range(total_length)\ub85c \uc0c8\ub85c\uc6b4 \uc0c9\uc778\uc744 \uc0dd\uc131\ud55c\ub2e4." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 7.1.4 \uacb9\uce58\ub294 \ub370\uc774\ud130 \ud569\uce58\uae30\n", "\n", "- \ub370\uc774\ud130\ub97c \ud569\uce60 \ub54c \uba38\uc9c0\ub098 \uc774\uc5b4\ubd99\uc774\uae30\ub85c\ub294 \ubd88\uac00\ub2a5\ud55c \uc0c1\ud669\uc774 \uc788\ub294\ub370, \ub450 \ub370\uc774\ud130\uc14b\uc758 \uc0c9\uc778\uc774 \uc77c\ubd80 \uacb9\uce58\uac70\ub098 \uc804\uccb4\uac00 \uacb9\uce58\ub294 \uacbd\uc6b0\uac00 \uadf8\ub807\ub2e4.\n", "- \ubca1\ud130\ud654\ub41c if-else \uad6c\ubb38\uc744 \ud45c\ud604\ud558\ub294 NumPy\uc758 Where \ud568\uc218\ub97c \ud1b5\ud574 \uc790\uc138\ud788 \uc54c\uc544\ubcf4\uc790" ] }, { "cell_type": "code", "collapsed": false, "input": [ "a = Series([np.nan, 2.5, np.nan, 3.5, 4.5, np.nan],\n", " index=['f', 'e', 'd', 'c', 'b', 'a'])" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 114 }, { "cell_type": "code", "collapsed": false, "input": [ "b = Series(np.arange(len(a), dtype=np.float64),\n", " index=['f', 'e', 'd', 'c', 'b', 'a'])" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 115 }, { "cell_type": "code", "collapsed": false, "input": [ "b[-1] = np.nan" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 116 }, { "cell_type": "code", "collapsed": false, "input": [ "a`" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 117, "text": [ "f NaN\n", "e 2.5\n", "d NaN\n", "c 3.5\n", "b 4.5\n", "a NaN\n", "dtype: float64" ] } ], "prompt_number": 117 }, { "cell_type": "code", "collapsed": false, "input": [ "np.arange(len(a))" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 214, "text": [ "array([0, 1, 2, 3, 4, 5])" ] } ], "prompt_number": 214 }, { "cell_type": "code", "collapsed": false, "input": [ "b" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 118, "text": [ "f 0\n", "e 1\n", "d 2\n", "c 3\n", "b 4\n", "a NaN\n", "dtype: float64" ] } ], "prompt_number": 118 }, { "cell_type": "code", "collapsed": false, "input": [ "# \uc624\ub79c\ub9cc\uc5d0 \ub098\uc640\uc11c np.where\uac00 \uc5b4\ub5a4 \uae30\ub2a5\uc778\uc9c0 \uae4c\uba39\uc5c8\uc5c8\ub2e4!\n", "# pd.isnull(a)\uc5d0\uc11c null\uc740 \uac83\uc740 True\uc744 \ub3cc\ub824\uc900\ub2e4.\n", "# \uc0bc\ud56d \uc5f0\uc0b0\uc790\ucc98\ub7fc null\uc778 True\uac00 \uc788\uc73c\uba74 b\uac12\uc744 \ub300\uc785\ud558\uace0\n", "# null\uc774 False\uba74 a\uac12\uc744 \ub300\uc785\ud55c\ub2e4.\n", "np.where(pd.isnull(a), b, a)" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 119, "text": [ "array([ 0. , 2.5, 2. , 3.5, 4.5, nan])" ] } ], "prompt_number": 119 }, { "cell_type": "code", "collapsed": false, "input": [ "pd.isnull(a)" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 120, "text": [ "f True\n", "e False\n", "d True\n", "c False\n", "b False\n", "a True\n", "dtype: bool" ] } ], "prompt_number": 120 }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Series \uac1d\uccb4\uc758 combine_first\ub77c\ub294 \uba54\uc11c\ub4dc\ub294 \uc704\uc640 \ub3d9\uc77c\ud55c \uc5f0\uc0b0\uc744 \uc81c\uacf5, \ub370\uc774\ud130 \uc815\ub82c \uae30\ub2a5\uae4c\uc9c0 \uc81c\uacf5" ] }, { "cell_type": "code", "collapsed": false, "input": [ "b[:-2]" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 215, "text": [ "f 0\n", "e 1\n", "d 2\n", "c 3\n", "dtype: float64" ] } ], "prompt_number": 215 }, { "cell_type": "code", "collapsed": false, "input": [ "a[2:]" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 216, "text": [ "d NaN\n", "c 3.5\n", "b 4.5\n", "a NaN\n", "dtype: float64" ] } ], "prompt_number": 216 }, { "cell_type": "code", "collapsed": false, "input": [ "b[:-2].combine_first(a[2:])" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 121, "text": [ "a NaN\n", "b 4.5\n", "c 3.0\n", "d 2.0\n", "e 1.0\n", "f 0.0\n", "dtype: float64" ] } ], "prompt_number": 121 }, { "cell_type": "code", "collapsed": false, "input": [ "b[:-2].combine_first" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 122, "text": [ "" ] } ], "prompt_number": 122 }, { "cell_type": "code", "collapsed": false, "input": [ "b[:-2]" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 123, "text": [ "f 0\n", "e 1\n", "d 2\n", "c 3\n", "dtype: float64" ] } ], "prompt_number": 123 }, { "cell_type": "code", "collapsed": false, "input": [ "b.combine_first(a)" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 124, "text": [ "f 0\n", "e 1\n", "d 2\n", "c 3\n", "b 4\n", "a NaN\n", "dtype: float64" ] } ], "prompt_number": 124 }, { "cell_type": "code", "collapsed": false, "input": [ "# a\uac00 False\uc77c \ub54c\ub9cc b \uac12\uc744 \ucc38\uc870\ud55c\ub2e4.\n", "# a\ub97c \uba3c\uc800 combine \ud558\ub418 False\ub77c\uba74 b\uac12 \ucc38\uc870\n", "a.combine_first(b)" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 125, "text": [ "f 0.0\n", "e 2.5\n", "d 2.0\n", "c 3.5\n", "b 4.5\n", "a NaN\n", "dtype: float64" ] } ], "prompt_number": 125 }, { "cell_type": "code", "collapsed": false, "input": [ "a" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 126, "text": [ "f NaN\n", "e 2.5\n", "d NaN\n", "c 3.5\n", "b 4.5\n", "a NaN\n", "dtype: float64" ] } ], "prompt_number": 126 }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### DataFrame\uc5d0\uc11c combine_first \uba54\uc11c\ub4dc\ub294 \uce7c\ub7fc\uc5d0 \ub300\ud574 \uac19\uc740 \ub3d9\uc791\n", "\n", "- \uc774\ub97c \ud1b5\ud574 \ud638\ucd9c\ud558\ub294 \uac1d\uccb4\uc5d0\uc11c \ub204\ub77d\ub41c \ub370\uc774\ud130\ub97c \uc778\uc790\ub85c \ub118\uae34 \uac1d\uccb4\uc5d0 \uc788\ub294 \uac12\uc73c\ub85c \ucc44\uc6cc \ub123\uae30 \uac00\ub2a5" ] }, { "cell_type": "code", "collapsed": false, "input": [ "df1 = DataFrame({'a': [1., np.nan, 5., np.nan],\n", " 'b': [np.nan, 2., np.nan, 6.],\n", " 'c': range(2, 18, 4)})" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 127 }, { "cell_type": "code", "collapsed": false, "input": [ "df2 = DataFrame({'a': [5., 4., np.nan, 3., 7.],\n", " 'b': [np.nan, 3., 4., 6., 8.]})" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 128 }, { "cell_type": "code", "collapsed": false, "input": [ "df1" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
abc
0 1NaN 2
1NaN 2 6
2 5NaN 10
3NaN 6 14
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 129, "text": [ " a b c\n", "0 1 NaN 2\n", "1 NaN 2 6\n", "2 5 NaN 10\n", "3 NaN 6 14" ] } ], "prompt_number": 129 }, { "cell_type": "code", "collapsed": false, "input": [ "df2" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ab
0 5NaN
1 4 3
2NaN 4
3 3 6
4 7 8
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 130, "text": [ " a b\n", "0 5 NaN\n", "1 4 3\n", "2 NaN 4\n", "3 3 6\n", "4 7 8" ] } ], "prompt_number": 130 }, { "cell_type": "code", "collapsed": false, "input": [ "df1.combine_first(df2)" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
abc
0 1NaN 2
1 4 2 6
2 5 4 10
3 3 6 14
4 7 8NaN
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 131, "text": [ " a b c\n", "0 1 NaN 2\n", "1 4 2 6\n", "2 5 4 10\n", "3 3 6 14\n", "4 7 8 NaN" ] } ], "prompt_number": 131 }, { "cell_type": "heading", "level": 2, "metadata": {}, "source": [ "7.2 \uc7ac\ud615\uc131\uacfc \ud53c\ubc97" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "- \uc7ac\ud615\uc131(reshaping, \ud53c\ubc97 \uc5f0\uc0b0): \ud45c\ud615\uc2dd\uc758 \ub370\uc774\ud130\ub97c \uc7ac\ubc30\uce58\ud558\ub294 \ub2e4\uc591\ud55c \uae30\ubcf8 \uc5f0\uc0b0" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 7.2.1 \uacc4\uce35\uc801 \uc0c9\uc778\uc73c\ub85c \uc7ac\ud615\uc131\ud558\uae30\n", "\n", "- stack: \ub370\uc774\ud130\uc758 \uce7c\ub7fc\uc744 \ub85c\uc6b0\ub85c \ud53c\ubc97 \ub610\ub294 \ud68c\uc804\uc2dc\ud0a8\ub2e4.\n", "- unstack: \ub85c\uc6b0\ub97c \uce7c\ub7fc\uc73c\ub85c \ud53c\ubc97\uc2dc\ud0a8\ub2e4." ] }, { "cell_type": "code", "collapsed": false, "input": [ "# \ubb38\uc790\uc5f4\uc774 \ub2f4\uae34 \ubc30\uc5f4\uc744 \ub85c\uc6b0\uc640 \uce7c\ub7fc\uc758 \uc0c9\uc778\uc73c\ub85c\ud558\ub294 \uc791\uc740 DataFrame\n", "data = DataFrame(np.arange(6).reshape((2, 3)),\n", " index=pd.Index(['Ohio', 'Colorado'], name='state'),\n", " columns=pd.Index(['one', 'two', 'three'], name='number'))" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 132 }, { "cell_type": "code", "collapsed": false, "input": [ "data" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
numberonetwothree
state
Ohio 0 1 2
Colorado 3 4 5
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 133, "text": [ "number one two three\n", "state \n", "Ohio 0 1 2\n", "Colorado 3 4 5" ] } ], "prompt_number": 133 }, { "cell_type": "code", "collapsed": false, "input": [ "# \ubb38\uc790\uc5f4\uc774 \ub2f4\uae34 \ubc30\uc5f4\uc744 \ub85c\uc6b0\uc640 \uce7c\ub7fc\uc758 \uc0c9\uc778\uc73c\ub85c\ud558\ub294 \uc791\uc740 DataFrame\n", "# pd.Index\ub85c index\ub97c \uc124\uc815 \ud6c4\uc5d0 name\uc744 \uc785\ub825\ud558\uae30 \uc704\ud574\uc11c \uc800\ub807\uac8c \ud55c \uac83.\n", "# \uae30\ubcf8\uc801\uc73c\ub85c\ub294 columns\uc5d0 list\ub9cc \ub118\uaca8\ub3c4 \uc0dd\uc131 \ub428\n", "data2 = DataFrame(np.arange(6).reshape((2, 3)),\n", " index=pd.Index(['Ohio', 'Colorado'], name='state'),\n", " columns=['one', 'two', 'three'])" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 134 }, { "cell_type": "code", "collapsed": false, "input": [ "data2" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
onetwothree
state
Ohio 0 1 2
Colorado 3 4 5
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 135, "text": [ " one two three\n", "state \n", "Ohio 0 1 2\n", "Colorado 3 4 5" ] } ], "prompt_number": 135 }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### stack \uba54\uc11c\ub4dc\ub97c \uc0ac\uc6a9\ud558\uba74 \uce7c\ub7fc\uc774 \ub85c\uc6b0\ub85c \ud53c\ubc97\ub418\uc5b4 \ub2e4\uc74c\uacfc \uac19\uc740 Series \uac1d\uccb4\ub97c \ubc18\ud658" ] }, { "cell_type": "code", "collapsed": false, "input": [ "result = data.stack()" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 136 }, { "cell_type": "code", "collapsed": false, "input": [ "result" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 137, "text": [ "state number\n", "Ohio one 0\n", " two 1\n", " three 2\n", "Colorado one 3\n", " two 4\n", " three 5\n", "dtype: int64" ] } ], "prompt_number": 137 }, { "cell_type": "code", "collapsed": false, "input": [ "type(result)" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 138, "text": [ "pandas.core.series.Series" ] } ], "prompt_number": 138 }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### unstack \uba54\uc11c\ub4dc\ub97c \uc0ac\uc6a9\ud558\uba74 \uc55e\uc5d0\uc11c \uc5bb\uc740 \uacc4\uce35\uc801 \uc0c9\uc778\uc744 \uac00\uc9c4 Series\ub85c\ubd80\ud130 DataFrame\uc744 \uc5bb\uc744 \uc218 \uc788\ub2e4." ] }, { "cell_type": "code", "collapsed": false, "input": [ "# \ubcf4\ud1b5 \uac00\uc7a5 \uc548\ucabd\uc5d0 \uc788\ub294 one, two, three\ubd80\ud130 \uceec\ub7fc\uc73c\ub85c \ub044\uc9d1\uc5b4 \ub0c4\n", "result.unstack()" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
numberonetwothree
state
Ohio 0 1 2
Colorado 3 4 5
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 139, "text": [ "number one two three\n", "state \n", "Ohio 0 1 2\n", "Colorado 3 4 5" ] } ], "prompt_number": 139 }, { "cell_type": "code", "collapsed": false, "input": [ "type( result.unstack() )" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 140, "text": [ "pandas.core.frame.DataFrame" ] } ], "prompt_number": 140 }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### \ubcf4\ud1b5 \uac00\uc7a5 \uc548\ucabd\uc5d0 \uc788\ub294 \uac83\ubd80\ud130 \ub044\uc9d1\uc5b4\ub0b4\ub294\ub370(stack\ub3c4 \ub9c8\ucc2c\uac00\uc9c0), \ub808\ubca8 \uc774\ub984\uc774\ub098 \uc22b\uc790\ub97c \uc804\ub2ec\ud574\uc11c \ub044\uc9d1\uc5b4\ub0bc \ub2e8\uacc4\ub97c \uc9c0\uc815\ud560 \uc218 \uc788\ub2e4.\n", "\n", "- \ub808\ubca8\uc774\ub984\uc774 0\uc774\uba74 \uccab\ubc88\uca30 index\ub97c \uceec\ub7fc\uc73c\ub85c\n", "- \ub808\ubca8\uc774\ub984\uc774 1\uc774\uba74 \ub450\ubc88\uc9f8 index\ub97c \uceec\ub7fc\uc73c\ub85c\n", "- \uc120\ud0dd\ud55c \uc774\ub984\uc744 \uceec\ub7fc\uc73c\ub85c" ] }, { "cell_type": "code", "collapsed": false, "input": [ "result.unstack(0)" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
stateOhioColorado
number
one 0 3
two 1 4
three 2 5
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 141, "text": [ "state Ohio Colorado\n", "number \n", "one 0 3\n", "two 1 4\n", "three 2 5" ] } ], "prompt_number": 141 }, { "cell_type": "code", "collapsed": false, "input": [ "result.unstack('state')" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
stateOhioColorado
number
one 0 3
two 1 4
three 2 5
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 142, "text": [ "state Ohio Colorado\n", "number \n", "one 0 3\n", "two 1 4\n", "three 2 5" ] } ], "prompt_number": 142 }, { "cell_type": "code", "collapsed": false, "input": [ "result.unstack(1)" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
numberonetwothree
state
Ohio 0 1 2
Colorado 3 4 5
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 143, "text": [ "number one two three\n", "state \n", "Ohio 0 1 2\n", "Colorado 3 4 5" ] } ], "prompt_number": 143 }, { "cell_type": "code", "collapsed": false, "input": [ "result.unstack('number')" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
numberonetwothree
state
Ohio 0 1 2
Colorado 3 4 5
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 217, "text": [ "number one two three\n", "state \n", "Ohio 0 1 2\n", "Colorado 3 4 5" ] } ], "prompt_number": 217 }, { "cell_type": "markdown", "metadata": {}, "source": [ "- \ud574\ub2f9 \ub808\ubca8\uc774 \uc788\ub294 \ubaa8\ub4e0 \uac12\uc774 \ud558\uc704 \uadf8\ub8f9\uc5d0 \uc18d\ud558\uc9c0 \uc54a\uc744 \uacbd\uc6b0 unstack\uc744 \ud558\uac8c \ub418\uba74 \ub204\ub77d\ub41c \ub370\uc774\ud130\uac00 \uc0dd\uae38 \uc218 \uc788\ub2e4." ] }, { "cell_type": "code", "collapsed": false, "input": [ "s1 = Series([0, 1, 2, 3], index=['a', 'b', 'c', 'd'])" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 144 }, { "cell_type": "code", "collapsed": false, "input": [ "s2 = Series([4, 5, 6], index=['c', 'd', 'e'])" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 145 }, { "cell_type": "code", "collapsed": false, "input": [ "data2 = pd.concat([s1, s2], keys=['one', 'two'])" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 146 }, { "cell_type": "code", "collapsed": false, "input": [ "s1" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 147, "text": [ "a 0\n", "b 1\n", "c 2\n", "d 3\n", "dtype: int64" ] } ], "prompt_number": 147 }, { "cell_type": "code", "collapsed": false, "input": [ "s2" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 148, "text": [ "c 4\n", "d 5\n", "e 6\n", "dtype: int64" ] } ], "prompt_number": 148 }, { "cell_type": "code", "collapsed": false, "input": [ "data3 = pd.concat([s1, s2])" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 149 }, { "cell_type": "code", "collapsed": false, "input": [ "data3" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 150, "text": [ "a 0\n", "b 1\n", "c 2\n", "d 3\n", "c 4\n", "d 5\n", "e 6\n", "dtype: int64" ] } ], "prompt_number": 150 }, { "cell_type": "code", "collapsed": false, "input": [ "# index\ub85c s1\uc744 one, s2\ub97c two\ub85c \ud560\ub2f9\n", "data2" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 151, "text": [ "one a 0\n", " b 1\n", " c 2\n", " d 3\n", "two c 4\n", " d 5\n", " e 6\n", "dtype: int64" ] } ], "prompt_number": 151 }, { "cell_type": "code", "collapsed": false, "input": [ "data2.unstack()" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
abcde
one 0 1 2 3NaN
twoNaNNaN 4 5 6
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 152, "text": [ " a b c d e\n", "one 0 1 2 3 NaN\n", "two NaN NaN 4 5 6" ] } ], "prompt_number": 152 }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### stack \uba54\uc11c\ub4dc\ub294 \ub204\ub77d\ub41c \ub370\uc774\ud130\ub97c \uc790\ub3d9\uc73c\ub85c \uac78\ub7ec\ub0b4\uae30 \ub54c\ubb38\uc5d0 \uc5f0\uc0b0\uc744 \uc27d\uac8c \uc6d0\uc0c1\ubcf5\uad6c \uac00\ub2a5" ] }, { "cell_type": "code", "collapsed": false, "input": [ "# \ubc14\uafe8\ub2e4 \uc6d0\uc0c1\ubcf5\uad6c... \uadfc\ub370 null\uac12 \ud3ec\ud568\ud558\uc9c0 \uc54a\ub294\ub2e4\ub294 \uac8c point!\n", "data2.unstack().stack()" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 153, "text": [ "one a 0\n", " b 1\n", " c 2\n", " d 3\n", "two c 4\n", " d 5\n", " e 6\n", "dtype: float64" ] } ], "prompt_number": 153 }, { "cell_type": "code", "collapsed": false, "input": [ "data2" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 154, "text": [ "one a 0\n", " b 1\n", " c 2\n", " d 3\n", "two c 4\n", " d 5\n", " e 6\n", "dtype: int64" ] } ], "prompt_number": 154 }, { "cell_type": "code", "collapsed": false, "input": [ "data2.unstack().stack(dropna=False)" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 155, "text": [ "one a 0\n", " b 1\n", " c 2\n", " d 3\n", " e NaN\n", "two a NaN\n", " b NaN\n", " c 4\n", " d 5\n", " e 6\n", "dtype: float64" ] } ], "prompt_number": 155 }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### DataFrame\uc744 unstack\ud560 \ub54c, unstack() \ub808\ubca8\uc740 \uacb0\uacfc\uc5d0\uc11c \uac00\uc7a5 \ub0ae\uc740 \ub2e8\uacc4" ] }, { "cell_type": "code", "collapsed": false, "input": [ "df = DataFrame({'left': result, 'right': result + 5},\n", " columns=pd.Index(['left', 'right'], name='side'))" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 156 }, { "cell_type": "code", "collapsed": false, "input": [ "df" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
sideleftright
statenumber
Ohioone 0 5
two 1 6
three 2 7
Coloradoone 3 8
two 4 9
three 5 10
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 157, "text": [ "side left right\n", "state number \n", "Ohio one 0 5\n", " two 1 6\n", " three 2 7\n", "Colorado one 3 8\n", " two 4 9\n", " three 5 10" ] } ], "prompt_number": 157 }, { "cell_type": "code", "collapsed": false, "input": [ "result" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 158, "text": [ "state number\n", "Ohio one 0\n", " two 1\n", " three 2\n", "Colorado one 3\n", " two 4\n", " three 5\n", "dtype: int64" ] } ], "prompt_number": 158 }, { "cell_type": "code", "collapsed": false, "input": [ "# unstack \ud560 \ub54c \uacb0\uacfc\uc5d0\uc11c \uac00\uc7a5 \ub0ae\uc740 \ub808\ubca8. \uc989, side\uc758 \ud558\uc704\ub85c \uac14\ub2e4.\n", "df.unstack('state')" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
sideleftright
stateOhioColoradoOhioColorado
number
one 0 3 5 8
two 1 4 6 9
three 2 5 7 10
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 159, "text": [ "side left right \n", "state Ohio Colorado Ohio Colorado\n", "number \n", "one 0 3 5 8\n", "two 1 4 6 9\n", "three 2 5 7 10" ] } ], "prompt_number": 159 }, { "cell_type": "code", "collapsed": false, "input": [ "# side\ub3c4 number\uc758 \ud558\uc704\ub85c \uac14\ub2e4. \n", "df.unstack('state').stack('side')" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
stateOhioColorado
numberside
oneleft 0 3
right 5 8
twoleft 1 4
right 6 9
threeleft 2 5
right 7 10
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 219, "text": [ "state Ohio Colorado\n", "number side \n", "one left 0 3\n", " right 5 8\n", "two left 1 4\n", " right 6 9\n", "three left 2 5\n", " right 7 10" ] } ], "prompt_number": 219 }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Why? \ud53c\ubc97\uc744 \ud558\uc9c0?\n", "\n", "- [\ud53c\ubc97\ud14c\uc774\ube14(Pivot Table)](http://www.soongin.com/2012/10/pivot-table.html)\n", "- \ud53c\ubc97\ud14c\uc774\ube14\uc740 \uc5d1\uc140\uc774 \uc81c\uacf5\ud558\ub294 \uac00\uc7a5 \uac15\ub825\ud55c \ub370\uc774\ud130 \ubd84\uc11d \ub3c4\uad6c\uc785\ub2c8\ub2e4. \uc544\ub9c8 \uc800\uc5d0\uac8c \uc5d1\uc140\uc758 \uac00\uc7a5 \ub6f0\uc5b4\ub09c \uae30\ub2a5\uc774 \ubb50\ub0d0\uace0 \ubb3c\uc73c\uc2e0\ub2e4\uba74... \uc8fc\uc800\uc5c6\uc774 \ud53c\ubc97\ud14c\uc774\ube14\uc774\ub77c\uace0 \uc598\uae30\ud560 \uc218 \uc788\uc2b5\ub2c8\ub2e4. \ud53c\ubc97\ud14c\uc774\ube14\uc740 \ub9ce\uc740 \uc591\uc758 \ub370\uc774\ud130\ub97c \ube60\ub978 \uc2dc\uac04\uc5d0 \ub2e4\uc591\ud558\uac8c \ubd84\uc11d\ud560 \uc218 \uc788\ub294 \ub300\ud654\ud615 \ud14c\uc774\ube14\uc774\ub77c\uace0 \ud560 \uc218 \uc788\ub294\ub370\uc694, \ub370\uc774\ud130\ub97c \ubd84\uc11d\ud558\ub294 \ub370 \uc788\uc5b4\uc11c \uc774\ub9cc\ud55c \uae30\ub2a5\uc744 \uc81c\uacf5\ud558\ub294 \ud504\ub85c\uadf8\ub7a8\ub3c4 \ud754\uce58 \uc54a\uc740 \uac83\uc774 \uc0ac\uc2e4\uc785\ub2c8\ub2e4.\n", "- \uadf8\ub7f0\ub370 \ud53c\ubc97(Pivot)\uc774\ub77c\ub294\uac74 \ubb58\uae4c\uc694? \ud53c\ubc97\uc740 \ub2e8\uc5b4\uc758 \uc758\ubbf8 \uadf8\ub300\ub85c \ud68c\uc804\uc911\uc2ec\ucd95\uc744 \uc758\ubbf8\ud558\ub294\ub370, \uc6d0\ubcf8 \ub370\uc774\ud130\ub97c \uc0ac\uc6a9\uc790\uac00 \uc815\uc758\ud55c \ucd95\uc744 \uc911\uc2ec\uc73c\ub85c \ub2e4\uc591\ud558\uac8c \ubd84\uc11d(\ud68c\uc804)\ud574\ubcfc \uc218 \uc788\ub2e4\ub294 \uc758\ubbf8\uc785\ub2c8\ub2e4. \uc5d1\uc140 97\uc5d0\uc11c \ucc98\uc74c \uc18c\uac1c\ub41c \ud53c\ubc97\ud14c\uc774\ube14\uc740 \ub108\ubb34 \ubcf5\uc7a1\ud574\ubcf4\uc5ec\uc11c \uc0ac\uc6a9\uc790\ub4e4\uc774 \uc27d\uac8c \uc811\uadfc\ud558\uc9c0 \ubabb\ud588\ub358 \uac83\uc774 \uc0ac\uc2e4\uc785\ub2c8\ub2e4. \uc5d1\uc140 2007\ubd80\ud130\ub294 \ud074\ub9ad \uba87\ubc88\uc73c\ub85c \ud53c\ubc97\ud14c\uc774\ube14\uc744 \ub9cc\ub4e4\uace0 \ubd84\uc11d\ud560 \uc218 \uc788\uac8c \ub418\uc5c8\ub294\ub370, \uc5ec\ub7ec\ubd84\ub4e4\ub3c4 \uc800\uc640 \ud568\uaed8 \uc27d\uac8c \uc774\ud574\ud558\uc2e4 \uc218 \uc788\uc744\uac81\ub2c8\ub2e4.\n", "- [\uc624\ud53c\uc2a4 \ud301 \uc5d1\uc140\uc758 \ud53c\ubc97\ud14c\uc774\ube14\uc744 \uc774\uc6a9\ud558\uc5ec \ub370\uc774\ud130\ub97c \ubd84\uc11d\ud558\ub294 \ud301](http://www.itworld.co.kr/news/77360)\n", "- [\ud53c\ubc97 \ud14c\uc774\ube14 \ubcf4\uace0\uc11c](http://www.jch74.com/study/eeeee1.htm)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 7.2. \ud53c\ubc84\ud305\uc73c\ub85c \ub370\uc774\ud130 \ub098\uc5f4 \ubc29\uc2dd \ubc14\uafb8\uae30\n", "\n", "- \ub370\uc774\ud130\ubca0\uc774\uc2a4\ub098 CSV \ud30c\uc77c\uc5d0 \uc5ec\ub7ec \uac1c\uc758 \uc2dc\uacc4\uc5f4 \ub370\uc774\ud130\ub97c \uc800\uc7a5\ud558\ub294 \uc77c\ubc18\uc801\uc778 \ubc29\ubc95\uc740 \uc2dc\uac04\uc21c\uc73c\ub85c \ub098\uc5f4\ud558\ub294 \ubc29\ubc95\n", "- csv\ud30c\uc77c \uc77d\uae30: p.222\ucabd \ucc38\uace0\n", "- \uceec\ub7fc \uc774\ub984 \ubcc0\uacbd\n", "- \uceec\ub7fc \ud569\uce58\uae30\n", "- \uc6d0\ud558\ub294 \uceec\ub7fc\ub9cc \ubcf4\uc5ec\uc8fc\uae30: p.169\ucabd frame2.ix['three'] \ubd80\ubd84 \ucc38\uace0" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### DataFrame\uc744 \uc0dd\uc131\ud558\ub294 \ucf54\ub4dc\ub294 \ud3b8\uc758\ub97c \uc704\ud574 \uc0dd\ub7b5\ud588\ub2e4.\n", "\n", "\n", "\n", "#### \ucc45\uc744 \ub2e4 \ud55c \ubc88 \ud6d1\uace0 \ub2e4\uc2dc \ub3cc\uc544\uc624\ub2c8 \uc774\uc81c\ub294 \ud560 \uc218 \uc788\uac8c\ub410\ub2e4." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### names\ub85c \uc9c0\uc815\ud574\uc8fc\uc9c0 \uc54a\uc73c\uba74 pivot \ud560 \ub54c error \ubc1c\uc0dd" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ " date, item, value\n", " 1959-03-31 00:00:00, realgdp, 2710.349\n", " 1959-03-31 00:00:00, infl, 0.000\n", " 1959-03-31 00:00:00, unemp, 5.800\n", " 1959-06-30 00:00:00, realgdp, 2778.801\n", " 1959-06-30 00:00:00, infl, 2.340\n", " 1959-06-30 00:00:00, unemp, 5.100\n", " 1959-09-30 00:00:00, realgdp, 2775.488\n", " 1959-09-30 00:00:00, infl, 2.740\n", " 1959-09-30 00:00:00, unemp, 5.300\n", " 1959-12-31 00:00:00, realgdp, 2785.204 " ] }, { "cell_type": "code", "collapsed": false, "input": [ "%%writefile ch07/pivot.csv\n", "date, item, value\n", "1959-03-31 00:00:00, realgdp, 2710.349\n", "1959-03-31 00:00:00, infl, 0.000\n", "1959-03-31 00:00:00, unemp, 5.800\n", "1959-06-30 00:00:00, realgdp, 2778.801\n", "1959-06-30 00:00:00, infl, 2.340\n", "1959-06-30 00:00:00, unemp, 5.100\n", "1959-09-30 00:00:00, realgdp, 2775.488\n", "1959-09-30 00:00:00, infl, 2.740\n", "1959-09-30 00:00:00, unemp, 5.300\n", "1959-12-31 00:00:00, realgdp, 2785.204 " ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "Overwriting ch07/pivot.csv\n" ] } ], "prompt_number": 268 }, { "cell_type": "code", "collapsed": false, "input": [ "# header\ub97c 0\uc73c\ub85c \uc124\uc815\ud558\uaca0\ub2e4\ub294\uac74\ub370.. \uae30\ubcf8\uac12.\n", "# \uacc4\uc18d pivot \ud560 \ub54c \uc5d0\ub7ec\uac00 \ub09c\ub2e4.\n", "# names\ub85c \uaf2d \uc124\uc815\ud574\uc57c \ud558\ub098\ubcf4\ub2e4. pivot\uc774 \uc778\uc2dd\ud558\ub294 \uac83\uc740 names\ub85c setting\ub41c \uac12\uc778\ub4ef.\n", "# \uc5f4\uacfc \ud589\uc744 \ubc14\uafd4\uc57c \ud558\uae30 \ub54c\ubb38\uc5d0 \uc774\ubbf8 \uc778\uc2dd\ud558\uace0 \uc788\ub294 \uac12\uc774 \uc5c6\uc73c\uba74 error \ubc49\ub294\uac83 \uac19\ub2e4.\n", "ldata = pd.read_csv('ch07/pivot.csv', header=0)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 274 }, { "cell_type": "code", "collapsed": false, "input": [ "# \uc774\uac83 \ub9d0\uace0\ub294 \ub531\ud788 \ub2e4\ub978\uac8c \uc0dd\uac01\ub098\uc9c0 \uc54a\ub294\ub2e4.\n", "# csv \ud30c\uc77c\uc5d0\uc11c 1\ubc88\uc9f8 \uc904\uc778 header\ub97c \uc9c0\uc6b0\uba74 \uc5b4\ub5a4 \uceec\ub7fc\uc778\uc9c0 \ubaa8\ub974\ub2c8\uae4c \ucc28\ub77c\ub9ac skiprows=1 \uc744 \ud574\uc8fc\ub294\uac8c \ub0ab\uaca0\ub2e4.\n", "# 6\uc7a5 \ucc98\uc74c \ubd80\ubd84\uc5d0 read_csv \uc635\uc158\ub4e4\uc774 \uc788\uc73c\ub2c8 \ucc38\uace0\n", "ldata = pd.read_csv('ch07/pivot.csv', skiprows=1, names=['date', 'item', 'value'])" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 275 }, { "cell_type": "code", "collapsed": false, "input": [ "ldata[:10]" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
dateitemvalue
0 1959-03-31 00:00:00 realgdp 2710.349
1 1959-03-31 00:00:00 infl 0.000
2 1959-03-31 00:00:00 unemp 5.800
3 1959-06-30 00:00:00 realgdp 2778.801
4 1959-06-30 00:00:00 infl 2.340
5 1959-06-30 00:00:00 unemp 5.100
6 1959-09-30 00:00:00 realgdp 2775.488
7 1959-09-30 00:00:00 infl 2.740
8 1959-09-30 00:00:00 unemp 5.300
9 1959-12-31 00:00:00 realgdp 2785.204
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 276, "text": [ " date item value\n", "0 1959-03-31 00:00:00 realgdp 2710.349\n", "1 1959-03-31 00:00:00 infl 0.000\n", "2 1959-03-31 00:00:00 unemp 5.800\n", "3 1959-06-30 00:00:00 realgdp 2778.801\n", "4 1959-06-30 00:00:00 infl 2.340\n", "5 1959-06-30 00:00:00 unemp 5.100\n", "6 1959-09-30 00:00:00 realgdp 2775.488\n", "7 1959-09-30 00:00:00 infl 2.740\n", "8 1959-09-30 00:00:00 unemp 5.300\n", "9 1959-12-31 00:00:00 realgdp 2785.204" ] } ], "prompt_number": 276 }, { "cell_type": "code", "collapsed": false, "input": [ "type(ldata)" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 277, "text": [ "pandas.core.frame.DataFrame" ] } ], "prompt_number": 277 }, { "cell_type": "code", "collapsed": false, "input": [ "# 1\ubc88\uc9f8 \uc778\uc790: \ub85c\uc6b0 \uc0c9\uc778\uc73c\ub85c \uc0ac\uc6a9\ub420 \uce7c\ub7fc \uc774\ub984\n", "# 2\ubc88\uc9f8 \uc778\uc790: \uce7c\ub7fc \uc0c9\uc778\uc73c\ub85c \uc0ac\uc6a9\ub420 \uce7c\ub7fc \uc774\ub984\n", "# 3\ubc88\uc9f8 \uc778\uc790: DataFrame\uc5d0 \ucc44\uc6cc \ub123\uc744 \uac12\uc744 \ub2f4\uace0 \uc788\ub294 \uce7c\ub7fc\n", "pivoted = ldata.pivot('date', 'item', 'value')" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 278 }, { "cell_type": "code", "collapsed": false, "input": [ "pivoted.head()" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
item infl realgdp unemp
date
1959-03-31 00:00:00 0.00 2710.349 5.8
1959-06-30 00:00:00 2.34 2778.801 5.1
1959-09-30 00:00:00 2.74 2775.488 5.3
1959-12-31 00:00:00 NaN 2785.204 NaN
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 279, "text": [ "item infl realgdp unemp\n", "date \n", "1959-03-31 00:00:00 0.00 2710.349 5.8\n", "1959-06-30 00:00:00 2.34 2778.801 5.1\n", "1959-09-30 00:00:00 2.74 2775.488 5.3\n", "1959-12-31 00:00:00 NaN 2785.204 NaN" ] } ], "prompt_number": 279 }, { "cell_type": "markdown", "metadata": {}, "source": [ "- \ud55c \ubc88\uc5d0 2\uac1c\uc758 \uce7c\ub7fc \ubcc0\ud615" ] }, { "cell_type": "code", "collapsed": false, "input": [ "ldata['value2'] = np.random.randn(len(ldata))" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 280 }, { "cell_type": "code", "collapsed": false, "input": [ "ldata[:10]" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
dateitemvaluevalue2
0 1959-03-31 00:00:00 realgdp 2710.349-1.758243
1 1959-03-31 00:00:00 infl 0.000-1.163026
2 1959-03-31 00:00:00 unemp 5.800-0.997308
3 1959-06-30 00:00:00 realgdp 2778.801-1.407402
4 1959-06-30 00:00:00 infl 2.340 1.698340
5 1959-06-30 00:00:00 unemp 5.100 0.309916
6 1959-09-30 00:00:00 realgdp 2775.488 0.759469
7 1959-09-30 00:00:00 infl 2.740-0.376500
8 1959-09-30 00:00:00 unemp 5.300-0.288409
9 1959-12-31 00:00:00 realgdp 2785.204-2.492189
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 281, "text": [ " date item value value2\n", "0 1959-03-31 00:00:00 realgdp 2710.349 -1.758243\n", "1 1959-03-31 00:00:00 infl 0.000 -1.163026\n", "2 1959-03-31 00:00:00 unemp 5.800 -0.997308\n", "3 1959-06-30 00:00:00 realgdp 2778.801 -1.407402\n", "4 1959-06-30 00:00:00 infl 2.340 1.698340\n", "5 1959-06-30 00:00:00 unemp 5.100 0.309916\n", "6 1959-09-30 00:00:00 realgdp 2775.488 0.759469\n", "7 1959-09-30 00:00:00 infl 2.740 -0.376500\n", "8 1959-09-30 00:00:00 unemp 5.300 -0.288409\n", "9 1959-12-31 00:00:00 realgdp 2785.204 -2.492189" ] } ], "prompt_number": 281 }, { "cell_type": "code", "collapsed": false, "input": [ "# 3\ubc88\uc9f8 \uc778\uc790 \uc0dd\ub7b5\ud558\uba74 \uacc4\uce35\uc801 \uc0c9\uc778\uc73c\ub85c \ubcf4\uc5ec\uc90c\n", "pivoted = ldata.pivot('date', 'item')" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 282 }, { "cell_type": "code", "collapsed": false, "input": [ "pivoted[:5]" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
valuevalue2
item infl realgdp unemp infl realgdp unemp
date
1959-03-31 00:00:00 0.00 2710.349 5.8-1.163026-1.758243-0.997308
1959-06-30 00:00:00 2.34 2778.801 5.1 1.698340-1.407402 0.309916
1959-09-30 00:00:00 2.74 2775.488 5.3-0.376500 0.759469-0.288409
1959-12-31 00:00:00 NaN 2785.204 NaN NaN-2.492189 NaN
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 285, "text": [ " value value2 \n", "item infl realgdp unemp infl realgdp unemp\n", "date \n", "1959-03-31 00:00:00 0.00 2710.349 5.8 -1.163026 -1.758243 -0.997308\n", "1959-06-30 00:00:00 2.34 2778.801 5.1 1.698340 -1.407402 0.309916\n", "1959-09-30 00:00:00 2.74 2775.488 5.3 -0.376500 0.759469 -0.288409\n", "1959-12-31 00:00:00 NaN 2785.204 NaN NaN -2.492189 NaN" ] } ], "prompt_number": 285 }, { "cell_type": "code", "collapsed": false, "input": [ "# Wow! Simple sentence!\n", "# \uacc4\uce35\uc801 \uc0c9\uc778\uc73c\ub85c \ub418\uc5b4 \uc788\ub294 \uac83\uc744 ['value']\ub85c \uc881\ud600\uc8fc\uace0 5\uac1c\ub9cc \ubcf4\uc5ec\uc900\ub2e4.\n", "pivoted['value'][:5]" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
item infl realgdp unemp
date
1959-03-31 00:00:00 0.00 2710.349 5.8
1959-06-30 00:00:00 2.34 2778.801 5.1
1959-09-30 00:00:00 2.74 2775.488 5.3
1959-12-31 00:00:00 NaN 2785.204 NaN
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 286, "text": [ "item infl realgdp unemp\n", "date \n", "1959-03-31 00:00:00 0.00 2710.349 5.8\n", "1959-06-30 00:00:00 2.34 2778.801 5.1\n", "1959-09-30 00:00:00 2.74 2775.488 5.3\n", "1959-12-31 00:00:00 NaN 2785.204 NaN" ] } ], "prompt_number": 286 }, { "cell_type": "markdown", "metadata": {}, "source": [ "- pivot: set_index\ub97c \uc0ac\uc6a9\ud574\uc11c \uacc4\uce35\uc801 \uc0c9\uc778\uc744 \ub9cc\ub4e4\uace0 unstack \uba54\uc11c\ub4dc\ub97c \uc774\uc6a9\ud574\uc11c \ud615\ud0dc\ub97c \ubcc0\uacbd\ud558\ub294 \ub2e8\ucd95\ud0a4\uac19\uc740 \uba54\uc11c\ub4dc" ] }, { "cell_type": "code", "collapsed": false, "input": [ "unstacked = ldata.set_index(['date', 'item']).unstack('item')" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 287 }, { "cell_type": "code", "collapsed": false, "input": [ "unstacked[:7]" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
valuevalue2
item infl realgdp unemp infl realgdp unemp
date
1959-03-31 00:00:00 0.00 2710.349 5.8-1.163026-1.758243-0.997308
1959-06-30 00:00:00 2.34 2778.801 5.1 1.698340-1.407402 0.309916
1959-09-30 00:00:00 2.74 2775.488 5.3-0.376500 0.759469-0.288409
1959-12-31 00:00:00 NaN 2785.204 NaN NaN-2.492189 NaN
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 288, "text": [ " value value2 \n", "item infl realgdp unemp infl realgdp unemp\n", "date \n", "1959-03-31 00:00:00 0.00 2710.349 5.8 -1.163026 -1.758243 -0.997308\n", "1959-06-30 00:00:00 2.34 2778.801 5.1 1.698340 -1.407402 0.309916\n", "1959-09-30 00:00:00 2.74 2775.488 5.3 -0.376500 0.759469 -0.288409\n", "1959-12-31 00:00:00 NaN 2785.204 NaN NaN -2.492189 NaN" ] } ], "prompt_number": 288 }, { "cell_type": "code", "collapsed": false, "input": [ "# \uc704\uc5d0\uc11c unstack\uc744 \ud558\ub2c8 item \ub85c\uc6b0\uac00 \uceec\ub7fc\uc73c\ub85c \uc774\ub3d9\ud588\ub2e4.\n", "unstacked = ldata.set_index(['date', 'item'])" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 289 }, { "cell_type": "code", "collapsed": false, "input": [ "# date, item\uc774 \ub85c\uc6b0\n", "# value, value2\uac00 \uc5f4\n", "unstacked[:7]" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
valuevalue2
dateitem
1959-03-31 00:00:00 realgdp 2710.349-1.758243
infl 0.000-1.163026
unemp 5.800-0.997308
1959-06-30 00:00:00 realgdp 2778.801-1.407402
infl 2.340 1.698340
unemp 5.100 0.309916
1959-09-30 00:00:00 realgdp 2775.488 0.759469
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 290, "text": [ " value value2\n", "date item \n", "1959-03-31 00:00:00 realgdp 2710.349 -1.758243\n", " infl 0.000 -1.163026\n", " unemp 5.800 -0.997308\n", "1959-06-30 00:00:00 realgdp 2778.801 -1.407402\n", " infl 2.340 1.698340\n", " unemp 5.100 0.309916\n", "1959-09-30 00:00:00 realgdp 2775.488 0.759469" ] } ], "prompt_number": 290 }, { "cell_type": "code", "collapsed": false, "input": [ "# set_index\uac00 \ub85c\uc6b0 \uc124\uc815\n", "# set_index\ub85c \uc124\uc815\ub418\uc9c0 \uc54a\uc740 \uac83\ub4e4\uc740 \ubaa8\ub450 \uc5f4\ub85c \uc774\ub3d9\n", "unstacked = ldata.set_index(['date'])" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 291 }, { "cell_type": "code", "collapsed": false, "input": [ "unstacked" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
itemvaluevalue2
date
1959-03-31 00:00:00 realgdp 2710.349-1.758243
1959-03-31 00:00:00 infl 0.000-1.163026
1959-03-31 00:00:00 unemp 5.800-0.997308
1959-06-30 00:00:00 realgdp 2778.801-1.407402
1959-06-30 00:00:00 infl 2.340 1.698340
1959-06-30 00:00:00 unemp 5.100 0.309916
1959-09-30 00:00:00 realgdp 2775.488 0.759469
1959-09-30 00:00:00 infl 2.740-0.376500
1959-09-30 00:00:00 unemp 5.300-0.288409
1959-12-31 00:00:00 realgdp 2785.204-2.492189
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 292, "text": [ " item value value2\n", "date \n", "1959-03-31 00:00:00 realgdp 2710.349 -1.758243\n", "1959-03-31 00:00:00 infl 0.000 -1.163026\n", "1959-03-31 00:00:00 unemp 5.800 -0.997308\n", "1959-06-30 00:00:00 realgdp 2778.801 -1.407402\n", "1959-06-30 00:00:00 infl 2.340 1.698340\n", "1959-06-30 00:00:00 unemp 5.100 0.309916\n", "1959-09-30 00:00:00 realgdp 2775.488 0.759469\n", "1959-09-30 00:00:00 infl 2.740 -0.376500\n", "1959-09-30 00:00:00 unemp 5.300 -0.288409\n", "1959-12-31 00:00:00 realgdp 2785.204 -2.492189" ] } ], "prompt_number": 292 }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 7.3 \ub370\uc774\ud130 \ubcc0\ud615\n", "\n", "- \ud544\ud130\ub9c1, \uc815\uc81c \ubc0f \ub2e4\ub978 \ubcc0\ud615 \uc5ed\uc2dc \uc911\uc694\ud55c \uc5f0\uc0b0" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 7.3.1 \uc911\ubcf5 \uc81c\uac70\ud558\uae30" ] }, { "cell_type": "code", "collapsed": false, "input": [ "data = DataFrame({'k1': ['one'] * 3 + ['two'] * 4,\n", " 'k2': [1, 1, 2, 3, 3, 4, 4]})" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 392 }, { "cell_type": "code", "collapsed": false, "input": [ "data" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
k1k2
0 one 1
1 one 1
2 one 2
3 two 3
4 two 3
5 two 4
6 two 4
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 393, "text": [ " k1 k2\n", "0 one 1\n", "1 one 1\n", "2 one 2\n", "3 two 3\n", "4 two 3\n", "5 two 4\n", "6 two 4" ] } ], "prompt_number": 393 }, { "cell_type": "code", "collapsed": false, "input": [ "# \uc774 \uad6c\uc870\uac00 \uc798 \uc778\ud574 \uc548\ub418\uba74 \ud0c0\uc774\ud551 \ud574\ubcf4\uc138\uc694.\n", "# \ud30c\uc774\uc36c \uc778\ud130\ud504\ub9ac\ud130\uac00 \ub2f5\uc744 \uc54c\ub824\uc90d\ub2c8\ub2e4.\n", "['one'] * 3 + ['two'] * 4" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 394, "text": [ "['one', 'one', 'one', 'two', 'two', 'two', 'two']" ] } ], "prompt_number": 394 }, { "cell_type": "code", "collapsed": false, "input": [ "data.duplicated()" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 395, "text": [ "0 False\n", "1 True\n", "2 False\n", "3 False\n", "4 True\n", "5 False\n", "6 True\n", "dtype: bool" ] } ], "prompt_number": 395 }, { "cell_type": "code", "collapsed": false, "input": [ "data2 = DataFrame({'k1': ['one'] * 3 + ['two'] * 4,\n", " 'k2': [1, 1, 2, 3, 3, 3, 4]})" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 396 }, { "cell_type": "code", "collapsed": false, "input": [ "# 2\uac1c \uc5f4\uc774 \ubaa8\ub450 \uac19\uc544\uc57c \uc911\ubcf5\uc73c\ub85c \uc778\uc815 \ub428\n", "data2.duplicated()" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 397, "text": [ "0 False\n", "1 True\n", "2 False\n", "3 False\n", "4 True\n", "5 True\n", "6 False\n", "dtype: bool" ] } ], "prompt_number": 397 }, { "cell_type": "code", "collapsed": false, "input": [ "# druplicated \ubc30\uc5f4\uc774 False\uc778 DataFrame \ubc18\ud658\n", "data.drop_duplicates()" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
k1k2
0 one 1
2 one 2
3 two 3
5 two 4
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 398, "text": [ " k1 k2\n", "0 one 1\n", "2 one 2\n", "3 two 3\n", "5 two 4" ] } ], "prompt_number": 398 }, { "cell_type": "code", "collapsed": false, "input": [ "data['v1'] = range(7)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 399 }, { "cell_type": "code", "collapsed": false, "input": [ "data" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
k1k2v1
0 one 1 0
1 one 1 1
2 one 2 2
3 two 3 3
4 two 3 4
5 two 4 5
6 two 4 6
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 400, "text": [ " k1 k2 v1\n", "0 one 1 0\n", "1 one 1 1\n", "2 one 2 2\n", "3 two 3 3\n", "4 two 3 4\n", "5 two 4 5\n", "6 two 4 6" ] } ], "prompt_number": 400 }, { "cell_type": "code", "collapsed": false, "input": [ "# \uc911\ubcf5 \uc5ec\ubd80\ub97c k1 \uceec\ub7fc\ub9cc \ubcf8\ub2e4\ub294 \uc774\uc57c\uae30\n", "# one, two\ub9cc \ub0a8\uc744 \uc218 \ubc16\uc5d0 \uc5c6\uc74c\n", "data.drop_duplicates(['k1'])" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
k1k2v1
0 one 1 0
3 two 3 3
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 406, "text": [ " k1 k2 v1\n", "0 one 1 0\n", "3 two 3 3" ] } ], "prompt_number": 406 }, { "cell_type": "code", "collapsed": false, "input": [ "data.drop_duplicates(['k1'], take_last=True)" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
k1k2v1
2 one 2 2
6 two 4 6
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 407, "text": [ " k1 k2 v1\n", "2 one 2 2\n", "6 two 4 6" ] } ], "prompt_number": 407 }, { "cell_type": "code", "collapsed": false, "input": [ "data.drop_duplicates(['k1'], take_last=False)" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
k1k2v1
0 one 1 0
3 two 3 3
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 408, "text": [ " k1 k2 v1\n", "0 one 1 0\n", "3 two 3 3" ] } ], "prompt_number": 408 }, { "cell_type": "code", "collapsed": false, "input": [ "data.drop_duplicates(['k1', 'k2'])" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
k1k2v1
0 one 1 0
2 one 2 2
3 two 3 3
5 two 4 5
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 403, "text": [ " k1 k2 v1\n", "0 one 1 0\n", "2 one 2 2\n", "3 two 3 3\n", "5 two 4 5" ] } ], "prompt_number": 403 }, { "cell_type": "markdown", "metadata": {}, "source": [ "- duplicated\uc640 drop_duplicates\ub294 \uae30\ubcf8\uc801\uc73c\ub85c \ucc98\uc74c \ubc1c\uacac\ub41c \uac12\uc744 \uc720\uc9c0\ud55c\ub2e4. take_last = True \uc635\uc158\uc744 \ub118\uae30\uba74 \ub9c8\uc9c0\ub9c9\uc73c\ub85c \ubc1c\uacac\ub41c \uac12\uc744 \ubc18\ud658\n", "- \uc774\uac8c \uc544\uc9c1 \uc798 \uc774\ud574\uac00 \uc548\ub41c\ub2e4. \uba87 \ubc88 \ud14c\uc2a4\ud2b8 \ud574\ubd24\ub294\ub370 \ub2ec\ub77c\uc9c0\ub294 \uac83\ub3c4 \uc5c6\uace0..\n", "- \uc0bd\uc9c8 \ub05d\uc5d0 \uc54c\uc544\ub0c8\ub2e4. \uadf8\ub0e5 \uc601\uc5b4 \uadf8\ub300\ub85c \ud574\uc11d\ud558\uba74 \ub418\ub294\uac70\ub124...-_-; \n", "- \uc911\ubcf5\uc744 \uc81c\uac70\ud558\ub294\ub370 take_last\uac00 False\uba74 \ucc98\uc74c\uac12 \ucd9c\ub825\ud574\uc8fc\uace0\n", "- take_last\uac00 True\uba74 \ub9c8\uc9c0\ub9c9 \uac12 \ucd9c\ub825\ud574 \uc8fc\uace0.." ] }, { "cell_type": "code", "collapsed": false, "input": [ "# take_last=False\ub294 5\ub97c \uc120\ud0dd\n", "# take_last=True\ub294 6\uc744 \uc120\ud0dd\n", "data.drop_duplicates(['k1', 'k2'], take_last=True)" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
k1k2v1
1 one 1 1
2 one 2 2
4 two 3 4
6 two 4 6
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 404, "text": [ " k1 k2 v1\n", "1 one 1 1\n", "2 one 2 2\n", "4 two 3 4\n", "6 two 4 6" ] } ], "prompt_number": 404 }, { "cell_type": "code", "collapsed": false, "input": [ "data.drop_duplicates?" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 352 }, { "cell_type": "markdown", "metadata": {}, "source": [ " Type: instancemethod\n", " String form:\n", " \n", " File: /Library/Python/2.7/site-packages/pandas-0.12.0_307_g3a2fe0b-py2.7-macosx-10.8-intel.egg/pandas/core/frame.py\n", " Definition: data.drop_duplicates(self, cols=None, take_last=False, inplace=False)\n", " Docstring:\n", " Return DataFrame with duplicate rows removed, optionally only\n", " considering certain columns\n", "\n", " Parameters\n", " ----------\n", " cols : column label or sequence of labels, optional\n", " Only consider certain columns for identifying duplicates, by\n", " default use all of the columns\n", " take_last : boolean, default False\n", " Take the last observed row in a row. Defaults to the first row\n", " inplace : boolean, default False\n", " Whether to drop duplicates in place or to return a copy\n", "\n", " Returns\n", " -------\n", " deduplicated : DataFrame" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 7.3.2 \ud568\uc218\ub098 \ub9e4\ud551 \uc774\uc6a9\ud574 \ub370\uc774\ud130 \ubcc0\ud615\ud558\uae30\n", "\n", "- DataFrame\uc758 \uce7c\ub7fc\uc774\ub098 Series, \ubc30\uc5f4 \uc548\uc758 \uac12\uc744 \uae30\ubc18\uc73c\ub85c \ub370\uc774\ud130\uc758 \ud615\ud0dc\ub97c \ubcc0\ud615\ud558\uace0 \uc2f6\uc744 \ub54c" ] }, { "cell_type": "code", "collapsed": false, "input": [ "data = DataFrame({'food': ['bacon', 'pulled pork', 'bacon', 'Pastrami',\n", " 'corned beef', 'Bacon', 'pastrami',\n", " 'honey ham', 'nova lox'],\n", " 'ounces': [4, 3, 12, 6, 7.5, 8, 3, 5, 6]})" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 414 }, { "cell_type": "code", "collapsed": false, "input": [ "data" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
foodounces
0 bacon 4.0
1 pulled pork 3.0
2 bacon 12.0
3 Pastrami 6.0
4 corned beef 7.5
5 Bacon 8.0
6 pastrami 3.0
7 honey ham 5.0
8 nova lox 6.0
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 415, "text": [ " food ounces\n", "0 bacon 4.0\n", "1 pulled pork 3.0\n", "2 bacon 12.0\n", "3 Pastrami 6.0\n", "4 corned beef 7.5\n", "5 Bacon 8.0\n", "6 pastrami 3.0\n", "7 honey ham 5.0\n", "8 nova lox 6.0" ] } ], "prompt_number": 415 }, { "cell_type": "markdown", "metadata": {}, "source": [ "- \ud574\ub2f9 \uc721\ub958\uac00 \uc5b4\ub5a4 \ub3d9\ubb3c\uc758 \uace0\uae30\uc778\uc9c0 \uc54c\ub824\uc904 \uc218 \uc788\ub294 \uce7c\ub7fc\uc744 \ud558\ub098 \ucd94\uac00\ud55c\u3134\u3134\u3134\ub2e4\uace0 \uac00\uc815\n", "- \uc721\ub958\ubcc4 \ub3d9\ubb3c\uc744 \ub2f4\uace0 \uc788\ub294 \uc0ac\uc804 \ub370\uc774\ud130 \uc791\uc131" ] }, { "cell_type": "code", "collapsed": false, "input": [ "meat_to_animal = {\n", " 'bacon': 'pig',\n", " 'pulled pork': 'pig',\n", " 'pastrami': 'cow',\n", " 'corned beef': 'cow',\n", " 'honey ham': 'pig',\n", " 'nova lox': 'salmon'\n", " }" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 416 }, { "cell_type": "markdown", "metadata": {}, "source": [ "- Series\uc758 map \uba54\uc11c\ub4dc\ub294 \uc0ac\uc804\ub958\uc758 \uac1d\uccb4\ub098 \uc5b4\ub5a4 \ud568\uc218\ub97c \ubc1b\uc744 \uc218 \uc788\ub294\ub370, \uc774 \ub370\uc774\ud130\uc5d0\ub294 \uc721\ub958\uc758 \uc774\ub984\uc5d0 \ub300\u2219\uc18c\ubb38\uc790\uac00 \uc11e\uc5ec \uc788\ub294 \uc0ac\uc18c\ud55c \ubb38\uc81c\uac00 \uc788\uc73c\ubbc0\ub85c \ubaa8\ub450 \uc18c\ubb38\uc790\ub85c \ubcc0\uacbd" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### 3\ub2e8\uacc4\ub85c \ud480\uc5b4 \ud5e4\uccd0\uc11c \ud568\uc218\uc758 \uc5ed\ud560 \uc0b4\ud3b4\ubcf4\uae30\n", "\n", "- \ucc98\uc74c\uc5d0\ub294 \ud55c \ubc88\uc5d0 \uc77d\uc744 \uc218 \uc5c6\uc73c\ub2c8 \uc870\uac01\uc870\uac01 \ucf54\ub4dc\ub97c \ub098\ub220\uc11c \uc2e4\ud589\ud574 \ubcf8\ub2e4.\n", "- \ud558\ub098\uc529 \ud558\ub098\uc529 \ud37c\uc990 \ub9de\ucd94\ub4ef\uc774 \uc870\ub9bd\ud558\uc5ec \uc758\ubbf8\ub97c \ud655\uc778\ud574 \ubcf8\ub2e4." ] }, { "cell_type": "code", "collapsed": false, "input": [ "# \uba3c\uc800 data['food']\uc5d0 \uc5b4\ub5a4 \ub370\uc774\ud130\uac00 \uc788\ub294\uc9c0 \ud655\uc778\n", "data['food']" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 417, "text": [ "0 bacon\n", "1 pulled pork\n", "2 bacon\n", "3 Pastrami\n", "4 corned beef\n", "5 Bacon\n", "6 pastrami\n", "7 honey ham\n", "8 nova lox\n", "Name: food, dtype: object" ] } ], "prompt_number": 417 }, { "cell_type": "code", "collapsed": false, "input": [ "data['food'].map?" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 420 }, { "cell_type": "markdown", "metadata": {}, "source": [ " map 1\ubc88\uc9f8 \uc778\uc790\ub85c \ud568\uc218\ub97c \ub118\uae30\uace0, 2\ubc88\uc9f8 \uc778\uc790\ub85c \uc5f0\uc18d\ub41c \ud615\ud0dc\uc758(\ub9ac\uc2a4\ud2b8, \ud29c\ud50c \uac19\uc740) \uc790\ub8cc\ud615\uc744 \ub118\uae30\uba74 \ub41c\ub2e4.\n", "\n", " Type: builtin_function_or_method\n", " String form: \n", " Namespace: Python builtin\n", " Docstring:\n", " map(function, sequence[, sequence, ...]) -> list\n", "\n", " Return a list of the results of applying the function to the items of\n", " the argument sequence(s). If more than one sequence is given, the\n", " function is called with an argument list consisting of the corresponding\n", " item of each sequence, substituting None for missing values when not all\n", " sequences have the same length. If the function is None, return a list of\n", " the items of the sequence (or a list of tuples if more than one sequence)." ] }, { "cell_type": "code", "collapsed": false, "input": [ "data['food'].map" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "# data['food']\uc5d0 map \ud568\uc218\ub97c \uc801\uc6a9\ud558\ub294\ub370 \uc18c\ubb38\uc790\ub85c \ubaa8\ub450 \ubcc0\uacbd\ud558\ub294 \ud568\uc218 \uc801\uc6a9\n", "data['food'].map(str.lower)" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 418, "text": [ "0 bacon\n", "1 pulled pork\n", "2 bacon\n", "3 pastrami\n", "4 corned beef\n", "5 bacon\n", "6 pastrami\n", "7 honey ham\n", "8 nova lox\n", "Name: food, dtype: object" ] } ], "prompt_number": 418 }, { "cell_type": "code", "collapsed": false, "input": [ "# \ub610 map\uc73c\ub85c meat_to_animal dictionary\ub97c \ub118\uae34\ub2e4.\n", "# bacon -> pig\ub85c, pastrami -> cow\ub85c \ubcc0\uacbd\n", "data['food'].map(str.lower).map(meat_to_animal)" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 419, "text": [ "0 pig\n", "1 pig\n", "2 pig\n", "3 cow\n", "4 cow\n", "5 pig\n", "6 cow\n", "7 pig\n", "8 salmon\n", "Name: food, dtype: object" ] } ], "prompt_number": 419 }, { "cell_type": "code", "collapsed": false, "input": [ "# \ubcc0\uacbd\ub41c \uac83\ub4e4\uc744 animal \uc5f4\uc744 \uc0c8\ub85c \uc0dd\uc131\ud558\uace0 \uc5ec\uae30\uc5d0 \ub300\uc785\n", "data['animal'] = data['food'].map(str.lower).map(meat_to_animal)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 412 }, { "cell_type": "code", "collapsed": false, "input": [ "# food\uc5f4\uc758 \ub300\u2219\uc18c\ubb38\uc790\ub294 \uc548 \ubc14\ub010 \uac83\uc744 \uc54c \uc218 \uc788\ub2e4.\n", "# \ub300\u2219\uc18c\ubb38\uc790 \ube44\uad50\ud55c\uac74 animal \ud544\ub4dc\uc5d0 \uc801\uc6a9\ud558\uae30 \uc704\ud55c \uac83\uc778 \uac83\uc784\uc744 \ud655\uc778\ud560 \uc218 \uc788\ub2e4.\n", "data" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
foodouncesanimal
0 bacon 4.0 pig
1 pulled pork 3.0 pig
2 bacon 12.0 pig
3 Pastrami 6.0 cow
4 corned beef 7.5 cow
5 Bacon 8.0 pig
6 pastrami 3.0 cow
7 honey ham 5.0 pig
8 nova lox 6.0 salmon
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 413, "text": [ " food ounces animal\n", "0 bacon 4.0 pig\n", "1 pulled pork 3.0 pig\n", "2 bacon 12.0 pig\n", "3 Pastrami 6.0 cow\n", "4 corned beef 7.5 cow\n", "5 Bacon 8.0 pig\n", "6 pastrami 3.0 cow\n", "7 honey ham 5.0 pig\n", "8 nova lox 6.0 salmon" ] } ], "prompt_number": 413 }, { "cell_type": "markdown", "metadata": {}, "source": [ "- \ubb3c\ub860 \ud568\uc218\ub97c \ub118\uaca8\uc11c \uac19\uc740 \uc77c\uc744 \uc218\ud589\ud560 \uc218 \uc788\ub2e4." ] }, { "cell_type": "code", "collapsed": false, "input": [ "data['food'].map(lambda x: meat_to_animal[x.lower()])" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 421, "text": [ "0 pig\n", "1 pig\n", "2 pig\n", "3 cow\n", "4 cow\n", "5 pig\n", "6 cow\n", "7 pig\n", "8 salmon\n", "Name: food, dtype: object" ] } ], "prompt_number": 421 }, { "cell_type": "markdown", "metadata": {}, "source": [ "- map \uba54\uc11c\ub4dc\ub97c \uc0ac\uc6a9\ud558\uba74 \ub370\uc774\ud130\uc758 \uc694\uc18c\ubcc4 \ubcc0\ud615 \ubc0f \ub370\uc774\ud130\ub97c \ub2e4\ub4ec\ub294 \uc791\uc5c5 \ud3b8\ub9ac" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 7.3.3 \uac12 \uce58\ud658\ud558\uae30\n", "\n", "- fillna \uba54\uc11c\ub4dc: \ub204\ub77d\ub41c \uac12\uc744 \ucc44\uc6b0\ub294 \uc77c\uc740 \uc77c\ubc18\uc801\uc778 \uac12 \uce58\ud658 \uc791\uc5c5\n", "- \uc704\uc5d0\uc11c \uc0b4\ud3b4\ubd24\ub4ef\uc774 map \uba54\uc11c\ub4dc\ub97c \ud55c \uac1d\uccb4 \uc548\uc5d0\uc11c \uac12\uc758 \ubd80\ubd84\uc9d1\ud569\uc744 \ubcc0\uacbd\ud558\ub294 \ub370 \uc0ac\uc6a9\ud588\ub2e4\uba74,\n", "- replace \uba54\uc11c\ub4dc: \uac19\uc740 \uc791\uc5c5\uc5d0 \ub300\ud574\uc11c \uc880 \ub354 \uac04\ub2e8\ud558\uace0 \uc720\uc5f0\ud55c \ubc29\ubc95 \uc81c\uacf5" ] }, { "cell_type": "code", "collapsed": false, "input": [ "data = Series([1., -999., 2., -999., -1000., 3.])" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 422 }, { "cell_type": "code", "collapsed": false, "input": [ "data" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 423, "text": [ "0 1\n", "1 -999\n", "2 2\n", "3 -999\n", "4 -1000\n", "5 3\n", "dtype: float64" ] } ], "prompt_number": 423 }, { "cell_type": "markdown", "metadata": {}, "source": [ "- -999\ub294 \ub204\ub77d\ub41c \ub370\uc774\ud130\ub97c \ub098\ud0c0\ub0b4\uae30 \uc704\ud55c \uac12\n", "- \uc774 \uac12\uc740 replace \uba54\uc11c\ub4dc\ub97c \uc774\uc6a9\ud574\uc11c pandas\uc5d0\uc11c \uc778\uc2dd\ud560 \uc218 \uc788\ub294 NA\uac12\uc73c\ub85c \uce58\ud658\ub41c \uc0c8\ub85c\uc6b4 Series\ub97c \uc0dd\uc131" ] }, { "cell_type": "code", "collapsed": false, "input": [ "data.replace(-999, np.nan)" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 424, "text": [ "0 1\n", "1 NaN\n", "2 2\n", "3 NaN\n", "4 -1000\n", "5 3\n", "dtype: float64" ] } ], "prompt_number": 424 }, { "cell_type": "markdown", "metadata": {}, "source": [ "- \ub2f9\uc5f0\ud788 \uc5ec\ub7ec \uac1c\uc758 \uac12\ub3c4 \ud55c \ubc88\uc5d0 \uce58\ud658 \uac00\ub2a5" ] }, { "cell_type": "code", "collapsed": false, "input": [ "# \uc815\ub9d0 \uc9c1\uad00\uc801\uc73c\ub85c \uc798\ub9cc\ub4e0\uac83 \uac19\ub2e4.\n", "# 1\uac1c \uc774\uc0c1\uc744 \ubcc0\uacbd\ud558\ub824\uba74 list\ub85c \ub118\uae30\uba74 \ubaa8\ub450 \uc54c\uc544\uc11c \ubcc0\uacbd\ud574\uc900\ub2e4.\n", "data.replace([-999, -1000], np.nan)" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 425, "text": [ "0 1\n", "1 NaN\n", "2 2\n", "3 NaN\n", "4 NaN\n", "5 3\n", "dtype: float64" ] } ], "prompt_number": 425 }, { "cell_type": "code", "collapsed": false, "input": [ "# \ubcc0\uacbd\ud560 \ubb38\uc790\uc5f4\ub9cc list\ub85c \ub118\uae30\ub77c\ub294 \ubc95 \uc5c6\uc74c\n", "# \ubcc0\uacbd\ud558\uace0 \uc2f6\uc740 \ubb38\uc790\uc5f4\ub3c4 list\ub85c \ub118\uae30\uba74 \uc21c\uc11c\uc5d0 \ub9de\uac8c\ub054 \ubcc0\uacbd\ud574 \uc90c\n", "data.replace([-999, -1000], [999, 1000])" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 428, "text": [ "0 1\n", "1 999\n", "2 2\n", "3 999\n", "4 1000\n", "5 3\n", "dtype: float64" ] } ], "prompt_number": 428 }, { "cell_type": "code", "collapsed": false, "input": [ "data.replace([-999, -1000], [np.nan, 0])" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 429, "text": [ "0 1\n", "1 NaN\n", "2 2\n", "3 NaN\n", "4 0\n", "5 3\n", "dtype: float64" ] } ], "prompt_number": 429 }, { "cell_type": "code", "collapsed": false, "input": [ "# \uc0ac\uc804\uc73c\ub85c \ub118\uaca8\ub3c4 \ub41c\ub2e4.\n", "# \uc815\ub9d0 \ud3b8\ub9ac\ud558\uac8c \ub9cc\ub4e4\uc5c8\ub2e4.\n", "data.replace({-999: np.nan, -1000: 0})" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 430, "text": [ "0 1\n", "1 NaN\n", "2 2\n", "3 NaN\n", "4 0\n", "5 3\n", "dtype: float64" ] } ], "prompt_number": 430 }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 7.3.4 \ucd95 \uc0c9\uc778 \uc774\ub984 \ubc14\uafb8\uae30\n", "\n", "- Series\uc758 \uac12\ucc98\ub7fc \ucd95 \uc774\ub984 \uc5ed\uc2dc \uc720\uc0ac\ud55c \ubc29\uc2dd\uc73c\ub85c \ud568\uc218\ub098 \uc0c8\ub86d\uac8c \ubc14\uafc0 \uac12\uc73c\ub85c \uc774\uc6a9\ud574\uc11c \ubcc0\ud615\n", "- \uc0c8\ub85c\uc6b4 \uc790\ub8cc \uad6c\uc870\ub97c \ub9cc\ub4e4\uc9c0 \uc54a\uace0 \uadf8 \uc790\ub9ac\uc5d0\uc11c \ubc14\ub85c \ucd95 \uc774\ub984\uc744 \ubcc0\uacbd\ud558\ub294 \uac83\uc774 \uac00\ub2a5" ] }, { "cell_type": "code", "collapsed": false, "input": [ "data = DataFrame(np.arange(12).reshape((3, 4)),\n", " index = ['Ohio', 'Colorado', 'New York'],\n", " columns=['one', 'two', 'three', 'four'])" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 431 }, { "cell_type": "code", "collapsed": false, "input": [ "data" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
onetwothreefour
Ohio 0 1 2 3
Colorado 4 5 6 7
New York 8 9 10 11
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 432, "text": [ " one two three four\n", "Ohio 0 1 2 3\n", "Colorado 4 5 6 7\n", "New York 8 9 10 11" ] } ], "prompt_number": 432 }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### map \ud568\uc218 \uc0ac\uc6a9\ubc95\n", "\n", "- \uc608\uc804\uc5d0\ub3c4 map, reduce \uc774\ub7f0 \ud568\uc218\ub4e4\uc774 \uc5b4\ub824\uc6e0\ub294\ub370, \uc5ec\uae30\uc5d0\uc11c \ud655\uc2e4\ud788 \uc775\ud788\ub124\n", "- \uadf8\ub0e5 map \ud568\uc218\uc5d0 \ud568\uc218\ub97c \uc778\uc790\ub85c \ub118\uae30\uba74 \ub428.." ] }, { "cell_type": "code", "collapsed": false, "input": [ "data.index.map(str.upper)" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 433, "text": [ "array(['OHIO', 'COLORADO', 'NEW YORK'], dtype=object)" ] } ], "prompt_number": 433 }, { "cell_type": "code", "collapsed": false, "input": [ "data.index = data.index.map(str.upper)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 434 }, { "cell_type": "code", "collapsed": false, "input": [ "data" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
onetwothreefour
OHIO 0 1 2 3
COLORADO 4 5 6 7
NEW YORK 8 9 10 11
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 435, "text": [ " one two three four\n", "OHIO 0 1 2 3\n", "COLORADO 4 5 6 7\n", "NEW YORK 8 9 10 11" ] } ], "prompt_number": 435 }, { "cell_type": "markdown", "metadata": {}, "source": [ "- rename \uba54\uc11c\ub4dc: \uc6d0\ub798 \uac1d\uccb4\ub97c \ubcc0\uacbd\ud558\uc9c0 \uc54a\uace0 \uc0c8\ub85c\uc6b4 \uac1d\uccb4\ub97c \uc0dd\uc131" ] }, { "cell_type": "code", "collapsed": false, "input": [ "data.rename(index=str.title, columns=str.upper)" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ONETWOTHREEFOUR
Ohio 0 1 2 3
Colorado 4 5 6 7
New York 8 9 10 11
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 436, "text": [ " ONE TWO THREE FOUR\n", "Ohio 0 1 2 3\n", "Colorado 4 5 6 7\n", "New York 8 9 10 11" ] } ], "prompt_number": 436 }, { "cell_type": "code", "collapsed": false, "input": [ "# \uc704\uc5d0\uc11c index\ub97c \ub300\ubb38\uc790\ub85c \ubc14\uafd4\uc92c\ub2e4.\n", "# rename\uc744 \uc0ac\uc6a9\ud574\uc11c \uc6d0\ubcf8\uc740 \ubc14\ub00c\uc9c0 \uc54a\uc558\ub2e4.\n", "data" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
onetwothreefour
OHIO 0 1 2 3
COLORADO 4 5 6 7
NEW YORK 8 9 10 11
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 437, "text": [ " one two three four\n", "OHIO 0 1 2 3\n", "COLORADO 4 5 6 7\n", "NEW YORK 8 9 10 11" ] } ], "prompt_number": 437 }, { "cell_type": "markdown", "metadata": {}, "source": [ "- rename \uba54\uc11c\ub4dc: \uc0ac\uc804 \ud615\uc2dd\uc758 \uac1d\uccb4\ub97c \uc774\uc6a9\ud574\uc11c \ucd95 \uc774\ub984 \uc911 \uc77c\ubd80\ub9cc \ubcc0\uacbd\ud558\ub294 \uac83\ub3c4 \uac00\ub2a5" ] }, { "cell_type": "code", "collapsed": false, "input": [ "data.rename(index={'OHIO': 'INDIANA'},\n", " columns={'three': 'peekaboo'})" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
onetwopeekaboofour
INDIANA 0 1 2 3
COLORADO 4 5 6 7
NEW YORK 8 9 10 11
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 438, "text": [ " one two peekaboo four\n", "INDIANA 0 1 2 3\n", "COLORADO 4 5 6 7\n", "NEW YORK 8 9 10 11" ] } ], "prompt_number": 438 }, { "cell_type": "code", "collapsed": false, "input": [ "data" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
onetwothreefour
OHIO 0 1 2 3
COLORADO 4 5 6 7
NEW YORK 8 9 10 11
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 439, "text": [ " one two three four\n", "OHIO 0 1 2 3\n", "COLORADO 4 5 6 7\n", "NEW YORK 8 9 10 11" ] } ], "prompt_number": 439 }, { "cell_type": "markdown", "metadata": {}, "source": [ "- rename \uba54\uc11c\ub4dc: DataFrame\uc744 \uc9c1\uc811 \ubcf5\uc0ac\ud574\uc11c index\uc640 columns \uc18d\uc131\uc744 \uac31\uc2e0\ud560 \ud544\uc694\uc5c6\uc774 \ubc14\ub85c \ubcc0\uacbd\ub3c4 \uac00\ub2a5\n", "- \uc6d0\ubcf8 \ub370\uc774\ud130\ub97c \ubc14\ub85c \ubcc0\uacbd\ud558\ub824\uba74 inplace = True \uc635\uc158 \uc124\uc815\n", "- \uc6d0\ubcf8 \ub370\uc774\ud130\ub97c \ubc14\ub85c \ubcc0\uacbd\ud558\uae30 \ub54c\ubb38\uc5d0 \ub9e4\uc6b0 \uc8fc\uc758\ud574\uc11c \uc0ac\uc6a9\ud558\uc790" ] }, { "cell_type": "code", "collapsed": false, "input": [ "# \ud56d\uc0c1 DataFrame\uc758 \ucc38\uc870\ub97c \ubc18\ud658\ud55c\ub2e4.\n", "_ = data.rename(index={'OHIO': 'INDIANA'}, inplace=True)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 440 }, { "cell_type": "code", "collapsed": false, "input": [ "# OHIO -> INDIANA\ub85c \ubc14\ub00c\uc5c8\ub2e4.\n", "data" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
onetwothreefour
INDIANA 0 1 2 3
COLORADO 4 5 6 7
NEW YORK 8 9 10 11
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 441, "text": [ " one two three four\n", "INDIANA 0 1 2 3\n", "COLORADO 4 5 6 7\n", "NEW YORK 8 9 10 11" ] } ], "prompt_number": 441 }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 7.3.5 \uac1c\ubcc4\ud654\uc640 \uc591\uc790\ud654\n", "\n", "- \uc5f0\uc18d\uc131 \ub370\uc774\ud130\ub294 \uc885\uc885 \uac1c\ubcc4\ub85c \ubd84\ud560\ud558\uac70\ub098 \ubd84\uc11d\uc744 \uc704\ud574 \uadf8\ub8f9\ubcc4\ub85c \ub098\ub214\n", "- \uc218\uc5c5\uc5d0 \ucc38\uc5ec\ud558\ub294 \ud559\uc0dd \uadf8\ub8f9 \ub370\uc774\ud130\uac00 \uc788\uace0, \ub098\uc774\ub300\uc5d0 \ub530\ub77c \ubd84\ub958\ud55c\ub2e4\uace0 \uac00\uc815" ] }, { "cell_type": "code", "collapsed": false, "input": [ "ages = [20, 22, 25, 27, 21, 23, 27, 31, 61, 45, 41, 32]" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 442 }, { "cell_type": "markdown", "metadata": {}, "source": [ "- \uc774 \ub370\uc774\ud130\ub97c pandas\uc758 cut \ud568\uc218\ub97c \uc0ac\uc6a9\ud574\uc11c 18-25, 26-35, 35-60, 60\uc774\uc0c1\uc778 \uadf8\ub8f9\uc73c\ub85c \ub098\ub214" ] }, { "cell_type": "code", "collapsed": false, "input": [ "bins = [18, 25, 35, 60, 100]" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 443 }, { "cell_type": "code", "collapsed": false, "input": [ "# 1\ubc88\uc9f8 \uc778\uc790: \ub098\ub20c list\n", "# 2\ubc88\uc9f8 \uc778\uc790: \ub098\ub20c \uae30\uc900\n", "cats = pd.cut(ages, bins)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 444 }, { "cell_type": "code", "collapsed": false, "input": [ "pd.cut?" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 446 }, { "cell_type": "markdown", "metadata": {}, "source": [ " Type: function\n", " String form: \n", " File: /Library/Python/2.7/site-packages/pandas-0.12.0_307_g3a2fe0b-py2.7-macosx-10.8-intel.egg/pandas/tools/tile.py\n", " Definition: pd.cut(x, bins, right=True, labels=None, retbins=False, precision=3, include_lowest=False)\n", " Docstring:\n", " Return indices of half-open bins to which each value of `x` belongs.\n", "\n", " Parameters\n", " ----------\n", " x : array-like\n", " Input array to be binned. It has to be 1-dimensional.\n", " bins : int or sequence of scalars\n", " If `bins` is an int, it defines the number of equal-width bins in the\n", " range of `x`. However, in this case, the range of `x` is extended\n", " by .1% on each side to include the min or max values of `x`. If\n", " `bins` is a sequence it defines the bin edges allowing for\n", " non-uniform bin width. No extension of the range of `x` is done in\n", " this case.\n", " right : bool, optional\n", " Indicates whether the bins include the rightmost edge or not. If\n", " right == True (the default), then the bins [1,2,3,4] indicate\n", " (1,2], (2,3], (3,4].\n", " labels : array or boolean, default None\n", " Labels to use for bin edges, or False to return integer bin labels\n", " retbins : bool, optional\n", " Whether to return the bins or not. Can be useful if bins is given\n", " as a scalar.\n", "\n", " Returns\n", " -------\n", " out : Categorical or array of integers if labels is False\n", " bins : ndarray of floats\n", " Returned only if `retbins` is True.\n", "\n", " Notes\n", " -----\n", " The `cut` function can be useful for going from a continuous variable to\n", " a categorical variable. For example, `cut` could convert ages to groups\n", " of age ranges.\n", "\n", " Any NA values will be NA in the result. Out of bounds values will be NA in\n", " the resulting Categorical object\n", "\n", "\n", " Examples\n", " --------\n", " >>> cut(np.array([.2, 1.4, 2.5, 6.2, 9.7, 2.1]), 3, retbins=True)\n", " (array([(0.191, 3.367], (0.191, 3.367], (0.191, 3.367], (3.367, 6.533],\n", " (6.533, 9.7], (0.191, 3.367]], dtype=object),\n", " array([ 0.1905 , 3.36666667, 6.53333333, 9.7 ]))\n", " >>> cut(np.ones(5), 4, labels=False)\n", " array([2, 2, 2, 2, 2])" ] }, { "cell_type": "code", "collapsed": false, "input": [ "cats" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 445, "text": [ "Categorical: \n", "[(18, 25], (18, 25], (18, 25], (25, 35], (18, 25], (18, 25], (25, 35], (25, 35], (60, 100], (35, 60], (35, 60], (25, 35]]\n", "Levels (4): Index(['(18, 25]', '(25, 35]', '(35, 60]', '(60, 100]'], dtype=object)" ] } ], "prompt_number": 445 }, { "cell_type": "markdown", "metadata": {}, "source": [ "- pandas\uc5d0\uc11c \ubc18\ud658\ud558\ub294 \uac1d\uccb4\ub294 Categorical \uac1d\uccb4\ub77c\ub294 \ud2b9\uc218\ud55c \uac1d\uccb4\uc778\ub370, \uc774 \uac1d\uccb4\ub294 \uadf8\ub8f9 \uc774\ub984\uc774 \ub2f4\uae34 \ubc30\uc5f4\n", "- \uc774 Categorical \uac1d\uccb4\ub294 labels \uc18d\uc131\uc5d0 \uc788\ub294 ages \ub370\uc774\ud130\uc5d0 \ub300\ud55c \uce74\ud14c\uace0\ub9ac \uc774\ub984\uc744 levels\ub77c\ub294 \ubc30\uc5f4\uc5d0 \ub0b4\ubd80\uc801\uc73c\ub85c \ub2f4\uace0 \uc788\ub2e4." ] }, { "cell_type": "code", "collapsed": false, "input": [ "cats.labels" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 447, "text": [ "array([0, 0, 0, 1, 0, 0, 1, 1, 3, 2, 2, 1])" ] } ], "prompt_number": 447 }, { "cell_type": "code", "collapsed": false, "input": [ "cats.levels" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 448, "text": [ "Index([u'(18, 25]', u'(25, 35]', u'(35, 60]', u'(60, 100]'], dtype=object)" ] } ], "prompt_number": 448 }, { "cell_type": "code", "collapsed": false, "input": [ "pd.value_counts(cats)" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 449, "text": [ "(18, 25] 5\n", "(25, 35] 4\n", "(35, 60] 2\n", "(60, 100] 1\n", "dtype: int64" ] } ], "prompt_number": 449 }, { "cell_type": "markdown", "metadata": {}, "source": [ "- \uac04\uaca9\uc744 \ub098\ud0c0\ub0b4\ub294 \ud45c\uae30\ubc95\uc740 (\ub85c \uc2dc\uc791\ud574\uc11c ]\ub85c \ub05d\ub09c\ub2e4.\n", "- (: \uad04\ud638\ucabd\uc758 \uac12\uc740 \ud3ec\ud568\ud558\uc9c0 \uc54a\uace0 \n", "- [: \ub300\uad04\ud638 \ucabd\uc758 \uac12\uc740 \ud3ec\ud568\ud558\ub294 \uac04\uaca9\uc744 \ub098\ud0c0\ub0c4\n", "- right=False\ub97c \ub118\uaca8\uc11c \uad04\ud638\uc640 \ub300\uad04\ud638\uc758 \uc704\uce58 \ubcc0\uacbd \uac00\ub2a5" ] }, { "cell_type": "code", "collapsed": false, "input": [ "pd.cut(ages, [18, 26, 36, 61, 100], right=False)" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 450, "text": [ "Categorical: \n", "[[18, 26), [18, 26), [18, 26), [26, 36), [18, 26), [18, 26), [26, 36), [26, 36), [61, 100), [36, 61), [36, 61), [26, 36)]\n", "Levels (4): Index(['[18, 26)', '[26, 36)', '[36, 61)', '[61, 100)'], dtype=object)" ] } ], "prompt_number": 450 }, { "cell_type": "code", "collapsed": false, "input": [ "bins" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 451, "text": [ "[18, 25, 35, 60, 100]" ] } ], "prompt_number": 451 }, { "cell_type": "markdown", "metadata": {}, "source": [ "- labels \uc635\uc158\uc73c\ub85c \uadf8\ub8f9\uc758 \uc774\ub984\uc744 \uc9c1\uc811 \ub118\uaca8\uc904 \uc218\ub3c4 \uc788\ub2e4." ] }, { "cell_type": "code", "collapsed": false, "input": [ "group_names = ['Youth', 'YoungAdult', 'MiddleAged', 'Senior']" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 452 }, { "cell_type": "code", "collapsed": false, "input": [ "pd.cut(ages, bins, labels=group_names)" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 453, "text": [ "Categorical: \n", "[Youth, Youth, Youth, YoungAdult, Youth, Youth, YoungAdult, YoungAdult, Senior, MiddleAged, MiddleAged, YoungAdult]\n", "Levels (4): Index(['Youth', 'YoungAdult', 'MiddleAged', 'Senior'], dtype=object)" ] } ], "prompt_number": 453 }, { "cell_type": "markdown", "metadata": {}, "source": [ "- cut \ud568\uc218\uc5d0 \uba85\uc2dc\uc801\uc73c\ub85c \uadf8\ub8f9\uc758 \uacbd\uacc4 \uac12\uc744 \ub118\uae30\uc9c0 \uc54a\uace0 \uadf8\ub8f9\uc758 \uac1c\uc218\ub97c \ub118\uaca8\uc8fc\uba74 \ub370\uc774\ud130 \ub0b4\uc5d0\uc11c \ucd5c\uc18c\uac12\uacfc \ucd5c\ub300\uac12\uc744 \uae30\uc900\uc73c\ub85c \uade0\ub4f1\ud55c \uae38\uc774\uc758 \uadf8\ub8f9\uc744 \uc790\ub3d9\uc73c\ub85c \uacc4\uc0b0" ] }, { "cell_type": "code", "collapsed": false, "input": [ "data = np.random.randn(20)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 454 }, { "cell_type": "code", "collapsed": false, "input": [ "data" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 455, "text": [ "array([ 0.21900547, 0.8840372 , -0.84813527, 1.79066963, 1.20818628,\n", " 0.50816038, 0.5384968 , -0.46679507, 0.20554364, 0.40605808,\n", " 1.38356295, -0.0487796 , -1.86564376, 0.0567211 , 0.00651676,\n", " -0.93895591, -0.74396268, 0.2366114 , 0.62541814, -1.38886959])" ] } ], "prompt_number": 455 }, { "cell_type": "code", "collapsed": false, "input": [ "# data, \uadf8\ub8f9\uc758 \uac2f\uc218, \uc815\ud655\ub3c4?\n", "pd.cut(data, 4, precision=2)" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 458, "text": [ "Categorical: \n", "[(-0.037, 0.88], (0.88, 1.79], (-0.95, -0.037], (0.88, 1.79], (0.88, 1.79], (-0.037, 0.88], (-0.037, 0.88], (-0.95, -0.037], (-0.037, 0.88], (-0.037, 0.88], (0.88, 1.79], (-0.95, -0.037], (-1.87, -0.95], (-0.037, 0.88], (-0.037, 0.88], (-0.95, -0.037], (-0.95, -0.037], (-0.037, 0.88], (-0.037, 0.88], (-1.87, -0.95]]\n", "Levels (4): Index(['(-1.87, -0.95]', '(-0.95, -0.037]',\n", " '(-0.037, 0.88]', '(0.88, 1.79]'], dtype=object)" ] } ], "prompt_number": 458 }, { "cell_type": "markdown", "metadata": {}, "source": [ "- qcut: \ud45c\ubcf8 \ubcc0\uc704\uce58\ub97c \uae30\ubc18\uc73c\ub85c \ub370\uc774\ud130\ub97c \ub098\ub220\uc900\ub2e4. \ub370\uc774\ud130\uc758 \ubd84\uc0b0\uc5d0 \ub530\ub77c \uac01\uac01\uc758 \uadf8\ub8f9\ub9c8\ub2e4 \ub370\uc774\ud130\uc758 \uac1c\uc218\uac00 \ub2e4\ub974\uac8c \ub098\ub204\uc5b4\uc9c0\ub294 \uacbd\uc6b0\uac00 \ub9ce\ub2e4.\n", "- qcut: \ud45c\uc900 \ubcc0\uc704\uce58\ub97c \uc0ac\uc6a9\ud558\uae30 \ub54c\ubb38\uc5d0 \uc801\ub2f9\ud788 \uac19\uc740 \ud06c\uae30\uc758 \uadf8\ub8f9\uc73c\ub85c \ub098\ub20c \uc218 \uc788\ub2e4." ] }, { "cell_type": "code", "collapsed": false, "input": [ "data = np.random.randn(1000) # Normally distibuted" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 459 }, { "cell_type": "code", "collapsed": false, "input": [ "cats = pd.qcut(data, 4) # Cut into quartiles" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 460 }, { "cell_type": "code", "collapsed": false, "input": [ "cats" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 461, "text": [ "Categorical: \n", "[(-0.608, 0.0816], [-3.176, -0.608], [-3.176, -0.608], (0.0816, 0.744], [-3.176, -0.608], (0.744, 2.956], (-0.608, 0.0816], [-3.176, -0.608], (0.0816, 0.744], (0.0816, 0.744], (-0.608, 0.0816], (0.744, 2.956], (-0.608, 0.0816], (0.0816, 0.744], [-3.176, -0.608], [-3.176, -0.608], [-3.176, -0.608], (0.0816, 0.744], (-0.608, 0.0816], (-0.608, 0.0816], (0.744, 2.956], (-0.608, 0.0816], (0.744, 2.956], [-3.176, -0.608], (0.0816, 0.744], (-0.608, 0.0816], (-0.608, 0.0816], (0.744, 2.956], (0.0816, 0.744], (0.0816, 0.744], (0.744, 2.956], (0.0816, 0.744], [-3.176, -0.608], (0.744, 2.956], (0.744, 2.956], (0.744, 2.956], (0.744, 2.956], (0.744, 2.956], [-3.176, -0.608], (-0.608, 0.0816], [-3.176, -0.608], (0.744, 2.956], (-0.608, 0.0816], (0.744, 2.956], (0.0816, 0.744], [-3.176, -0.608], (0.0816, 0.744], (0.0816, 0.744], (-0.608, 0.0816], (0.744, 2.956], (-0.608, 0.0816], (-0.608, 0.0816], [-3.176, -0.608], (0.0816, 0.744], (0.744, 2.956], (-0.608, 0.0816], (-0.608, 0.0816], [-3.176, -0.608], [-3.176, -0.608], (-0.608, 0.0816], (0.744, 2.956], (0.744, 2.956], (0.744, 2.956], (0.0816, 0.744], (-0.608, 0.0816], (0.0816, 0.744], (0.0816, 0.744], (0.744, 2.956], (0.744, 2.956], (-0.608, 0.0816], (0.0816, 0.744], (0.744, 2.956], (0.0816, 0.744], (-0.608, 0.0816], (-0.608, 0.0816], (-0.608, 0.0816], (0.0816, 0.744], [-3.176, -0.608], [-3.176, -0.608], [-3.176, -0.608], [-3.176, -0.608], (0.0816, 0.744], (-0.608, 0.0816], [-3.176, -0.608], [-3.176, -0.608], (0.0816, 0.744], (-0.608, 0.0816], (-0.608, 0.0816], (0.744, 2.956], (-0.608, 0.0816], [-3.176, -0.608], (0.0816, 0.744], (0.0816, 0.744], [-3.176, -0.608], (0.744, 2.956], (-0.608, 0.0816], (-0.608, 0.0816], (0.0816, 0.744], [-3.176, -0.608], (0.0816, 0.744], [-3.176, -0.608], (-0.608, 0.0816], (0.0816, 0.744], (-0.608, 0.0816], (-0.608, 0.0816], (0.0816, 0.744], (0.744, 2.956], [-3.176, -0.608], (0.0816, 0.744], (0.744, 2.956], (0.0816, 0.744], (-0.608, 0.0816], (-0.608, 0.0816], (-0.608, 0.0816], (-0.608, 0.0816], (0.0816, 0.744], [-3.176, -0.608], (-0.608, 0.0816], (-0.608, 0.0816], (0.744, 2.956], (0.744, 2.956], (0.744, 2.956], (-0.608, 0.0816], (0.0816, 0.744], (0.0816, 0.744], (0.0816, 0.744], (0.0816, 0.744], (-0.608, 0.0816], (0.0816, 0.744], (0.744, 2.956], [-3.176, -0.608], (0.744, 2.956], (-0.608, 0.0816], (0.744, 2.956], (0.0816, 0.744], (0.744, 2.956], (0.0816, 0.744], (0.0816, 0.744], [-3.176, -0.608], (0.0816, 0.744], [-3.176, -0.608], [-3.176, -0.608], (0.0816, 0.744], (0.744, 2.956], (0.744, 2.956], (-0.608, 0.0816], (0.744, 2.956], (-0.608, 0.0816], (0.0816, 0.744], (0.744, 2.956], (0.0816, 0.744], [-3.176, -0.608], (0.0816, 0.744], [-3.176, -0.608], (0.744, 2.956], (0.0816, 0.744], [-3.176, -0.608], [-3.176, -0.608], [-3.176, -0.608], [-3.176, -0.608], [-3.176, -0.608], (-0.608, 0.0816], [-3.176, -0.608], (0.744, 2.956], (0.744, 2.956], (-0.608, 0.0816], [-3.176, -0.608], (-0.608, 0.0816], (0.744, 2.956], (-0.608, 0.0816], (0.0816, 0.744], (0.0816, 0.744], [-3.176, -0.608], (-0.608, 0.0816], (0.744, 2.956], (0.0816, 0.744], (-0.608, 0.0816], (-0.608, 0.0816], (0.0816, 0.744], (0.744, 2.956], (0.744, 2.956], (0.744, 2.956], (-0.608, 0.0816], (-0.608, 0.0816], (0.0816, 0.744], (0.0816, 0.744], (-0.608, 0.0816], (0.0816, 0.744], (0.0816, 0.744], [-3.176, -0.608], (-0.608, 0.0816], [-3.176, -0.608], (0.0816, 0.744], [-3.176, -0.608], (0.0816, 0.744], (0.0816, 0.744], (0.744, 2.956], (-0.608, 0.0816], (0.744, 2.956], (0.744, 2.956], [-3.176, -0.608], (-0.608, 0.0816], (-0.608, 0.0816], [-3.176, -0.608], (0.0816, 0.744], (0.0816, 0.744], (-0.608, 0.0816], (0.0816, 0.744], (0.744, 2.956], [-3.176, -0.608], (0.0816, 0.744], (0.744, 2.956], [-3.176, -0.608], (0.744, 2.956], (0.0816, 0.744], (0.0816, 0.744], [-3.176, -0.608], (0.744, 2.956], (0.0816, 0.744], (-0.608, 0.0816], (0.744, 2.956], (0.744, 2.956], (-0.608, 0.0816], (0.0816, 0.744], (-0.608, 0.0816], (-0.608, 0.0816], (-0.608, 0.0816], (0.744, 2.956], (0.744, 2.956], (0.744, 2.956], [-3.176, -0.608], [-3.176, -0.608], [-3.176, -0.608], (-0.608, 0.0816], (0.744, 2.956], (0.744, 2.956], (0.0816, 0.744], (-0.608, 0.0816], (0.0816, 0.744], (-0.608, 0.0816], (0.0816, 0.744], (-0.608, 0.0816], (0.0816, 0.744], (0.0816, 0.744], (0.0816, 0.744], [-3.176, -0.608], (-0.608, 0.0816], (-0.608, 0.0816], (-0.608, 0.0816], (-0.608, 0.0816], (0.744, 2.956], (0.0816, 0.744], (-0.608, 0.0816], (0.0816, 0.744], [-3.176, -0.608], (-0.608, 0.0816], (0.0816, 0.744], (-0.608, 0.0816], (-0.608, 0.0816], (0.744, 2.956], (-0.608, 0.0816], (0.0816, 0.744], [-3.176, -0.608], (0.0816, 0.744], (-0.608, 0.0816], (-0.608, 0.0816], (0.0816, 0.744], [-3.176, -0.608], [-3.176, -0.608], (-0.608, 0.0816], (0.0816, 0.744], (0.744, 2.956], (0.0816, 0.744], (0.0816, 0.744], (-0.608, 0.0816], [-3.176, -0.608], (0.744, 2.956], (0.0816, 0.744], (-0.608, 0.0816], [-3.176, -0.608], (0.0816, 0.744], (0.0816, 0.744], (-0.608, 0.0816], (0.0816, 0.744], (-0.608, 0.0816], [-3.176, -0.608], (0.0816, 0.744], (0.0816, 0.744], (0.744, 2.956], (0.0816, 0.744], (0.744, 2.956], (0.744, 2.956], (0.744, 2.956], (0.0816, 0.744], (-0.608, 0.0816], (0.0816, 0.744], [-3.176, -0.608], [-3.176, -0.608], (0.0816, 0.744], (0.744, 2.956], (0.0816, 0.744], (0.0816, 0.744], (0.744, 2.956], [-3.176, -0.608], [-3.176, -0.608], (-0.608, 0.0816], (-0.608, 0.0816], (0.744, 2.956], (0.744, 2.956], (-0.608, 0.0816], (0.744, 2.956], [-3.176, -0.608], (0.744, 2.956], (0.744, 2.956], (0.744, 2.956], (0.744, 2.956], (0.744, 2.956], (0.744, 2.956], [-3.176, -0.608], [-3.176, -0.608], (0.0816, 0.744], [-3.176, -0.608], [-3.176, -0.608], (0.744, 2.956], [-3.176, -0.608], [-3.176, -0.608], (0.0816, 0.744], [-3.176, -0.608], [-3.176, -0.608], (0.744, 2.956], (0.744, 2.956], (0.0816, 0.744], (0.744, 2.956], [-3.176, -0.608], [-3.176, -0.608], [-3.176, -0.608], (0.0816, 0.744], (0.744, 2.956], (0.0816, 0.744], [-3.176, -0.608], (-0.608, 0.0816], [-3.176, -0.608], [-3.176, -0.608], [-3.176, -0.608], (0.0816, 0.744], (0.744, 2.956], (0.744, 2.956], (0.0816, 0.744], (-0.608, 0.0816], (0.0816, 0.744], (0.0816, 0.744], (0.744, 2.956], (0.744, 2.956], (-0.608, 0.0816], (0.0816, 0.744], (0.0816, 0.744], [-3.176, -0.608], (0.744, 2.956], (-0.608, 0.0816], (0.744, 2.956], (0.744, 2.956], (0.0816, 0.744], (0.744, 2.956], (-0.608, 0.0816], (0.744, 2.956], (0.744, 2.956], [-3.176, -0.608], (-0.608, 0.0816], (-0.608, 0.0816], (-0.608, 0.0816], (0.0816, 0.744], (-0.608, 0.0816], [-3.176, -0.608], (-0.608, 0.0816], (0.0816, 0.744], (0.0816, 0.744], (0.0816, 0.744], (0.744, 2.956], [-3.176, -0.608], (-0.608, 0.0816], [-3.176, -0.608], (0.0816, 0.744], (-0.608, 0.0816], (0.0816, 0.744], (0.0816, 0.744], [-3.176, -0.608], (-0.608, 0.0816], (-0.608, 0.0816], (0.0816, 0.744], [-3.176, -0.608], (-0.608, 0.0816], (0.744, 2.956], (0.744, 2.956], [-3.176, -0.608], (0.744, 2.956], (0.0816, 0.744], (0.0816, 0.744], (0.744, 2.956], [-3.176, -0.608], (0.0816, 0.744], (0.744, 2.956], [-3.176, -0.608], (-0.608, 0.0816], (-0.608, 0.0816], [-3.176, -0.608], (0.744, 2.956], [-3.176, -0.608], (0.744, 2.956], (0.0816, 0.744], [-3.176, -0.608], (0.0816, 0.744], (0.0816, 0.744], (0.744, 2.956], (0.0816, 0.744], (-0.608, 0.0816], (-0.608, 0.0816], [-3.176, -0.608], [-3.176, -0.608], [-3.176, -0.608], (-0.608, 0.0816], [-3.176, -0.608], (0.0816, 0.744], (-0.608, 0.0816], (0.0816, 0.744], (0.744, 2.956], (-0.608, 0.0816], [-3.176, -0.608], (-0.608, 0.0816], (0.744, 2.956], (0.0816, 0.744], (0.744, 2.956], [-3.176, -0.608], (0.744, 2.956], [-3.176, -0.608], (0.0816, 0.744], [-3.176, -0.608], (0.744, 2.956], (0.0816, 0.744], (0.0816, 0.744], (0.0816, 0.744], (0.744, 2.956], [-3.176, -0.608], (-0.608, 0.0816], [-3.176, -0.608], [-3.176, -0.608], (-0.608, 0.0816], (0.0816, 0.744], (0.0816, 0.744], (0.744, 2.956], (0.744, 2.956], (-0.608, 0.0816], (-0.608, 0.0816], (0.0816, 0.744], (0.744, 2.956], (0.0816, 0.744], (-0.608, 0.0816], (-0.608, 0.0816], [-3.176, -0.608], (-0.608, 0.0816], (0.744, 2.956], [-3.176, -0.608], [-3.176, -0.608], [-3.176, -0.608], (-0.608, 0.0816], (0.0816, 0.744], (-0.608, 0.0816], (-0.608, 0.0816], (0.744, 2.956], [-3.176, -0.608], (0.744, 2.956], (0.0816, 0.744], [-3.176, -0.608], (-0.608, 0.0816], (-0.608, 0.0816], (0.744, 2.956], (0.744, 2.956], (0.744, 2.956], [-3.176, -0.608], (0.0816, 0.744], [-3.176, -0.608], (-0.608, 0.0816], (-0.608, 0.0816], (0.744, 2.956], [-3.176, -0.608], (0.744, 2.956], (0.0816, 0.744], (0.744, 2.956], (-0.608, 0.0816], [-3.176, -0.608], (-0.608, 0.0816], [-3.176, -0.608], (0.744, 2.956], (0.744, 2.956], (0.0816, 0.744], (0.0816, 0.744], (0.744, 2.956], (0.744, 2.956], [-3.176, -0.608], [-3.176, -0.608], [-3.176, -0.608], [-3.176, -0.608], (0.0816, 0.744], (-0.608, 0.0816], (-0.608, 0.0816], (0.744, 2.956], [-3.176, -0.608], (0.0816, 0.744], [-3.176, -0.608], (0.744, 2.956], (0.0816, 0.744], (-0.608, 0.0816], (0.744, 2.956], (0.744, 2.956], [-3.176, -0.608], (-0.608, 0.0816], [-3.176, -0.608], (0.0816, 0.744], (0.0816, 0.744], [-3.176, -0.608], (-0.608, 0.0816], (0.0816, 0.744], [-3.176, -0.608], [-3.176, -0.608], (-0.608, 0.0816], (0.744, 2.956], (-0.608, 0.0816], (-0.608, 0.0816], [-3.176, -0.608], [-3.176, -0.608], (0.0816, 0.744], (-0.608, 0.0816], [-3.176, -0.608], (0.0816, 0.744], (-0.608, 0.0816], (-0.608, 0.0816], (0.0816, 0.744], [-3.176, -0.608], (0.0816, 0.744], (-0.608, 0.0816], (0.744, 2.956], [-3.176, -0.608], (0.0816, 0.744], (0.0816, 0.744], (-0.608, 0.0816], (0.0816, 0.744], [-3.176, -0.608], (0.0816, 0.744], (0.0816, 0.744], (-0.608, 0.0816], (0.0816, 0.744], [-3.176, -0.608], (0.0816, 0.744], [-3.176, -0.608], (-0.608, 0.0816], (-0.608, 0.0816], (-0.608, 0.0816], (-0.608, 0.0816], (0.0816, 0.744], (0.744, 2.956], (0.0816, 0.744], (0.744, 2.956], (-0.608, 0.0816], (0.744, 2.956], [-3.176, -0.608], (-0.608, 0.0816], (0.744, 2.956], (0.0816, 0.744], [-3.176, -0.608], (0.744, 2.956], [-3.176, -0.608], (0.744, 2.956], [-3.176, -0.608], [-3.176, -0.608], [-3.176, -0.608], (0.0816, 0.744], [-3.176, -0.608], [-3.176, -0.608], (-0.608, 0.0816], (0.0816, 0.744], [-3.176, -0.608], (0.0816, 0.744], (-0.608, 0.0816], (0.744, 2.956], (0.0816, 0.744], [-3.176, -0.608], (0.0816, 0.744], (0.744, 2.956], (0.0816, 0.744], (0.744, 2.956], (0.744, 2.956], (0.0816, 0.744], (0.744, 2.956], (0.0816, 0.744], (0.744, 2.956], (-0.608, 0.0816], (0.744, 2.956], (0.744, 2.956], (-0.608, 0.0816], (0.0816, 0.744], [-3.176, -0.608], (0.0816, 0.744], [-3.176, -0.608], (0.744, 2.956], [-3.176, -0.608], (0.744, 2.956], (0.0816, 0.744], [-3.176, -0.608], (0.0816, 0.744], (-0.608, 0.0816], (-0.608, 0.0816], (-0.608, 0.0816], (0.744, 2.956], (0.0816, 0.744], [-3.176, -0.608], [-3.176, -0.608], (0.0816, 0.744], [-3.176, -0.608], [-3.176, -0.608], [-3.176, -0.608], (0.744, 2.956], [-3.176, -0.608], (0.0816, 0.744], (-0.608, 0.0816], (-0.608, 0.0816], (-0.608, 0.0816], (0.0816, 0.744], (0.0816, 0.744], (-0.608, 0.0816], [-3.176, -0.608], [-3.176, -0.608], [-3.176, -0.608], (-0.608, 0.0816], (0.744, 2.956], (-0.608, 0.0816], (-0.608, 0.0816], (0.0816, 0.744], (-0.608, 0.0816], (0.744, 2.956], (0.0816, 0.744], (-0.608, 0.0816], [-3.176, -0.608], (-0.608, 0.0816], (-0.608, 0.0816], (0.0816, 0.744], (0.744, 2.956], (0.744, 2.956], (0.744, 2.956], (-0.608, 0.0816], (0.744, 2.956], (0.0816, 0.744], [-3.176, -0.608], [-3.176, -0.608], (0.744, 2.956], (-0.608, 0.0816], [-3.176, -0.608], (0.744, 2.956], [-3.176, -0.608], [-3.176, -0.608], (0.744, 2.956], (0.0816, 0.744], (0.744, 2.956], (0.0816, 0.744], (0.0816, 0.744], [-3.176, -0.608], (-0.608, 0.0816], [-3.176, -0.608], [-3.176, -0.608], (-0.608, 0.0816], (0.0816, 0.744], (-0.608, 0.0816], (0.744, 2.956], [-3.176, -0.608], (-0.608, 0.0816], (0.744, 2.956], (0.0816, 0.744], [-3.176, -0.608], (-0.608, 0.0816], (-0.608, 0.0816], (0.744, 2.956], (0.0816, 0.744], (0.0816, 0.744], (0.0816, 0.744], (0.744, 2.956], (0.0816, 0.744], (0.0816, 0.744], [-3.176, -0.608], (0.744, 2.956], [-3.176, -0.608], [-3.176, -0.608], (0.744, 2.956], (-0.608, 0.0816], (0.0816, 0.744], (0.744, 2.956], (-0.608, 0.0816], (-0.608, 0.0816], (-0.608, 0.0816], (0.744, 2.956], (-0.608, 0.0816], (0.0816, 0.744], (0.0816, 0.744], [-3.176, -0.608], (0.744, 2.956], (0.744, 2.956], (0.0816, 0.744], (-0.608, 0.0816], (-0.608, 0.0816], (-0.608, 0.0816], (-0.608, 0.0816], (-0.608, 0.0816], (-0.608, 0.0816], (0.0816, 0.744], (0.744, 2.956], [-3.176, -0.608], (0.0816, 0.744], (0.744, 2.956], (0.0816, 0.744], (0.0816, 0.744], (-0.608, 0.0816], (0.0816, 0.744], (0.0816, 0.744], (0.744, 2.956], (0.0816, 0.744], (0.0816, 0.744], [-3.176, -0.608], (0.744, 2.956], (-0.608, 0.0816], [-3.176, -0.608], (0.744, 2.956], (-0.608, 0.0816], [-3.176, -0.608], (-0.608, 0.0816], (0.744, 2.956], (0.744, 2.956], (0.0816, 0.744], (0.744, 2.956], (0.744, 2.956], (0.0816, 0.744], [-3.176, -0.608], (0.0816, 0.744], [-3.176, -0.608], [-3.176, -0.608], (-0.608, 0.0816], (0.744, 2.956], (-0.608, 0.0816], [-3.176, -0.608], [-3.176, -0.608], (0.0816, 0.744], (-0.608, 0.0816], (0.0816, 0.744], [-3.176, -0.608], (0.744, 2.956], [-3.176, -0.608], (-0.608, 0.0816], (0.744, 2.956], (-0.608, 0.0816], [-3.176, -0.608], (0.0816, 0.744], (0.0816, 0.744], (0.744, 2.956], (-0.608, 0.0816], (0.0816, 0.744], (-0.608, 0.0816], (-0.608, 0.0816], [-3.176, -0.608], (-0.608, 0.0816], [-3.176, -0.608], (-0.608, 0.0816], (0.0816, 0.744], (0.744, 2.956], (0.744, 2.956], [-3.176, -0.608], [-3.176, -0.608], (0.0816, 0.744], (-0.608, 0.0816], (0.0816, 0.744], (0.0816, 0.744], [-3.176, -0.608], (0.744, 2.956], [-3.176, -0.608], (0.744, 2.956], (-0.608, 0.0816], (0.744, 2.956], (0.0816, 0.744], (-0.608, 0.0816], [-3.176, -0.608], [-3.176, -0.608], [-3.176, -0.608], (0.744, 2.956], [-3.176, -0.608], [-3.176, -0.608], (0.744, 2.956], (-0.608, 0.0816], (0.744, 2.956], (0.744, 2.956], (0.744, 2.956], (-0.608, 0.0816], (-0.608, 0.0816], [-3.176, -0.608], (-0.608, 0.0816], [-3.176, -0.608], (0.744, 2.956], [-3.176, -0.608], (0.744, 2.956], (0.0816, 0.744], (-0.608, 0.0816], (0.0816, 0.744], [-3.176, -0.608], (-0.608, 0.0816], (0.744, 2.956], [-3.176, -0.608], [-3.176, -0.608], (0.744, 2.956], (-0.608, 0.0816], (-0.608, 0.0816], [-3.176, -0.608], (0.744, 2.956], (-0.608, 0.0816], (0.744, 2.956], (-0.608, 0.0816], (0.744, 2.956], (0.0816, 0.744], (-0.608, 0.0816], (0.0816, 0.744], [-3.176, -0.608], (-0.608, 0.0816], (-0.608, 0.0816], [-3.176, -0.608], (0.744, 2.956], [-3.176, -0.608], (0.0816, 0.744], [-3.176, -0.608], [-3.176, -0.608], [-3.176, -0.608], (0.744, 2.956], (0.744, 2.956], (-0.608, 0.0816], (-0.608, 0.0816], (-0.608, 0.0816], [-3.176, -0.608], (-0.608, 0.0816], (-0.608, 0.0816], [-3.176, -0.608], (0.744, 2.956], (-0.608, 0.0816], [-3.176, -0.608], (0.744, 2.956], (0.0816, 0.744], (-0.608, 0.0816], [-3.176, -0.608], (-0.608, 0.0816], (-0.608, 0.0816], (-0.608, 0.0816], (0.0816, 0.744], (-0.608, 0.0816], (0.0816, 0.744], (0.0816, 0.744], (0.0816, 0.744], (0.744, 2.956], (0.0816, 0.744], (0.744, 2.956], (-0.608, 0.0816], [-3.176, -0.608], [-3.176, -0.608], (-0.608, 0.0816], (0.744, 2.956], (0.0816, 0.744], [-3.176, -0.608], (0.0816, 0.744], (0.0816, 0.744], (0.0816, 0.744], (0.744, 2.956], (0.0816, 0.744], (0.744, 2.956], (0.0816, 0.744], [-3.176, -0.608], (0.744, 2.956], (0.0816, 0.744], (0.744, 2.956], (-0.608, 0.0816], [-3.176, -0.608], (0.0816, 0.744], (0.744, 2.956], [-3.176, -0.608], (0.0816, 0.744], [-3.176, -0.608], (-0.608, 0.0816], (0.744, 2.956], (0.744, 2.956], (-0.608, 0.0816], [-3.176, -0.608], (-0.608, 0.0816], [-3.176, -0.608], [-3.176, -0.608], (0.744, 2.956], [-3.176, -0.608], (0.744, 2.956], (0.744, 2.956], [-3.176, -0.608], (0.0816, 0.744], (0.0816, 0.744], (-0.608, 0.0816], (0.744, 2.956], [-3.176, -0.608], (-0.608, 0.0816], (0.744, 2.956], (-0.608, 0.0816], (0.744, 2.956], [-3.176, -0.608], (-0.608, 0.0816], (-0.608, 0.0816], (0.744, 2.956], (0.744, 2.956], (0.744, 2.956], [-3.176, -0.608], (-0.608, 0.0816], [-3.176, -0.608], [-3.176, -0.608], [-3.176, -0.608], (0.0816, 0.744], [-3.176, -0.608], (0.744, 2.956], (0.744, 2.956], [-3.176, -0.608], (-0.608, 0.0816], (0.744, 2.956], (0.0816, 0.744], (0.0816, 0.744], [-3.176, -0.608], (0.0816, 0.744], [-3.176, -0.608], (0.744, 2.956], (0.744, 2.956], (0.744, 2.956], (0.0816, 0.744], (0.0816, 0.744], (-0.608, 0.0816], (-0.608, 0.0816], (0.744, 2.956], (0.0816, 0.744], (0.744, 2.956], [-3.176, -0.608], [-3.176, -0.608], (0.0816, 0.744], (0.744, 2.956], (0.744, 2.956], (0.0816, 0.744], (-0.608, 0.0816], (0.0816, 0.744], (0.0816, 0.744], (0.744, 2.956], (0.744, 2.956], (0.0816, 0.744], [-3.176, -0.608], (0.0816, 0.744], (0.744, 2.956], [-3.176, -0.608], (0.744, 2.956], (0.0816, 0.744], (-0.608, 0.0816], (0.744, 2.956], [-3.176, -0.608], [-3.176, -0.608], [-3.176, -0.608], (-0.608, 0.0816], [-3.176, -0.608], (0.744, 2.956], (0.744, 2.956], [-3.176, -0.608], [-3.176, -0.608], (0.744, 2.956], (-0.608, 0.0816], (0.744, 2.956], (0.0816, 0.744], (0.744, 2.956], (0.744, 2.956], (0.0816, 0.744], (-0.608, 0.0816], (0.744, 2.956], [-3.176, -0.608], (0.0816, 0.744], (-0.608, 0.0816], (-0.608, 0.0816], [-3.176, -0.608], (0.744, 2.956], (0.744, 2.956], [-3.176, -0.608], (-0.608, 0.0816], (0.744, 2.956], (0.744, 2.956], (0.0816, 0.744], (-0.608, 0.0816], [-3.176, -0.608], (-0.608, 0.0816], (0.744, 2.956], [-3.176, -0.608], (-0.608, 0.0816], [-3.176, -0.608], (-0.608, 0.0816], (0.744, 2.956], (0.744, 2.956]]\n", "Levels (4): Index(['[-3.176, -0.608]', '(-0.608, 0.0816]',\n", " '(0.0816, 0.744]', '(0.744, 2.956]'], dtype=object)" ] } ], "prompt_number": 461 }, { "cell_type": "code", "collapsed": false, "input": [ "pd.value_counts(cats)" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 462, "text": [ "(-0.608, 0.0816] 250\n", "(0.744, 2.956] 250\n", "[-3.176, -0.608] 250\n", "(0.0816, 0.744] 250\n", "dtype: int64" ] } ], "prompt_number": 462 }, { "cell_type": "code", "collapsed": false, "input": [ "data2 = pd.cut(data, 4, precision=2)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 463 }, { "cell_type": "code", "collapsed": false, "input": [ "# qcut\uacfc \ube44\uad50\ub97c \uc704\ud574\n", "pd.value_counts(data2)" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 464, "text": [ "(-0.11, 1.42] 496\n", "(-1.64, -0.11] 375\n", "(1.42, 2.96] 83\n", "(-3.18, -1.64] 46\n", "dtype: int64" ] } ], "prompt_number": 464 }, { "cell_type": "markdown", "metadata": {}, "source": [ "- cut\ud568\uc218\uc640 \uc720\uc0ac\ud558\uac8c \ubcc0\uc704\uce58\ub97c \uc9c1\uc811 \uc9c0\uc815\ud574\uc904 \uc218 \uc788\ub2e4(\ubcc0\uc704\uce58 \uac12\uc740 0\ubd80\ud130 1\uae4c\uc9c0)" ] }, { "cell_type": "code", "collapsed": false, "input": [ "cats2 = pd.qcut(data, [0, 0.1, 0.5, 0.9, 1.])" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 468 }, { "cell_type": "code", "collapsed": false, "input": [ "cats2" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 469, "text": [ "Categorical: \n", "[(-1.259, 0.0816], [-3.176, -1.259], (-1.259, 0.0816], (0.0816, 1.361], [-3.176, -1.259], (1.361, 2.956], (-1.259, 0.0816], (-1.259, 0.0816], (0.0816, 1.361], (0.0816, 1.361], (-1.259, 0.0816], (0.0816, 1.361], (-1.259, 0.0816], (0.0816, 1.361], (-1.259, 0.0816], [-3.176, -1.259], (-1.259, 0.0816], (0.0816, 1.361], (-1.259, 0.0816], (-1.259, 0.0816], (1.361, 2.956], (-1.259, 0.0816], (0.0816, 1.361], [-3.176, -1.259], (0.0816, 1.361], (-1.259, 0.0816], (-1.259, 0.0816], (0.0816, 1.361], (0.0816, 1.361], (0.0816, 1.361], (0.0816, 1.361], (0.0816, 1.361], (-1.259, 0.0816], (0.0816, 1.361], (0.0816, 1.361], (1.361, 2.956], (1.361, 2.956], (1.361, 2.956], [-3.176, -1.259], (-1.259, 0.0816], [-3.176, -1.259], (0.0816, 1.361], (-1.259, 0.0816], (1.361, 2.956], (0.0816, 1.361], (-1.259, 0.0816], (0.0816, 1.361], (0.0816, 1.361], (-1.259, 0.0816], (1.361, 2.956], (-1.259, 0.0816], (-1.259, 0.0816], [-3.176, -1.259], (0.0816, 1.361], (1.361, 2.956], (-1.259, 0.0816], (-1.259, 0.0816], [-3.176, -1.259], [-3.176, -1.259], (-1.259, 0.0816], (0.0816, 1.361], (0.0816, 1.361], (1.361, 2.956], (0.0816, 1.361], (-1.259, 0.0816], (0.0816, 1.361], (0.0816, 1.361], (1.361, 2.956], (0.0816, 1.361], (-1.259, 0.0816], (0.0816, 1.361], (0.0816, 1.361], (0.0816, 1.361], (-1.259, 0.0816], (-1.259, 0.0816], (-1.259, 0.0816], (0.0816, 1.361], [-3.176, -1.259], [-3.176, -1.259], (-1.259, 0.0816], [-3.176, -1.259], (0.0816, 1.361], (-1.259, 0.0816], [-3.176, -1.259], (-1.259, 0.0816], (0.0816, 1.361], (-1.259, 0.0816], (-1.259, 0.0816], (0.0816, 1.361], (-1.259, 0.0816], [-3.176, -1.259], (0.0816, 1.361], (0.0816, 1.361], (-1.259, 0.0816], (1.361, 2.956], (-1.259, 0.0816], (-1.259, 0.0816], (0.0816, 1.361], (-1.259, 0.0816], (0.0816, 1.361], (-1.259, 0.0816], (-1.259, 0.0816], (0.0816, 1.361], (-1.259, 0.0816], (-1.259, 0.0816], (0.0816, 1.361], (1.361, 2.956], (-1.259, 0.0816], (0.0816, 1.361], (0.0816, 1.361], (0.0816, 1.361], (-1.259, 0.0816], (-1.259, 0.0816], (-1.259, 0.0816], (-1.259, 0.0816], (0.0816, 1.361], [-3.176, -1.259], (-1.259, 0.0816], (-1.259, 0.0816], (1.361, 2.956], (0.0816, 1.361], (0.0816, 1.361], (-1.259, 0.0816], (0.0816, 1.361], (0.0816, 1.361], (0.0816, 1.361], (0.0816, 1.361], (-1.259, 0.0816], (0.0816, 1.361], (0.0816, 1.361], (-1.259, 0.0816], (0.0816, 1.361], (-1.259, 0.0816], (0.0816, 1.361], (0.0816, 1.361], (1.361, 2.956], (0.0816, 1.361], (0.0816, 1.361], [-3.176, -1.259], (0.0816, 1.361], (-1.259, 0.0816], (-1.259, 0.0816], (0.0816, 1.361], (0.0816, 1.361], (1.361, 2.956], (-1.259, 0.0816], (0.0816, 1.361], (-1.259, 0.0816], (0.0816, 1.361], (0.0816, 1.361], (0.0816, 1.361], (-1.259, 0.0816], (0.0816, 1.361], (-1.259, 0.0816], (0.0816, 1.361], (0.0816, 1.361], (-1.259, 0.0816], [-3.176, -1.259], (-1.259, 0.0816], [-3.176, -1.259], (-1.259, 0.0816], (-1.259, 0.0816], (-1.259, 0.0816], (0.0816, 1.361], (0.0816, 1.361], (-1.259, 0.0816], (-1.259, 0.0816], (-1.259, 0.0816], (0.0816, 1.361], (-1.259, 0.0816], (0.0816, 1.361], (0.0816, 1.361], (-1.259, 0.0816], (-1.259, 0.0816], (0.0816, 1.361], (0.0816, 1.361], (-1.259, 0.0816], (-1.259, 0.0816], (0.0816, 1.361], (0.0816, 1.361], (1.361, 2.956], (0.0816, 1.361], (-1.259, 0.0816], (-1.259, 0.0816], (0.0816, 1.361], (0.0816, 1.361], (-1.259, 0.0816], (0.0816, 1.361], (0.0816, 1.361], (-1.259, 0.0816], (-1.259, 0.0816], [-3.176, -1.259], (0.0816, 1.361], (-1.259, 0.0816], (0.0816, 1.361], (0.0816, 1.361], (1.361, 2.956], (-1.259, 0.0816], (0.0816, 1.361], (0.0816, 1.361], [-3.176, -1.259], (-1.259, 0.0816], (-1.259, 0.0816], (-1.259, 0.0816], (0.0816, 1.361], (0.0816, 1.361], (-1.259, 0.0816], (0.0816, 1.361], (0.0816, 1.361], [-3.176, -1.259], (0.0816, 1.361], (1.361, 2.956], (-1.259, 0.0816], (1.361, 2.956], (0.0816, 1.361], (0.0816, 1.361], (-1.259, 0.0816], (0.0816, 1.361], (0.0816, 1.361], (-1.259, 0.0816], (1.361, 2.956], (0.0816, 1.361], (-1.259, 0.0816], (0.0816, 1.361], (-1.259, 0.0816], (-1.259, 0.0816], (-1.259, 0.0816], (0.0816, 1.361], (0.0816, 1.361], (0.0816, 1.361], [-3.176, -1.259], (-1.259, 0.0816], [-3.176, -1.259], (-1.259, 0.0816], (1.361, 2.956], (0.0816, 1.361], (0.0816, 1.361], (-1.259, 0.0816], (0.0816, 1.361], (-1.259, 0.0816], (0.0816, 1.361], (-1.259, 0.0816], (0.0816, 1.361], (0.0816, 1.361], (0.0816, 1.361], (-1.259, 0.0816], (-1.259, 0.0816], (-1.259, 0.0816], (-1.259, 0.0816], (-1.259, 0.0816], (1.361, 2.956], (0.0816, 1.361], (-1.259, 0.0816], (0.0816, 1.361], [-3.176, -1.259], (-1.259, 0.0816], (0.0816, 1.361], (-1.259, 0.0816], (-1.259, 0.0816], (0.0816, 1.361], (-1.259, 0.0816], (0.0816, 1.361], [-3.176, -1.259], (0.0816, 1.361], (-1.259, 0.0816], (-1.259, 0.0816], (0.0816, 1.361], (-1.259, 0.0816], [-3.176, -1.259], (-1.259, 0.0816], (0.0816, 1.361], (0.0816, 1.361], (0.0816, 1.361], (0.0816, 1.361], (-1.259, 0.0816], (-1.259, 0.0816], (0.0816, 1.361], (0.0816, 1.361], (-1.259, 0.0816], (-1.259, 0.0816], (0.0816, 1.361], (0.0816, 1.361], (-1.259, 0.0816], (0.0816, 1.361], (-1.259, 0.0816], (-1.259, 0.0816], (0.0816, 1.361], (0.0816, 1.361], (0.0816, 1.361], (0.0816, 1.361], (0.0816, 1.361], (1.361, 2.956], (0.0816, 1.361], (0.0816, 1.361], (-1.259, 0.0816], (0.0816, 1.361], (-1.259, 0.0816], (-1.259, 0.0816], (0.0816, 1.361], (0.0816, 1.361], (0.0816, 1.361], (0.0816, 1.361], (0.0816, 1.361], (-1.259, 0.0816], [-3.176, -1.259], (-1.259, 0.0816], (-1.259, 0.0816], (0.0816, 1.361], (0.0816, 1.361], (-1.259, 0.0816], (1.361, 2.956], [-3.176, -1.259], (0.0816, 1.361], (1.361, 2.956], (1.361, 2.956], (0.0816, 1.361], (1.361, 2.956], (0.0816, 1.361], (-1.259, 0.0816], (-1.259, 0.0816], (0.0816, 1.361], (-1.259, 0.0816], [-3.176, -1.259], (0.0816, 1.361], (-1.259, 0.0816], [-3.176, -1.259], (0.0816, 1.361], (-1.259, 0.0816], (-1.259, 0.0816], (1.361, 2.956], (1.361, 2.956], (0.0816, 1.361], (0.0816, 1.361], (-1.259, 0.0816], (-1.259, 0.0816], (-1.259, 0.0816], (0.0816, 1.361], (0.0816, 1.361], (0.0816, 1.361], (-1.259, 0.0816], (-1.259, 0.0816], (-1.259, 0.0816], [-3.176, -1.259], (-1.259, 0.0816], (0.0816, 1.361], (0.0816, 1.361], (0.0816, 1.361], (0.0816, 1.361], (-1.259, 0.0816], (0.0816, 1.361], (0.0816, 1.361], (0.0816, 1.361], (0.0816, 1.361], (-1.259, 0.0816], (0.0816, 1.361], (0.0816, 1.361], (-1.259, 0.0816], (1.361, 2.956], (-1.259, 0.0816], (0.0816, 1.361], (0.0816, 1.361], (0.0816, 1.361], (1.361, 2.956], (-1.259, 0.0816], (1.361, 2.956], (1.361, 2.956], [-3.176, -1.259], (-1.259, 0.0816], (-1.259, 0.0816], (-1.259, 0.0816], (0.0816, 1.361], (-1.259, 0.0816], (-1.259, 0.0816], (-1.259, 0.0816], (0.0816, 1.361], (0.0816, 1.361], (0.0816, 1.361], (1.361, 2.956], [-3.176, -1.259], (-1.259, 0.0816], [-3.176, -1.259], (0.0816, 1.361], (-1.259, 0.0816], (0.0816, 1.361], (0.0816, 1.361], (-1.259, 0.0816], (-1.259, 0.0816], (-1.259, 0.0816], (0.0816, 1.361], (-1.259, 0.0816], (-1.259, 0.0816], (0.0816, 1.361], (1.361, 2.956], (-1.259, 0.0816], (0.0816, 1.361], (0.0816, 1.361], (0.0816, 1.361], (0.0816, 1.361], [-3.176, -1.259], (0.0816, 1.361], (0.0816, 1.361], (-1.259, 0.0816], (-1.259, 0.0816], (-1.259, 0.0816], (-1.259, 0.0816], (1.361, 2.956], [-3.176, -1.259], (0.0816, 1.361], (0.0816, 1.361], (-1.259, 0.0816], (0.0816, 1.361], (0.0816, 1.361], (1.361, 2.956], (0.0816, 1.361], (-1.259, 0.0816], (-1.259, 0.0816], (-1.259, 0.0816], (-1.259, 0.0816], (-1.259, 0.0816], (-1.259, 0.0816], (-1.259, 0.0816], (0.0816, 1.361], (-1.259, 0.0816], (0.0816, 1.361], (1.361, 2.956], (-1.259, 0.0816], [-3.176, -1.259], (-1.259, 0.0816], (1.361, 2.956], (0.0816, 1.361], (0.0816, 1.361], [-3.176, -1.259], (0.0816, 1.361], (-1.259, 0.0816], (0.0816, 1.361], (-1.259, 0.0816], (0.0816, 1.361], (0.0816, 1.361], (0.0816, 1.361], (0.0816, 1.361], (0.0816, 1.361], (-1.259, 0.0816], (-1.259, 0.0816], [-3.176, -1.259], [-3.176, -1.259], (-1.259, 0.0816], (0.0816, 1.361], (0.0816, 1.361], (0.0816, 1.361], (0.0816, 1.361], (-1.259, 0.0816], (-1.259, 0.0816], (0.0816, 1.361], (1.361, 2.956], (0.0816, 1.361], (-1.259, 0.0816], (-1.259, 0.0816], [-3.176, -1.259], (-1.259, 0.0816], (0.0816, 1.361], (-1.259, 0.0816], (-1.259, 0.0816], (-1.259, 0.0816], (-1.259, 0.0816], (0.0816, 1.361], (-1.259, 0.0816], (-1.259, 0.0816], (0.0816, 1.361], [-3.176, -1.259], (0.0816, 1.361], (0.0816, 1.361], [-3.176, -1.259], (-1.259, 0.0816], (-1.259, 0.0816], (0.0816, 1.361], (0.0816, 1.361], (1.361, 2.956], (-1.259, 0.0816], (0.0816, 1.361], (-1.259, 0.0816], (-1.259, 0.0816], (-1.259, 0.0816], (1.361, 2.956], (-1.259, 0.0816], (0.0816, 1.361], (0.0816, 1.361], (0.0816, 1.361], (-1.259, 0.0816], (-1.259, 0.0816], (-1.259, 0.0816], (-1.259, 0.0816], (0.0816, 1.361], (1.361, 2.956], (0.0816, 1.361], (0.0816, 1.361], (0.0816, 1.361], (1.361, 2.956], (-1.259, 0.0816], (-1.259, 0.0816], [-3.176, -1.259], [-3.176, -1.259], (0.0816, 1.361], (-1.259, 0.0816], (-1.259, 0.0816], (1.361, 2.956], (-1.259, 0.0816], (0.0816, 1.361], (-1.259, 0.0816], (0.0816, 1.361], (0.0816, 1.361], (-1.259, 0.0816], (0.0816, 1.361], (1.361, 2.956], (-1.259, 0.0816], (-1.259, 0.0816], (-1.259, 0.0816], (0.0816, 1.361], (0.0816, 1.361], [-3.176, -1.259], (-1.259, 0.0816], (0.0816, 1.361], (-1.259, 0.0816], (-1.259, 0.0816], (-1.259, 0.0816], (0.0816, 1.361], (-1.259, 0.0816], (-1.259, 0.0816], [-3.176, -1.259], [-3.176, -1.259], (0.0816, 1.361], (-1.259, 0.0816], (-1.259, 0.0816], (0.0816, 1.361], (-1.259, 0.0816], (-1.259, 0.0816], (0.0816, 1.361], [-3.176, -1.259], (0.0816, 1.361], (-1.259, 0.0816], (0.0816, 1.361], (-1.259, 0.0816], (0.0816, 1.361], (0.0816, 1.361], (-1.259, 0.0816], (0.0816, 1.361], [-3.176, -1.259], (0.0816, 1.361], (0.0816, 1.361], (-1.259, 0.0816], (0.0816, 1.361], (-1.259, 0.0816], (0.0816, 1.361], [-3.176, -1.259], (-1.259, 0.0816], (-1.259, 0.0816], (-1.259, 0.0816], (-1.259, 0.0816], (0.0816, 1.361], (0.0816, 1.361], (0.0816, 1.361], (0.0816, 1.361], (-1.259, 0.0816], (0.0816, 1.361], (-1.259, 0.0816], (-1.259, 0.0816], (0.0816, 1.361], (0.0816, 1.361], [-3.176, -1.259], (1.361, 2.956], (-1.259, 0.0816], (1.361, 2.956], (-1.259, 0.0816], (-1.259, 0.0816], [-3.176, -1.259], (0.0816, 1.361], [-3.176, -1.259], (-1.259, 0.0816], (-1.259, 0.0816], (0.0816, 1.361], [-3.176, -1.259], (0.0816, 1.361], (-1.259, 0.0816], (0.0816, 1.361], (0.0816, 1.361], (-1.259, 0.0816], (0.0816, 1.361], (1.361, 2.956], (0.0816, 1.361], (0.0816, 1.361], (1.361, 2.956], (0.0816, 1.361], (1.361, 2.956], (0.0816, 1.361], (1.361, 2.956], (-1.259, 0.0816], (0.0816, 1.361], (0.0816, 1.361], (-1.259, 0.0816], (0.0816, 1.361], (-1.259, 0.0816], (0.0816, 1.361], [-3.176, -1.259], (0.0816, 1.361], [-3.176, -1.259], (0.0816, 1.361], (0.0816, 1.361], [-3.176, -1.259], (0.0816, 1.361], (-1.259, 0.0816], (-1.259, 0.0816], (-1.259, 0.0816], (0.0816, 1.361], (0.0816, 1.361], [-3.176, -1.259], [-3.176, -1.259], (0.0816, 1.361], (-1.259, 0.0816], (-1.259, 0.0816], [-3.176, -1.259], (0.0816, 1.361], [-3.176, -1.259], (0.0816, 1.361], (-1.259, 0.0816], (-1.259, 0.0816], (-1.259, 0.0816], (0.0816, 1.361], (0.0816, 1.361], (-1.259, 0.0816], (-1.259, 0.0816], (-1.259, 0.0816], (-1.259, 0.0816], (-1.259, 0.0816], (1.361, 2.956], (-1.259, 0.0816], (-1.259, 0.0816], (0.0816, 1.361], (-1.259, 0.0816], (0.0816, 1.361], (0.0816, 1.361], (-1.259, 0.0816], [-3.176, -1.259], (-1.259, 0.0816], (-1.259, 0.0816], (0.0816, 1.361], (0.0816, 1.361], (1.361, 2.956], (0.0816, 1.361], (-1.259, 0.0816], (0.0816, 1.361], (0.0816, 1.361], (-1.259, 0.0816], (-1.259, 0.0816], (0.0816, 1.361], (-1.259, 0.0816], (-1.259, 0.0816], (1.361, 2.956], (-1.259, 0.0816], (-1.259, 0.0816], (0.0816, 1.361], (0.0816, 1.361], (0.0816, 1.361], (0.0816, 1.361], (0.0816, 1.361], [-3.176, -1.259], (-1.259, 0.0816], (-1.259, 0.0816], [-3.176, -1.259], (-1.259, 0.0816], (0.0816, 1.361], (-1.259, 0.0816], (0.0816, 1.361], [-3.176, -1.259], (-1.259, 0.0816], (1.361, 2.956], (0.0816, 1.361], [-3.176, -1.259], (-1.259, 0.0816], (-1.259, 0.0816], (0.0816, 1.361], (0.0816, 1.361], (0.0816, 1.361], (0.0816, 1.361], (1.361, 2.956], (0.0816, 1.361], (0.0816, 1.361], (-1.259, 0.0816], (0.0816, 1.361], (-1.259, 0.0816], (-1.259, 0.0816], (0.0816, 1.361], (-1.259, 0.0816], (0.0816, 1.361], (1.361, 2.956], (-1.259, 0.0816], (-1.259, 0.0816], (-1.259, 0.0816], (0.0816, 1.361], (-1.259, 0.0816], (0.0816, 1.361], (0.0816, 1.361], (-1.259, 0.0816], (1.361, 2.956], (0.0816, 1.361], (0.0816, 1.361], (-1.259, 0.0816], (-1.259, 0.0816], (-1.259, 0.0816], (-1.259, 0.0816], (-1.259, 0.0816], (-1.259, 0.0816], (0.0816, 1.361], (0.0816, 1.361], (-1.259, 0.0816], (0.0816, 1.361], (1.361, 2.956], (0.0816, 1.361], (0.0816, 1.361], (-1.259, 0.0816], (0.0816, 1.361], (0.0816, 1.361], (0.0816, 1.361], (0.0816, 1.361], (0.0816, 1.361], (-1.259, 0.0816], (1.361, 2.956], (-1.259, 0.0816], [-3.176, -1.259], (0.0816, 1.361], (-1.259, 0.0816], (-1.259, 0.0816], (-1.259, 0.0816], (0.0816, 1.361], (1.361, 2.956], (0.0816, 1.361], (0.0816, 1.361], (0.0816, 1.361], (0.0816, 1.361], (-1.259, 0.0816], (0.0816, 1.361], [-3.176, -1.259], [-3.176, -1.259], (-1.259, 0.0816], (0.0816, 1.361], (-1.259, 0.0816], [-3.176, -1.259], [-3.176, -1.259], (0.0816, 1.361], (-1.259, 0.0816], (0.0816, 1.361], (-1.259, 0.0816], (0.0816, 1.361], (-1.259, 0.0816], (-1.259, 0.0816], (1.361, 2.956], (-1.259, 0.0816], (-1.259, 0.0816], (0.0816, 1.361], (0.0816, 1.361], (1.361, 2.956], (-1.259, 0.0816], (0.0816, 1.361], (-1.259, 0.0816], (-1.259, 0.0816], (-1.259, 0.0816], (-1.259, 0.0816], (-1.259, 0.0816], (-1.259, 0.0816], (0.0816, 1.361], (0.0816, 1.361], (1.361, 2.956], (-1.259, 0.0816], [-3.176, -1.259], (0.0816, 1.361], (-1.259, 0.0816], (0.0816, 1.361], (0.0816, 1.361], [-3.176, -1.259], (0.0816, 1.361], [-3.176, -1.259], (0.0816, 1.361], (-1.259, 0.0816], (0.0816, 1.361], (0.0816, 1.361], (-1.259, 0.0816], (-1.259, 0.0816], (-1.259, 0.0816], (-1.259, 0.0816], (1.361, 2.956], (-1.259, 0.0816], (-1.259, 0.0816], (1.361, 2.956], (-1.259, 0.0816], (0.0816, 1.361], (0.0816, 1.361], (1.361, 2.956], (-1.259, 0.0816], (-1.259, 0.0816], [-3.176, -1.259], (-1.259, 0.0816], [-3.176, -1.259], (0.0816, 1.361], (-1.259, 0.0816], (1.361, 2.956], (0.0816, 1.361], (-1.259, 0.0816], (0.0816, 1.361], (-1.259, 0.0816], (-1.259, 0.0816], (1.361, 2.956], [-3.176, -1.259], (-1.259, 0.0816], (1.361, 2.956], (-1.259, 0.0816], (-1.259, 0.0816], (-1.259, 0.0816], (0.0816, 1.361], (-1.259, 0.0816], (0.0816, 1.361], (-1.259, 0.0816], (0.0816, 1.361], (0.0816, 1.361], (-1.259, 0.0816], (0.0816, 1.361], (-1.259, 0.0816], (-1.259, 0.0816], (-1.259, 0.0816], (-1.259, 0.0816], (0.0816, 1.361], [-3.176, -1.259], (0.0816, 1.361], (-1.259, 0.0816], (-1.259, 0.0816], [-3.176, -1.259], (1.361, 2.956], (0.0816, 1.361], (-1.259, 0.0816], (-1.259, 0.0816], (-1.259, 0.0816], [-3.176, -1.259], (-1.259, 0.0816], (-1.259, 0.0816], (-1.259, 0.0816], (0.0816, 1.361], (-1.259, 0.0816], [-3.176, -1.259], (0.0816, 1.361], (0.0816, 1.361], (-1.259, 0.0816], [-3.176, -1.259], (-1.259, 0.0816], (-1.259, 0.0816], (-1.259, 0.0816], (0.0816, 1.361], (-1.259, 0.0816], (0.0816, 1.361], (0.0816, 1.361], (0.0816, 1.361], (1.361, 2.956], (0.0816, 1.361], (1.361, 2.956], (-1.259, 0.0816], (-1.259, 0.0816], [-3.176, -1.259], (-1.259, 0.0816], (0.0816, 1.361], (0.0816, 1.361], [-3.176, -1.259], (0.0816, 1.361], (0.0816, 1.361], (0.0816, 1.361], (1.361, 2.956], (0.0816, 1.361], (0.0816, 1.361], (0.0816, 1.361], (-1.259, 0.0816], (0.0816, 1.361], (0.0816, 1.361], (0.0816, 1.361], (-1.259, 0.0816], (-1.259, 0.0816], (0.0816, 1.361], (0.0816, 1.361], [-3.176, -1.259], (0.0816, 1.361], [-3.176, -1.259], (-1.259, 0.0816], (0.0816, 1.361], (1.361, 2.956], (-1.259, 0.0816], (-1.259, 0.0816], (-1.259, 0.0816], (-1.259, 0.0816], [-3.176, -1.259], (1.361, 2.956], (-1.259, 0.0816], (1.361, 2.956], (0.0816, 1.361], (-1.259, 0.0816], (0.0816, 1.361], (0.0816, 1.361], (-1.259, 0.0816], (1.361, 2.956], [-3.176, -1.259], (-1.259, 0.0816], (1.361, 2.956], (-1.259, 0.0816], (1.361, 2.956], (-1.259, 0.0816], (-1.259, 0.0816], (-1.259, 0.0816], (0.0816, 1.361], (1.361, 2.956], (1.361, 2.956], (-1.259, 0.0816], (-1.259, 0.0816], (-1.259, 0.0816], (-1.259, 0.0816], [-3.176, -1.259], (0.0816, 1.361], (-1.259, 0.0816], (0.0816, 1.361], (0.0816, 1.361], (-1.259, 0.0816], (-1.259, 0.0816], (0.0816, 1.361], (0.0816, 1.361], (0.0816, 1.361], (-1.259, 0.0816], (0.0816, 1.361], [-3.176, -1.259], (1.361, 2.956], (0.0816, 1.361], (1.361, 2.956], (0.0816, 1.361], (0.0816, 1.361], (-1.259, 0.0816], (-1.259, 0.0816], (1.361, 2.956], (0.0816, 1.361], (0.0816, 1.361], (-1.259, 0.0816], [-3.176, -1.259], (0.0816, 1.361], (1.361, 2.956], (1.361, 2.956], (0.0816, 1.361], (-1.259, 0.0816], (0.0816, 1.361], (0.0816, 1.361], (1.361, 2.956], (0.0816, 1.361], (0.0816, 1.361], [-3.176, -1.259], (0.0816, 1.361], (0.0816, 1.361], (-1.259, 0.0816], (0.0816, 1.361], (0.0816, 1.361], (-1.259, 0.0816], (1.361, 2.956], (-1.259, 0.0816], [-3.176, -1.259], [-3.176, -1.259], (-1.259, 0.0816], [-3.176, -1.259], (1.361, 2.956], (1.361, 2.956], (-1.259, 0.0816], (-1.259, 0.0816], (1.361, 2.956], (-1.259, 0.0816], (1.361, 2.956], (0.0816, 1.361], (0.0816, 1.361], (1.361, 2.956], (0.0816, 1.361], (-1.259, 0.0816], (0.0816, 1.361], [-3.176, -1.259], (0.0816, 1.361], (-1.259, 0.0816], (-1.259, 0.0816], [-3.176, -1.259], (0.0816, 1.361], (1.361, 2.956], (-1.259, 0.0816], (-1.259, 0.0816], (0.0816, 1.361], (1.361, 2.956], (0.0816, 1.361], (-1.259, 0.0816], [-3.176, -1.259], (-1.259, 0.0816], (1.361, 2.956], [-3.176, -1.259], (-1.259, 0.0816], (-1.259, 0.0816], (-1.259, 0.0816], (1.361, 2.956], (1.361, 2.956]]\n", "Levels (4): Index(['[-3.176, -1.259]', '(-1.259, 0.0816]',\n", " '(0.0816, 1.361]', '(1.361, 2.956]'], dtype=object)" ] } ], "prompt_number": 469 }, { "cell_type": "code", "collapsed": false, "input": [ "pd.value_counts(cats2)" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 470, "text": [ "(0.0816, 1.361] 400\n", "(-1.259, 0.0816] 400\n", "[-3.176, -1.259] 100\n", "(1.361, 2.956] 100\n", "dtype: int64" ] } ], "prompt_number": 470 }, { "cell_type": "markdown", "metadata": {}, "source": [ "- \uadf8\ub8f9 \ubd84\uc11d\uacfc \ubcc0\uc704\uce58\ub97c \ub2e4\ub8f0 \ub54c\ub294 cut\uacfc qcut \ud568\uc218 \uac19\uc740 \uc774\uc0b0 \ud568\uc218\uac00 \ud2b9\ud788 \ub354 \uc720\uc6a9\n", "- \uc774 \ub0b4\uc6a9\uc740 \uc218\uc9d1\uacfc \uadf8\ub8f9 \uc5f0\uc0b0\uc5d0 \ub300\ud55c \uc7a5\uc5d0\uc11c \ub2e4\uc2dc \ud55c \ubc88 \uc0b4\ud3b4\ubd04\n", "\n", "#### \ud1b5\uacc4, \ud655\ub960 \ubaa8\ub974\uba74 \uc6a9\uc5b4\ubd80\ud130 \uc774\ud574\uac00 \uc548\ub428.. \uc774\uc0b0 \ud568\uc218?" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 7.3.6 \ud2b9\uc774\uac12 \ucc3e\uc544\ub0b4\uace0 \uc81c\uc678\ud558\uae30\n", "\n", "- \ubc30\uc5f4\uc5f0\uc0b0\uc744 \uc218\ud589\ud560 \ub54c\ub294 \ud2b9\uc774\uac12(outlier)\uc744 \uc81c\uc678\ud558\uac70\ub098 \uc801\ub2f9\ud55c \uac12\uc73c\ub85c \ub300\uccb4\ud558\ub294 \uac83 \uc911\uc694\n", "- \uc2e4\uc81c \uc2e4\ubb34\uc5d0\uc11c\ub3c4 outlier\ub294 \uc81c\uac70\ud55c\ub2e4\uace0 \ud55c\ub2e4. \uc774 \ud558\ub098\uc758 \ud2b9\uc774\uac12\uc774 \ub098\uba38\uc9c0 \uac12\ub4e4\uc744 \ubcc0\ud654\uc2dc\ud0ac \uc218\ub3c4 \uc788\uc5b4\uc11c.\n", "- \uc5b4\ub290 \uc815\ub3c4 \uacbd\uacc4\uae4c\uc9c0 outlier\ub85c \ubcfc \uac83\uc778\uc9c0? \uc774\uac8c \ub610 \ubb38\uc81c\uc778\ub4ef.." ] }, { "cell_type": "code", "collapsed": false, "input": [ "np.random.seed?" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 476 }, { "cell_type": "raw", "metadata": {}, "source": [ "Type: builtin_function_or_method\n", "String form: \n", "Docstring:\n", "seed(seed=None)\n", "\n", "Seed the generator.\n", "\n", "This method is called when `RandomState` is initialized. It can be\n", "called again to re-seed the generator. For details, see `RandomState`.\n", "\n", "Parameters\n", "----------\n", "seed : int or array_like, optional\n", " Seed for `RandomState`.\n", "\n", "See Also\n", "--------\n", "RandomState" ] }, { "cell_type": "code", "collapsed": false, "input": [ "np.random.seed(12345)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 471 }, { "cell_type": "code", "collapsed": false, "input": [ "data = DataFrame(np.random.randn(1000, 4))" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 472 }, { "cell_type": "code", "collapsed": false, "input": [ "data.describe()" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0123
count 1000.000000 1000.000000 1000.000000 1000.000000
mean -0.067684 0.067924 0.025598 -0.002298
std 0.998035 0.992106 1.006835 0.996794
min -3.428254 -3.548824 -3.184377 -3.745356
25% -0.774890 -0.591841 -0.641675 -0.644144
50% -0.116401 0.101143 0.002073 -0.013611
75% 0.616366 0.780282 0.680391 0.654328
max 3.366626 2.653656 3.260383 3.927528
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 473, "text": [ " 0 1 2 3\n", "count 1000.000000 1000.000000 1000.000000 1000.000000\n", "mean -0.067684 0.067924 0.025598 -0.002298\n", "std 0.998035 0.992106 1.006835 0.996794\n", "min -3.428254 -3.548824 -3.184377 -3.745356\n", "25% -0.774890 -0.591841 -0.641675 -0.644144\n", "50% -0.116401 0.101143 0.002073 -0.013611\n", "75% 0.616366 0.780282 0.680391 0.654328\n", "max 3.366626 2.653656 3.260383 3.927528" ] } ], "prompt_number": 473 }, { "cell_type": "markdown", "metadata": {}, "source": [ "- \ud55c \uce7c\ub7fc\uc5d0\uc11c \uc808\ub300 \uac12\uc774 3 \ucd08\uacfc\ud558\ub294 \uac12 \ucc3e\uc544\ub0b4\uae30" ] }, { "cell_type": "code", "collapsed": false, "input": [ "col = data[3]" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 474 }, { "cell_type": "code", "collapsed": false, "input": [ "col[np.abs(col) > 3]" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 475, "text": [ "97 3.927528\n", "305 -3.399312\n", "400 -3.745356\n", "Name: 3, dtype: float64" ] } ], "prompt_number": 475 }, { "cell_type": "code", "collapsed": false, "input": [ "data[np.abs(data) > 3]" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n",
        "<class 'pandas.core.frame.DataFrame'>\n",
        "Int64Index: 1000 entries, 0 to 999\n",
        "Data columns (total 4 columns):\n",
        "0    2  non-null values\n",
        "1    1  non-null values\n",
        "2    5  non-null values\n",
        "3    3  non-null values\n",
        "dtypes: float64(4)\n",
        "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 477, "text": [ "\n", "Int64Index: 1000 entries, 0 to 999\n", "Data columns (total 4 columns):\n", "0 2 non-null values\n", "1 1 non-null values\n", "2 5 non-null values\n", "3 3 non-null values\n", "dtypes: float64(4)" ] } ], "prompt_number": 477 }, { "cell_type": "code", "collapsed": false, "input": [ "data[(np.abs(data) > 3)]" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n",
        "<class 'pandas.core.frame.DataFrame'>\n",
        "Int64Index: 1000 entries, 0 to 999\n",
        "Data columns (total 4 columns):\n",
        "0    2  non-null values\n",
        "1    1  non-null values\n",
        "2    5  non-null values\n",
        "3    3  non-null values\n",
        "dtypes: float64(4)\n",
        "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 478, "text": [ "\n", "Int64Index: 1000 entries, 0 to 999\n", "Data columns (total 4 columns):\n", "0 2 non-null values\n", "1 1 non-null values\n", "2 5 non-null values\n", "3 3 non-null values\n", "dtypes: float64(4)" ] } ], "prompt_number": 478 }, { "cell_type": "code", "collapsed": false, "input": [ "# \uc808\ub300\uac12 3\uc744 \ucd08\uacfc\ud558\ub294 \uac12\uc774 \ub4e4\uc5b4\uc788\ub294 \ubaa8\ub4e0 \ub85c\uc6b0\ub97c \uc120\ud0dd\ud558\ub824\uba74 any \uba54\uc11c\ub4dc \uc0ac\uc6a9\n", "data[(np.abs(data) > 3).any(1)]" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0123
5 -0.539741 0.476985 3.248944-1.021228
97 -0.774363 0.552936 0.106061 3.927528
102-0.655054-0.565230 3.176873 0.959533
305-2.315555 0.457246-0.025907-3.399312
324 0.050188 1.951312 3.260383 0.963301
400 0.146326 0.508391-0.196713-3.745356
499-0.293333-0.242459-3.056990 1.918403
523-3.428254-0.296336-0.439938-0.867165
586 0.275144 1.179227-3.184377 1.369891
808-0.362528-3.548824 1.553205-2.186301
900 3.366626-2.372214 0.851010 1.332846
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 479, "text": [ " 0 1 2 3\n", "5 -0.539741 0.476985 3.248944 -1.021228\n", "97 -0.774363 0.552936 0.106061 3.927528\n", "102 -0.655054 -0.565230 3.176873 0.959533\n", "305 -2.315555 0.457246 -0.025907 -3.399312\n", "324 0.050188 1.951312 3.260383 0.963301\n", "400 0.146326 0.508391 -0.196713 -3.745356\n", "499 -0.293333 -0.242459 -3.056990 1.918403\n", "523 -3.428254 -0.296336 -0.439938 -0.867165\n", "586 0.275144 1.179227 -3.184377 1.369891\n", "808 -0.362528 -3.548824 1.553205 -2.186301\n", "900 3.366626 -2.372214 0.851010 1.332846" ] } ], "prompt_number": 479 }, { "cell_type": "markdown", "metadata": {}, "source": [ "- -3\uc774\ub098 3\uc744 \ucd08\uacfc\ud558\ub294 \uac12\uc744 -3 \ud639\uc740 3\uc73c\ub85c \uc9c0\uc815\ud560 \uc218 \uc788\ub2e4." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "- \uc720\ub2c8\ubc84\uc124 \ud568\uc218\uc778 np.sign\uc740 \uc8fc\uc5b4\uc9c4 \uac12\uc774 \ubd80\ud638\uc5d0 \ub530\ub77c 1\uc774\ub098 -1\uc774 \ub2f4\uae34 \ubc30\uc5f4\uc744 \ubc18\ud658" ] }, { "cell_type": "code", "collapsed": false, "input": [ "# \uc774\ub807\uac8c\ud558\uba74 sign\uac12\ub9cc \ucd94\ucd9c\ud560 \uc218 \uc788\ub2e4.\n", "np.sign(data[2])" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 505, "text": [ "0 -1\n", "1 1\n", "2 1\n", "3 1\n", "4 1\n", "5 1\n", "6 1\n", "7 -1\n", "8 -1\n", "9 -1\n", "10 -1\n", "11 1\n", "12 1\n", "13 1\n", "14 1\n", "...\n", "985 1\n", "986 1\n", "987 1\n", "988 -1\n", "989 1\n", "990 -1\n", "991 1\n", "992 -1\n", "993 -1\n", "994 1\n", "995 -1\n", "996 -1\n", "997 1\n", "998 -1\n", "999 1\n", "Name: 2, Length: 1000, dtype: float64" ] } ], "prompt_number": 505 }, { "cell_type": "code", "collapsed": false, "input": [ "# \uc808\ub300\uac12 3\uc744 \ub118\ub294 \uac83\ub4e4\uc744 \uc218\uc815\ud558\ub294\ub370 \uc6d0\ub798 \ub370\uc774\ud130\uc758 sign\uac12\uc744 \uc720\uc9c0\ud558\uba74\uc11c * 3\uc744 \ud55c\ub2e4.\n", "data[np.abs(data) > 3] = np.sign(data) * 3" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 500 }, { "cell_type": "code", "collapsed": false, "input": [ "data[(np.abs(data) >= 3).any(1)]" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0123
5 -0.539741 0.476985 3.000000-1.021228
97 -0.774363 0.552936 0.106061 3.000000
102-0.655054-0.565230 3.000000 0.959533
305-2.315555 0.457246-0.025907-3.000000
324 0.050188 1.951312 3.000000 0.963301
400 0.146326 0.508391-0.196713-3.000000
499-0.293333-0.242459-3.000000 1.918403
523-3.000000-0.296336-0.439938-0.867165
586 0.275144 1.179227-3.000000 1.369891
808-0.362528-3.000000 1.553205-2.186301
900 3.000000-2.372214 0.851010 1.332846
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 502, "text": [ " 0 1 2 3\n", "5 -0.539741 0.476985 3.000000 -1.021228\n", "97 -0.774363 0.552936 0.106061 3.000000\n", "102 -0.655054 -0.565230 3.000000 0.959533\n", "305 -2.315555 0.457246 -0.025907 -3.000000\n", "324 0.050188 1.951312 3.000000 0.963301\n", "400 0.146326 0.508391 -0.196713 -3.000000\n", "499 -0.293333 -0.242459 -3.000000 1.918403\n", "523 -3.000000 -0.296336 -0.439938 -0.867165\n", "586 0.275144 1.179227 -3.000000 1.369891\n", "808 -0.362528 -3.000000 1.553205 -2.186301\n", "900 3.000000 -2.372214 0.851010 1.332846" ] } ], "prompt_number": 502 }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### np.any \uc2e4\ud5d8" ] }, { "cell_type": "code", "collapsed": false, "input": [ "(np.abs(data) > 3).any?" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 486 }, { "cell_type": "markdown", "metadata": {}, "source": [ " Type: function\n", " String form: \n", " File: /Library/Python/2.7/site-packages/numpy-1.9.0.dev_c50e60d-py2.7-macosx-10.8-x86_64.egg/numpy/core/fromnumeric.py\n", " Definition: any(a, axis=None, out=None, keepdims=False)\n", " Docstring:\n", " Test whether any array element along a given axis evaluates to True.\n", "\n", " Returns single boolean unless `axis` is not ``None``\n", "\n", " Parameters\n", " ----------\n", " a : array_like\n", " Input array or object that can be converted to an array.\n", " axis : None or int or tuple of ints, optional\n", " Axis or axes along which a logical OR reduction is performed.\n", " The default (`axis` = `None`) is perform a logical OR over all\n", " the dimensions of the input array. `axis` may be negative, in\n", " which case it counts from the last to the first axis.\n", "\n", " .. versionadded:: 1.7.0\n", "\n", " If this is a tuple of ints, a reduction is performed on multiple\n", " axes, instead of a single axis or all the axes as before.\n", " out : ndarray, optional\n", " Alternate output array in which to place the result. It must have\n", " the same shape as the expected output and its type is preserved\n", " (e.g., if it is of type float, then it will remain so, returning\n", " 1.0 for True and 0.0 for False, regardless of the type of `a`).\n", " See `doc.ufuncs` (Section \"Output arguments\") for details.\n", " keepdims : bool, optional\n", " If this is set to True, the axes which are reduced are left\n", " in the result as dimensions with size one. With this option,\n", " the result will broadcast correctly against the original `arr`.\n", "\n", " Returns\n", " -------\n", " any : bool or ndarray\n", " A new boolean or `ndarray` is returned unless `out` is specified,\n", " in which case a reference to `out` is returned.\n", "\n", " See Also\n", " --------\n", " ndarray.any : equivalent method\n", "\n", " all : Test whether all elements along a given axis evaluate to True.\n", "\n", " Notes\n", " -----\n", " Not a Number (NaN), positive infinity and negative infinity evaluate\n", " to `True` because these are not equal to zero.\n", "\n", " Examples\n", " --------\n", " >>> np.any([[True, False], [True, True]])\n", " True\n", "\n", " >>> np.any([[True, False], [False, False]], axis=0)\n", " array([ True, False], dtype=bool)\n", "\n", " >>> np.any([-1, 0, 5])\n", " True\n", "\n", " >>> np.any(np.nan)\n", " True\n", "\n", " >>> o=np.array([False])\n", " >>> z=np.any([-1, 4, 5], out=o)\n", " >>> z, o\n", " (array([ True], dtype=bool), array([ True], dtype=bool))\n", " >>> # Check now that z is a reference to o\n", " >>> z is o\n", " True\n", " >>> id(z), id(o) # identity of z and o # doctest: +SKIP\n", " (191614240, 191614240)" ] }, { "cell_type": "code", "collapsed": false, "input": [ "data[(np.abs(data) > 3).any()]" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stderr", "text": [ "/Library/Python/2.7/site-packages/pandas-0.12.0_307_g3a2fe0b-py2.7-macosx-10.8-intel.egg/pandas/core/frame.py:1853: UserWarning: Boolean Series key will be reindexed to match DataFrame index.\n", " \"DataFrame index.\", UserWarning)\n" ] }, { "ename": "IndexingError", "evalue": "Unalignable boolean Series key provided", "output_type": "pyerr", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[0;31mIndexingError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdata\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mabs\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m3\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0many\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;32m/Library/Python/2.7/site-packages/pandas-0.12.0_307_g3a2fe0b-py2.7-macosx-10.8-intel.egg/pandas/core/frame.pyc\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 1821\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mSeries\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mndarray\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1822\u001b[0m \u001b[0;31m# either boolean or fancy integer index\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1823\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_getitem_array\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1824\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mDataFrame\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1825\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_getitem_frame\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/Library/Python/2.7/site-packages/pandas-0.12.0_307_g3a2fe0b-py2.7-macosx-10.8-intel.egg/pandas/core/frame.pyc\u001b[0m in \u001b[0;36m_getitem_array\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 1857\u001b[0m \u001b[0;31m# _check_bool_indexer will throw exception if Series key cannot\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1858\u001b[0m \u001b[0;31m# be reindexed to match DataFrame rows\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1859\u001b[0;31m \u001b[0mkey\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_check_bool_indexer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1860\u001b[0m \u001b[0mindexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnonzero\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1861\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtake\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mindexer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mconvert\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/Library/Python/2.7/site-packages/pandas-0.12.0_307_g3a2fe0b-py2.7-macosx-10.8-intel.egg/pandas/core/indexing.pyc\u001b[0m in \u001b[0;36m_check_bool_indexer\u001b[0;34m(ax, key)\u001b[0m\n\u001b[1;32m 1213\u001b[0m \u001b[0mmask\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcom\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0misnull\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresult\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1214\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mmask\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0many\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1215\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mIndexingError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'Unalignable boolean Series key provided'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1216\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1217\u001b[0m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mastype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbool\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mIndexingError\u001b[0m: Unalignable boolean Series key provided" ] } ], "prompt_number": 480 }, { "cell_type": "code", "collapsed": false, "input": [ "data[(np.abs(data) > 3).any(0)]" ], "language": "python", "metadata": {}, "outputs": [ { "ename": "IndexingError", "evalue": "Unalignable boolean Series key provided", "output_type": "pyerr", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[0;31mIndexingError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdata\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mabs\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m3\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0many\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;32m/Library/Python/2.7/site-packages/pandas-0.12.0_307_g3a2fe0b-py2.7-macosx-10.8-intel.egg/pandas/core/frame.pyc\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 1821\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mSeries\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mndarray\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1822\u001b[0m \u001b[0;31m# either boolean or fancy integer index\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1823\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_getitem_array\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1824\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mDataFrame\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1825\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_getitem_frame\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/Library/Python/2.7/site-packages/pandas-0.12.0_307_g3a2fe0b-py2.7-macosx-10.8-intel.egg/pandas/core/frame.pyc\u001b[0m in \u001b[0;36m_getitem_array\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 1857\u001b[0m \u001b[0;31m# _check_bool_indexer will throw exception if Series key cannot\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1858\u001b[0m \u001b[0;31m# be reindexed to match DataFrame rows\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1859\u001b[0;31m \u001b[0mkey\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_check_bool_indexer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1860\u001b[0m \u001b[0mindexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnonzero\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1861\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtake\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mindexer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mconvert\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/Library/Python/2.7/site-packages/pandas-0.12.0_307_g3a2fe0b-py2.7-macosx-10.8-intel.egg/pandas/core/indexing.pyc\u001b[0m in \u001b[0;36m_check_bool_indexer\u001b[0;34m(ax, key)\u001b[0m\n\u001b[1;32m 1213\u001b[0m \u001b[0mmask\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcom\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0misnull\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresult\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1214\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mmask\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0many\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1215\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mIndexingError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'Unalignable boolean Series key provided'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1216\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1217\u001b[0m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mastype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbool\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mIndexingError\u001b[0m: Unalignable boolean Series key provided" ] } ], "prompt_number": 481 }, { "cell_type": "code", "collapsed": false, "input": [ "data[(np.abs(data) > 3).any(2)]" ], "language": "python", "metadata": {}, "outputs": [ { "ename": "ValueError", "evalue": "No axis named 2", "output_type": "pyerr", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdata\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mabs\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m3\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0many\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;32m/Library/Python/2.7/site-packages/pandas-0.12.0_307_g3a2fe0b-py2.7-macosx-10.8-intel.egg/pandas/core/frame.pyc\u001b[0m in \u001b[0;36many\u001b[0;34m(self, axis, bool_only, skipna, level)\u001b[0m\n\u001b[1;32m 4087\u001b[0m skipna=skipna)\n\u001b[1;32m 4088\u001b[0m return self._reduce(nanops.nanany, axis=axis, skipna=skipna,\n\u001b[0;32m-> 4089\u001b[0;31m numeric_only=bool_only, filter_type='bool')\n\u001b[0m\u001b[1;32m 4090\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4091\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mall\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbool_only\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mskipna\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mTrue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlevel\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/Library/Python/2.7/site-packages/pandas-0.12.0_307_g3a2fe0b-py2.7-macosx-10.8-intel.egg/pandas/core/frame.pyc\u001b[0m in \u001b[0;36m_reduce\u001b[0;34m(self, op, axis, skipna, numeric_only, filter_type, **kwds)\u001b[0m\n\u001b[1;32m 4276\u001b[0m def _reduce(self, op, axis=0, skipna=True, numeric_only=None,\n\u001b[1;32m 4277\u001b[0m filter_type=None, **kwds):\n\u001b[0;32m-> 4278\u001b[0;31m \u001b[0maxis\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_axis_number\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maxis\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4279\u001b[0m \u001b[0mf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mlambda\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mop\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0maxis\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mskipna\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mskipna\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4280\u001b[0m \u001b[0mlabels\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_agg_axis\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maxis\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/Library/Python/2.7/site-packages/pandas-0.12.0_307_g3a2fe0b-py2.7-macosx-10.8-intel.egg/pandas/core/generic.pyc\u001b[0m in \u001b[0;36m_get_axis_number\u001b[0;34m(self, axis)\u001b[0m\n\u001b[1;32m 220\u001b[0m \u001b[0;32mexcept\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 221\u001b[0m \u001b[0;32mpass\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 222\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'No axis named %s'\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 223\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 224\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_get_axis_name\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mValueError\u001b[0m: No axis named 2" ] } ], "prompt_number": 485 }, { "cell_type": "code", "collapsed": false, "input": [ "np.any([[True, False], [True, True]])" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 490, "text": [ "True" ] } ], "prompt_number": 490 }, { "cell_type": "code", "collapsed": false, "input": [ "np.any([[True, False], [True, True]], axis=0)" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 491, "text": [ "array([ True, True], dtype=bool)" ] } ], "prompt_number": 491 }, { "cell_type": "code", "collapsed": false, "input": [ "np.any([[True, False], [False, False]], axis=0)" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 493, "text": [ "array([ True, False], dtype=bool)" ] } ], "prompt_number": 493 }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 7.3.7 \uce58\ud658\uacfc \uc784\uc758 \uc0d8\ud50c\ub9c1\n", "\n", "- numpy.random.permutation \ud568\uc218\ub97c \uc774\uc6a9\ud558\uba74 **\ub85c\uc6b0**\ub97c \uc27d\uac8c \uc784\uc758\uc758 \uc21c\uc11c\ub300\ub85c \uc7ac\ubc30\uce58\n", "- \uc21c\uc11c\ub97c \ubc14\uafb8\uace0 \uc2f6\uc740 \ub9cc\ud07c\uc758 \uae38\uc774\ub97c permutation \ud568\uc218\uc5d0 \ub118\uae30\uba74 \ubc14\ub010 \uc21c\uc11c\uac00 \ub2f4\uae34 \uc815\uc218 \ubc30\uc5f4 \uc0dd\uc131\uc131" ] }, { "cell_type": "code", "collapsed": false, "input": [ "df = DataFrame(np.arange(5 * 4).reshape(5, 4))" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 506 }, { "cell_type": "code", "collapsed": false, "input": [ "sampler = np.random.permutation(5)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 507 }, { "cell_type": "code", "collapsed": false, "input": [ "sampler" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 508, "text": [ "array([1, 0, 2, 3, 4])" ] } ], "prompt_number": 508 }, { "cell_type": "code", "collapsed": false, "input": [ "df" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0123
0 0 1 2 3
1 4 5 6 7
2 8 9 10 11
3 12 13 14 15
4 16 17 18 19
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 509, "text": [ " 0 1 2 3\n", "0 0 1 2 3\n", "1 4 5 6 7\n", "2 8 9 10 11\n", "3 12 13 14 15\n", "4 16 17 18 19" ] } ], "prompt_number": 509 }, { "cell_type": "code", "collapsed": false, "input": [ "df.take(sampler)" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0123
1 4 5 6 7
0 0 1 2 3
2 8 9 10 11
3 12 13 14 15
4 16 17 18 19
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 512, "text": [ " 0 1 2 3\n", "1 4 5 6 7\n", "0 0 1 2 3\n", "2 8 9 10 11\n", "3 12 13 14 15\n", "4 16 17 18 19" ] } ], "prompt_number": 512 }, { "cell_type": "markdown", "metadata": {}, "source": [ "- \uce58\ud658\uc5c6\uc774 \uc77c\ubd80\ub9cc \uc784\uc758\ub85c \uc120\ud0dd\ud558\ub824\uba74 permutation \ud568\uc218\uc5d0\uc11c \ubc18\ud658\ub41c \ubc30\uc5f4\uc5d0\uc11c \uc6d0\ud558\ub294 \ud06c\uae30 k\ub9cc\ud07c\ub9cc \ucc98\uc74c\ubd80\ud130 \uc798\ub77c\ub0b4\uba74 \ub41c\ub2e4." ] }, { "cell_type": "code", "collapsed": false, "input": [ "# 1. df\uc758 \ud06c\uae30\ub97c \ub118\uaca8\uc8fc\uc5b4 \uce58\ud658\uc744 \ud55c\ub2e4.\n", "# 2. df.take \ud568\uc218\ub85c df\uc5d0 \ud568\uc218 \uc801\uc6a9\n", "# 3. \uc774 \ub370\uc774\ud130 \uae38\uc774\ub294 5\uac1c\uc774\uc9c0\ub9cc 0,1,2\ub9cc \ubcf4\uace0 \uc2f6\uc744 \ub54c [:3]\uc73c\ub85c \ubcf4\uc5ec\uc8fc\ub294 \uac83 \uc81c\ud55c\n", "df.take(np.random.permutation(len(df))[:3])" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0123
4 16 17 18 19
2 8 9 10 11
0 0 1 2 3
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 530, "text": [ " 0 1 2 3\n", "4 16 17 18 19\n", "2 8 9 10 11\n", "0 0 1 2 3" ] } ], "prompt_number": 530 }, { "cell_type": "code", "collapsed": false, "input": [ "df.take(np.random.permutation(len(df)))" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0123
0 0 1 2 3
3 12 13 14 15
4 16 17 18 19
2 8 9 10 11
1 4 5 6 7
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 531, "text": [ " 0 1 2 3\n", "0 0 1 2 3\n", "3 12 13 14 15\n", "4 16 17 18 19\n", "2 8 9 10 11\n", "1 4 5 6 7" ] } ], "prompt_number": 531 }, { "cell_type": "code", "collapsed": false, "input": [ "# \ud560 \ub54c\ub9c8\ub2e4 \ubc14\ub01c\n", "# 1. df\uc758 \ud06c\uae30\ub97c \ub118\uaca8\uc8fc\uc5b4 \uce58\ud658\uc744 \ud55c\ub2e4.\n", "np.random.permutation(len(df))" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 517, "text": [ "array([0, 3, 2, 4, 1])" ] } ], "prompt_number": 517 }, { "cell_type": "code", "collapsed": false, "input": [ "np.random.permutation(len(df))" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 523, "text": [ "array([4, 1, 3, 0, 2])" ] } ], "prompt_number": 523 }, { "cell_type": "code", "collapsed": false, "input": [ "np.random.permutation(5)" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 528, "text": [ "array([0, 3, 4, 1, 2])" ] } ], "prompt_number": 528 }, { "cell_type": "markdown", "metadata": {}, "source": [ "- \uce58\ud658\uc744 \ud1b5\ud574 \ud45c\ubcf8\uc744 \uc0dd\uc131\ud558\uae30 \uc704\ud55c \uac00\uc7a5 \ube60\ub978 \ubc29\ubc95\uc740 np.random.randint\ub97c \uc0ac\uc6a9\ud574\uc11c \uc0dd\uc131\ud55c \ub09c\uc218\ub97c \uc774\uc6a9\ud558\ub294 \ubc29\ubc95" ] }, { "cell_type": "code", "collapsed": false, "input": [ "bag = np.array([5, 7, -1, 6, 4])" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 532 }, { "cell_type": "code", "collapsed": false, "input": [ "# 1. sampler\uc5d0 \uc758\ud574\uc11c 0, bag\uc758 \ud06c\uae30\ub85c \ub79c\ub364 int\uac00 \uc0dd\uc131\ud558\ub294\ub370 10\uac1c \uc0dd\uc131\n", "sampler = np.random.randint(0, len(bag), size=10)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 537 }, { "cell_type": "code", "collapsed": false, "input": [ "sampler" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 541, "text": [ "array([1, 3, 2, 4, 2, 2, 1, 0, 0, 3])" ] } ], "prompt_number": 541 }, { "cell_type": "code", "collapsed": false, "input": [ "draws = bag.take(sampler)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 539 }, { "cell_type": "code", "collapsed": false, "input": [ "# sampler \uc21c\uc11c\uc5d0 \ub530\ub77c bag\uc5d0 \uc788\ub294 \uc6d0\uc18c\ub4e4\uc744 \ubc30\uc5f4\n", "# sampler[0]: 1\uc774\ub2c8 bag[1]\uc758 7\n", "# sampler[1]: 3\uc774\ub2c8 bag[3]\uc758 6\n", "draws" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 540, "text": [ "array([ 7, 6, -1, 4, -1, -1, 7, 5, 5, 6])" ] } ], "prompt_number": 540 }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 7.3.8 \ud45c\uc2dc\uc790/\ub354\ubbf8 \ubcc0\uc218\n", "\n", "- \ud1b5\uacc4 \ubaa8\ub378\uc774\ub098 \uae30\uacc4 \ud559\uc2b5 \uc560\ud50c\ub9ac\ucf00\uc774\uc158\uc744 \uc704\ud55c \ub610 \ub2e4\ub978 \ub370\uc774\ud130 \ubcc0\ud615\uc740 \ubd84\ub958 \uac12\uc744 \ub354\ubbf8\ub098 \ud45c\uc2dc \ud589\ub82c\ub85c \ubcc0\ud658\ud558\ub294 \uac83\n", "- \ub9cc\uc57d \uc5b4\ub5a4 DataFrame\uc758 \ud55c \uce7c\ub7fc\uc5d0 k\uac00\uc9c0\uc758 \uac12\uc774 \uc788\ub2e4\uba74 k\uac1c\uc758 \uce7c\ub7fc\uc774 \uc788\ub294 DataFrame\uc774\ub098 \ud589\ub82c\uc744 \ub9cc\ub4e4\uace0 \uac12\uc73c\ub85c\ub294 1\uacfc 0\uc744 \ucc44\uc6cc \ub123\uc744 \uac83\uc774\ub2e4.\n", "- \uc774\ub54c \uc0ac\uc6a9\ud558\ub294 \ud568\uc218\uac00 pandas\uc758 get_dummies \ud568\uc218\uc778\ub370" ] }, { "cell_type": "code", "collapsed": false, "input": [ "df = DataFrame({'key': ['b', 'b', 'a', 'c', 'a', 'b'],\n", " 'data1': range(6)})" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 542 }, { "cell_type": "code", "collapsed": false, "input": [ "df" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
data1key
0 0 b
1 1 b
2 2 a
3 3 c
4 4 a
5 5 b
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 545, "text": [ " data1 key\n", "0 0 b\n", "1 1 b\n", "2 2 a\n", "3 3 c\n", "4 4 a\n", "5 5 b" ] } ], "prompt_number": 545 }, { "cell_type": "code", "collapsed": false, "input": [ "# df['key']\ub300\ub85c key\uac00 b\uc774\uba74 b\uceec\ub7fc\uc5d0 1 \uc138\ud305. \ub098\uba38\uc9c0\ub294 0\n", "pd.get_dummies(df['key'])" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
abc
0 0 1 0
1 0 1 0
2 1 0 0
3 0 0 1
4 1 0 0
5 0 1 0
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 543, "text": [ " a b c\n", "0 0 1 0\n", "1 0 1 0\n", "2 1 0 0\n", "3 0 0 1\n", "4 1 0 0\n", "5 0 1 0" ] } ], "prompt_number": 543 }, { "cell_type": "code", "collapsed": false, "input": [ "df['key']" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 544, "text": [ "0 b\n", "1 b\n", "2 a\n", "3 c\n", "4 a\n", "5 b\n", "Name: key, dtype: object" ] } ], "prompt_number": 544 }, { "cell_type": "markdown", "metadata": {}, "source": [ "- \ud45c\uc2dc\uc6a9 DataFrame \uc548\uc5d0 \uc788\ub294 \uce7c\ub7fc\uc5d0 prefix\ub97c \ucd94\uac00\ud55c \ud6c4 \ub2e4\ub978 \ub370\uc774\ud130\uc640 \ubcd1\ud569\ud558\uace0 \uc2f6\uc744 \ub54c\uac00 \uc788\ub294\ub370, \uc774\ub7f0 \uacbd\uc6b0 get_dummies \ud568\uc218\uc758 prefix \uc778\uc790\ub97c \uc0ac\uc6a9" ] }, { "cell_type": "code", "collapsed": false, "input": [ "dummies = pd.get_dummies(df['key'], prefix='key')" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 546 }, { "cell_type": "code", "collapsed": false, "input": [ "dummies" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
key_akey_bkey_c
0 0 1 0
1 0 1 0
2 1 0 0
3 0 0 1
4 1 0 0
5 0 1 0
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 549, "text": [ " key_a key_b key_c\n", "0 0 1 0\n", "1 0 1 0\n", "2 1 0 0\n", "3 0 0 1\n", "4 1 0 0\n", "5 0 1 0" ] } ], "prompt_number": 549 }, { "cell_type": "code", "collapsed": false, "input": [ "df_with_dummy = df[['data1']].join(dummies)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 547 }, { "cell_type": "code", "collapsed": false, "input": [ "df_with_dummy" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
data1key_akey_bkey_c
0 0 0 1 0
1 1 0 1 0
2 2 1 0 0
3 3 0 0 1
4 4 1 0 0
5 5 0 1 0
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 548, "text": [ " data1 key_a key_b key_c\n", "0 0 0 1 0\n", "1 1 0 1 0\n", "2 2 1 0 0\n", "3 3 0 0 1\n", "4 4 1 0 0\n", "5 5 0 1 0" ] } ], "prompt_number": 548 }, { "cell_type": "markdown", "metadata": {}, "source": [ "- DataFrame\uc758 \ud55c \uceec\ub7fc\uc774 \uc5ec\ub7ec \uce74\ud14c\uace0\ub9ac\uc5d0 \uc18d\ud55c\ub2e4\uba74 \uc77c\uc774 \uc870\uae08 \ubcf5\uc7a1" ] }, { "cell_type": "code", "collapsed": false, "input": [ "mnames = ['movie_id', 'title', 'genres']" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 550 }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### \uc6d0\ubcf8 pydata\uc5d0\ub294 ch02\uc5d0 movielens\uac00 \ub4e4\uc5b4\uc788\ub2e4. ch07 \ud3f4\ub354\ub85c \uc774\ub3d9 \ud6c4 \ud14c\uc2a4\ud2b8 \ud574\uc57c\ud55c\ub2e4." ] }, { "cell_type": "code", "collapsed": false, "input": [ "movies = pd.read_table('ch07/movielens/movies.dat', sep='::',\n", " header=None, names=mnames)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 553 }, { "cell_type": "code", "collapsed": false, "input": [ "movies[:10]" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
movie_idtitlegenres
0 1 Toy Story (1995) Animation|Children's|Comedy
1 2 Jumanji (1995) Adventure|Children's|Fantasy
2 3 Grumpier Old Men (1995) Comedy|Romance
3 4 Waiting to Exhale (1995) Comedy|Drama
4 5 Father of the Bride Part II (1995) Comedy
5 6 Heat (1995) Action|Crime|Thriller
6 7 Sabrina (1995) Comedy|Romance
7 8 Tom and Huck (1995) Adventure|Children's
8 9 Sudden Death (1995) Action
9 10 GoldenEye (1995) Action|Adventure|Thriller
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 554, "text": [ " movie_id title genres\n", "0 1 Toy Story (1995) Animation|Children's|Comedy\n", "1 2 Jumanji (1995) Adventure|Children's|Fantasy\n", "2 3 Grumpier Old Men (1995) Comedy|Romance\n", "3 4 Waiting to Exhale (1995) Comedy|Drama\n", "4 5 Father of the Bride Part II (1995) Comedy\n", "5 6 Heat (1995) Action|Crime|Thriller\n", "6 7 Sabrina (1995) Comedy|Romance\n", "7 8 Tom and Huck (1995) Adventure|Children's\n", "8 9 Sudden Death (1995) Action\n", "9 10 GoldenEye (1995) Action|Adventure|Thriller" ] } ], "prompt_number": 554 }, { "cell_type": "markdown", "metadata": {}, "source": [ "- \uac01 \uc7a5\ub974\ub9c8\ub2e4 \ud45c\uc2dc\uc6a9 \uac12\uc744 \ucd94\uac00\ud558\ub824\uba74 \uc57d\uac04\uc758 \uc218\uace0\n", "- \uba3c\uc800 \ub370\uc774\ud130 \ubb36\uc74c\uc5d0\uc11c \uc720\uc77c\ud55c \uc7a5\ub974 \ubaa9\ub85d\uc744 \ucd94\ucd9c(set.union \ud2b8\ub9ad \uc0ac\uc6a9)" ] }, { "cell_type": "code", "collapsed": false, "input": [ "# \uc774\uac78 Unique \ud558\uac8c \ub9cc\ub4e4\uc5b4\uc57c \ud55c\ub2e4.\n", "movies.genres" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 562, "text": [ "0 Animation|Children's|Comedy\n", "1 Adventure|Children's|Fantasy\n", "2 Comedy|Romance\n", "3 Comedy|Drama\n", "4 Comedy\n", "5 Action|Crime|Thriller\n", "6 Comedy|Romance\n", "7 Adventure|Children's\n", "8 Action\n", "9 Action|Adventure|Thriller\n", "10 Comedy|Drama|Romance\n", "11 Comedy|Horror\n", "12 Animation|Children's\n", "13 Drama\n", "14 Action|Adventure|Romance\n", "...\n", "3868 Horror\n", "3869 Horror\n", "3870 Horror\n", "3871 Horror\n", "3872 Horror\n", "3873 Comedy\n", "3874 Comedy|Drama\n", "3875 Adventure|Animation|Children's\n", "3876 Action|Drama|Thriller\n", "3877 Thriller\n", "3878 Comedy\n", "3879 Drama\n", "3880 Drama\n", "3881 Drama\n", "3882 Drama|Thriller\n", "Name: genres, Length: 3883, dtype: object" ] } ], "prompt_number": 562 }, { "cell_type": "code", "collapsed": false, "input": [ "# 1. movies.genres\uc5d0\uc11c x\ub97c \ud558\ub098\uc529 \ucd94\ucd9c\n", "# 2. x.split('|')\ub85c \ub098\ub208\ub2e4. \uadf8\ub7fc 1,2,3~ \uac00 \ub418\uaca0\uc9c0\n", "# 3. \uc5ec\uae30\uc5d0\uc11c \ub2e4\uc2dc \uc911\ubcf5\uc774 \uc5c6\uac8c \ud558\uae30 \uc704\ud574 set \ud568\uc218\ub97c \ud55c \ubc88 \uc801\uc6a9\n", "# 4. \ucd5c\uc885\uc801\uc73c\ub85c \uc7a5\ub974 generator\uac00 \uc0dd\uc131\ub428\n", "genre_iter = (set(x.split('|')) for x in movies.genres)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 555 }, { "cell_type": "code", "collapsed": false, "input": [ "genre_iter" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 557, "text": [ " at 0x10a0efaa0>" ] } ], "prompt_number": 557 }, { "cell_type": "code", "collapsed": false, "input": [ "set.union?" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 563 }, { "cell_type": "markdown", "metadata": {}, "source": [ " Type: method_descriptor\n", " String form: \n", " Namespace: Python builtin\n", " Docstring:\n", " Return the union of sets as a new set.\n", "\n", " (i.e. all elements that are in either set.)" ] }, { "cell_type": "code", "collapsed": false, "input": [ "# genre_iter\uac00 generator\uc774\uae30 \ub54c\ubb38\uc5d0 \uc55e\uc5d0 *\ub97c \ubd99\uc5ec\uc8fc\uc5b4 \uc5ec\ub7ec\uac1c\ub97c \ubc1b\uc744 \uc218 \uc788\uac8c \ud574\uc8fc\uc5c8\ub2e4.\n", "# \uc774 \ubd80\ubd84\uc740 \ub098\ub3c4 \uc798 \uc774\ud574\uac00 \uc548\ub428..\n", "# generator \ubd80\ubd84\uc744 Python Cookbook \ubcf4\uba74\uc11c \uacf5\ubd80\ud560 \uc608\uc815\n", "# \uc5b4\uca0b\ub4e0 set.union\uc73c\ub85c generator\ub97c 1\uac1c\uc529 \ub118\uaca8\uc11c\n", "# \ubaa8\ub450 \ud569\uce58\uace0 sorted \ud568\uc218\ub85c sorting\n", "genres = sorted(set.union(*genre_iter))" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 559 }, { "cell_type": "code", "collapsed": false, "input": [ "genres" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 560, "text": [ "['Action',\n", " 'Adventure',\n", " 'Animation',\n", " \"Children's\",\n", " 'Comedy',\n", " 'Crime',\n", " 'Documentary',\n", " 'Drama',\n", " 'Fantasy',\n", " 'Film-Noir',\n", " 'Horror',\n", " 'Musical',\n", " 'Mystery',\n", " 'Romance',\n", " 'Sci-Fi',\n", " 'Thriller',\n", " 'War',\n", " 'Western']" ] } ], "prompt_number": 560 }, { "cell_type": "markdown", "metadata": {}, "source": [ "- \ud45c\uc2dc\uc6a9 DataFrame\uc744 \uc0dd\uc131\ud558\uae30 \uc704\ud574 0\uc73c\ub85c \ucd08\uae30\ud654\ub41c DataFrame\uc744 \uc0dd\uc131" ] }, { "cell_type": "code", "collapsed": false, "input": [ "dummies = DataFrame(np.zeros((len(movies), len(genres))), columns=genres)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 564 }, { "cell_type": "code", "collapsed": false, "input": [ "dummies" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n",
        "<class 'pandas.core.frame.DataFrame'>\n",
        "Int64Index: 3883 entries, 0 to 3882\n",
        "Data columns (total 18 columns):\n",
        "Action         3883  non-null values\n",
        "Adventure      3883  non-null values\n",
        "Animation      3883  non-null values\n",
        "Children's     3883  non-null values\n",
        "Comedy         3883  non-null values\n",
        "Crime          3883  non-null values\n",
        "Documentary    3883  non-null values\n",
        "Drama          3883  non-null values\n",
        "Fantasy        3883  non-null values\n",
        "Film-Noir      3883  non-null values\n",
        "Horror         3883  non-null values\n",
        "Musical        3883  non-null values\n",
        "Mystery        3883  non-null values\n",
        "Romance        3883  non-null values\n",
        "Sci-Fi         3883  non-null values\n",
        "Thriller       3883  non-null values\n",
        "War            3883  non-null values\n",
        "Western        3883  non-null values\n",
        "dtypes: float64(18)\n",
        "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 565, "text": [ "\n", "Int64Index: 3883 entries, 0 to 3882\n", "Data columns (total 18 columns):\n", "Action 3883 non-null values\n", "Adventure 3883 non-null values\n", "Animation 3883 non-null values\n", "Children's 3883 non-null values\n", "Comedy 3883 non-null values\n", "Crime 3883 non-null values\n", "Documentary 3883 non-null values\n", "Drama 3883 non-null values\n", "Fantasy 3883 non-null values\n", "Film-Noir 3883 non-null values\n", "Horror 3883 non-null values\n", "Musical 3883 non-null values\n", "Mystery 3883 non-null values\n", "Romance 3883 non-null values\n", "Sci-Fi 3883 non-null values\n", "Thriller 3883 non-null values\n", "War 3883 non-null values\n", "Western 3883 non-null values\n", "dtypes: float64(18)" ] } ], "prompt_number": 565 }, { "cell_type": "code", "collapsed": false, "input": [ "dummies[:10]" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ActionAdventureAnimationChildren'sComedyCrimeDocumentaryDramaFantasyFilm-NoirHorrorMusicalMysteryRomanceSci-FiThrillerWarWestern
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
5 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
7 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
8 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 566, "text": [ " Action Adventure Animation Children's Comedy Crime Documentary \\\n", "0 0 0 0 0 0 0 0 \n", "1 0 0 0 0 0 0 0 \n", "2 0 0 0 0 0 0 0 \n", "3 0 0 0 0 0 0 0 \n", "4 0 0 0 0 0 0 0 \n", "5 0 0 0 0 0 0 0 \n", "6 0 0 0 0 0 0 0 \n", "7 0 0 0 0 0 0 0 \n", "8 0 0 0 0 0 0 0 \n", "9 0 0 0 0 0 0 0 \n", "\n", " Drama Fantasy Film-Noir Horror Musical Mystery Romance Sci-Fi \\\n", "0 0 0 0 0 0 0 0 0 \n", "1 0 0 0 0 0 0 0 0 \n", "2 0 0 0 0 0 0 0 0 \n", "3 0 0 0 0 0 0 0 0 \n", "4 0 0 0 0 0 0 0 0 \n", "5 0 0 0 0 0 0 0 0 \n", "6 0 0 0 0 0 0 0 0 \n", "7 0 0 0 0 0 0 0 0 \n", "8 0 0 0 0 0 0 0 0 \n", "9 0 0 0 0 0 0 0 0 \n", "\n", " Thriller War Western \n", "0 0 0 0 \n", "1 0 0 0 \n", "2 0 0 0 \n", "3 0 0 0 \n", "4 0 0 0 \n", "5 0 0 0 \n", "6 0 0 0 \n", "7 0 0 0 \n", "8 0 0 0 \n", "9 0 0 0 " ] } ], "prompt_number": 566 }, { "cell_type": "markdown", "metadata": {}, "source": [ "- \uac01 \uc601\ud654\ub97c \uc21c\ud68c\ud558\uba74\uc11c dummies \uce7c\ub7fc\uc758 \uac01 \ud56d\ubaa9\uc744 1\ub85c \uc124\uc815" ] }, { "cell_type": "code", "collapsed": false, "input": [ "for i, gen in enumerate(movies.genres):\n", " dummies.ix[i, gen.split('|')] = 1" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 568 }, { "cell_type": "code", "collapsed": false, "input": [ "dummies[:10]" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ActionAdventureAnimationChildren'sComedyCrimeDocumentaryDramaFantasyFilm-NoirHorrorMusicalMysteryRomanceSci-FiThrillerWarWestern
0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0
1 0 1 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0
2 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0
3 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0
4 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0
5 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0
6 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0
7 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
8 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
9 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 570, "text": [ " Action Adventure Animation Children's Comedy Crime Documentary \\\n", "0 0 0 1 1 1 0 0 \n", "1 0 1 0 1 0 0 0 \n", "2 0 0 0 0 1 0 0 \n", "3 0 0 0 0 1 0 0 \n", "4 0 0 0 0 1 0 0 \n", "5 1 0 0 0 0 1 0 \n", "6 0 0 0 0 1 0 0 \n", "7 0 1 0 1 0 0 0 \n", "8 1 0 0 0 0 0 0 \n", "9 1 1 0 0 0 0 0 \n", "\n", " Drama Fantasy Film-Noir Horror Musical Mystery Romance Sci-Fi \\\n", "0 0 0 0 0 0 0 0 0 \n", "1 0 1 0 0 0 0 0 0 \n", "2 0 0 0 0 0 0 1 0 \n", "3 1 0 0 0 0 0 0 0 \n", "4 0 0 0 0 0 0 0 0 \n", "5 0 0 0 0 0 0 0 0 \n", "6 0 0 0 0 0 0 1 0 \n", "7 0 0 0 0 0 0 0 0 \n", "8 0 0 0 0 0 0 0 0 \n", "9 0 0 0 0 0 0 0 0 \n", "\n", " Thriller War Western \n", "0 0 0 0 \n", "1 0 0 0 \n", "2 0 0 0 \n", "3 0 0 0 \n", "4 0 0 0 \n", "5 1 0 0 \n", "6 0 0 0 \n", "7 0 0 0 \n", "8 0 0 0 \n", "9 1 0 0 " ] } ], "prompt_number": 570 }, { "cell_type": "markdown", "metadata": {}, "source": [ "- \uc55e\uc5d0\uc11c \ud55c \ub300\ub85c movies DataFrame\uacfc \uc870\ud569\ud558\uba74 \ub428" ] }, { "cell_type": "code", "collapsed": false, "input": [ "movies_windic = movies.join(dummies.add_prefix('Genre_'))" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 571 }, { "cell_type": "code", "collapsed": false, "input": [ "movies_windic.ix[0]" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 572, "text": [ "movie_id 1\n", "title Toy Story (1995)\n", "genres Animation|Children's|Comedy\n", "Genre_Action 0\n", "Genre_Adventure 0\n", "Genre_Animation 1\n", "Genre_Children's 1\n", "Genre_Comedy 1\n", "Genre_Crime 0\n", "Genre_Documentary 0\n", "Genre_Drama 0\n", "Genre_Fantasy 0\n", "Genre_Film-Noir 0\n", "Genre_Horror 0\n", "Genre_Musical 0\n", "Genre_Mystery 0\n", "Genre_Romance 0\n", "Genre_Sci-Fi 0\n", "Genre_Thriller 0\n", "Genre_War 0\n", "Genre_Western 0\n", "Name: 0, dtype: object" ] } ], "prompt_number": 572 }, { "cell_type": "code", "collapsed": false, "input": [ "movies_windic.ix[1]" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 578, "text": [ "movie_id 2\n", "title Jumanji (1995)\n", "genres Adventure|Children's|Fantasy\n", "Genre_Action 0\n", "Genre_Adventure 1\n", "Genre_Animation 0\n", "Genre_Children's 1\n", "Genre_Comedy 0\n", "Genre_Crime 0\n", "Genre_Documentary 0\n", "Genre_Drama 0\n", "Genre_Fantasy 1\n", "Genre_Film-Noir 0\n", "Genre_Horror 0\n", "Genre_Musical 0\n", "Genre_Mystery 0\n", "Genre_Romance 0\n", "Genre_Sci-Fi 0\n", "Genre_Thriller 0\n", "Genre_War 0\n", "Genre_Western 0\n", "Name: 1, dtype: object" ] } ], "prompt_number": 578 }, { "cell_type": "markdown", "metadata": {}, "source": [ "- \ub370\uc774\ud130\uac00 \uc774\ubcf4\ub2e4 \ub354 \ud06c\ub2e4\uba74 \ub2e4\uc911 \uba64\ubc84\uc2ed\uc744 \uac16\ub294 \ud45c\uc2dc\uc6a9 \ubcc0\uc218\ub97c \uc774 \ubc29\ubc95\uc73c\ub85c \uc0dd\uc131\ud558\ub294 \uac83\uc740 \uadf8\ub2e4\uc9c0 \ube60\ub978 \ubc29\ubc95\uc740 \uc544\ub2c8\ub2e4. \ube60\ub974\uac8c \ucc98\ub9ac\ud558\uace0 \uc2f6\ub2e4\uba74 DataFrame\uc758 \ub0b4\ubd80\ub97c \uc870\uc791\ud558\ub294 \uc800\uc218\uc900\uc758 \ud568\uc218\ub97c \uc791\uc131\ud574\uc11c \uc0ac\uc6a9\ud574\uc57c \ud55c\ub2e4.\n", "\n", "\n", "- get_dummies\ub098 cut \uac19\uc740 \uc774\uc0b0 \ud568\uc218\ub97c \uc798 \uc870\ud569\ud558\uba74 \ud1b5\uacc4 \uc560\ud50c\ub9ac\ucf00\uc774\uc158\uc5d0\uc11c \uc720\uc6a9\ud558\uac8c \uc0ac\uc6a9" ] }, { "cell_type": "code", "collapsed": false, "input": [ "values = np.random.rand(10)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 579 }, { "cell_type": "code", "collapsed": false, "input": [ "values" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 580, "text": [ "array([ 0.17582534, 0.18368382, 0.93694953, 0.108632 , 0.46920334,\n", " 0.77175794, 0.58726166, 0.38948574, 0.76265434, 0.19847269])" ] } ], "prompt_number": 580 }, { "cell_type": "code", "collapsed": false, "input": [ "bins = [0, 0.2, 0.4, 0.6, 0.8, 1]" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 581 }, { "cell_type": "code", "collapsed": false, "input": [ "pd.get_dummies(pd.cut(values, bins))" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
(0, 0.2](0.2, 0.4](0.4, 0.6](0.6, 0.8](0.8, 1]
0 1 0 0 0 0
1 1 0 0 0 0
2 0 0 0 0 1
3 1 0 0 0 0
4 0 0 1 0 0
5 0 0 0 1 0
6 0 0 1 0 0
7 0 1 0 0 0
8 0 0 0 1 0
9 1 0 0 0 0
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 582, "text": [ " (0, 0.2] (0.2, 0.4] (0.4, 0.6] (0.6, 0.8] (0.8, 1]\n", "0 1 0 0 0 0\n", "1 1 0 0 0 0\n", "2 0 0 0 0 1\n", "3 1 0 0 0 0\n", "4 0 0 1 0 0\n", "5 0 0 0 1 0\n", "6 0 0 1 0 0\n", "7 0 1 0 0 0\n", "8 0 0 0 1 0\n", "9 1 0 0 0 0" ] } ], "prompt_number": 582 }, { "cell_type": "code", "collapsed": false, "input": [ "pd.cut(values, bins)" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 583, "text": [ "Categorical: \n", "[(0, 0.2], (0, 0.2], (0.8, 1], (0, 0.2], (0.4, 0.6], (0.6, 0.8], (0.4, 0.6], (0.2, 0.4], (0.6, 0.8], (0, 0.2]]\n", "Levels (5): Index(['(0, 0.2]', '(0.2, 0.4]', '(0.4, 0.6]', '(0.6, 0.8]',\n", " '(0.8, 1]'], dtype=object)" ] } ], "prompt_number": 583 }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 7.4 \ubb38\uc790\uc5f4 \ub2e4\ub8e8\uae30\n", "\n", "- \ud30c\uc774\uc36c \ub300\uc911\uc801 \uc778\uae30 \uc6d0\uc778: \ubb38\uc790\uc5f4\uc774\ub098 \ud14d\uc2a4\ud2b8 \ucc98\ub9ac\uc758 \uc6a9\uc774\ud568\uc774 \ud55c\ubaab\n", "- \ud14d\uc2a4\ud2b8 \uc5f0\uc0b0\uc740 \ub300\ubd80\ubd84 \ubb38\uc790\uc5f4 \uac1d\uccb4\uc758 \ub0b4\uc7a5 \uba54\uc11c\ub4dc\ub97c \ud1b5\ud574 \uac04\ub2e8\ud558\uac8c \ucc98\ub9ac\n", "- \uc880 \ub354 \ubcf5\uc7a1\ud55c \ud328\ud134 \ub9e4\uce6d\uc774\ub098 \ud14d\uc2a4\ud2b8 \uc870\uc791\uc740 \uc815\uaddc\ud45c\ud604\uc2dd \ud544\uc694\n", "- pandas\ub294 \ubc30\uc5f4 \ub370\uc774\ud130 \uc804\uccb4\uc5d0 \uc27d\uac8c \uc815\uaddc\ud45c\ud604\uc2dd\uc744 \uc801\uc6a9\ud558\uace0 \ucd94\uac00\uc801\uc73c\ub85c \ub204\ub77d\ub41c \ub370\uc774\ud130\ub97c \ud3b8\ub9ac\ud558\uac8c \ucc98\ub9ac\ud560 \uc218 \uc788\ub294 \uae30\ub2a5 \ud3ec\ud568\n", "\n", "### 7.4.1 \ubb38\uc790\uc5f4 \uac1d\uccb4 \uba54\uc11c\ub4dc\n", "\n", "- \ub0b4\uc7a5 \ubb38\uc790\uc5f4 \uba54\uc11c\ub4dc\ub9cc\uc73c\ub85c\ub3c4 \uc660\ub9cc\ud55c \uc560\ud50c\ub9ac\ucf00\uc774\uc158\ub4e4\uc740 \ud574\uacb0 \uac00\ub2a5" ] }, { "cell_type": "code", "collapsed": false, "input": [ "val = 'a,b, guido'" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 584 }, { "cell_type": "code", "collapsed": false, "input": [ "# \uc27c\ud45c\ub85c \uad6c\ubd84\ub41c \ubb38\uc790\uc5f4 \ubd84\ub9ac\n", "val.split(',')" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 589, "text": [ "['a', 'b', ' guido']" ] } ], "prompt_number": 589 }, { "cell_type": "markdown", "metadata": {}, "source": [ "- strip \uba54\uc11c\ub4dc: \uacf5\ubc31\ubb38\uc790(\uc904\ubc14\uafc8\ubb38\uc790 \ud3ec\ud568) \uc81c\uac70" ] }, { "cell_type": "code", "collapsed": false, "input": [ "pieces = [x.strip() for x in val.split(',')]" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 586 }, { "cell_type": "code", "collapsed": false, "input": [ "pieces" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 587, "text": [ "['a', 'b', 'guido']" ] } ], "prompt_number": 587 }, { "cell_type": "markdown", "metadata": {}, "source": [ "- \ub354\ud558\uae30 \uc5f0\uc0b0\uc744 \ud1b5\ud574 '::' \ubb38\uc790\uc5f4\uacfc \ud569\uce60 \uc218 \uc788\ub2e4." ] }, { "cell_type": "code", "collapsed": false, "input": [ "first, second, third = pieces" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 590 }, { "cell_type": "code", "collapsed": false, "input": [ "first" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 591, "text": [ "'a'" ] } ], "prompt_number": 591 }, { "cell_type": "code", "collapsed": false, "input": [ "second" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 592, "text": [ "'b'" ] } ], "prompt_number": 592 }, { "cell_type": "code", "collapsed": false, "input": [ "third" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 593, "text": [ "'guido'" ] } ], "prompt_number": 593 }, { "cell_type": "code", "collapsed": false, "input": [ "first + '::' + second + '::' + third" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 595, "text": [ "'a::b::guido'" ] } ], "prompt_number": 595 }, { "cell_type": "markdown", "metadata": {}, "source": [ "- +\ub85c \ubb38\uc790\uc5f4\uc744 \ud569\uce58\ub294\uac8c \uadc0\ucc2e\ub2e4. +\ub97c \uc4f0\ub2e4\uac00 \uc624\ub958\uac00 \ub0a0 \uc218\ub3c4 \uc788\uace0..\n", "- \uadf8\ub798\uc11c join \uba54\uc11c\ub4dc\ub97c \ub9ce\uc774 \uc0ac\uc6a9\ud55c\ub2e4." ] }, { "cell_type": "code", "collapsed": false, "input": [ "# '\ud569\uce60\ubb38\uc790\uc5f4'.join(\ud569\uce60\ub9ac\uc2a4\ud2b8)\n", "'::'.join(pieces)" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 597, "text": [ "'a::b::guido'" ] } ], "prompt_number": 597 }, { "cell_type": "markdown", "metadata": {}, "source": [ "- \uc77c\uce58\ud558\ub294 \ubd80\ubd84 \ubb38\uc790\uc5f4\uc758 \uc704\uce58\ub97c \ucc3e\ub294 \ubc29\ubc95\ub3c4 \uc874\uc7ac\n", "- index\ub098 find\ub97c \uc0ac\uc6a9\ud558\ub294 \uac83\ub3c4 \uac00\ub2a5\ud558\uc9c0\ub9cc \ud30c\uc774\uc36c\uc758 in \uc608\uc57d\uc5b4\ub97c \uc0ac\uc6a9\ud558\uba74 \uc77c\uce58\ud558\ub294 \ubd80\ubd84 \ubb38\uc790\uc5f4\uc744 \uc27d\uac8c \ucc3e\uc744 \uc218 \uc788\ub2e4." ] }, { "cell_type": "code", "collapsed": false, "input": [ "'guido' in val" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 598, "text": [ "True" ] } ], "prompt_number": 598 }, { "cell_type": "code", "collapsed": false, "input": [ "val.index(',')" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 599, "text": [ "1" ] } ], "prompt_number": 599 }, { "cell_type": "code", "collapsed": false, "input": [ "val.find(':')" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 600, "text": [ "-1" ] } ], "prompt_number": 600 }, { "cell_type": "code", "collapsed": false, "input": [ "val" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 601, "text": [ "'a,b, guido'" ] } ], "prompt_number": 601 }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### find, index\uc758 \ucc28\uc774\uc810\n", "\n", "- index\uc758 \uacbd\uc6b0 \ubb38\uc790\uc5f4\uc744 \ucc3e\uc9c0 \ubabb\ud558\uba74 \uc608\uc678\ub97c \ubc1c\uc0dd\n", "- find\uc758 \uacbd\uc6b0 -1\uc744 \ubc18\ud658" ] }, { "cell_type": "code", "collapsed": false, "input": [ "val.index(':')" ], "language": "python", "metadata": {}, "outputs": [ { "ename": "ValueError", "evalue": "substring not found", "output_type": "pyerr", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mval\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m':'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;31mValueError\u001b[0m: substring not found" ] } ], "prompt_number": 602 }, { "cell_type": "code", "collapsed": false, "input": [ "# count\ub294 \ud2b9\uc815 \ubd80\ubd84 \ubb38\uc790\uc5f4\uc774 \uba87 \uac74 \ubc1c\uacac\ub418\uc5c8\ub294\uc9c0 \ubc18\ud658\n", "val.count(',')" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 603, "text": [ "2" ] } ], "prompt_number": 603 }, { "cell_type": "markdown", "metadata": {}, "source": [ "- replace\ub294 \ucc3e\uc544\ub0b8 \ud328\ud134\uc744 \ub2e4\ub978 \ubb38\uc790\uc5f4\ub85c \uce58\ud658\n", "- \ub300\uccb4\ud560 \ubb38\uc790\uc5f4\ub85c \ube44\uc5b4\uc788\ub294 \ubb38\uc790\uc5f4\uc744 \ub118\uaca8\uc11c \ud328\ud134\uc744 \uc0ad\uc81c\ud558\uae30 \uc704\ud55c \ubc29\ubc95\uc73c\ub85c \uc790\uc8fc \uc0ac\uc6a9" ] }, { "cell_type": "code", "collapsed": false, "input": [ "val.replace(',', '::')" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 604, "text": [ "'a::b:: guido'" ] } ], "prompt_number": 604 }, { "cell_type": "code", "collapsed": false, "input": [ "val.replace(',', '')" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 605, "text": [ "'ab guido'" ] } ], "prompt_number": 605 }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### \ud30c\uc774\uc36c \ub0b4\uc7a5 \ubb38\uc790\uc5f4 \uba54\uc11c\ub4dc\n", "\n", "\uc778\uc790 | \uc124\uba85\n", "--- | ---\n", "count | \ubb38\uc790\uc5f4\uc5d0\uc11c \uacb9\uce58\uc9c0 \uc54a\ub294 \ubd80\ubd84 \ubb38\uc790\uc5f4\uc758 \uac1c\uc218\ub97c \ubc18\ud658\n", "endswith, startswith | \ubb38\uc790\uc5f4\uc774 \uc8fc\uc5b4\uc9c4 \uc811\ubbf8\uc0ac(\uc811\ub450\uc0ac)\ub85c \ub05d\ub0a0 \uacbd\uc6b0 True\ub97c \ubc18\ud658\n", "join | \ubb38\uc790\uc5f4\uc744 \uad6c\ubd84\uc790\ub85c \ud558\uc5ec \ub2e4\ub978 \ubb38\uc790\uc5f4\uc744 \uc21c\uc11c\ub300\ub85c \uc774\uc5b4\ubd99\uc778\ub2e4.\n", "index | \ubd80\ubd84 \ubb38\uc790\uc5f4\uc758 \uccab \ubc88\uc9f8 \uae00\uc790\uc758 \uc704\uce58\ub97c \ubc18\ud658\ud55c\ub2e4. \ubd80\ubd84 \ubb38\uc790\uc5f4\uc774 \uc5c6\ub2e4\uba74 ValueError \uc608\uc678\uac00 \ubc1c\uc0dd\ud55c\ub2e4.\n", "find | \uccab \ubc88\uc9f8 \ubd80\ubd84 \ubb38\uc790\uc5f4\uc758 \uccab \ubc88\uc9f8 \uae00\uc790\uc758 \uc704\uce58\ub97c \ubc18\ud658\ud55c\ub2e4. index\uc640 \uc720\uc0ac\ud558\uc9c0\ub9cc \ubd80\ubd84 \ubb38\uc790\uc5f4\uc774 \uc5c6\uc744 \uacbd\uc6b0 -1\uc744 \ubc18\ud658\ud55c\ub2e4.\n", "rfind | \ub9c8\uc9c0\ub9c9 \ubd80\ubd84 \ubb38\uc790\uc5f4\uc758 \uccab \ubc88\uc9f8 \uae00\uc790\uc758 \uc704\uce58\ub97c \ubc18\ud658\ud55c\ub2e4. \ubd80\ubd84 \ubb38\uc790\uc5f4\uc774 \uc5c6\ub2e4\uba74 -1\uc744 \ubc18\ud658\ud55c\ub2e4.\n", "replace | \ubb38\uc790\uc5f4\uc744 \ub2e4\ub978 \ubb38\uc790\uc5f4\ub85c \uce58\ud658\ud55c\ub2e4.\n", "strip, rstrip, lstrip | \uac1c\ud589\ubb38\uc790\ub97c \ud3ec\ud568\ud55c \uacf5\ubc31\ubb38\uc790\ub97c \uc81c\uac70\ud55c\ub2e4. lstrip\uc740 \ubb38\uc790\uc5f4\uc758 \uc2dc\uc791 \ubd80\ubd84\uc5d0 \uc788\ub294 \uacf5\ubc31 \ubb38\uc790\ub9cc\uc744 \uc81c\uac70\ud558\uace0, rstrip\uc740 \ubb38\uc790\uc5f4\uc758 \ub9c8\uc9c0\ub9c9 \ubd80\ubd84\uc5d0 \uc787\ub294 \uacf5\ubc31\ubb38\uc790\ub9cc\uc744 \uc81c\uac70\ud55c\ub2e4.\n", "split | \ubb38\uc790\uc5f4\uc744 \uad6c\ubd84\uc790\ub97c \uae30\uc900\uc73c\ub85c \ubd80\ubd84 \ubb38\uc790\uc5f4\uc758 \ub9ac\uc2a4\ud2b8\ub85c \ubd84\ub9ac\ud55c\ub2e4.\n", "lower, upper | \uac01\uac01 \uc54c\ud30c\ubcb3 \ubb38\uc790\ub97c \uc18c\ubb38\uc790 \ud639\uc740 \ub300\ubb38\uc790\ub85c \ubcc0\ud658\ud55c\ub2e4.\n", "ljust, rjust | \ubb38\uc790\uc5f4\uc744 \uc624\ub978\ucabd \ud639\uc740 \uc67c\ucabd\uc73c\ub85c \uc815\ub82c\ud558\uace0 \uc8fc\uc5b4\uc9c4 \uae38\uc774\uc5d0\uc11c \ubb38\uc790\uc5f4\uc758 \uae38\uc774\ub97c \uc81c\uc678\ud55c \uacf3\uc740 \uacf5\ubc31\ubb38\uc790\ub97c \ucc44\uc6cc \ub123\uc5b4 \uc8fc\uc5b4\uc9c4 \uae38\uc774\ub97c \uac00\uc9c0\ub294 \ubb38\uc790\uc5f4\uc744 \ubc18\ud658\ud55c\ub2e4." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 7.4.2 \uc815\uaddc\ud45c\ud604\uc2dd\n", "\n", "- \ud14d\uc2a4\ud2b8\uc5d0\uc11c \ubb38\uc790\uc5f4 \ud328\ud134\uc744 \ucc3e\ub294 \uc720\uc5f0\ud55c \ubc29\ubc95\n", "- regex\ub77c \ubd88\ub9ac\ub294 \ub2e8\uc77c \ud45c\ud604\uc2dd\uc740 \uc815\uaddc\ud45c\ud604\uc5b8\uc5b4\ub85c \uad6c\uc131\ub41c \ubb38\uc790\uc5f4\n", "- \uc815\uaddc\ud45c\ud604\uc2dd\uc744 \uc791\uc131\ud558\ub294 \ubc29\ubc95\uc740 **\uadf8 \uc790\uccb4\ub85c \ud558\ub098\uc758 \ub3c5\ub9bd\ub41c \uc7a5**\uc73c\ub85c \uad6c\uc131\ud560 \uc218 \uc788\ub294 \ub9cc\ud07c, \uc774 \ucc45\uc5d0\uc11c \ub2e4\ub8e8\ub294 \ubc94\uc704\ub97c \ubc97\uc5b4\ub09c\ub2e4. \uc778\ud130\ub137\uc5d0\ub294 \uc81c\ub4dc\uc1fc\uc758 '\uc815\uaddc\ud45c\ud604\uc2dd \uc5b4\ub835\uac8c \ubc30\uc6b0\uae30(Learn Regex The Hard Way) \uac19\uc740 \ud6cc\ub96d\ud55c \ud29c\ud1a0\ub9ac\uc5bc\uacfc \ub808\ud37c\ub7f0\uc2a4\uac00 \uc788\ub2e4.\n", "- re \ubaa8\ub4c8 \ud568\uc218\ub294 \ud328\ud134 \ub9e4\uce6d, \uce58\ud658, \ubd84\ub9ac\uc758 3\uac00\uc9c0\n", "- 3\uac00\uc9c0\ub294 \uc11c\ub85c \uc5f0\uad00\ub418\uc5b4 \uc788\uc73c\uba70, \uc815\uaddc\ud45c\ud604\uc2dd\uc740 \ud14d\uc2a4\ud2b8 \uc548\uc5d0 \uc874\uc7ac\ud558\ub294 \ud328\ud134\uc744 \ud45c\ud604\ud558\uace0 \uc774\ub97c \uc5ec\ub7ec\uac00\uc9c0 \ub2e4\uc591\ud55c \ubaa9\uc801\uc73c\ub85c \uc0ac\uc6a9\ud560 \uc218 \uc788\ub3c4\ub85d \ub418\uc5b4 \uc788\ub2e4.\n", "- \uc5ec\ub7ec\uac00\uc9c0 \uacf5\ubc31\ubb38\uc790(\ud0ed, \uc2a4\ud398\uc774\uc2a4, \uac1c\ud589\ubb38\uc790)\uac00 \ud3ec\ud568\ub41c \ubb38\uc790\uc5f4\uc744 \ub098\ub204\uace0 \uc2f6\ub2e4\uba74 \ud558\ub098 \uc774\uc0c1\uc758 \uacf5\ubc31\ubb38\uc790\ub97c \uc758\ubbf8\ud558\ub294 \\s+\ub97c \uc0ac\uc6a9\ud574\uc11c \ubb38\uc790\uc5f4\uc744 \ubd84\ub9ac\ud560 \uc218 \uc788\ub2e4.\n", "\n", "#### \uc815\uaddc\ud45c\ud604\uc2dd \ub3c4\uc11c\n", "\n", "- [Yes 24 \uc815\uaddc\ud45c\ud604\uc2dd \ub3c4\uc11c \uac80\uc0c9](http://www.yes24.com/SearchCorner/Search?mstr_query=&query=%c1%a4%b1%d4+%c7%a5%c7%f6%bd%c4&disp_no=&title_yn=y&author_yn=y&company_yn=y&domain=all)\n", "- [\uc815\uaddc \ud45c\ud604\uc2dd \uc644\uc804 \ud574\ubd80\uc640 \uc2e4\uc2b5(\uac1c\uc815\ud310)](http://www.hanbit.co.kr/book/look.html?isbn=89-7914-226-9): \ub09c \uac1c\uc778\uc801\uc73c\ub85c \uc774 \ucc45\uc73c\ub85c \uc815\uaddc\ud45c\ud604\uc2dd\uc744 \uacf5\ubd80\ud588\ub2e4. pcre\ub85c \uc124\uba85\ud574 \uc8fc\ub294\ub370 \uad49\uc7a5\ud788 \uc7ac\ubc0c\uac8c \ubd24\ub358 \uae30\uc5b5\uc774 \ub09c\ub2e4. \uc5d4\uc9c4\uc740 \uc5b4\ub5a4\uac8c \uc788\ub294\uc9c0? \ucd5c\uc801\ud654\ub294 \uc5b4\ub5bb\uac8c \ud558\ub294\uc9c0? \ub4f1 3\ud68c\ub3c5 \ud588\uc9c0. \uc9c0\uae08\uc740 \uc808\ud310\ub41c\ub4ef.. \uc608\uc804\uc5d0 \uc804\uc790 \ub3c4\uc11c\uad00\uc5d0\uc11c pdf \uad6c\ud55c\uac83 \uac19\uc740\ub370.. \uae30\uc5b5\uc774 \uc798..\n", "- [\uc815\uaddc \ud45c\ud604\uc2dd \uc644\uc804 \ud574\ubd80\uc640 \uc2e4\uc2b5 \ub9ac\ubdf0](http://www.benjaminlog.com/80)" ] }, { "cell_type": "code", "collapsed": false, "input": [ "import re" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 2 }, { "cell_type": "code", "collapsed": false, "input": [ "text = 'foo bar\\t baz \\tqux'" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 3 }, { "cell_type": "code", "collapsed": false, "input": [ "text" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 4, "text": [ "'foo bar\\t baz \\tqux'" ] } ], "prompt_number": 4 }, { "cell_type": "code", "collapsed": false, "input": [ "re.split('\\s+', text)" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 5, "text": [ "['foo', 'bar', 'baz', 'qux']" ] } ], "prompt_number": 5 }, { "cell_type": "markdown", "metadata": {}, "source": [ "- re.split('\\s+', text)\ub97c \uc0ac\uc6a9\ud558\uba74 \uba3c\uc800 \uc815\uaddc\ud45c\ud604\uc2dd\uc774 \ucef4\ud30c\uc77c\ub418\uace0 \uadf8 \ub2e4\uc74c\uc5d0 split \uba54\uc11c\ub4dc\uac00 \uc2e4\ud589\n", "- re.compile\uc744 \ud1b5\ud574 \uc9c1\uc811 \uc815\uaddc\ud45c\ud604\uc2dd\uc744 \ucef4\ud30c\uc77c\ud558\uace0 \uadf8\ub807\uac8c \uc5bb\uc740 \uc815\uaddc\ud45c\ud604\uc2dd \uac1d\uccb4\ub97c \uc7ac\uc0ac\uc6a9\ud558\ub294 \uac83\ub3c4 \uac00\ub2a5" ] }, { "cell_type": "code", "collapsed": false, "input": [ "regex = re.compile('\\s+')" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 6 }, { "cell_type": "code", "collapsed": false, "input": [ "regex.split(text)" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 7, "text": [ "['foo', 'bar', 'baz', 'qux']" ] } ], "prompt_number": 7 }, { "cell_type": "markdown", "metadata": {}, "source": [ "- \uc815\uaddc\ud45c\ud604\uc2dd\uc5d0 \ub9e4\uce6d\ub418\ub294 \ubaa8\ub4e0 \ud328\ud134\uc758 \ubaa9\ub85d\uc744 \uc5bb\uace0 \uc2f6\ub2e4\uba74 findall \uba54\uc11c\ub4dc" ] }, { "cell_type": "code", "collapsed": false, "input": [ "regex.findall(text)" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 8, "text": [ "[' ', '\\t ', ' \\t']" ] } ], "prompt_number": 8 }, { "cell_type": "markdown", "metadata": {}, "source": [ "- \uc815\uaddc\ud45c\ud604\uc2dd \uc548\uc5d0\uc11c \\ \ubb38\uc790\uac00 \uc774\uc2a4\ucf00\uc774\uc2a4\ub418\ub294 \uac83\uc744 \ud53c\ud558\ub824\uba74 raw \ubb38\uc790\uc5f4 \ud45c\uae30\ubc95\uc73c\ub85c \ubb38\uc81c\ub97c \ud68c\ud53c\ud560 \uc218 \uc788\ub2e4.\n", "- r'C:\\x'\uc740 'c:\\\\\\\\x'\uc640 \ub3d9\uc77c" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "- \uac19\uc740 \uc815\uaddc\ud45c\ud604\uc2dd\uc744 \ub2e4\ub978 \ubb38\uc790\uc5f4\uc5d0\ub3c4 \uc801\uc6a9\ud574\uc57c \ud55c\ub2e4\uba74 re.compile\uc744 \uc774\uc6a9\ud574\uc11c \uc815\uaddc\ud45c\ud604\uc2dd \uac1d\uccb4\ub97c \ub9cc\ub4e4\uc5b4 \uc0ac\uc6a9\ud558\ub294 \ubc29\ubc95 \ucd94\ucc9c. CPU \uc0ac\uc6a9\ub7c9 \uc808\uc57d\n", "- match\uc640 search\ub294 findall \uba54\uc11c\ub4dc\uc640 \uad00\ub828\n", "- findall: \ubb38\uc790\uc5f4\uc5d0\uc11c \uc77c\uce58\ud558\ub294 \ubaa8\ub4e0 \ubd80\ubd84\uc758 \ubb38\uc790\uc5f4\uc744 \ucc3e\uc544\uc90c\n", "- search: \ud328\ud134\uacfc \uc77c\uce58\ud558\ub294 \uccab \ubc88\uc9f8 \uc874\uc7ac\ub97c \ubc18\ud658\n", "- match: \uc774\ubcf4\ub2e4 \ub354 \uc5c4\uaca9\ud574\uc11c \ubb38\uc790\uc5f4\uc758 \uc2dc\uc791 \ubd80\ubd84\uc5d0\uc11c \uc77c\uce58\ud558\ub294 \uac83\ub9cc \ucc3e\uc544\uc900\ub2e4." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "- \ucc45\uc5d0 \ub9c8\uc9c0\ub9c9 re.I\ub97c \uc801\uc6a9\ud558\ub294 \ubd80\ubd84\uc774 \uc0dd\ub7b5\ub418\uc5b4 \uc81c\ub300\ub85c \ub3d9\uc791\ud558\uc9c0 \uc54a\ub294\ub2e4. regex \ubcc0\uc218\ub97c \ucd94\uac00\ud574 \uc8fc\uae30 \ubc14\ub780\ub2e4." ] }, { "cell_type": "code", "collapsed": false, "input": [ "text = \"\"\"Dave dave@google.com\n", "Steve steve@gmail.com\n", "Rob rob@gmail.com\n", "Ryan ryan@yahoo.com\n", "\"\"\"\n", "pattern = r'[A-Z0-9._%+-]+@[A-Z0-9.-]+\\.[A-Z]{2,4}'\n", "\n", "# \uc774 \ubd80\ubd84\uc774 \uc0dd\ub7b5\ub418\uc5b4 \uc788\uc74c\n", "regex = re.compile(pattern, re.I) # re.IGNORECASE\ub294 \uc815\uaddc\ud45c\ud604\uc2dd\uc774 \ub300\u2219\uc18c\ubb38\uc790\ub97c \uac00\ub9ac\uc9c0 \uc54a\ub3c4\ub85d \ud55c\ub2e4." ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 17 }, { "cell_type": "code", "collapsed": false, "input": [ "regex.findall(text)" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 18, "text": [ "['dave@google.com', 'steve@gmail.com', 'rob@gmail.com', 'ryan@yahoo.com']" ] } ], "prompt_number": 18 }, { "cell_type": "markdown", "metadata": {}, "source": [ "- search\ub294 \ud14d\uc2a4\ud2b8\uc5d0\uc11c \uccab \ubc88\uc9f8 \uc774\uba54\uc77c \uc8fc\uc18c\ub9cc \ucc3e\uc544\uc900\ub2e4. \uc774 \uc815\uaddc\ud45c\ud604\uc2dd\uc5d0 \ub300\ud55c match \uac1d\uccb4\ub294 \uadf8 \ud328\ud134\uc774 \ubb38\uc790\uc5f4 \uc548\uc5d0\uc11c \uc704\uce58\ud558\ub294 \uc2dc\uc791\uc810\uacfc \ub05d\uc810\ub9cc\uc744 \uc54c\ub824\uc900\ub2e4." ] }, { "cell_type": "code", "collapsed": false, "input": [ "m = regex.search(text)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 19 }, { "cell_type": "code", "collapsed": false, "input": [ "m" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 20, "text": [ "<_sre.SRE_Match at 0x109dbe308>" ] } ], "prompt_number": 20 }, { "cell_type": "code", "collapsed": false, "input": [ "text[m.start():m.end()]" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 21, "text": [ "'dave@google.com'" ] } ], "prompt_number": 21 }, { "cell_type": "markdown", "metadata": {}, "source": [ "- regex.match\ub294 None\uc744 \ubc18\ud658. \uc65c\ub0d0\ud558\uba74 \uadf8 \ud328\ud134\uc774 \ubb38\uc790\uc5f4\uc758 \uc2dc\uc791\uc810\uc5d0\uc11c\ubd80\ud130 \uc77c\uce58\ud558\ub294\uc9c0 \uac80\uc0ac\ud558\uae30 \ub54c\ubb38" ] }, { "cell_type": "code", "collapsed": false, "input": [ "# \uc774 \ubb38\uc11c\uc758 \ucc98\uc74c\uc740 Dave\ub85c \uc2dc\uc791\ud558\uae30 \ub54c\ubb38\uc5d0 regex\uc640 \uc77c\uce58\ud558\uc9c0 \uc54a\uc74c\n", "print regex.match(text)" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "None\n" ] } ], "prompt_number": 30 }, { "cell_type": "markdown", "metadata": {}, "source": [ "- sub \uba54\uc11c\ub4dc\ub294 \ucc3e\uc740 \ud328\ud134\uc744 \uc8fc\uc5b4\uc9c4 \ubb38\uc790\uc5f4\ub85c \uce58\ud658\ud558\uc5ec \uc0c8\ub85c\uc6b4 \ubb38\uc790\uc5f4 \ubc18\ud658" ] }, { "cell_type": "code", "collapsed": false, "input": [ "print regex.sub('REDACTED', text)" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "Dave REDACTED\n", "Steve REDACTED\n", "Rob REDACTED\n", "Ryan REDACTED\n", "\n" ] } ], "prompt_number": 31 }, { "cell_type": "markdown", "metadata": {}, "source": [ "- \uc774\uba54\uc77c \uc8fc\uc18c\ub97c \ucc3e\uc544\uc11c \ub3d9\uc2dc\uc5d0 \uac01 \uc774\uba54\uc77c \uc8fc\uc18c\ub97c \uc0ac\uc6a9\uc790 \uc774\ub984, \ub3c4\uba54\uc778 \uc774\ub984, \ub3c4\uba54\uc778 \uc811\ubbf8\uc0ac\uc758 3\uac00\uc9c0 \ucef4\ud3ec\ub10c\ud2b8\ub85c \ub098\ub220\uc57c \ud55c\ub2e4\uba74 \uac01 \ud328\ud134\uc744 \uad04\ud638\ub85c \ubb36\uc5b4\uc900\ub2e4." ] }, { "cell_type": "code", "collapsed": false, "input": [ "pattern = r'([A-Z0-9._%+-]+)@([A-Z0-9.-]+)\\.([A-Z]{2,4})'" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 81 }, { "cell_type": "code", "collapsed": false, "input": [ "regex = re.compile(pattern, flags=re.IGNORECASE)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 59 }, { "cell_type": "markdown", "metadata": {}, "source": [ "- \uc774\ub807\uac8c \ub9cc\ub4e0 regex \uac1d\uccb4\uc758 match \uba54\uc11c\ub4dc\ub97c \uc774\uc6a9\ud558\uba74 groups \uba54\uc11c\ub4dc\ub97c \ud1b5\ud574 \uac01 \ud328\ud134 \ucef4\ud3ec\ub10c\ud2b8\uc758 \ud29c\ud50c\uc744 \uc5bb\uc744 \uc218 \uc788\uc74c" ] }, { "cell_type": "code", "collapsed": false, "input": [ "m = regex.match('wesm@bright.net')" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 60 }, { "cell_type": "code", "collapsed": false, "input": [ "type(m)" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 61, "text": [ "_sre.SRE_Match" ] } ], "prompt_number": 61 }, { "cell_type": "code", "collapsed": false, "input": [ "m.groups()" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 46, "text": [ "('wesm', 'bright', 'net')" ] } ], "prompt_number": 46 }, { "cell_type": "code", "collapsed": false, "input": [ "m.group(0)" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 47, "text": [ "'wesm@bright.net'" ] } ], "prompt_number": 47 }, { "cell_type": "code", "collapsed": false, "input": [ "m.group(1)" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 48, "text": [ "'wesm'" ] } ], "prompt_number": 48 }, { "cell_type": "code", "collapsed": false, "input": [ "m.group(2)" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 49, "text": [ "'bright'" ] } ], "prompt_number": 49 }, { "cell_type": "code", "collapsed": false, "input": [ "m.group(3)" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 50, "text": [ "'net'" ] } ], "prompt_number": 50 }, { "cell_type": "markdown", "metadata": {}, "source": [ "- \ud328\ud134\uc5d0 \uadf8\ub8f9\uc774 \uc788\ub2e4\uba74 findall \uba54\uc11c\ub4dc\ub294 \ud29c\ud50c\uc758 \ubaa9\ub85d\uc744 \ubc18\ud658" ] }, { "cell_type": "code", "collapsed": false, "input": [ "regex.findall(text)" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 51, "text": [ "[('dave', 'google', 'com'),\n", " ('steve', 'gmail', 'com'),\n", " ('rob', 'gmail', 'com'),\n", " ('ryan', 'yahoo', 'com')]" ] } ], "prompt_number": 51 }, { "cell_type": "markdown", "metadata": {}, "source": [ "- sub \uc5ed\uc2dc \ub9c8\ucc2c\uac00\uc9c0\ub85c \\1, \\2 \uac19\uc740 \ud2b9\uc218\ud55c \uae30\ud638\ub97c \uc0ac\uc6a9\ud574\uc11c \uac01 \ud328\ud134\uc758 \uadf8\ub8f9\uc5d0 \uc811\uadfc \uac00\ub2a5" ] }, { "cell_type": "code", "collapsed": false, "input": [ "print regex.sub(r'Username: \\1, Domain: \\2, Suffix: \\3', text)" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "Dave Username: dave, Domain: google, Suffix: com\n", "Steve Username: steve, Domain: gmail, Suffix: com\n", "Rob Username: rob, Domain: gmail, Suffix: com\n", "Ryan Username: ryan, Domain: yahoo, Suffix: com\n", "\n" ] } ], "prompt_number": 52 }, { "cell_type": "markdown", "metadata": {}, "source": [ "- \ud30c\uc774\uc36c\uc5d0\uc11c \ud560 \uc218 \uc788\ub294 \uc815\uaddc\ud45c\ud604\uc2dd\uc740 \ub9ce\uc774 \uc788\uc9c0\ub9cc \uc774 \ucc45\uc758 \ubc94\uc704\ub97c \ub118\uae30 \ub54c\ubb34\u315c\u3134\uc5d0 \uc0dd\ub7b5\n", "- \ud55c \uac00\uc9c0 \ub354 \uc18c\uac1c\ud558\uc790\uba74 \uc704\uc5d0\uc11c \uc0b4\ud3b4\ubcf8 \uc774\uba54\uc77c \uc8fc\uc18c \uc815\uaddc\ud45c\ud604\uc2dd\uc758 \ub9e4\uce58 \uadf8\ub8f9\uc5d0 \ub2e4\uc74c\ucc98\ub7fc \uc774\ub984\uc744 \uc904 \uc218\ub3c4 \uc788\ub2e4." ] }, { "cell_type": "code", "collapsed": false, "input": [ "regex = re.compile(r'''\n", "(?P[A-Z0-9._%+-]+)\n", "@\n", "(?P[A-Z0-9.-]+)\n", "\\.\n", "(?P[A-Z]{2,4})''', flags=re.IGNORECASE | re.VERBOSE)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 82 }, { "cell_type": "markdown", "metadata": {}, "source": [ "- \uc774 \uc815\uaddc\ud45c\ud604\uc2dd\uc73c\ub85c \uc0dd\uc131\ud55c match \uac1d\uccb4\ub294 \uadf8\ub8f9 \uc774\ub984\uc774 \uc9c0\uc815\ub418\uc5b4 \ud3b8\ub9ac\ud55c \uc0ac\uc804\ud615\uc744 \ub9cc\ub4e4\uc5b4 \uc900\ub2e4." ] }, { "cell_type": "code", "collapsed": false, "input": [ "m = regex.match('wesm@bright.net')" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 83 }, { "cell_type": "code", "collapsed": false, "input": [ "m.groupdict()" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 84, "text": [ "{'domain': 'bright', 'suffix': 'net', 'username': 'wesm'}" ] } ], "prompt_number": 84 }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### \uc815\uaddc\ud45c\ud604\uc2dd \uba54\uc11c\ub4dc\n", "\n", "\uc778\uc790 | \uc124\uba85\n", "--- | ---\n", "findall, finditer | \ubb38\uc790\uc5f4\uc5d0\uc11c \uacb9\uce58\uc9c0 \uc54a\ub294, \ubc1c\uacac\ub41c \ubaa8\ub4e0 \ud328\ud134\uc744 \ubc18\ud658\ud55c\ub2e4. findall \uba54\uc11c\ub4dc\ub294 \ubaa8\ub4e0 \ud328\ud134\uc758 \ub9ac\uc2a4\ud2b8\ub97c \ubc14\ub178\ud55c\ud558\uace0, finditer \uba54\uc11c\ub4dc\ub294 \uc774\ud130\ub808\uc774\ud130\ub97c \ud1b5\ud574 \ud558\ub098\uc529 \ubc18\ud658\ud55c\ub2e4.\n", "match | \ubb38\uc790\uc5f4\uc758 \uc2dc\uc791\uc810\ubd80\ud130 \ud328\ud134\uc744 \ucc3e\uace0 \uc120\ud0dd\uc801\uc73c\ub85c \ud328\ud134 \ucef4\ud3ec\ub10c\ud2b8\ub97c \uadf8\ub8f9\uc73c\ub85c \ub098\ub208\ub2e4. \uc77c\uce58\ud558\ub294 \ud328\ud134\uc774 \uc788\ub2e4\uba74 match \uac1d\uccb4\ub97c \ubc18\ud658\ud558\uace0 \uadf8\ub807\uc9c0 \uc54a\uc744 \uacbd\uc6b0 None\uc744 \ubc18\ud658\ud55c\ub2e4.\n", "search | \ubb38\uc790\uc5f4\uc5d0\uc11c \ud328\ud134\uacfc \uc77c\uce58\ud558\ub294 \ub0b4\uc6a9\uc744 \uac80\uc0c9\ud558\uace0 match \uac1d\uccb4\ub97c \ubc18\ud658\ud55c\ub2e4. match \uba54\uc11c\ub4dc\uc640\ub294 \ub2ec\ub9ac \uc2dc\uc791\ubd80\ud130 \uc77c\uce58\ud558\ub294 \ub0b4\uc6a9\ub9cc \ucc3e\uc9c0 \uc54a\uace0 \ubb38\uc790\uc5f4 \uc5b4\ub514\ub4e0 \uc77c\uce58\ud558\ub294 \ub0b4\uc6a9\uc774 \uc788\ub2e4\uba74 \ubc18\ud658\ud55c\ub2e4.\n", "split | \ubb38\uc790\uc5f4\uc5d0\uc11c \ud328\ud134\uacfc \uc77c\uce58\ud558\ub294 \ubd80\ubd84\uc744 \ucabc\uac20\ub2e4.\n", "sub, subn | \ubb38\uc790\uc5f4\uc5d0\uc11c \uc77c\uce58\ud558\ub294 \ubaa8\ub4e0 \ud328\ud134(sub) \ud639\uc740 \ucc98\uc74c n\uac1c\uc758 \ud328\ud134(subn)\uc744 \ub300\uccb4 \ud45c\ud604\uc73c\ub85c \uce58\ud658\ud55c\ub2e4. \ub300\uccb4 \ud45c\ud604 \ubb38\uc790\uc5f4\uc740 \\1, \\2, ... \uac19\uc740 \uae30\ud638\ub97c \uc0ac\uc6a9\ud574\uc11c \ub9e4\uce58 \uadf8\ub8f9\uc758 \uc694\uc18c\ub97c \ucc38\uc870\ud55c\ub2e4." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 7.4.3 pandas\uc758 \ubca1\ud130\ud654\ub41c \ubb38\uc790\uc5f4 \ud568\uc218\n", "\n", "- \ub370\uc774\ud130\ub97c \ubd84\uc11d\ud558\ub824\uba74 \uba3c\uc800 \ub4a4\uc8fd\ubc15\uc8fd\uc778 \ub370\uc774\ud130\uc758 \ubb38\uc790\uc5f4\uc744 \ub2e4\ub4ec\uace0 \uc815\uaddc\ud654\ud558\ub294 \uc815\ub9ac \uc791\uc5c5 \ud544\uc694\n", "- \ubb38\uc790\uc5f4\uc744 \ub2f4\uace0 \uc788\ub294 \uce7c\ub7fc\uc5d0 \ub204\ub77d\ub41c \uac12\uc774 \uc788\ub2e4\uba74 \uc77c\uc744 \ub354 \ubcf5\uc7a1\ud558\uac8c \ub9cc\ub4e0\ub2e4." ] }, { "cell_type": "code", "collapsed": false, "input": [ "data = {'Dave': 'dave@google.com', \n", " 'Steve': 'steve@gmail.com',\n", " 'Rob': 'rob@gmail.com', \n", " 'Wes': np.nan}" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 85 }, { "cell_type": "code", "collapsed": false, "input": [ "data = Series(data)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 86 }, { "cell_type": "code", "collapsed": false, "input": [ "data" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 87, "text": [ "Dave dave@google.com\n", "Rob rob@gmail.com\n", "Steve steve@gmail.com\n", "Wes NaN\n", "dtype: object" ] } ], "prompt_number": 87 }, { "cell_type": "code", "collapsed": false, "input": [ "data.isnull()" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 88, "text": [ "Dave False\n", "Rob False\n", "Steve False\n", "Wes True\n", "dtype: bool" ] } ], "prompt_number": 88 }, { "cell_type": "markdown", "metadata": {}, "source": [ "- \ubb38\uc790\uc5f4\uacfc \uc815\uaddc\ud45c\ud604\uc2dd \uba54\uc11c\ub4dc\ub294 data.map\uc744 \uc0ac\uc6a9\ud574\uc11c \uac01 \uac12\uc5d0 \uc801\uc6a9(lambda \ud639\uc740 \ub2e4\ub978 \ud568\uc218\ub97c \ub118\uaca8\uc11c) \ud560 \uc218 \uc788\uc9c0\ub9cc NA \uac12\uc744 \ub9cc\ub098\uba74 \uc2e4\ud328\ud55c\ub2e4. \uc774\ub7f0 \ubb38\uc81c\uc5d0 \ub300\ucc98\ud558\uae30 \uc704\ud574 Series\uc5d0\ub294 NA\uac12\uc744 \uac74\ub108\ub6f0\uac8c \ud558\ub294 \uac04\uacb0\ud55c \ubb38\uc790\uc5f4 \ucc98\ub9ac \uba54\uc11c\ub4dc \uc874\uc7ac\n", "- \uc774\ub294 Series\uc758 str \uc18d\uc131\uc744 \uc774\uc6a9\ud558\ub294\ub370, \uc608\ub97c \ub4e4\uba74 \uac01 \uc774\uba54\uc77c \uc8fc\uc18c\uac00 'gmail'\uc744 \ud3ec\ud568\ud558\uace0 \uc788\ub294\uc9c0 str.contains\ub97c \uc774\uc6a9\ud574\uc11c \uac80\uc0ac \uac00\ub2a5" ] }, { "cell_type": "code", "collapsed": false, "input": [ "data.str.contains('gmail')" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 89, "text": [ "Dave False\n", "Rob True\n", "Steve True\n", "Wes NaN\n", "dtype: object" ] } ], "prompt_number": 89 }, { "cell_type": "code", "collapsed": false, "input": [ "pattern" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 90, "text": [ "'([A-Z0-9._%+-]+)@([A-Z0-9.-]+)\\\\.([A-Z]{2,4})'" ] } ], "prompt_number": 90 }, { "cell_type": "markdown", "metadata": {}, "source": [ "- IGNORECASE \uac19\uc740 re \uc635\uc158\uacfc \ud568\uaed8 \uc815\uaddc\ud45c\ud604\uc2dd\ub3c4 \uc0ac\uc6a9\ud560 \uc218 \uc788\ub2e4." ] }, { "cell_type": "code", "collapsed": false, "input": [ "data.str.findall(pattern, flags=re.IGNORECASE)" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 91, "text": [ "Dave [(dave, google, com)]\n", "Rob [(rob, gmail, com)]\n", "Steve [(steve, gmail, com)]\n", "Wes NaN\n", "dtype: object" ] } ], "prompt_number": 91 }, { "cell_type": "markdown", "metadata": {}, "source": [ "- \ubca1\ud130\ud654\ub41c \uc694\uc18c\ub97c \uaebc\ub0b4\uc624\ub294 \uba87 \uac00\uc9c0 \ubc29\ubc95\uc774 \uc788\ub294\ub370, str.get\uc744 \uc774\uc6a9\ud558\uac70\ub098 str \uc18d\uc131\uc758 \uc0c9\uc778\uc744 \uc774\uc6a9\ud558\ub294 \ubc29\ubc95" ] }, { "cell_type": "code", "collapsed": false, "input": [ "matches = data.str.match(pattern, flags=re.IGNORECASE)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 92 }, { "cell_type": "code", "collapsed": false, "input": [ "matches" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 93, "text": [ "Dave (dave, google, com)\n", "Rob (rob, gmail, com)\n", "Steve (steve, gmail, com)\n", "Wes NaN\n", "dtype: object" ] } ], "prompt_number": 93 }, { "cell_type": "code", "collapsed": false, "input": [ "matches.str.get(1)" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 94, "text": [ "Dave google\n", "Rob gmail\n", "Steve gmail\n", "Wes NaN\n", "dtype: object" ] } ], "prompt_number": 94 }, { "cell_type": "code", "collapsed": false, "input": [ "matches.str[0]" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 95, "text": [ "Dave dave\n", "Rob rob\n", "Steve steve\n", "Wes NaN\n", "dtype: object" ] } ], "prompt_number": 95 }, { "cell_type": "code", "collapsed": false, "input": [ "matches.str.get(2)" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 97, "text": [ "Dave com\n", "Rob com\n", "Steve com\n", "Wes NaN\n", "dtype: object" ] } ], "prompt_number": 97 }, { "cell_type": "code", "collapsed": false, "input": [ "matches.str.get(0)" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 98, "text": [ "Dave dave\n", "Rob rob\n", "Steve steve\n", "Wes NaN\n", "dtype: object" ] } ], "prompt_number": 98 }, { "cell_type": "code", "collapsed": false, "input": [ "matches.str[1]" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 99, "text": [ "Dave google\n", "Rob gmail\n", "Steve gmail\n", "Wes NaN\n", "dtype: object" ] } ], "prompt_number": 99 }, { "cell_type": "code", "collapsed": false, "input": [ "matches.str[2]" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 100, "text": [ "Dave com\n", "Rob com\n", "Steve com\n", "Wes NaN\n", "dtype: object" ] } ], "prompt_number": 100 }, { "cell_type": "markdown", "metadata": {}, "source": [ "- \ubb38\uc790\uc5f4\uc744 \uc798\ub77c\ub0bc \uc218 \uc788\ub2e4." ] }, { "cell_type": "code", "collapsed": false, "input": [ "data.str[:5]" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 101, "text": [ "Dave dave@\n", "Rob rob@g\n", "Steve steve\n", "Wes NaN\n", "dtype: object" ] } ], "prompt_number": 101 }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### [\ubca1\ud130\ud654\ub41c \ubb38\uc790\uc5f4 \uba54\uc11c\ub4dc](http://pandas.pydata.org/pandas-docs/stable/basics.html#vectorized-string-methods)\n", "\n", "\uba54\uc11c\ub4dc | \uc124\uba85\n", "--- | ---\n", "cat | \uc120\ud0dd\uc801\uc778 \uad6c\ubd84\uc790\uc640 \ud568\uaed8 \uc694\uc18c\ubcc4\ub85c \ubb38\uc790\uc5f4\uc744 \uc774\uc5b4 \ubd99\uc778\ub2e4.\n", "contains | \ubb38\uc790\uc5f4\uc774 \ud328\ud134\uc774\ub098 \uc815\uaddc\ud45c\ud604\uc2dd\uc744 \ud3ec\ud568\ud558\ub294\uc9c0\ub97c \ub098\ud0c0\ub0b4\ub294 \ubd88\ub9ac\uc5b8 \ubc30\uc5f4\uc744 \ubc18\ud658\ud55c\ub2e4.\n", "count | \uc77c\uce58\ud558\ub294 \ud328\ud134\uc758 \uac1c\uc218\ub97c \ubc18\ud658\ud55c\ub2e4.\n", "endswith, startswith | \uac01 \uc694\uc18c\uc5d0 \ub300\ud574 x.endswith(pattern), x.startswith(pattern)\uc640 \ub3d9\uc77c\ud55c \ub3d9\uc791\uc744 \ud55c\ub2e4.\n", "findall | \uac01 \ubb38\uc790\uc5f4\uc5d0 \ub300\ud574 \uc77c\uce58\ud558\ub294 \ud328\ud134/\uc815\uaddc\ud45c\ud604\uc2dd\uc758 \uc804\uccb4 \ubaa9\ub85d\uc744 \uad6c\ud55c\ub2e4.\n", "get | i\ubc88\uc9f8 \uc694\uc18c\ub97c \ubc18\ud658\ud55c\ub2e4.\n", "join | Series\uc758 \uac01 \uc694\uc18c\ub97c \uc8fc\uc5b4\uc9c4 \uad6c\ubd84\uc790\ub85c \uc5f0\uacb0\ud55c\ub2e4.\n", "len | \uac01 \ubb38\uc790\uc5f4\uc758 \uae38\uc774\ub97c \uad6c\ud55c\ub2e4.\n", "lower, upper | \ub300\u2219\uc18c\ubb38\uc790\ub85c \ubcc0\ud658\ud55c\ub2e4. \uac01 \uc694\uc18c\uc5d0 \ub300\ud55c x.lower(), x.upper()\uc640 \uac19\ub2e4.\n", "match | \uc8fc\uc5b4\uc9c4 \uc815\uaddc\ud45c\ud604\uc2dd\uc73c\ub85c \uac01 \uc694\uc18c\uc5d0 \ub300\ud55c re.match\ub97c \uc218\ud589\ud558\uc5ec \uc77c\uce58\ud558\ub294 \uadf8\ub8f9\uc744 \ub9ac\uc2a4\ud2b8\ub85c \ubc18\ud658\ud55c\ub2e4.\n", "pad | \ubb38\uc790\uc5f4\uc758 \uc88c, \uc6b0 \ud639\uc740 \uc591\ucabd\uc5d0 \uacf5\ubc31\uc744 \ucd94\uac00\ud55c\ub2e4.\n", "center | pad(side='both')\uc640 \ub3d9\uc77c\ud558\ub2e4.\n", "repeat | \uac12\uc744 \ubcf5\uc0ac\ud55c\ub2e4. \uc608\ub97c \ub4e4\uc5b4 s.str.repeat(3)\ub294 \uac01 \ubb38\uc790\uc5f4\uc5d0 \ub300\ud55c x*3\uacfc \ub3d9\uc77c\ud558\ub2e4.\n", "replace | \ud328\ud134/\uc815\uaddc\ud45c\ud604\uc2dd\uacfc \uc77c\uce58\ud558\ub294 \ub0b4\uc6a9\uc744 \ub2e4\ub978 \ubb38\uc790\uc5f4\ub85c \uce58\ud658\ud55c\ub2e4.\n", "slice | Series \uc548\uc5d0 \uc788\ub294 \uac01 \ubb38\uc790\uc5f4\uc744 \uc790\ub978\ub2e4.\n", "split | \uc815\uaddc\ud45c\ud604\uc2dd \ud639\uc740 \uad6c\ubd84\uc790\ub85c \ubb38\uc790\uc5f4\uc744 \ub098\ub208\ub2e4.\n", "strip, rstrip, lstrip | \uac01 \uc694\uc18c\uc758 \uac1c\ud589\ubb38\uc790\ub97c \ud3ec\ud568\ud55c \uacf5\ubc31\ubb38\uc790\ub97c \uc81c\uac70\ud55c\ub2e4." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 7.5 \uc608\uc81c: \ubbf8\uad6d \ub18d\ubb34\ubd80 \uc74c\uc2dd \ub370\uc774\ud130\ubca0\uc774\uc2a4\n", "\n", "- US Department of Argiculture\ub294 \uc74c\uc2dd\uc758 \uc601\uc591\uc18c \uc815\ubcf4 \ub370\uc774\ud130\ubca0\uc774\uc2a4 \uc81c\uacf5\n", "- \uc601\uad6d \ud574\ucee4\uc778 Ashley Williams\ub294 \uc774 \ub370\uc774\ud130\ubca0\uc774\uc2a4\ub97c \ub2e4\uc74c\uacfc \uac19\uc740 JSON \ud615\uc2dd\uc73c\ub85c [\uc6f9\uc0ac\uc774\ud2b8](http://ashleyw.co.uk/project/food-nutrient-database)\uc5d0 \uc81c\uacf5\n", "\n", "####\uc624 \uc774\uc81c \uc2e4\uc804!!\n", "\n", "" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ " {\n", " \"id\": 21441,\n", " \"description\": \"KENTUCKY FRIED CHICKEN, Fried Chicken, EXTRA CRISPY, Wing, meat and skin with breading\",\n", " \"tags\": [\"KFC\"], \n", " \"manufacturer\": \"Kentucky Fried Chicken\",\n", " \"group\": \"Fast Foods\",\n", " \"portions\": [\n", " {\n", " \"amount\": 1,\n", " \"unit\": \"wing, with skin\",\n", " \"grams\": 68.0\n", " },\n", "\n", " ...\n", " ],\n", " \"nutrients\": [\n", " {\n", " \"value\": 20.8,\n", " \"units\": \"g\",\n", " \"description\": \"Protein\",\n", " \"group\": \"Composition\"\n", " },\n", "\n", " ...\n", " ]\n", " }" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "- \uac01 \uc74c\uc2dd\uc5d0\ub294 \uc22b\uc790\ub85c \ub41c \uace0\uc720 ID\uc640 \uc601\uc591\uc18c \ubc0f \uc81c\uacf5\ub7c9\uc744 \ub2f4\uace0 \uc788\ub294 2\uac00\uc9c0 \ub9ac\uc2a4\ud2b8\n", "- \uc774 \ub370\uc774\ud130 \ud615\uc2dd\uc740 \ubd84\uc11d\ud558\uae30\uc5d0 \ud3b8\ud558\uc9c0 \uc54a\uc73c\ubbc0\ub85c \uc880 \ub354 \ub098\uc740 \ud615\ud0dc\ub85c \ub370\uc774\ud130 \ud615\uc2dd \ubcc0\uacbd" ] }, { "cell_type": "code", "collapsed": false, "input": [ "import json" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 106 }, { "cell_type": "code", "collapsed": false, "input": [ "db = json.load(open('ch07/foods-2011-10-03.json'))" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 107 }, { "cell_type": "code", "collapsed": false, "input": [ "len(db)" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 108, "text": [ "6636" ] } ], "prompt_number": 108 }, { "cell_type": "markdown", "metadata": {}, "source": [ "- db\uc5d0 \uc788\ub294 \uac01 \uc5d4\ud2b8\ub9ac\ub294 \ud55c \uac00\uc9c0 \uc74c\uc2dd\uc5d0 \ub300\ud55c \ubaa8\ub4e0 \uc815\ubcf4\ub97c \ub2f4\uace0 \uc788\ub294 \uc0ac\uc804\ud615\n", "- 'nutrients'(\uc601\uc591\uc18c) \ud544\ub4dc\ub294 \uc0ac\uc804\uc758 \ub9ac\uc2a4\ud2b8\uc774\uba70 \uac01 \ud56d\ubaa9\uc740 \ud55c \uac00\uc9c0 \uc601\uc591\uc18c\uc5d0 \ub300\ud55c \uc815\ubcf4\ub97c \ub2f4\uace0 \uc788\ub2e4." ] }, { "cell_type": "code", "collapsed": false, "input": [ "db[0].keys()" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 109, "text": [ "[u'portions',\n", " u'description',\n", " u'tags',\n", " u'nutrients',\n", " u'group',\n", " u'id',\n", " u'manufacturer']" ] } ], "prompt_number": 109 }, { "cell_type": "code", "collapsed": false, "input": [ "db[0]['nutrients'][0]" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 110, "text": [ "{u'description': u'Protein',\n", " u'group': u'Composition',\n", " u'units': u'g',\n", " u'value': 25.18}" ] } ], "prompt_number": 110 }, { "cell_type": "code", "collapsed": false, "input": [ "db[0]['nutrients'][1]" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 111, "text": [ "{u'description': u'Total lipid (fat)',\n", " u'group': u'Composition',\n", " u'units': u'g',\n", " u'value': 29.2}" ] } ], "prompt_number": 111 }, { "cell_type": "code", "collapsed": false, "input": [ "nutrients = DataFrame(db[0]['nutrients'])" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 112 }, { "cell_type": "code", "collapsed": false, "input": [ "nutrients[:7]" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
descriptiongroupunitsvalue
0 Protein Composition g 25.18
1 Total lipid (fat) Composition g 29.20
2 Carbohydrate, by difference Composition g 3.06
3 Ash Other g 3.28
4 Energy Energy kcal 376.00
5 Water Composition g 39.28
6 Energy Energy kJ 1573.00
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 113, "text": [ " description group units value\n", "0 Protein Composition g 25.18\n", "1 Total lipid (fat) Composition g 29.20\n", "2 Carbohydrate, by difference Composition g 3.06\n", "3 Ash Other g 3.28\n", "4 Energy Energy kcal 376.00\n", "5 Water Composition g 39.28\n", "6 Energy Energy kJ 1573.00" ] } ], "prompt_number": 113 }, { "cell_type": "code", "collapsed": false, "input": [ "len(nutrients)" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 114, "text": [ "162" ] } ], "prompt_number": 114 }, { "cell_type": "markdown", "metadata": {}, "source": [ "- \uc0ac\uc804\uc758 \ub9ac\uc2a4\ud2b8\ub97c DataFrame\uc73c\ub85c \ubc14\uafc0 \ub54c \ucd94\ucd9c\ud560 \ud544\ub4dc \ubaa9\ub85d\uc744 \uc9c0\uc815\ud574\uc904 \uc218 \uc788\ub2e4. \uc6b0\ub9ac\ub294 \uc74c\uc2dd\uc758 \uc774\ub984\uacfc \uadf8\ub8f9, id \uadf8\ub9ac\uace0 \uc81c\uc870\uc0ac\ub97c \ucd94\ucd9c\ud560 \uac83\uc774\ub2e4." ] }, { "cell_type": "code", "collapsed": false, "input": [ "info_keys = ['description', 'group', 'id', 'manufacturer']" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 118 }, { "cell_type": "code", "collapsed": false, "input": [ "info = DataFrame(db, columns=info_keys)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 119 }, { "cell_type": "code", "collapsed": false, "input": [ "info[:5]" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
descriptiongroupidmanufacturer
0 Cheese, caraway Dairy and Egg Products 1008
1 Cheese, cheddar Dairy and Egg Products 1009
2 Cheese, edam Dairy and Egg Products 1018
3 Cheese, feta Dairy and Egg Products 1019
4 Cheese, mozzarella, part skim milk Dairy and Egg Products 1028
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 124, "text": [ " description group id manufacturer\n", "0 Cheese, caraway Dairy and Egg Products 1008 \n", "1 Cheese, cheddar Dairy and Egg Products 1009 \n", "2 Cheese, edam Dairy and Egg Products 1018 \n", "3 Cheese, feta Dairy and Egg Products 1019 \n", "4 Cheese, mozzarella, part skim milk Dairy and Egg Products 1028 " ] } ], "prompt_number": 124 }, { "cell_type": "code", "collapsed": false, "input": [ "len(info)" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 125, "text": [ "6636" ] } ], "prompt_number": 125 }, { "cell_type": "code", "collapsed": false, "input": [ "info" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n",
        "<class 'pandas.core.frame.DataFrame'>\n",
        "Int64Index: 6636 entries, 0 to 6635\n",
        "Data columns (total 4 columns):\n",
        "description     6636  non-null values\n",
        "group           6636  non-null values\n",
        "id              6636  non-null values\n",
        "manufacturer    5195  non-null values\n",
        "dtypes: int64(1), object(3)\n",
        "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 126, "text": [ "\n", "Int64Index: 6636 entries, 0 to 6635\n", "Data columns (total 4 columns):\n", "description 6636 non-null values\n", "group 6636 non-null values\n", "id 6636 non-null values\n", "manufacturer 5195 non-null values\n", "dtypes: int64(1), object(3)" ] } ], "prompt_number": 126 }, { "cell_type": "markdown", "metadata": {}, "source": [ "- value_counts \uba54\uc11c\ub4dc\ub97c \uc774\uc6a9\ud574\uc11c \uc74c\uc2dd \uadf8\ub8f9\uc758 \ubd84\ud3ec \ud655\uc778" ] }, { "cell_type": "code", "collapsed": false, "input": [ "pd.value_counts(info.group)[:10]" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 127, "text": [ "Vegetables and Vegetable Products 812\n", "Beef Products 618\n", "Baked Products 496\n", "Breakfast Cereals 403\n", "Legumes and Legume Products 365\n", "Fast Foods 365\n", "Lamb, Veal, and Game Products 345\n", "Sweets 341\n", "Fruits and Fruit Juices 328\n", "Pork Products 328\n", "dtype: int64" ] } ], "prompt_number": 127 }, { "cell_type": "markdown", "metadata": {}, "source": [ "- \ubaa8\ub4e0 \uc601\uc591\uc18c \uc815\ubcf4\ub97c \ubd84\uc11d\n", "- \uba3c\uc800 \uc880 \ub354 \uc27d\uac8c \ubd84\uc11d\uc744 \ud558\uae30 \uc704\ud574 \uac01 \uc74c\uc2dc\uadf8\uc774 \uc601\uc591\uc18c \uc815\ubcf4\ub97c \uac70\ub300\ud55c \ud14c\uc774\ube14 \ud558\ub098\uc5d0 \ub2f4\uc544\ubcf4\uc790.\n", "- \uadf8\ub7ec\ub824\uba74 \uc0ac\uc804\uc5d0 \uba87 \uac00\uc9c0 \uacfc\uc815\uc744 \uac70\uccd0\uc57c \ud568\n", "- \uba3c\uc800 \uc74c\uc2dd\uc758 \uc601\uc591\uc18c \ub9ac\uc2a4\ud2b8\ub97c \ud558\ub098\uc758 DataFrame\uc73c\ub85c \ubcc0\ud658\ud558\uace0, \uc74c\uc2dd\uc758 id\ub97c \uc704\ud55c \uce7c\ub7fc\uc744 \ud558\ub098 \ucd94\uac00\n", "- \uadf8\ub9ac\uace0 \uc774 DataFrame\uc744 \ub9ac\uc2a4\ud2b8\uc5d0 \ucd94\uac00\ud558\ub09f. \ub9c8\uc9c0\ub9c9\uc73c\ub85c \uc774 \ub9ac\uc2a4\ud2b8\ub97c concat \uba54\uc11c\ub4dc\ub97c \uc0ac\uc6a9\ud574\uc11c \ud558\ub098\ub85c \ud569\uce5c\ub2e4." ] }, { "cell_type": "code", "collapsed": false, "input": [ "nutrients = []\n", "\n", "for rec in db:\n", " fnuts = DataFrame(rec['nutrients'])\n", " fnuts['id'] = rec['id']\n", " nutrients.append(fnuts)\n", " \n", "nutrients = pd.concat(nutrients, ignore_index=True)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 128 }, { "cell_type": "code", "collapsed": false, "input": [ "db[0]['nutrients']" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 133, "text": [ "[{u'description': u'Protein',\n", " u'group': u'Composition',\n", " u'units': u'g',\n", " u'value': 25.18},\n", " {u'description': u'Total lipid (fat)',\n", " u'group': u'Composition',\n", " u'units': u'g',\n", " u'value': 29.2},\n", " {u'description': u'Carbohydrate, by difference',\n", " u'group': u'Composition',\n", " u'units': u'g',\n", " u'value': 3.06},\n", " {u'description': u'Ash', u'group': u'Other', u'units': u'g', u'value': 3.28},\n", " {u'description': u'Energy',\n", " u'group': u'Energy',\n", " u'units': u'kcal',\n", " u'value': 376.0},\n", " {u'description': u'Water',\n", " u'group': u'Composition',\n", " u'units': u'g',\n", " u'value': 39.28},\n", " {u'description': u'Energy',\n", " u'group': u'Energy',\n", " u'units': u'kJ',\n", " u'value': 1573.0},\n", " {u'description': u'Fiber, total dietary',\n", " u'group': u'Composition',\n", " u'units': u'g',\n", " u'value': 0.0},\n", " {u'description': u'Calcium, Ca',\n", " u'group': u'Elements',\n", " u'units': u'mg',\n", " u'value': 673.0},\n", " {u'description': u'Iron, Fe',\n", " u'group': u'Elements',\n", " u'units': u'mg',\n", " u'value': 0.64},\n", " {u'description': u'Magnesium, Mg',\n", " u'group': u'Elements',\n", " u'units': u'mg',\n", " u'value': 22.0},\n", " {u'description': u'Phosphorus, P',\n", " u'group': u'Elements',\n", " u'units': u'mg',\n", " u'value': 490.0},\n", " {u'description': u'Potassium, K',\n", " u'group': u'Elements',\n", " u'units': u'mg',\n", " u'value': 93.0},\n", " {u'description': u'Sodium, Na',\n", " u'group': u'Elements',\n", " u'units': u'mg',\n", " u'value': 690.0},\n", " {u'description': u'Zinc, Zn',\n", " u'group': u'Elements',\n", " u'units': u'mg',\n", " u'value': 2.94},\n", " {u'description': u'Copper, Cu',\n", " u'group': u'Elements',\n", " u'units': u'mg',\n", " u'value': 0.024},\n", " {u'description': u'Manganese, Mn',\n", " u'group': u'Elements',\n", " u'units': u'mg',\n", " u'value': 0.021},\n", " {u'description': u'Selenium, Se',\n", " u'group': u'Elements',\n", " u'units': u'mcg',\n", " u'value': 14.5},\n", " {u'description': u'Vitamin A, IU',\n", " u'group': u'Vitamins',\n", " u'units': u'IU',\n", " u'value': 1054.0},\n", " {u'description': u'Retinol',\n", " u'group': u'Vitamins',\n", " u'units': u'mcg',\n", " u'value': 262.0},\n", " {u'description': u'Vitamin A, RAE',\n", " u'group': u'Vitamins',\n", " u'units': u'mcg_RAE',\n", " u'value': 271.0},\n", " {u'description': u'Vitamin C, total ascorbic acid',\n", " u'group': u'Vitamins',\n", " u'units': u'mg',\n", " u'value': 0.0},\n", " {u'description': u'Thiamin',\n", " u'group': u'Vitamins',\n", " u'units': u'mg',\n", " u'value': 0.031},\n", " {u'description': u'Riboflavin',\n", " u'group': u'Vitamins',\n", " u'units': u'mg',\n", " u'value': 0.45},\n", " {u'description': u'Niacin',\n", " u'group': u'Vitamins',\n", " u'units': u'mg',\n", " u'value': 0.18},\n", " {u'description': u'Pantothenic acid',\n", " u'group': u'Vitamins',\n", " u'units': u'mg',\n", " u'value': 0.19},\n", " {u'description': u'Vitamin B-6',\n", " u'group': u'Vitamins',\n", " u'units': u'mg',\n", " u'value': 0.074},\n", " {u'description': u'Folate, total',\n", " u'group': u'Vitamins',\n", " u'units': u'mcg',\n", " u'value': 18.0},\n", " {u'description': u'Vitamin B-12',\n", " u'group': u'Vitamins',\n", " u'units': u'mcg',\n", " u'value': 0.27},\n", " {u'description': u'Folic acid',\n", " u'group': u'Vitamins',\n", " u'units': u'mcg',\n", " u'value': 0.0},\n", " {u'description': u'Folate, food',\n", " u'group': u'Vitamins',\n", " u'units': u'mcg',\n", " u'value': 18.0},\n", " {u'description': u'Folate, DFE',\n", " u'group': u'Vitamins',\n", " u'units': u'mcg_DFE',\n", " u'value': 18.0},\n", " {u'description': u'Cholesterol',\n", " u'group': u'Other',\n", " u'units': u'mg',\n", " u'value': 93.0},\n", " {u'description': u'Fatty acids, total saturated',\n", " u'group': u'Other',\n", " u'units': u'g',\n", " u'value': 18.584},\n", " {u'description': u'Fatty acids, total monounsaturated',\n", " u'group': u'Other',\n", " u'units': u'g',\n", " u'value': 8.275},\n", " {u'description': u'Fatty acids, total polyunsaturated',\n", " u'group': u'Other',\n", " u'units': u'g',\n", " u'value': 0.83},\n", " {u'description': u'Tryptophan',\n", " u'group': u'Amino Acids',\n", " u'units': u'g',\n", " u'value': 0.324},\n", " {u'description': u'Threonine',\n", " u'group': u'Amino Acids',\n", " u'units': u'g',\n", " u'value': 0.896},\n", " {u'description': u'Isoleucine',\n", " u'group': u'Amino Acids',\n", " u'units': u'g',\n", " u'value': 1.563},\n", " {u'description': u'Leucine',\n", " u'group': u'Amino Acids',\n", " u'units': u'g',\n", " u'value': 2.412},\n", " {u'description': u'Lysine',\n", " u'group': u'Amino Acids',\n", " u'units': u'g',\n", " u'value': 2.095},\n", " {u'description': u'Methionine',\n", " u'group': u'Amino Acids',\n", " u'units': u'g',\n", " u'value': 0.659},\n", " {u'description': u'Cystine',\n", " u'group': u'Amino Acids',\n", " u'units': u'g',\n", " u'value': 0.126},\n", " {u'description': u'Phenylalanine',\n", " u'group': u'Amino Acids',\n", " u'units': u'g',\n", " u'value': 1.326},\n", " {u'description': u'Tyrosine',\n", " u'group': u'Amino Acids',\n", " u'units': u'g',\n", " u'value': 1.216},\n", " {u'description': u'Valine',\n", " u'group': u'Amino Acids',\n", " u'units': u'g',\n", " u'value': 1.682},\n", " {u'description': u'Arginine',\n", " u'group': u'Amino Acids',\n", " u'units': u'g',\n", " u'value': 0.952},\n", " {u'description': u'Histidine',\n", " u'group': u'Amino Acids',\n", " u'units': u'g',\n", " u'value': 0.884},\n", " {u'description': u'Alanine',\n", " u'group': u'Amino Acids',\n", " u'units': u'g',\n", " u'value': 0.711},\n", " {u'description': u'Aspartic acid',\n", " u'group': u'Amino Acids',\n", " u'units': u'g',\n", " u'value': 1.618},\n", " {u'description': u'Glutamic acid',\n", " u'group': u'Amino Acids',\n", " u'units': u'g',\n", " u'value': 6.16},\n", " {u'description': u'Glycine',\n", " u'group': u'Amino Acids',\n", " u'units': u'g',\n", " u'value': 0.439},\n", " {u'description': u'Proline',\n", " u'group': u'Amino Acids',\n", " u'units': u'g',\n", " u'value': 2.838},\n", " {u'description': u'Serine',\n", " u'group': u'Amino Acids',\n", " u'units': u'g',\n", " u'value': 1.472},\n", " {u'description': u'Protein',\n", " u'group': u'Composition',\n", " u'units': u'g',\n", " u'value': 25.18},\n", " {u'description': u'Total lipid (fat)',\n", " u'group': u'Composition',\n", " u'units': u'g',\n", " u'value': 29.2},\n", " {u'description': u'Carbohydrate, by difference',\n", " u'group': u'Composition',\n", " u'units': u'g',\n", " u'value': 3.06},\n", " {u'description': u'Ash', u'group': u'Other', u'units': u'g', u'value': 3.28},\n", " {u'description': u'Energy',\n", " u'group': u'Energy',\n", " u'units': u'kcal',\n", " u'value': 376.0},\n", " {u'description': u'Water',\n", " u'group': u'Composition',\n", " u'units': u'g',\n", " u'value': 39.28},\n", " {u'description': u'Energy',\n", " u'group': u'Energy',\n", " u'units': u'kJ',\n", " u'value': 1573.0},\n", " {u'description': u'Fiber, total dietary',\n", " u'group': u'Composition',\n", " u'units': u'g',\n", " u'value': 0.0},\n", " {u'description': u'Calcium, Ca',\n", " u'group': u'Elements',\n", " u'units': u'mg',\n", " u'value': 673.0},\n", " {u'description': u'Iron, Fe',\n", " u'group': u'Elements',\n", " u'units': u'mg',\n", " u'value': 0.64},\n", " {u'description': u'Magnesium, Mg',\n", " u'group': u'Elements',\n", " u'units': u'mg',\n", " u'value': 22.0},\n", " {u'description': u'Phosphorus, P',\n", " u'group': u'Elements',\n", " u'units': u'mg',\n", " u'value': 490.0},\n", " {u'description': u'Potassium, K',\n", " u'group': u'Elements',\n", " u'units': u'mg',\n", " u'value': 93.0},\n", " {u'description': u'Sodium, Na',\n", " u'group': u'Elements',\n", " u'units': u'mg',\n", " u'value': 690.0},\n", " {u'description': u'Zinc, Zn',\n", " u'group': u'Elements',\n", " u'units': u'mg',\n", " u'value': 2.94},\n", " {u'description': u'Copper, Cu',\n", " u'group': u'Elements',\n", " u'units': u'mg',\n", " u'value': 0.024},\n", " {u'description': u'Manganese, Mn',\n", " u'group': u'Elements',\n", " u'units': u'mg',\n", " u'value': 0.021},\n", " {u'description': u'Selenium, Se',\n", " u'group': u'Elements',\n", " u'units': u'mcg',\n", " u'value': 14.5},\n", " {u'description': u'Vitamin A, IU',\n", " u'group': u'Vitamins',\n", " u'units': u'IU',\n", " u'value': 1054.0},\n", " {u'description': u'Retinol',\n", " u'group': u'Vitamins',\n", " u'units': u'mcg',\n", " u'value': 262.0},\n", " {u'description': u'Vitamin A, RAE',\n", " u'group': u'Vitamins',\n", " u'units': u'mcg_RAE',\n", " u'value': 271.0},\n", " {u'description': u'Vitamin C, total ascorbic acid',\n", " u'group': u'Vitamins',\n", " u'units': u'mg',\n", " u'value': 0.0},\n", " {u'description': u'Thiamin',\n", " u'group': u'Vitamins',\n", " u'units': u'mg',\n", " u'value': 0.031},\n", " {u'description': u'Riboflavin',\n", " u'group': u'Vitamins',\n", " u'units': u'mg',\n", " u'value': 0.45},\n", " {u'description': u'Niacin',\n", " u'group': u'Vitamins',\n", " u'units': u'mg',\n", " u'value': 0.18},\n", " {u'description': u'Pantothenic acid',\n", " u'group': u'Vitamins',\n", " u'units': u'mg',\n", " u'value': 0.19},\n", " {u'description': u'Vitamin B-6',\n", " u'group': u'Vitamins',\n", " u'units': u'mg',\n", " u'value': 0.074},\n", " {u'description': u'Folate, total',\n", " u'group': u'Vitamins',\n", " u'units': u'mcg',\n", " u'value': 18.0},\n", " {u'description': u'Vitamin B-12',\n", " u'group': u'Vitamins',\n", " u'units': u'mcg',\n", " u'value': 0.27},\n", " {u'description': u'Folic acid',\n", " u'group': u'Vitamins',\n", " u'units': u'mcg',\n", " u'value': 0.0},\n", " {u'description': u'Folate, food',\n", " u'group': u'Vitamins',\n", " u'units': u'mcg',\n", " u'value': 18.0},\n", " {u'description': u'Folate, DFE',\n", " u'group': u'Vitamins',\n", " u'units': u'mcg_DFE',\n", " u'value': 18.0},\n", " {u'description': u'Tryptophan',\n", " u'group': u'Amino Acids',\n", " u'units': u'g',\n", " u'value': 0.324},\n", " {u'description': u'Threonine',\n", " u'group': u'Amino Acids',\n", " u'units': u'g',\n", " u'value': 0.896},\n", " {u'description': u'Isoleucine',\n", " u'group': u'Amino Acids',\n", " u'units': u'g',\n", " u'value': 1.563},\n", " {u'description': u'Leucine',\n", " u'group': u'Amino Acids',\n", " u'units': u'g',\n", " u'value': 2.412},\n", " {u'description': u'Lysine',\n", " u'group': u'Amino Acids',\n", " u'units': u'g',\n", " u'value': 2.095},\n", " {u'description': u'Methionine',\n", " u'group': u'Amino Acids',\n", " u'units': u'g',\n", " u'value': 0.659},\n", " {u'description': u'Cystine',\n", " u'group': u'Amino Acids',\n", " u'units': u'g',\n", " u'value': 0.126},\n", " {u'description': u'Phenylalanine',\n", " u'group': u'Amino Acids',\n", " u'units': u'g',\n", " u'value': 1.326},\n", " {u'description': u'Tyrosine',\n", " u'group': u'Amino Acids',\n", " u'units': u'g',\n", " u'value': 1.216},\n", " {u'description': u'Valine',\n", " u'group': u'Amino Acids',\n", " u'units': u'g',\n", " u'value': 1.682},\n", " {u'description': u'Arginine',\n", " u'group': u'Amino Acids',\n", " u'units': u'g',\n", " u'value': 0.952},\n", " {u'description': u'Histidine',\n", " u'group': u'Amino Acids',\n", " u'units': u'g',\n", " u'value': 0.884},\n", " {u'description': u'Alanine',\n", " u'group': u'Amino Acids',\n", " u'units': u'g',\n", " u'value': 0.711},\n", " {u'description': u'Aspartic acid',\n", " u'group': u'Amino Acids',\n", " u'units': u'g',\n", " u'value': 1.618},\n", " {u'description': u'Glutamic acid',\n", " u'group': u'Amino Acids',\n", " u'units': u'g',\n", " u'value': 6.16},\n", " {u'description': u'Glycine',\n", " u'group': u'Amino Acids',\n", " u'units': u'g',\n", " u'value': 0.439},\n", " {u'description': u'Proline',\n", " u'group': u'Amino Acids',\n", " u'units': u'g',\n", " u'value': 2.838},\n", " {u'description': u'Serine',\n", " u'group': u'Amino Acids',\n", " u'units': u'g',\n", " u'value': 1.472},\n", " {u'description': u'Cholesterol',\n", " u'group': u'Other',\n", " u'units': u'mg',\n", " u'value': 93.0},\n", " {u'description': u'Fatty acids, total saturated',\n", " u'group': u'Other',\n", " u'units': u'g',\n", " u'value': 18.584},\n", " {u'description': u'Fatty acids, total monounsaturated',\n", " u'group': u'Other',\n", " u'units': u'g',\n", " u'value': 8.275},\n", " {u'description': u'Fatty acids, total polyunsaturated',\n", " u'group': u'Other',\n", " u'units': u'g',\n", " u'value': 0.83},\n", " {u'description': u'Protein',\n", " u'group': u'Composition',\n", " u'units': u'g',\n", " u'value': 25.18},\n", " {u'description': u'Total lipid (fat)',\n", " u'group': u'Composition',\n", " u'units': u'g',\n", " u'value': 29.2},\n", " {u'description': u'Carbohydrate, by difference',\n", " u'group': u'Composition',\n", " u'units': u'g',\n", " u'value': 3.06},\n", " {u'description': u'Ash', u'group': u'Other', u'units': u'g', u'value': 3.28},\n", " {u'description': u'Energy',\n", " u'group': u'Energy',\n", " u'units': u'kcal',\n", " u'value': 376.0},\n", " {u'description': u'Water',\n", " u'group': u'Composition',\n", " u'units': u'g',\n", " u'value': 39.28},\n", " {u'description': u'Energy',\n", " u'group': u'Energy',\n", " u'units': u'kJ',\n", " u'value': 1573.0},\n", " {u'description': u'Fiber, total dietary',\n", " u'group': u'Composition',\n", " u'units': u'g',\n", " u'value': 0.0},\n", " {u'description': u'Calcium, Ca',\n", " u'group': u'Elements',\n", " u'units': u'mg',\n", " u'value': 673.0},\n", " {u'description': u'Iron, Fe',\n", " u'group': u'Elements',\n", " u'units': u'mg',\n", " u'value': 0.64},\n", " {u'description': u'Magnesium, Mg',\n", " u'group': u'Elements',\n", " u'units': u'mg',\n", " u'value': 22.0},\n", " {u'description': u'Phosphorus, P',\n", " u'group': u'Elements',\n", " u'units': u'mg',\n", " u'value': 490.0},\n", " {u'description': u'Potassium, K',\n", " u'group': u'Elements',\n", " u'units': u'mg',\n", " u'value': 93.0},\n", " {u'description': u'Sodium, Na',\n", " u'group': u'Elements',\n", " u'units': u'mg',\n", " u'value': 690.0},\n", " {u'description': u'Zinc, Zn',\n", " u'group': u'Elements',\n", " u'units': u'mg',\n", " u'value': 2.94},\n", " {u'description': u'Copper, Cu',\n", " u'group': u'Elements',\n", " u'units': u'mg',\n", " u'value': 0.024},\n", " {u'description': u'Manganese, Mn',\n", " u'group': u'Elements',\n", " u'units': u'mg',\n", " u'value': 0.021},\n", " {u'description': u'Selenium, Se',\n", " u'group': u'Elements',\n", " u'units': u'mcg',\n", " u'value': 14.5},\n", " {u'description': u'Vitamin A, IU',\n", " u'group': u'Vitamins',\n", " u'units': u'IU',\n", " u'value': 1054.0},\n", " {u'description': u'Retinol',\n", " u'group': u'Vitamins',\n", " u'units': u'mcg',\n", " u'value': 262.0},\n", " {u'description': u'Vitamin A, RAE',\n", " u'group': u'Vitamins',\n", " u'units': u'mcg_RAE',\n", " u'value': 271.0},\n", " {u'description': u'Vitamin C, total ascorbic acid',\n", " u'group': u'Vitamins',\n", " u'units': u'mg',\n", " u'value': 0.0},\n", " {u'description': u'Thiamin',\n", " u'group': u'Vitamins',\n", " u'units': u'mg',\n", " u'value': 0.031},\n", " {u'description': u'Riboflavin',\n", " u'group': u'Vitamins',\n", " u'units': u'mg',\n", " u'value': 0.45},\n", " {u'description': u'Niacin',\n", " u'group': u'Vitamins',\n", " u'units': u'mg',\n", " u'value': 0.18},\n", " {u'description': u'Pantothenic acid',\n", " u'group': u'Vitamins',\n", " u'units': u'mg',\n", " u'value': 0.19},\n", " {u'description': u'Vitamin B-6',\n", " u'group': u'Vitamins',\n", " u'units': u'mg',\n", " u'value': 0.074},\n", " {u'description': u'Folate, total',\n", " u'group': u'Vitamins',\n", " u'units': u'mcg',\n", " u'value': 18.0},\n", " {u'description': u'Vitamin B-12',\n", " u'group': u'Vitamins',\n", " u'units': u'mcg',\n", " u'value': 0.27},\n", " {u'description': u'Folic acid',\n", " u'group': u'Vitamins',\n", " u'units': u'mcg',\n", " u'value': 0.0},\n", " {u'description': u'Folate, food',\n", " u'group': u'Vitamins',\n", " u'units': u'mcg',\n", " u'value': 18.0},\n", " {u'description': u'Folate, DFE',\n", " u'group': u'Vitamins',\n", " u'units': u'mcg_DFE',\n", " u'value': 18.0},\n", " {u'description': u'Tryptophan',\n", " u'group': u'Amino Acids',\n", " u'units': u'g',\n", " u'value': 0.324},\n", " {u'description': u'Threonine',\n", " u'group': u'Amino Acids',\n", " u'units': u'g',\n", " u'value': 0.896},\n", " {u'description': u'Isoleucine',\n", " u'group': u'Amino Acids',\n", " u'units': u'g',\n", " u'value': 1.563},\n", " {u'description': u'Leucine',\n", " u'group': u'Amino Acids',\n", " u'units': u'g',\n", " u'value': 2.412},\n", " {u'description': u'Lysine',\n", " u'group': u'Amino Acids',\n", " u'units': u'g',\n", " u'value': 2.095},\n", " {u'description': u'Methionine',\n", " u'group': u'Amino Acids',\n", " u'units': u'g',\n", " u'value': 0.659},\n", " {u'description': u'Cystine',\n", " u'group': u'Amino Acids',\n", " u'units': u'g',\n", " u'value': 0.126},\n", " {u'description': u'Phenylalanine',\n", " u'group': u'Amino Acids',\n", " u'units': u'g',\n", " u'value': 1.326},\n", " {u'description': u'Tyrosine',\n", " u'group': u'Amino Acids',\n", " u'units': u'g',\n", " u'value': 1.216},\n", " {u'description': u'Valine',\n", " u'group': u'Amino Acids',\n", " u'units': u'g',\n", " u'value': 1.682},\n", " {u'description': u'Arginine',\n", " u'group': u'Amino Acids',\n", " u'units': u'g',\n", " u'value': 0.952},\n", " {u'description': u'Histidine',\n", " u'group': u'Amino Acids',\n", " u'units': u'g',\n", " u'value': 0.884},\n", " {u'description': u'Alanine',\n", " u'group': u'Amino Acids',\n", " u'units': u'g',\n", " u'value': 0.711},\n", " {u'description': u'Aspartic acid',\n", " u'group': u'Amino Acids',\n", " u'units': u'g',\n", " u'value': 1.618},\n", " {u'description': u'Glutamic acid',\n", " u'group': u'Amino Acids',\n", " u'units': u'g',\n", " u'value': 6.16},\n", " {u'description': u'Glycine',\n", " u'group': u'Amino Acids',\n", " u'units': u'g',\n", " u'value': 0.439},\n", " {u'description': u'Proline',\n", " u'group': u'Amino Acids',\n", " u'units': u'g',\n", " u'value': 2.838},\n", " {u'description': u'Serine',\n", " u'group': u'Amino Acids',\n", " u'units': u'g',\n", " u'value': 1.472},\n", " {u'description': u'Cholesterol',\n", " u'group': u'Other',\n", " u'units': u'mg',\n", " u'value': 93.0},\n", " {u'description': u'Fatty acids, total saturated',\n", " u'group': u'Other',\n", " u'units': u'g',\n", " u'value': 18.584},\n", " {u'description': u'Fatty acids, total monounsaturated',\n", " u'group': u'Other',\n", " u'units': u'g',\n", " u'value': 8.275},\n", " {u'description': u'Fatty acids, total polyunsaturated',\n", " u'group': u'Other',\n", " u'units': u'g',\n", " u'value': 0.83}]" ] } ], "prompt_number": 133 }, { "cell_type": "code", "collapsed": false, "input": [ "nutrients" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n",
        "<class 'pandas.core.frame.DataFrame'>\n",
        "Int64Index: 389355 entries, 0 to 389354\n",
        "Data columns (total 5 columns):\n",
        "description    389355  non-null values\n",
        "group          389355  non-null values\n",
        "units          389355  non-null values\n",
        "value          389355  non-null values\n",
        "id             389355  non-null values\n",
        "dtypes: float64(1), int64(1), object(3)\n",
        "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 129, "text": [ "\n", "Int64Index: 389355 entries, 0 to 389354\n", "Data columns (total 5 columns):\n", "description 389355 non-null values\n", "group 389355 non-null values\n", "units 389355 non-null values\n", "value 389355 non-null values\n", "id 389355 non-null values\n", "dtypes: float64(1), int64(1), object(3)" ] } ], "prompt_number": 129 }, { "cell_type": "code", "collapsed": false, "input": [ "nutrients[:5]" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
descriptiongroupunitsvalueid
0 Protein Composition g 25.18 1008
1 Total lipid (fat) Composition g 29.20 1008
2 Carbohydrate, by difference Composition g 3.06 1008
3 Ash Other g 3.28 1008
4 Energy Energy kcal 376.00 1008
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 130, "text": [ " description group units value id\n", "0 Protein Composition g 25.18 1008\n", "1 Total lipid (fat) Composition g 29.20 1008\n", "2 Carbohydrate, by difference Composition g 3.06 1008\n", "3 Ash Other g 3.28 1008\n", "4 Energy Energy kcal 376.00 1008" ] } ], "prompt_number": 130 }, { "cell_type": "markdown", "metadata": {}, "source": [ "- DataFrame\uc5d0\ub294 \uc911\ubcf5\ub41c \ub370\uc774\ud130\uac00 \uc788\uc73c\ubbc0\ub85c \uc81c\uac70" ] }, { "cell_type": "code", "collapsed": false, "input": [ "nutrients.duplicated().sum()" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 134, "text": [ "14179" ] } ], "prompt_number": 134 }, { "cell_type": "code", "collapsed": false, "input": [ "nutrients = nutrients.drop_duplicates()" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 135 }, { "cell_type": "code", "collapsed": false, "input": [ "nutrients.duplicated().sum()" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 136, "text": [ "0" ] } ], "prompt_number": 136 }, { "cell_type": "code", "collapsed": false, "input": [ "nutrients" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n",
        "<class 'pandas.core.frame.DataFrame'>\n",
        "Int64Index: 375176 entries, 0 to 389354\n",
        "Data columns (total 5 columns):\n",
        "description    375176  non-null values\n",
        "group          375176  non-null values\n",
        "units          375176  non-null values\n",
        "value          375176  non-null values\n",
        "id             375176  non-null values\n",
        "dtypes: float64(1), int64(1), object(3)\n",
        "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 138, "text": [ "\n", "Int64Index: 375176 entries, 0 to 389354\n", "Data columns (total 5 columns):\n", "description 375176 non-null values\n", "group 375176 non-null values\n", "units 375176 non-null values\n", "value 375176 non-null values\n", "id 375176 non-null values\n", "dtypes: float64(1), int64(1), object(3)" ] } ], "prompt_number": 138 }, { "cell_type": "markdown", "metadata": {}, "source": [ "- 'group'\uacfc 'description'\uc740 \ubaa8\ub450 DataFrame \uac1d\uccb4\uc774\ubbc0\ub85c \ubb50\uac00 \ubb54\uc9c0 \uc27d\uac8c \uc54c\uc544\ubcfc \uc218 \uc788\ub3c4\ub85d \uc774\ub984\uc744 \ubc14\uafd4\uc8fc\uc790." ] }, { "cell_type": "code", "collapsed": false, "input": [ "col_mapping = {'description': 'food',\n", " 'group' : 'fgroup'}" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 139 }, { "cell_type": "code", "collapsed": false, "input": [ "info = info.rename(columns=col_mapping, copy=False)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 140 }, { "cell_type": "code", "collapsed": false, "input": [ "info" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n",
        "<class 'pandas.core.frame.DataFrame'>\n",
        "Int64Index: 6636 entries, 0 to 6635\n",
        "Data columns (total 4 columns):\n",
        "food            6636  non-null values\n",
        "fgroup          6636  non-null values\n",
        "id              6636  non-null values\n",
        "manufacturer    5195  non-null values\n",
        "dtypes: int64(1), object(3)\n",
        "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 141, "text": [ "\n", "Int64Index: 6636 entries, 0 to 6635\n", "Data columns (total 4 columns):\n", "food 6636 non-null values\n", "fgroup 6636 non-null values\n", "id 6636 non-null values\n", "manufacturer 5195 non-null values\n", "dtypes: int64(1), object(3)" ] } ], "prompt_number": 141 }, { "cell_type": "code", "collapsed": false, "input": [ "col_mapping = {'description': 'nutrient',\n", " 'group': 'nutgroup'}" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 142 }, { "cell_type": "code", "collapsed": false, "input": [ "nutrients = nutrients.rename(columns=col_mapping, copy=False)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 143 }, { "cell_type": "code", "collapsed": false, "input": [ "nutrients" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n",
        "<class 'pandas.core.frame.DataFrame'>\n",
        "Int64Index: 375176 entries, 0 to 389354\n",
        "Data columns (total 5 columns):\n",
        "nutrient    375176  non-null values\n",
        "nutgroup    375176  non-null values\n",
        "units       375176  non-null values\n",
        "value       375176  non-null values\n",
        "id          375176  non-null values\n",
        "dtypes: float64(1), int64(1), object(3)\n",
        "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 145, "text": [ "\n", "Int64Index: 375176 entries, 0 to 389354\n", "Data columns (total 5 columns):\n", "nutrient 375176 non-null values\n", "nutgroup 375176 non-null values\n", "units 375176 non-null values\n", "value 375176 non-null values\n", "id 375176 non-null values\n", "dtypes: float64(1), int64(1), object(3)" ] } ], "prompt_number": 145 }, { "cell_type": "markdown", "metadata": {}, "source": [ "- \uc5ec\uae30\uae4c\uc9c0 \ud588\uc73c\uba74 info \uac1d\uccb4\ub97c nutrients \uac1d\uccb4\ub85c \ubcd1\ud569\ud55c\ub2e4.\n", "- info\ub294 6636\uac1c, nutrients\ub294 375176\uac1c\uc778\ub370 info\uc5d0\ub294 unique \ud55c id\uac12\ub9cc \ub4e4\uc5b4\uac00 \uc788\uace0 nutrients\uc5d0\ub294 \ubaa8\ub4e0 \uc601\uc591\uc18c\uac00 \ub4e4\uc5b4\uac00 \uc788\ub2e4." ] }, { "cell_type": "code", "collapsed": false, "input": [ "ndata = pd.merge(nutrients, info, on='id', how='outer')" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 147 }, { "cell_type": "code", "collapsed": false, "input": [ "ndata" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n",
        "<class 'pandas.core.frame.DataFrame'>\n",
        "Int64Index: 375176 entries, 0 to 375175\n",
        "Data columns (total 8 columns):\n",
        "nutrient        375176  non-null values\n",
        "nutgroup        375176  non-null values\n",
        "units           375176  non-null values\n",
        "value           375176  non-null values\n",
        "id              375176  non-null values\n",
        "food            375176  non-null values\n",
        "fgroup          375176  non-null values\n",
        "manufacturer    293054  non-null values\n",
        "dtypes: float64(1), int64(1), object(6)\n",
        "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 148, "text": [ "\n", "Int64Index: 375176 entries, 0 to 375175\n", "Data columns (total 8 columns):\n", "nutrient 375176 non-null values\n", "nutgroup 375176 non-null values\n", "units 375176 non-null values\n", "value 375176 non-null values\n", "id 375176 non-null values\n", "food 375176 non-null values\n", "fgroup 375176 non-null values\n", "manufacturer 293054 non-null values\n", "dtypes: float64(1), int64(1), object(6)" ] } ], "prompt_number": 148 }, { "cell_type": "code", "collapsed": false, "input": [ "ndata[:5]" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
nutrientnutgroupunitsvalueidfoodfgroupmanufacturer
0 Protein Composition g 25.18 1008 Cheese, caraway Dairy and Egg Products
1 Total lipid (fat) Composition g 29.20 1008 Cheese, caraway Dairy and Egg Products
2 Carbohydrate, by difference Composition g 3.06 1008 Cheese, caraway Dairy and Egg Products
3 Ash Other g 3.28 1008 Cheese, caraway Dairy and Egg Products
4 Energy Energy kcal 376.00 1008 Cheese, caraway Dairy and Egg Products
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 149, "text": [ " nutrient nutgroup units value id \\\n", "0 Protein Composition g 25.18 1008 \n", "1 Total lipid (fat) Composition g 29.20 1008 \n", "2 Carbohydrate, by difference Composition g 3.06 1008 \n", "3 Ash Other g 3.28 1008 \n", "4 Energy Energy kcal 376.00 1008 \n", "\n", " food fgroup manufacturer \n", "0 Cheese, caraway Dairy and Egg Products \n", "1 Cheese, caraway Dairy and Egg Products \n", "2 Cheese, caraway Dairy and Egg Products \n", "3 Cheese, caraway Dairy and Egg Products \n", "4 Cheese, caraway Dairy and Egg Products " ] } ], "prompt_number": 149 }, { "cell_type": "code", "collapsed": false, "input": [ "ndata.ix[30000]" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 150, "text": [ "nutrient Glycine\n", "nutgroup Amino Acids\n", "units g\n", "value 0.04\n", "id 6158\n", "food Soup, tomato bisque, canned, condensed\n", "fgroup Soups, Sauces, and Gravies\n", "manufacturer \n", "Name: 30000, dtype: object" ] } ], "prompt_number": 150 }, { "cell_type": "markdown", "metadata": {}, "source": [ "- \uc774 \ub370\uc774\ud130\ub97c \ud544\uc694\ud55c \ud06c\uae30\ub85c \uc798\ub77c\ub0b4\uace0 \uc218\uc9d1\ud558\uba70 \uc2dc\uac01\ud654\ud558\ub294 \ub3c4\uad6c\ub294 \ub2e4\uc74c 2\uc7a5\uc744 \ud1b5\ud574\uc11c \uc790\uc138\ud788 \uc0b4\ud3b4\ubd04\n", "- \uadf8\ub7f0 \ub2e4\uc74c\uc5d0 \ub2e4\uc2dc \uc774 \ub370\uc774\ud130\ub97c \uc0b4\ud3b4\ubcf4\ub294 \uac83\ub3c4 \uc88b\ub2e4.\n", "- \uc608\ub97c \ub4e4\uba74 \uc74c\uc2dd \uadf8\ub8f9\uacfc \uc601\uc591\uc18c\uc758 \uc885\ub958\ubcc4 \uc911\uac04 \uac12\uc744 \uadf8\ub798\ud504\ub85c \uadf8\ub824\ub0bc \uc218 \uc788\ub2e4.\n", "\n", "#### \uc5ec\uae30\uae4c\uc9c0\ub294 \uc774\ud574\ud588\ub294\ub370 \ubc11\uc73c\ub85c\ub294 \uc880 \uc774\ud574\uac00 \uc548\ub428(\uccab\ubc88\uc9f8 \uc77d\uc744 \ub54c) \ub4a4\uc5d0 2\uc7a5\uc744 \ubcf4\uace0 \ub2e4\uc2dc \ub3cc\uc544\uc640\uc57c \ud560\ub4ef" ] }, { "cell_type": "code", "collapsed": false, "input": [ "result = ndata.groupby(['nutrient', 'fgroup'])['value'].quantile(0.5)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 151 }, { "cell_type": "code", "collapsed": false, "input": [ "result" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 152, "text": [ "nutrient fgroup \n", "Adjusted Protein Sweets 12.900\n", " Vegetables and Vegetable Products 2.180\n", "Alanine Baby Foods 0.085\n", " Baked Products 0.248\n", " Beef Products 1.550\n", " Beverages 0.003\n", " Breakfast Cereals 0.311\n", " Cereal Grains and Pasta 0.373\n", " Dairy and Egg Products 0.271\n", " Ethnic Foods 1.290\n", " Fast Foods 0.514\n", " Fats and Oils 0.000\n", " Finfish and Shellfish Products 1.218\n", " Fruits and Fruit Juices 0.027\n", " Lamb, Veal, and Game Products 1.408\n", "...\n", "Zinc, Zn Finfish and Shellfish Products 0.67\n", " Fruits and Fruit Juices 0.10\n", " Lamb, Veal, and Game Products 3.94\n", " Legumes and Legume Products 1.14\n", " Meals, Entrees, and Sidedishes 0.63\n", " Nut and Seed Products 3.29\n", " Pork Products 2.32\n", " Poultry Products 2.50\n", " Restaurant Foods 0.80\n", " Sausages and Luncheon Meats 2.13\n", " Snacks 1.47\n", " Soups, Sauces, and Gravies 0.20\n", " Spices and Herbs 2.75\n", " Sweets 0.36\n", " Vegetables and Vegetable Products 0.33\n", "Length: 2246, dtype: float64" ] } ], "prompt_number": 152 }, { "cell_type": "code", "collapsed": false, "input": [ "result['Zinc, Zn'].order().plot(kind='barh')" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 153, "text": [ "" ] }, { "metadata": {}, "output_type": "display_data", "png": "iVBORw0KGgoAAAANSUhEUgAAAiIAAAD7CAYAAABXGv/kAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzsnXdYVMcaxt8VC6BgN5qoLAFCh12aFJVmQBHEgl1kiSHR\nxJYoxnhvFKIxGhtoNDGxoNjBa4wmsVFUFK8KqCQaC8V4FQu91/3uH7gnLOziYkDYdX7PMw87c86Z\n+b7Dcpjzzcw7PCIiMBgMBoPBYLQC7VrbAAaDwWAwGK8vrCPCYDAYDAaj1WAdEQaDwWAwGK0G64gw\nGAwGg8FoNVhHhMFgMBgMRqvBOiIMBoPBYDBajfatbQCDoQwIBAJcv369tc1gMBgMpcLS0hLXrl1r\n9BwWEWFwqKmpQSgUwtLSEiNHjsTvv//+0nWdPn0ajo6OcHd3lyrPzMyEhoYGhEIh7OzssHLlyn9k\nM5/PR25ubpOvu3//Pvbv36/w+devXwcRqWxatmxZq9vA/GP+vY7+qbJvRKTYCxwxGM/p0qUL9/nQ\noUM0YcKEl65rzJgxFBcX16A8IyODzMzMiIiouLiYLC0tKTU1Veqcqqoqhdvh8/mUk5PTZPvi4uLI\n29tb4fMBsMQSSywpTdLS6t7k52JLALy4m8EiIowGEBGys7Ohrq7Ola1ZswYmJiYwMDDA8uXLufI9\ne/bAzMwMBgYGmDVrFgDgyy+/xOnTpzFz5kwsWrRIbjudO3fGkCFDkJCQgF27dmH8+PEYNmwYPD09\nkZ+fD19fX/D5fNja2iI1NRUAkJubCw8PD+jp6WHu3Lmo/Z7XRlrMzc25uteuXYvQ0FAAwL179+Dm\n5gYDAwPY2NggPT0dixcvxvnz5yEUChEeHo4HDx5gxIgREAgEsLS0xL1792TdGRVOAW3ABuYf8+91\n9K9lfCsqyoPS0OLdIYbSoKamRgKBgHR0dKhPnz708OFDIiI6efIkjR8/nqqrq6miooKcnZ3p0aNH\ndPPmTRoyZAiVl5dTTU0NTZ48mS5dukRERC4uLpSUlNSgjboRkaysLDI2Nqb09HTauXMnde/enTIy\nMoiIKDw8nObMmUM1NTW0Z88eEggERES0bt06rnzbtm3E4/EoJydHql4iorVr11JoaCgREdnZ2dHm\nzZuJiKiiooJKS0spPj5eKiKybNky2rZtGxHVRmTKysqk7AZAAKlwimsDNjD/mH+vo38t5Rv+8f+E\n5kARO9hkVQaHhoYGUlJSAACHDx/GuHHjkJiYiFOnTuHKlSuwsbEBAJSUlCAmJgb5+flIS0uDvb09\nAKC8vBxxcXEYNGgQAKD2O9iQtLQ0CIVC9OrVCwsXLoSuri7Onj0LNzc38Pl8AMAvv/yCr7/+Gu3a\ntcPEiRPx8ccfo6ioCCdOnMCqVavQrl07TJ06FbNnz5brDxGhuLgYt27dQlBQEACgY8eOMm2ztbXF\n4sWLkZ2djcDAQPTp00dGjSIA/OefuwEQAHB5no9//lNZ85KytmJPc+clZW3FnubOS8raij3NnZeU\ntRV7mjPv0oL1P8/F1+ZdXFxaPB8fH4+IiAgA4J7nL6Sle0MM5aHuHBGxWExdu3al4uJiWrBgAYWE\nhDQ4f9OmTSQSiWTWpUhEpC4RERE0e/ZsLu/h4UFXr14lotoIRdeuXamwsJDeffddrrysrIzU1dUp\nJyeHsrKyyMDAgLv+888/p9DQUCoqKiItLS2qqKiQak/WHJGsrCxas2YN6evrU0pKitQxgEVElDsx\n/5Q7qbJ/LeUbZD6bXzWK2MHmiDBkcuHCBRgYGKBz587w9PREVFQU/vrrLwDAw4cP8ezZM7i7u+PU\nqVO4desWgNr5G5Jzmkrt9/VvvL29ERkZiZqaGkRFRUFPTw9aWloYMWIEIiMjIRaLsX//flRUVAAA\n+vbtC7FYjIcPHyI3NxdHjx4FAHTp0gWmpqb48ccfIRaLUVFRgbKyMmhra+PZs2dcexkZGejbty8W\nLlwId3d33Lx586X8YDAYDEbTYEMzDI6ysjIIhUKIxWLo6Ohg/fr1AIB3330XH3zwAVxdXaGhoYGu\nXbtiz549MDY2xoYNGzBmzBioqalBQ0MDW7ZswcCBAxtth8fjySyrW+7v7w+RSAQ9PT307t0bu3bt\nAgAEBARg0qRJMDAwgJeXF3R0dLhrVqxYAS8vL2hra3MhQwCIjIxEUFAQNmzYgG7duiE6OhoWFhbQ\n0dGBUChEQEAAKioqEBkZCQ0NDRgbG2PChAmyLG/C3WQwGIzWQ0ure2uboDA8qv8qymAwGsDj8RpE\nbRgMBoPROIo8O5ViaKZLly7NXmdThLBKS0vRq1cvFBUVSZWPHj0ahw4datG2XxYXFxckJSU1KK+p\nqcGqVatgbW0NCwsLmJqa/mNRsaYiEU6ztrZGcHAwqqqqXroueX6+iIKCAnz33Xcv3a6qIZlspqow\n/5QbVfZPlX1TFKUYmpEVym+OOhV9w9XU1ISnpyeOHDmC6dOnA6j9R3bhwgUcOHDgpdpuaeoPdUj4\n17/+hadPnyIxMREdO3ZEYWEhVqxY0eL21EVTUxMpKSmoqqrC6NGjcfLkSXh7e3PHq6ur0b69Yl9N\neX6+iLy8PGzZsoXTPlG0LQaD0fJoaXVHYWHLvqwx2g5KERGRxbFjx2Bvbw+hUIiPPvoIT58+BQCE\nhITgww8/xNChQ6Gnp4dTp07hiy++gJmZGWbNmiXV+fj2229hZGTEiVw1xuTJk6U6HUeOHMHw4cOh\nrq4uV+xrzJgxsLa2hpubG6Kioprk3/Lly2FnZwdbW1upjoKLiwtCQ0NhY2MDZ2dnbrlteXk5Jk2a\nBF1dXUyaNAmVlZUNOlqlpaXYtm0bNm3axC1j1dbWxjfffPNCm7t06YIvvvgChoaG8PPzw59//glX\nV1dYWVnh3LlzAGqjLcHBwTAxMYG+vj527NjRqI8dOnSAm5sbEhIScPbsWbi6umLcuHGwsLBAZWUl\nAgMDoaurCxMTE+6tQZafdW2UEB0djcDAQADAkydPMGbMGOjp6UEoFCIxMRGLFy/mlhF/9tlnKC0t\nxZgxYyAUCmFubo6EhAQZFhNLLLH0ClJ9Ma66c75UDVX2TWFacNVOs1F3WamEvLw87vPq1atpwYIF\nRFQrTGVpaUmFhYUUHx9PXbp0oYiICBKLxeTu7s4t/eTz+fTJJ5+QWCymb775RmrpqCwqKirojTfe\noNzcXCIi8vT0pF9++UWu2BcRcecWFBTQwIEDubwisuSSc6urq8nHx4eOHTtGRLXLYgMDA6m6upr2\n7NlDgYGBRER0+PBhGjduHJWXl9OZM2eIx+M1WD57/fp1EgqFCrVb32Yejyd1H52cnLh7LFkGu3Xr\nVvr0009JLBZTcXExCYXCBstmif7+febl5ZGLiwvFxsZSXFwctWvXjpKTk4mI6OjRozR27FgqKyuj\nhIQE6tevH5WXlzfqZ93vSXR0NLe0eMKECfT5559TVVUVicViKigooMzMTKllxDt27KB///vfRFS7\ndLmoqEjKZgCEVl/mxxJLr0tCo88phvKgyO9SaSMiz549Q1BQEMzNzbFjxw5uuSWPx8OoUaOgpaUF\nBwcHVFRUYNKkSeDxeBg0aBASExO5Ovz9/cHj8SASiXDq1KlG2+vYsSNGjRqFqKgoZGdn49q1a/D0\n9JQS+xo0aBAePXqE2NhYAMCBAwfg7u4OJycnFBQU4O7duwr7d/XqVS46kJycLLWcdOrUqVBTU4Or\nqyvnz2+//YYpU6agU6dOcHd3f+HKFQCIiIiAUCjEwIED8fDhw0Ztbt++vdR9dHNz4+6xxIZTp07h\n+PHjsLKywuDBg1FQUIBLly41aFeyOsfX1xc+Pj5wdnYGULvDrVAoBFAraDZ16lSoq6vDyckJ3bt3\nx507d3DixAmF/Kz9/tcSGxuLOXPmoH379uDxeNDW1pY6Lmk7KioKS5cuRWZmppx5SSIAIc9TGKRF\ng+KVPK9q/tTPM/+UKy89dyIsLEwqHx8frzJ5yee2Yk9z+CMSiSASiRASEgKFaPHuUDMgKyISEBBA\n27dvp5qaGjpy5Ai5uLgQEVFISAitXbtW5rUhISG0bt06IqqNSkjepJ8+fUrvvPPOC+2IjY0lV1dX\n+v777+nDDz8kIpIr9pWWlkbm5uZc5EMgENDZs2e5thuLiIjFYjI0NOQ2g5s/fz4nV15XKOzZs2fE\n5/OJiCgoKIiio6O5Our6J6GkpIR69uzZ4G3fzMyM7t+/36jN9e+jrHs8btw42rVrl1y/6p9fl/oC\nYx988IGUPyYmJnTjxo1G/ezatStXvnXrVi5a1KtXL06uXoIsYbX8/HzasmULmZmZcREoCYCqR0Ti\n2oANzD/mnyShwfNBVVFl34ga/i5lobQRkYcPH0JfXx/l5eWcxgQA1PotH8lxIsLevXtRU1OD3bt3\nw9PTE0Dt3I8lS5bIvNbFxQV37tzB5s2bMXnyZACQK/aVlZWF3r17o0ePHrhw4YLcrZDd3d2RlZUl\nVVZRUYGioiLw+Xw8fPiQE+dqjBEjRuDAgQOoqKhAbGws7t+/3+AcTU1NzJgxA7Nnz+aEwGpqarh5\nFo8ePVLIZnl4enpi165dnFDYnTt3UFpa2qQ6JHh7e+PAgQMoLy/HxYsXkZ+fDyMjo0b9tLKyQmJi\nIkpKSnDw4EGufNiwYdi0aROqqqpQU1ODwsJCaGlpSQma/fXXX+jSpQtmzZqFqVOn4saNGy9lt/Li\n0toGtDAurW1AC+PS2ga0KKo8j0KVfVMUpVg1U1paigEDBnD5BQsWYMmSJZg/fz43FHP27FkADVdR\n1F/pIMnzeDx07doVpqam6NKlCzcxMy0tDV27dpVpB4/Hw/jx4xEVFcUNJ9QX+9LW1sbevXvh5OQE\nHR0dGBsbw8zMDMOGDWtQn1gsRlpaGnr06CFVrq6ujsWLF8POzg49evSAl5eX3Hsj8cfLywuHDh2C\nsbEx7Ozs4OjoKPP8lStXYvXq1bC3t4eamho6deoEkUiEfv36YeDAgXJtlncf635+//33kZmZCTs7\nO3Tp0gV9+vTBkSNH5Npcv6xuuWSVkrGxMTQ1NbFv3z506NChUT+XLFmCuXPnol27dnB2dkZ2djYA\nIDw8HEFBQTA0NETXrl3x/fffY9CgQfDz84OVlRWGDRsGMzMzrFmzBh07dgSfz8ePP/4o627LvKcM\nBqN5USYxLkYz0NJhGWVj2rRplJ2d/Y/r4fF43ARaIqI1a9Y0GML5/fffpc7JzMykffv2/eO2ZSFv\njxexWEyhoaFkb29P5ubmZGtry+2A2xLtZWRkkLq6OgkEArK1taWvvvrqH7Wjo6Pzwom/smjqvVb1\nPxVVDw8z/5QbVfZPlX0jUuzZqRQRkVdJZGRks9TTsWNHHDlyBJ9//jl69uwpMwpgamqKtWvXcvmM\njAzs27ePG/Z5FRw4cADnzp3DuXPn0KFDBzx69Aiampot2qa+vj5SUlJQUlICJycnjBo1CmZmZtzx\npuqIvAwvc6+ZjghDVWG6HYzWRGnniLR1OnTogA8++AAbNmxocEwkEuHw4cNcXktLCwCwePFinD9/\nHkKhEOHh4VLXlJSUYNiwYbCysoKXlxdiYmIAAJmZmTAxMcHHH38MExMTzJw5k1MqTU5OhrGxMUxN\nTbltmetTXl4OHR0ddOjQAQDw5ptvolu3bgBqV8EIBAIYGhpiwoQJKC8vBwAkJSXB3t4eRkZG8PDw\n4IZAFGmvLp07d8aQIUOQkJCAXbt2Yfz48Rg2bBg8PT2Rn58PX19f8Pl82NraIjU1FUDtxnoeHh7Q\n09PD3LlzuTk/mZmZMDc35+peu3YtQkNDAQD37t2Dm5sbDAwMOM2Y+vf6wYMHGDFiBAQCASwtLXHv\n3j0ZFhNLLKlkqq/b0dZQ5XkUquyborCOSAvy0UcfYe/evSgsLJQql/dmvXr1agwZMgQpKSmYN2+e\n1DF1dXUcOXIEycnJ+P777yESibhjf/75J8aOHYvff/8dmZmZ3HLakJAQfPnll7h27RoePHggs82x\nY8ciKSkJRkZGWLJkCTdxNjs7G8HBwTh37hxu376Nt99+G0ePHkVVVRUCAwMRHR2NP//8E6NHj8b2\n7dsVbq8ujx8/RkxMDDw9PUFEiImJwbZt2xATE4Pdu3dDR0cH6enpmD9/PqdoGxERASMjI9y9exeW\nlpZyd/utO+dk6tSp8PPzw927d3Hx4kX069evwb3evn07/Pz8cO3aNSQlJaF///4vtJ/BYDAY/xzW\nEWlBtLS0MH36dGzcuFGh86mRFT9qamoIDw+Ho6MjfHx8kJuby6nJvvXWW3B3d+cmaSYmJqKyshLJ\nycnw8/NDhw4d4O/vL7Perl274tq1a/juu+9QUFAAMzMz3LhxA5cuXcKjR4/g7OwMoVCIY8eO4dy5\nc7hz5w7u378PHx8fCIVCfP/997hw4YLC7QHgFE39/f2xcOFC6OrqAgDc3NzA5/MB1OqIiEQitGvX\nDhMnTkRGRgaKiopw4sQJrnzq1Kno1KlTo/ezuLgYt27dQlBQEIDaITMNDY0G99rW1hZhYWFYvXo1\ncnNzoa6uLrde1SS+tQ1oYeJb24AWJr61DWhRVHk/FlX2TVHYHJEWZv78+bCysuLkxoHa6IZk+Wxp\naSn3uTHi4+Nx/vx5nDx5Ep07d0afPn24oRLJUApQ+4+2pKSkyXa6urrC1dUVvXv3xsGDB+Hg4AAz\nMzPExcVJnZeamooePXpw0vISFPFBgp6eXoPreTwe+vXrJ1Umr2Mmq7zuPQWAnJwcqc5EY508ABg5\nciSsra2xZ88eODk5ISoqCgKBoN5ZIgD855+7ARDg72WT8c9/Kmv+Whuzp7nzzL/G87XPGMkwgeSf\nY1vJX7t2rU3Zw/Ly8/Hx8dzQvOTF8oW04GTZ15q6ol2LFi2igQMHcqJku3btog8++ICIiL777jvi\n8XhERHT16lUaNGiQzPrqyrnv3buXeDwe3b9/v8HqlLVr13Krc0aNGkVRUVFUWVlJM2bMkLmKJTk5\nmRP7Ki8vJy8vL1q3bh09e/aMevXqRYmJiUREVFxcTHfu3KHKykri8/kUHR1NYrGYKisr6Y8//lC4\nPXmraXbu3Ckls79x40aaN28eVVdX0759+8jKyoqIiNavX0/z5s2jmpoa2rFjB/F4PG7VjJ6eHv3v\nf/+jnJwcMjEx4e63vb09ffvtt1RTU0Pl5eVUWlpKSUlJUvc6PT2d+/zhhx/S3r17pewDQGh1kSeW\nWGqpBJnPHQbjn6LId4sNzbQQdeeBLFiwgJvQCdSKdRUVFcHExASPHz/m5MQtLS2ho6Mjc7Lq6NGj\nkZ+fD2NjYyQkJMDExERmW3XzISEhWLp0KQQCAQYMGCBzbsrTp0+5VSuGhobo168fZs+ejV69euHQ\noUOYOXMmjIyM4OjoiNu3b6NDhw746aefsH79ehgaGnKbyCnanix7JWV1y/39/ZGZmQk9PT2sX7+e\nE60LCAjAzZs3YWBggOTkZOjo6HDXrFixAl5eXvD19ZWaABYZGYno6Gi88847cHJywpMnT2BhYcHd\n67CwMBw6dAhmZmawtbVFaWkpJkyYINN2BoPBYDQvvOc9FgaD0Qhs6S5DlWnry3frDhupGqrsG1D7\n7HxRN4NFRJQcNTU1CIVCWFpaYuTIkfj999+bpd76y2HrsmDBAlhbWzeI2ryI69ev47fffpN7/I8/\n/oCHhwcEAgGMjY0RFBSEsrKyJrXRFBrzURZEpLIpLi6u1W1g/rWef225E8JQfdhkVSVHU1OTm/gZ\nFRWF5cuXS+2zAjRNIOxF5OXl4aeffkJaWlqTr01JSUFSUhJGjBjR4NiTJ08wcuRIHDx4EIMGDQIA\n7N69G4WFhdDQ0Hhh3c3pozxYVITRmjQWtVDlN2pAtf1TZd8UhUVEVAQiQnZ2NrdSJD4+Hq6urhg3\nbhwsLCwgFosRHBwMExMT6OvrY8eOHQCA4uJimUJpdUlPT4eVlRWuXr0KZ2dnZGVlQSgUIiEhAdu2\nbYOdnR2sra2xaNEiLoJx+vRpDB06FJaWlnBxcUFVVRWWLl2KgwcPQigUcnv7SNi8eTNEIhHXCQGA\n6dOn44033kBJSQkCAwNhYmICIyMj/PLLLwBqNUXqiqCVlpbKPC8zMxNDhw6FlZUV/Pz8kJyc3MBH\nJmjGUltPbV10jMF4aYih1KipqZFAICAdHR3q06cPtwImLi6O2rVrR8nJyUREtHXrVvr0009JLBZT\ncXExCYVCqqiooOrqaiosLCQiovv371P//v2J6O/VLX/++ScJhUK6ceMGEdXu0VJ31UvdfV4++ugj\n2rRpExEROTs7U1paGhERFRQUEBFRREQEzZkzR6YfY8eOpZ9//lnmsc8//5zCw8OJiOjx48dkZ2dH\nRLUrbbp3787tjSPvvNLSUiovLyciokuXLpG1tbWUj0RES5cupW3bthERUVVVFZWVlUnZAIAAUuEU\n1wZsYP41niDz74NI9fcrUWX/VNk3osa/txLY0IySo6GhwQ3NHD58GOPGjeNWsQgEAgiFQgC1cu2p\nqamIjY0FABQWFuLSpUsYOnQowsPD8euvv6KkpERKKO3p06cYPXo0jhw5AiMjIwBA7ffqb9LT0zF3\n7lykpKSgrKyMOz548GDMmDEDAQEB3H4uRNTg+rrIO3bq1ClUVFRg586dAGqHh9LT0wFIi6DJOi8j\nIwN9+/bF0qVLERMTg5qaGty5c6dBG3Z2dli8eDGys7MRGBiIPn36yLWTwWAwGM0HG5pRIcaOHYtb\nt26htLQUQO2+MXX517/+hZSUFKSkpCAtLQ1Dhw6VEkpLSUmBhoaGlFCajo4Ozp8/L7fNhQsXIiAg\nAH/88QfmzZvHDc2sWLEC4eHhuHnzJszMzLj9b+RhamqKpKQkucc3b97M2Z6ZmYm3335bpgha/fN0\ndXVx8OBBZGdnIyEhAWfOnJEpvjZy5EicPn0aampqcHJy4gSUpBEBCHmewiCtZhmv5Hm84Liy5/GC\n48qQr5OLj2+gyFk3X/+4sudV2T+JCFhbseef5uPj4yESiSASiRASEgKFaNmgDKOlqSucdv78ebKx\nsSGi2nCft7c3d+yHH34gNzc3evr0KRER3b59m0pKSl4olFZSUkKDBw+mffv2EVFDQTIDAwPKyMig\n3NxcGjRoEFfXvXv3iIhILBaTqakpPXjwgA4fPkwTJ06U6ceTJ09IR0eH/vvf/3Jle/bsocePH9OS\nJUtoypQp3BCSZLipvgiavPNWrFhBoaGhJBaL6euvv+YE5Or6IhlGImKCZiy11QTZDwEGow2jyPeW\nRUSUnLKyMm757jfffIP169cDaCgQ9v7778Pe3h52dnYwNzfHrFmzUFNT80KhNE1NTRw/fhwbNmzA\n8ePHG9S7fPlyeHt7w9PTE66urlz5okWLYGFhAQcHB0ybNg39+/eHq6srioqKZE5W7dOnD44fP45/\n//vfEAgEMDExQUJCArS1tfHFF19AS0sLFhYWMDMzw7Jly2T6KO+8gIAAJCQkwNzcHJWVlZyAnKQO\nAEzQrEH0QNWIb20DWpT60QNVQ5X9U2XfFIUJmjEYCsCW7jJam8aW78bHq7Yolir7p8q+AYoJmrGO\niArw7bffIioqCnl5eVBTU8PWrVthZ2cn9/ytW7dCU1Oz0R1yWwuRSAQfHx+MGzfuheVdunRBcXFx\nk+p/mWsAxf6YGAwGgyGNIs9OtmpGyUlMTMTmzZuRlJQETU1N5ObmvnAn3A8//PAVWdd06g+3NFbe\n1CiFWCz+R5ENFhVhtDRtXWqdwWgJ2BwRJae8vBxvvfUWNDU1AQA9evTgVpLw+XyEhobCyMgINjY2\n3JLXkJAQrFu3DgBw7949uLm5wcDAANbW1sjIyAAArFmzBiYmJjAwMMDy5csBAESEwMBAWFlZwdzc\nHIcOHWpgz48//ihT4EwkEuGzzz6Do6MjbGxscObMGe6a2bNng8/nw9PTE/n5+XJ7z431qmXZm5mZ\nCWNjY3zwwQcwNzfH//73PwC1q4f09fXh7u6OnJwcAMC+ffvg4OAAS0tLbrmxDAtUOMW1ARuYfy8r\nWqbq8wxU2T9V9k1RWEdEyXFxcYFYLIaOjg7mzp0rpQjK4/FQUFCAW7duYeLEidiwYQNXLnm7nzp1\nKvz8/HD37l0kJiaib9++OHXqFK5cuYLU1FT88ccfiImJQVZWFuLj41FdXY3k5GSkpqZi+PDhDewZ\nN24cLl++jKSkJJSUlGD79u3csevXryM2Nhbh4eFYuXIlACA5ORnJycm4ceMGNmzYgBMnTsiMPBAR\ngoODIRQKuSQ5T569AHD79m2MHDkSqampGDhwIEpKStCrVy/cvn0bpqamiIiIAAB8+eWXiImJwfXr\n17F169Zm+M0wGAwGQxFYR0TJ4fF4iI2NRXR0NDQ0NODk5IRff/2VO+7v7w8ejweRSIRTp05JXVtU\nVIRbt24hKCgIANCxY0doaGhw/9htbGwwaNAgPHr0CLGxsTA2Nsbly5exYMECpKamQltbu4E96enp\nmDZtGkxNTfHbb7/h5s2bnJ3jx4+Huro6HBwcOJn13377DX5+ftDW1oaJiYmUxHt9P9euXctphKSk\npHAREnn2AkDPnj3h6+srVU9AQADU1NQwffp0nDhxAgBgY2ODyZMnIzo6Gp07d5Zzt0VgOiLKmscL\njretfFN1HCRlL3t9W8+rsn9MRwRgC9NVjJ07d9KUKVOIiIjP51NSUhIRET19+pTeeecdIiIKCQmh\ndevWUVFREWlpaVFFRYVUHQsWLKCQkBCZ9ZeVldGuXbto0KBBtHnz5gbHnZ2d6dSpU0REFBYWRiKR\niIiIRCIRRUdHc+dJ9E+++uorWrt2LVfu4uJChw8fblBv/evr1iHP3vqaJ0RE7dq142Tpr1y5QsOG\nDeOOXbhwgWbNmsVJw9cFAKHVdSRYUv2EBt89BkOZUeQ7zSIiSs6dO3dw9+5dALU70F66dAk9e/YE\nABAR9u7di5qaGuzevRuenp5cORGhS5cuMDU1xY8//gixWIyKigqUlZXB09MTUVFR+OuvvwAADx8+\nxLNnz7hSkD4JAAAgAElEQVThjunTp2PevHmctHxdHj16BAMDA+Tl5WH//v0vnOA5YsQIHDlyBIWF\nhbh16xYuXbrU5Hsgz15ZEBEiIyNRU1ODyMhIjBgxAkSEzMxMODo6Yv369cjKynrhhF/VI761DWhh\n4lvbgBalfvRA1VBl/1TZN0Vhq2aUnOLiYsyZMwf5+fkoLS2FtbU1fvjhBwC1wxDa2towNTVFly5d\nOBGxunNEIiMjERQUhA0bNqBbt26Ijo7Gu+++iw8++ACurq7Q0NBA165dERkZiXv37iE4OBhqamp4\n8803ERYW1sAeicCZpqYm3N3d8eTJE+5Y3U6J5LNkvoeFhQUMDQ0xYsQIub7KWzUjy949e/bIXGnT\nuXNnPH36FIaGhtDR0cGyZctQXV0Nf39/FBQUQEtLCyEhIejUqZPCvwMGg8FgvDxMR0SF0dXVRVJS\nEnr06NHapig9bOku41XAlu8yVA1FdETY0IwKk5mZCWdnZ1hbWyM4OPiFG8/JQyKJfv/+fezfv/8f\n26ShoQGhUAg7Oztu9czLwufzkZvb9Af3y/giGdJiiaWWSqwTwngdYR0RFaZLly5ITU3FpUuXcPPm\nTZw8efKl6pFEAzIyMrBv3z6Z51RXVytcn76+PlJSUhAXF4dDhw7h999/f+m6XjZS0ZgvjbXFEkuK\nJG3tVxuFVPV5Bqrsnyr7piisI/Ia0KFDB7i5uSEhIQH5+fnw9fUFn8+Hra0tUlNTAUiLnAGAmZkZ\nN/lTwuLFi3H+/HkIhUKEhYVh165dGD9+PIYNGwYPDw8EBATg6NGj3PlTp07Fzz//LNeuzp07Y8iQ\nIUhISJCqSyJsJsvO3NxceHh4QE9PD3PnzgVRbcgvMzMT5ubmXN1r165FaGgoAGnRNomwW11fwsPD\n8eDBA4wYMQICgQCWlpZSeix/Qyqc4tqADarj38sKkzEYryOsI/IakJ+fj+PHj8PT0xO7d++Gjo4O\n0tPTMX/+fEyfPh2A/ImgdVm9ejWGDBmClJQUzJ8/H0SEmJgYbNu2DbGxsZgxYwYnEFZQUIDExER4\ne3vLtevx48eIiYmBp6enVF0xMTFy7YyIiICRkRHu3r0LS0vLBp2luvZLfKgr2nbx4kX069dPypd5\n8+Zh+/bt8PPzw7Vr15CUlIT+/fs3+T4rNy6tbUAL49LaBrQoqrxpGqDa/qmyb4rCOiIqTFlZGYRC\nIXx9feHj4wNnZ2f88ssvEIlEaNeuHSZOnIiMjAwUFRUpVJ8k+lAXNzc38Pl8AMDQoUNx9+5dZGdn\nY//+/fDz80O7dg2/YmlpaRAKhfD398fChQuhq6vboC55dp44cYIrnzp1aqOrW4gIxcXFMkXb6vti\na2uLsLAwrF69Grm5uVBXV5dRowiqK2jG8s2bx0sLQrE8yytzPj6eCZox6iAR/KqLh4cHXb16lYiI\nqqqqqGvXrlRYWEirVq2ir776ijvvrbfeovv370vVExcXR97e3tw5ERERNHv2bKn6V69eTevXr6dB\ngwbRrVu3GrQvS2RMVl3y7Hz33Xe58rKyMlJXV6ecnBzKysoiAwMD7vrPP/+cQkND5Yq21feFiCgr\nK4vWrFlD+vr6lJKSInUMAAGkwimuDdigSv6hwXe8JYmLi3ul7b1qVNk/VfaNSLG/BRYRec3w9vbm\nBL2ioqKgp6cHLS0tODg4ICEhAUSE3377DY8ePWpwrZaWlpRQWO13TBqRSISwsDDweDwYGRkpbFf9\nuuTZOWLECERGRkIsFmP//v2c8Fjfvn0hFovx8OFD5ObmcnNV5Im2aWtrS/mSkZGBvn37YuHChXB3\nd+ek6RkMBoPRsjBBMxVG1jwPf39/iEQi6OnpoXfv3ti1axcAwMHBAQMGDICpqSmcnZ1hYmLSoB5L\nS0vo6OhAKBQiICAA3bt3b9BGnz59YGJigjFjxjTJrrpzOhqzMyAgAJMmTYKBgQG8vLygo6PDXbNi\nxQp4eXlBW1tbatxVlmibhYWFlC8VFRWIjIyEhoYGjI2NMWHChMZurQri0toGtDAurW1Ai6Lq8wxU\n2T9V9k1RmKAZo1kpKiqChYUFbty4AS0trdY2p9mQ1XliMOTBhMkYjFp4PCZoxmgiampqEAqFLyWC\nFhwcjIEDB2LhwoVyOyERERHo3bs3hEIh3Nzc8J///Oelba2/ZLcpnD17FomJiU26hqj1Ba9aKsXF\nxbW6Dark36vuhNSdOKiKqLJ/quyborChGYYUmpqaSElJQVVVFUaPHo2TJ082ugRXQnV1NczMzDBt\n2jR8/PHHcs/j8XiYPHkyNm7ciIyMDAwdOhRjxoyRijjU1NRATU2tWfyRR1xcHDc3RlFYVITBIh0M\nRvPDIiIMmSgqgvbBBx/AyckJAQEBUtf/8ssvcHR0lCm/TlQbptPV1UW3bt1w+/ZtiEQifPrppxg0\naBAWL16M69evw9raGnw+H2PGjEF+fj4AIDk5GcbGxjA1NeU0S4DaSMucOXO4vLe3N86ePQsAOHHi\nBMzMzGBqaop3330X9+/fx9atW7FhwwYIhUIkJCTg9OnTGDp0KCwtLeHs7CznrhBLr3lqq0Jlqj7P\nQJX9U2XfFIVFRBgykYigLV26lBMXO3LkCPbv34/p06cjJSUFAHDu3DmcP39eakLpkSNHsGHDBvz2\n22/o2rWr3DaSkpJQWlrKra5JTk7GmTNnoKWlhbFjx2LRokUYN24c5s6di9DQUGzYsAEhISH48ssv\nMXr0aMycOVNulEIy+fXZs2eYNm0aTp48CWtra+Tn56Nbt26YOXMmtLS08OmnnwKofRhERETg7bff\nRmFhYXPeSgaDwWA0AuuIMKSQiKBpa2vD19cXzs7OWLVqFb7++mtOXOzjjz/mRNC8vb3Ru3dvAAAR\nITY2FlevXsXp06e5zfLqQkQ4ePAgEhIS8M4772DHjh0AajsOfn5+0NLSQmVlJa5cuYLDhw+Dx+Mh\nMDAQM2fORGVlJZKTk3H06FHweDz4+/vj8uXLcn0hIly6dAnW1tawtrYGAHTr1k3quITBgwdjxowZ\nCAgIwOTJk+XUKALAf/65GwAB/l6NEf/8p7Lmw6Ba/tTPN5d/z3PPx/Ulb7OtnQ8LC4NAIGgz9jD/\nFM/XnSPSFuxpDn8k0WqJQOULIQajDk0RQQsJCaG1a9dy50VERJCPjw+ZmZlx59cnIiKC5syZ06Bc\nJBJRdHQ0ERFVVFRQ//79SSwWExHRf//7X7KysqKKigp66623uPK4uDhOHO3AgQMUFBTE1efo6Ejx\n8fF07Ngx8vDwaNBefduJiK5fv07BwcGkr69PlZWVUscAEEAqnOLagA3K4F/bfGSquiiWKvunyr4R\nKfY3w+aIMF6IPHGx2u/Y3xARdHR0EB0djenTp8sUBSOiBtfVp2PHjrCzs0N0dDSqq6uxa9cuuLi4\noGPHjrC2tsbhw4dRVVWFPXv2cNfY29vj0qVLqKysxO+//47Lly+Dx+PB3t4eSUlJuHr1KgBwc1bq\ni7OlpaXBwsICq1evRqdOnfDkyZOXvl/KiUtrG9DCuLS2AS2Kqs8zUGX/VNk3RWEdEYYU8kTQMjMz\noaenh/Xr13NzQeqLkEnyhoaG2Lt3L8aPH4+MjIwG9Tc2r0PCsmXLsHr1aujr6yMrKwtLly4FUDtB\ndunSpRAIBBgwYAB3jY6ODnx8fCAQCBAaGsr9cffq1Qt79uzB9OnTYWpqyg27jBo1ClevXuUmqy5a\ntAgWFhZwcHDAtGnT5Gx6x2PpNU9aWt3BYDCaFyZoxmAogCKiPMpMfHy8Sr+ZMf+UG1X2T5V9AxR7\ndiplROTbb7+Fs7MzLCwsIBQKG52wqOrImhDaWHlzEBISgnXr1rVY/fWJiIhAu3btEBMTw5X99NNP\naNeu3UsLoh09ehS3bt1qLhMZDAaD8ZIo3aqZxMREbN68GUlJSdDU1ERubi638dnriCLDHK+qzZZs\nz9zcHAcOHIC7uzsAYP/+/RAIBC9d55EjR+Dj4wNjY+Mm2cFQPVRBpEyV36gB1fZPlX1TFKWLiJSX\nl+Ott96CpqYmAKBHjx7o168fAGD58uWws7ODra0tVqxYwV3j4uKCpKQkAEB2djZ0dXUBAA8ePMCI\nESMgEAhgaWmJtLQ0AMCYMWNgbW0NNzc3REVFcfVs374dAwYMgJ2dHebPn88JaD179gyjR4+GiYkJ\nLC0tuQjNtWvX4O7uDoFAACsrKxQXFzfwR15bXbp0wfLly2FqaoopU6ZwkywzMzNhZ2eHd955BytX\nrmzSvZN3HyIiIjBp0iR4eXnBzMwMGzdu5K6pLwYmIS0tDa6urrCwsMCBAwe48j179sDMzAwGBgaY\nNWsWV37gwAHo6+vj7bffxuLFi1/oZ32GDBmCy5cvo7q6GsXFxUhLS4OlpSUX8ktKSoK9vT2MjIzg\n4eGB7OxsAMCPP/4IOzs7WFtbY9GiRSgrK8PFixdx7NgxBAcHw8rKCunp6di3bx8cHBxgaWnZyPJd\nYkkFU1sVKWMwXhtabtFOyyAWi8nV1ZUGDhxIc+bMobt373LHcnNziYiourqafHx86NixY0RE5OLi\nQklJSURE9OzZM+Lz+UREtHTpUtq2bRsR1S5LLSsrk6qnoKCABg4cSLm5uVRVVUV8Pp8yMjIoJyeH\nrKysuGWokydPpp9++omIiFJTU8nLy4uIiAICAujMmTNERFRSUkLV1dUN/JHVFhERj8fjbHv//fdp\n165dREQ0Z84c+uabb0gsFtMXX3whc7ktkexluPLuw86dO6lPnz706NEjKiwspP79+1NlZSU9ffqU\nevbsyS3FzcvLIyKiZcuWkbm5OeXl5dFff/1Fenp6RER08+ZNGjJkCJWXl1NNTQ1NnjyZLl26RDU1\nNaSnp0d3796l3NxcsrOz4+6XPD/rEhERQbNnz6YFCxbQ8ePHae/evRQaGkoikYgOHz5MlZWVZG5u\nTg8ePCAios2bN9OqVauIiCgnJ4er56OPPqJNmzYREXHXSjA0NKSSkhLud1EfgC3fVe7UmH9K9xhs\ngKovAVVl/1TZNyLF/r6UbmiGx+MhNjYWV65cQXR0NJycnLBz5054eXnh6tWr+P777/Hnn3+ioKAA\nN2/ebHSfFDs7OyxevBjZ2dkIDAxEnz59ANS+vUdHR+Pp06coLCzEnTt3UFVVBXNzc06gZdSoUcjJ\nyQEAnD59Grdu3UJISAiAWlXSsrIyODg4YPHixQgICMD06dO5KE5d6rZVUFCAu3fvws7ODu3bt8fU\nqVMBAG5ubjh37hymT5+OkydP4uLFi+DxeHjvvfewYcOGZrmvHh4eXGTJxMQEycnJePr0qUwxMB6P\nB19fX3Tr1g3dunWDmpoanjx5gpiYGKSlpcHe3h5AbfQqLi4OYrEYxsbG0NfXBwD4+fnh3Llz8PX1\nleunLCZOnIjw8HAUFhZi3bp1WLlyJYgIt2/fxv379+Hj4wOgdq8aye8pPT0dc+fORUpKCsrKyqQm\nTdX9bGNjg8mTJ8Pf3x9jxoyRc5dEAPjPP6uaoNm1NmZPc+cb968tCUK9TP7atWttyh7m3+ubfy0F\nzXbu3ElTpkwhotq32tTUVCIimj9/PoWGhhIR0fDhw+nChQtERHT79m0uEkBElJWVRWvWrCF9fX1K\nSUmhtLQ0Mjc3596kBQIBnT17lhISEmjUqFHcdcuWLeMiIj179qT79+/LtC8tLY1CQ0NJR0eHHj9+\n3OCYrLaIpCMaUVFRJBKJiIjonXfeoWfPnhERUUZGRpMiIvLuw86dO2n27Nnced7e3k0SAzMzM6PM\nzEzatGkTZ2ddLl68SN7e3lx+9erVtGDBgkb9rIskIkJEZGlpSY6OjkT0d1QjNTVV6ndaF2dnZzp1\n6hQREYWFhXH114+IEBFduHCBZs2aRXZ2dg3qgcpHRF7npPSPQQajzaLI35fSzRG5c+cO7t69C6B2\nx9dLly6hZ8+eqKioQFFREfh8Ph4+fIijR49y1zg4OODs2bMQi8VSG6Wlp6ejb9++WLhwIdzd3XHz\n5k1kZWWhd+/e6NGjBy5cuIDr168DqBXMSk1NRWZmJnJzc3H8+HGuHk9PT2zatImbNCvpvaelpeHt\nt9/G0qVLYWRkxM1BkfDo0SOZbTXG8OHDsWvXrga+KIK8+yCLxsTA5J3v7u6OU6dOcatRcnNz8ddf\nf8He3h5//vkn0tLSkJeXhyNHjjSysVxDqE7kYtWqVQ3mxhgaGgIADh8+DCJCVVUVJ6b26NEjGBgY\nIC8vD/v37+cmnGppaeHp06dc/ZmZmXB0dMT69euRlZX1Wk+AZjAYjFeJ0nVEiouLIRKJYGpqCgMD\nA2RnZyMkJASdOnXC4sWLYWdnh4kTJ8LLy4u7xt/fHxcuXIClpSW0tLS4f0aHDh2CmZkZbG1tUVpa\nigkTJsDJyQk6OjowNjZGWFgYhg0bBgBQU1PDkiVLMHjwYAwfPhzm5ubQ1tYGAGzcuBF//fUXtyvs\nDz/8AAAIDw+Hubk57OzsYGRkBEdHRylfBg8eLLMtADKFwgDgk08+waFDh2BkZIROnTrJXclRWlqK\nAQMGcCksLEzufZAnMiZPDKy+fRKMjY2xYcMGjBkzBqampvDw8MDjx4/B4/GwYsUKeHp6wtraGi4u\nLtwwijw/61K3fPjw4Q06MR06dMBPP/2E9evXw9DQEEKhEImJiQBqJzB7e3vD09MTrq6u3DVTpkzB\nvn37YGVlhbt378Lf3x8WFhZwd3fnvk8NaX1BLZaaP6mCSJkkNK6qqLJ/quybojBBsyZQUlKCzp07\no6CgAN7e3li4cCF8fX1b2yyV49tvv0VUVBTy8vKgpqaGrVu3ws7Orlnq5vP5SE5ORo8ePZp0HRM0\nU26Yf8qNKvunyr4Bij07WUekCQQHB+PMmTN49uwZ/Pz8EBYW1tomqRyJiYl47733GujESCbS/lN0\ndXWRlJTEOiIMBoPxClBZZdXWYs2aNUhJScH//vc/1glpIeTpxPD5fKxatQoWFhbw9vbm9rC5fPky\nHB0dIRQKERAQwM3DqampwcKFC6GrqwtLS0ts3rxZqp2ysjKMGDEC27dvBwAEBgbCysoK5ubmOHTo\nkEzbJENELL180tZuWgeQwWCoPqwjwmhTuLi4QCwWQ0dHB3PnzsW9e/cA1HYCysrKcOPGDTg4OCAy\nMhJA7byU8+fPIyUlBSNHjsRnn30GAPjhhx9w48YNJCcn4/r165gyZQrXRlFREUaNGoWpU6dixowZ\niIuLQ3V1NZKTk5Gamorhw4fLsY5UOMW9knZaSzxM1cfhmX/Kiyr7piisI8JoU0h0YqKjo6GhoQEn\nJyf8+uuvAMDpi7i5uXGTUcvKyvDJJ5/A0tISK1as4FbLnDlzBu+99x66d6+diCj5SUTw9fXFe++9\nh2nTpgGo7cxcvnwZCxYsQGpqKjcJmcFgMBgtD+uIMNoktra2WL16NVavXo09e/YA+Lsz0aFDB5SX\nlwMAtmzZgp49e+Lq1avYvXs3ysrKAMgfl+TxeBg8eDB+++03rqxv3764fv06LC0tERQUhC1btsix\nSgQg5HkKw9+iWHj+WZnzeMHx5s3Hx8dLvQm2dF5S1lrtM/+Yf/LyEhGwtmLPP83Hx8dDJBJBJBJx\nIp8vgk1WZbQp7ty5Ax6PBwMDA1RXV2P27Nno0KEDjh8/zk0yvXr1KoKDgxEXF4egoCAMGTIEkydP\nxvz58/Hrr78iIyMDW7duxeHDh3HgwAH06NEDeXl56N69OzdZNTQ0FNXV1di8eTOysrLQvXt3qKur\nY//+/YiNjcWPP/4oZVft8mH2p/LPYZN+GYzXCTZZlaF0yNOJqUtdXZE5c+Zg69atsLGxwYABA7jy\n999/H+bm5hAKhRAIBNi/f79UHeHh4SgrK8Nnn32G1NRUDBo0CFZWVti7dy83z+T1Ir61DWhR6r9d\nqxrMP+VFlX1TFBYRYTAUQJ5wHKNpaGl1R2GhfIXeliI+XrW1Gph/yosq+wawiMgrR01NDUKhEDY2\nNrC3t8eOHTte+At49OgRxo8f/4osbEhmZibMzc1llmtoaEAoFHJJMlfjnyK5T9bW1ggODkZVVdVL\n1+Xi4oKkpKQmX1dQUIDvvvuuSdcQEUv/MLVGJwSASj/oAeafMqPKvimK0u2+25bR1NRESkoKAODZ\ns2eYMmUKCgsLMX/+fLnXvPnmm4iKimpQXl1djfbtW/fXo6+vz/nTnEjuU1VVFUaPHo2TJ09K7ZLc\nFN/lycK/iLy8PGzZsgWzZs1S+BoWFWk6rRUBYTAYygOLiLQQvXv3xg8//IBvvvkGQG2EYejQobCy\nsoKfnx+Sk5O5cklEIiIiAuPHj8ewYcPg4eGBgIAAqc37pk6dip9//lmqnZKSEgwbNgxWVlbw8vJC\nTEwMV6+JiQk+/vhjmJiYYObMmVzkITk5mdsXp6kb5wHA9u3bMWDAANjZ2WH+/PmYM2cOgNpNBG1t\nbWFoaIiVK1dCS0ur0Xo6dOgANzc3JCQk4OzZs3B1dcW4ceNgYWGByspKBAYGQldXFyYmJtw4anl5\nOSZNmgRdXV1MmjQJlZWVXH1dunThPkdHRyMwMBAA8OTJE4wZMwZ6enrcPjSLFy9GWloahEIhPvvs\nM5SWlmLMmDEQCoUwNzdHQkKCDItbW+tD+XREWks3pD6qPg7P/FNeVNk3hWlsa15G06i7pb2Ebt26\nUVFREZWWllJ5eTkREV26dImsra2JiCgjI4PMzMyIiGjnzp3UvXt3ysjIICKis2fP0ujRo4mIKD8/\nn3R1dammpkaq/urqaiosLCQiovv371P//v25enk8Hp05c4ZqamrI09OTzp49S0REPj4+dOjQIaqs\nrKT33nuPa78uGRkZpKGhQQKBgEsJCQlUVVVFfD6fMjIyKCcnh6ysrGjOnDlERPTRRx9RWFgYEREt\nW7aMtLS0Gr1PeXl55OLiQrGxsRQXF0ft2rWj5ORkIiI6evQojR07lsrKyighIYH69etH5eXldPjw\nYRo3bhyVl5fTmTNniMfjUVJSUoP7Hx0dTSKRiIiIJkyYQJ9//jlVVVWRWCymgoICyszMlPJ7x44d\n9O9//5uIiMRiMRUVFUnZDIDQatvUv4oU10L1to1HTFxcXGub0KIw/5QXVfaNSLFnAIuIvAIkIf2l\nS5fCxsYGM2fOxK1bt2Se6+bmBj6fDwAYOnQo7t69i+zsbOzfvx9+fn5o1076V6ampobw8HA4OjrC\nx8cHubm53Pb2b731Ftzd3dGuXTs4OzsjMTERlZWVSE5Ohp+fHzp06AB/f3+5duvp6SElJYVLTk5O\nuHTpEszNzcHn89GjRw+MGjWKO//06dNcfYGBgaj9DjakrKwMQqEQvr6+8PHx4XbTFQgEEAqFAIBf\nfvkFU6dOhbq6OpycnNC9e3fcuXMHJ06cwJQpU9CpUye4u7tj4MCBMtuo23ZsbCzmzJmD9u3bP5cZ\n125gm0AgQFRUFJYuXYrMzEyp6MrrgUtrG9CiqPo4PPNPeVFl3xSFzRFpQdLT06GpqYnOnTsjIiIC\n2dnZSEhIQElJCd544w2Z19Tf3G369OmIjIzEwYMHZQ6jxMfH4/z58zh58iQ6d+6MPn36cGJf3bp1\n487r2LEjSkpK/rFP9edkyOtsyCsHAA0NDZlzT958802F6qhbXtcWNTU17nNubq5CdkoQCoX473//\ni3379mHUqFH4+uuvpeat1CICwH/+uRsAAf7+Bx7//CfLS+ef5+qIN7E8y7O86ubj4+O5/1WSl+oX\n0oIRmdeOukMDT58+JQ8PD26oYsWKFRQaGkpisZi+/vpr4vF4RNRwaGb27NlSdT558oQGDhxI9vb2\nMtvcs2cPBQYGEhHR3r17icfj0f3796XqJSJau3YthYSEEBHRqFGjKCoqiiorK2nGjBlyh2ZklVdV\nVZGuri43NGNtbc0NzXz88ccUHh5ONTU19OWXX8ocqqp/nyTExcWRt7c3l//555/Jz8+PysrK6MKF\nC/Tmm29SZWUl/ec//yE/Pz8qLy+nmJgYqaEZV1dXunjxIhUXF5Obmxt3XyZNmkSLFy+myspKqq6u\npoKCAsrOzqY33niDa+/+/ftUXV1NRERff/01ffXVV1L2gQ3NsKGZNgzzT3lRZd+I2NDMK0cy5GBj\nYwMfHx9MnDgRc+fOBQAEBAQgISEB5ubmqKyslAr9S97cZa0A6dOnD0xMTLiJl/UZPXo08vPzYWxs\njISEBJiYmDSot34+JCQES5cuhUAgkBIBq49kMqckffvtt2jfvj2WLFmCwYMHY/jw4TA3N+f2Zvnk\nk08QGRkJY2NjFBcXo2vXrjLrldVefd89PT2hpaUFY2NjBAUFYd++fejQoQO8vLzQvn17GBsb44cf\nfoCjoyN3zZIlSzB37ly4ubnB2tqaKw8PD8fNmzdhaGgIGxsb3Lp1Cz179oSfnx+srKywaNEixMfH\nQyAQwNraGleuXMHMmTNl2s5gMBiM5oUJmrVxioqKYGFhgRs3brxwFcqroqSkBJ07d0ZBQQG8vb2x\ncOFC+Pr6oqysDBoaGiAibNiwAefPn8eRI0da29xmgS3dfTnY8l0G4/WGCZopOWfOnIGtrS0WLlzY\nZjohQG1ERSgUwtTUFNbW1vD19QUAJCUlgcfjQV1dHUuXLsXt27fx119/NanusLAwbuO6+ri4uMDI\nyIiL0PznP//5x77w+Xzk5ir2j5Ko9QXBlC2xTgiDwXgRLCLCaFa0tLRQVFT00tfr6uri6tWr6Nmz\nZ4Njrq6uWLduHaysrP6JiQ3ak2ym1xgsIiIbZYl4xKu4jDbzT3lRZd8AFhFhtAHkCa4REQIDA2Fl\nZQVzc3McOnQImzZtwqNHj+Dq6gp3d3eZ9dX/Qufl5cHX1xd8Ph+2trZITU1ttDw3NxceHh7Q09PD\n3Llzufpk2SOjdRVOcS91XVsRLGMwGErMC6ezMhhNQE1NjRNAGzt2rFzBtdjYWJo2bRp3XUFBARER\n8TfzutwAACAASURBVPl8ysnJkVm3s7MzGRoakkAgIKFQSDk5ORQeHk5z5syhmpoa2rNnDwkEAiIi\nueXr1q3jyrdt20Y8Ho9ycnLk2iMBUPVVMy+b2COEwWDIR5FnBHuKMJoVWUtzly9fTg4ODmRhYUGa\nmpr05MkTysrKonfeeYc+/fRTunHjBnduYx0RFxcXbqmuBA8PD66sqqqKunbtSoWFhXLL3333Xa68\nrKyM1NXVKScnR649ElhHhHVEGAxG01HkGcGGZhgtSnwdwbWUlBRoaGigvLwcffv2xfXr12FpaYmg\noCBs2bJFofpqv9cvLmtquWL2iACEPE9hkBbtilfy/D/zJz4+nhM1aov5sLCwNmUP84/5J8lLPrcV\ne5rDH5FIBJFIhJCQEChEC3eGGK8Z9SMi8gTXHj16RGVlZUREtG/fPgoKCiIiInNzc7p165bMumVF\nRDZu3Ejz5s2j6upq2rdvH1lZWTVavn79epo3bx7V1NTQjh07uKGZ+va8//77Uu1A5SMicSodEVF1\n0Sjmn/Kiyr4RKfaMYBLvjGal/uqS0aNH4/DhwzA2NoarqysnuJaamorg4GCoqanhzTffRFhYGABg\n7ty5mD59OrS0tLiJrY3h7+8PkUgEPT099O7dG7t27Wq0PCAgAJMmTYKBgQG8vLygo6PTqD2vDy6t\nbUCLosqrEgDmnzKjyr4pClu+y2AoAFu+KxtlWb7LYDBah2ZZvpueno45c+bA0NAQhoaGmDt3LjIy\nMprNyKbw+u2IKo1IJMLhw4cVLm+LhISEoH///hAKhfDy8pIaZ2wq8fHx8PHxealrjx49KncHZHkQ\ntb5AWEuluLi4l7pOWToh/+R7pgww/5QXVfZNUV7YEZkwYQIGDhyIhIQEJCQkQEdHB+PHj38VtjXg\ndX8rlbUXTWPlbREej4dPP/0UKSkpWLJkCRYuXNjgnJqamha348iRI7h582aTrpHcZ1VMrq6ujR7X\n1m5c8I3BYDBelhd2RIqLi7Fw4UL07t0bvXv3xvz581FcXPwqbFOItLQ0uLu7w8TEBA4ODrh37x6A\n2kiOra0tDA0NsXLlSk4ivf5b9OzZs7n5A3w+H8uXL4epqSlcXFyQkZGB4cOHw8LCQkpOfM2aNTAx\nMYGBgQGWL18OoPZt+UWCWD/++CPs7OxgbW2NRYsWcVLmIpEIn332GRwdHWFjY4MzZ85I2cfn8+Hp\n6Yn8/Hy5Ia765TU1NQgODoaJiQn09fWxY8cO7thHH30EPp+P4cOHY8yYMVw0pa7c+dWrV+Hq6gqg\nNorx4YcfYujQodDT08OpU6fwxRdfwMzMDLNmzeLaTkpKgr29PYyMjODh4YHs7OxGbXVycsK9e/dQ\nWloKFxcX/Otf/4KNjQ02btyI2NhYGBsbQ1dXFzNmzEBlZSUA4OTJk9DV1YVQKMTJkye5OkNCQrBu\n3Toub2ZmxsnL7969GwYGBrCwsMD06dORmJiIY8eOITg4GFZWVkhPT8e+ffvg4OAAS0tLTJ48Wabd\naHXRsdZLyi5cpurj8Mw/5UWVfVOUF3ZEJk+ejAULFiAlJQXJyclYtGgRJk+ejNzcXIX36GhJAgMD\n8c033+DmzZv44osvuH9G69atw7Rp03D79m1UVlbKjRjUjSZIfv7xxx94++234eHhgd27d+PYsWNY\ntmwZAODUqVO4cuUKUlNT8ccffyAmJgZZWVmIj49HdXU1kpOTkZqaiuHDhzdoa9y4cbh8+TKSkpJQ\nUlKC7du3c8euX7+O2NhYhIeHY+X/2Tv3uJiz/4+/RihWLtuu3a9ISVuNpmYqUS5dmY1yqxBico2v\nsGs3du2S6y6LshZrL6p1i/JzjYQKoRWF3JOKLUspul+mef/+aOfznamZmtCq8Xk+HvMw53zO55z3\n+6RP53PO+7zOmjUAgOTkZCQnJ+PGjRsICgpCdHS0yjMfv//+OyQSCW7duoXr16/jp59+QmVlJZKS\nknDz5k2kpqYiODhYrs766v7zzz8RFRWFHTt2wMPDA71790ZqairS0tKQnJyMqqoq+Pr6IjIyEnfv\n3sWoUaPk/FPE4cOHYWFhgfbt2wMAMjIycPHiRXz22WcICAhAaGgobt++jefPn2Pbtm0AgICAAOzb\ntw8JCQm4du2aUttlf5ZLlizB8ePHcePGDfz444+wtbXFiBEjsH79eiQnJ6NXr15YsWIFzpw5g+vX\nr2P79u0q9TELCwsLy+vT4K6ZkJAQcDicOqeohoaGgsPh4OHDh01mXEMUFxfj8uXLmD59ep1rp06d\nYmYrfH195d6W62PixIkAAFtbW4jFYnTt2hVAjWR4cXExMxCxtrYGUCNhHhsbC2dnZ1y+fBkLFy6E\nSCQCj8erU/fDhw8xb948pKSkoKysjJkZ4HA48PLygpaWFmxtbZGcnAwAOHHiBDw9PdGxY0dwuVz0\n69dP5b6JiYlBamoqYmNjAQCFhYW4dOkSEhISMHr0aGhra8PExAT9+/dvsC4Oh4MRI0ZAW1sbtra2\nqKiowPjx48HhcNCvXz9cunQJWlpayMrKYmabqquroa+vX6cuopqTeXft2gWBQIBNmzYxbUyYMAFt\n27ZFTk4OKisrGX8nTpyI8PBweHp6gohgY2MDABg3bly9sTFEhNjYWAwfPhxGRkYAgM6dO8tdl2Jt\nbQ1vb2/4+Phg9OjRSmoUAZD61BkAH//bbRL/z78tNR2M+v2pmU2Uvr1J17VbSjo4OBh8Pr/Z2MP6\nx/onTcvGiDQHe96EP6GhoQCg8G+AQhrc4NuMqK1R8fLlS2rfvj2Vl5fXKWtkZMQodGZkZDD3Xrp0\niYYMGcKU8/b2prCwMCKSV/UMDQ2luXPnMuX09fUpLy+PFi5cSIGBgQrtKysro7CwMOrXrx9t2bKl\nznV7e3uKiYkhIqLg4GASiURERCQSiSgyMrKOn6tXr6b169cz+Q4ODnTgwIE69YpEojr5Hh4ejF+y\nrFq1ijZu3KiwThMTE8rJySEiopMnT5KDgwMREQUGBsrZIftzkF5LTU0lfX19hf0iS2BgIG3YsKFO\nvoOMRshff/1FPB6PubZv3z4aM2ZMnfyQkBByc3MjIqLvv/+eVq9ezVzT1dWlzMxM2rx5M82cObNO\ne4r67MKFCzR79myysbGpUx5413VEWtSjog7qrtXA+tdyUWffiFR7djS4NBMWFoY//vijzqc50LFj\nR/D5fGzbtg1isRhEhBs3bgAAhg4dil27dkEikWDnzp3MPXw+H7dv30ZxcTGys7MRExOjsG5SEIvB\n4XAgFAoRERHBxB9kZ2cjNzcXT548AQBMnjwZ8+fPR0pKSp37c3JyYGRkhIKCAuzdu7fBZRZXV1cc\nPHgQhYWFuHPnDhITE5WWrW2vUChEWFgYcnNzAQD3799HaWkphEIhDh8+jKKiIty7dw9//vknc4+t\nrS3i4+NRVVUl12eK+qI2xsbGAIADBw6AiFBVVaU0GFRZfdJ8XV1daGpq4vLlyygrK8PevXvh4OAA\nXV1daGhoICkpCSUlJXJxOLa2tkhISAAR4cSJE8jJyQGHw4GTkxOioqJw//59ADUzW0DNKcHPnj1j\n2s3MzISdnR02btyIJ0+eoKKiokGf1QuHt21AkyJ9c1NXWP9aLursm6o0OBBJSkpiPtHR0fj8888R\nHR39b9hWh9LSUvTo0YP5BAcHY/fu3YiOjoaJiQnMzMxw5MgRAMBnn32GnTt3wtTUFMXFxejUqRMA\nQEtLCwEBAejfvz+mTp2KoUOHKmyr9k4U6fchQ4Zg5syZcHR0hJmZGcaOHYuioiKkpqaiX79+sLS0\nxO7du7Fo0aI6da5cuRJubm4QCoVMIGjt+mW/CwQCCAQCmJubY8GCBXB1dVXaN7NmzWL6ZcCAAZg+\nfTr69+8PGxsb8Hg8zJ49G9XV1bCysoKpqSl4PB7mz5+Pfv36MX3j7++Pn3/+GTY2NujVq5dc/IUi\n+2TTbdq0waFDh7Bx40YYGxtDIBDg0qVLSvu2ofx169ZBJBKBy+VCR0cHfn5+AIDvv/8eY8eOxcCB\nA2FhYcHcY2trix49eqBPnz44cuQII5zG5XKxZs0auLq6wtzcHAsXLgQATJgwAXv27IGlpSXS0tLg\n4+MDc3NzODs7IzAwEJqamkr7moWFhYXlzdFoQbPs7GxMnTpVbsdCc6SsrAzt2rVjYhLOnz9fJ87l\nXaWkpATvvfceMjMz0bdvX9y6dYuJhWFRTEvZHt1UtHThsniZ+BZ1hPWv5aLOvgFvSNCsNp06dUJ2\ndvYrG/VvcfXqVfD5fBgaGuLw4cMqB6u+CgUFBfD19QWfz0efPn0wfPhwpKWlNVl7gPxWW1nKy8vx\n5ZdfQiAQgMfjgc/n47fffpMr4+bmBh6PB0dHR2zYsEHhIGT48OEoLCxsMvtVIV6JYFl8fDw6deoE\ngUCAgQMH4tdff2103WfPnlU6Y6MM+pdFxv7NT0OCZi15EMLCwtK8aXDXjOwfgoqKCty+fRsBAQFN\natSbYODAgbh27VqTt0NEGDZsGHx9fRESEgIASExMZOJBVLkfaPwbt7LyIpEIvXv3ZmJU/v77b2br\nq5S4uDiIxWK0bq38xx8VFdUoe/5tBg8ejKNHjyI/Px8WFhbw9PREly5dVL4/Li6O2QWkKu/arEhL\nnwWRRZ3fOAHWv5aMOvumMg1Fs8bFxVFcXBzFx8dTYmIic0IpSw1nzpyhwYMHK72+bt06MjU1pd69\ne9OKFSuIqGYXj4mJCc2YMYPMzMwoKytLYTkiolGjRpGlpSU5OjrS/v37mXzZHT5SHjx4QL169VJq\nS1xcHDk4ONCYMWPI1NSUiIhGjhypsP6ePXvS8+fPKSMjg0xNTWnOnDlkampKs2bNosrKSiKqOU23\nf//+ZG5uTuPHj6/TXkZGBg0aNIgEAgF5eHgwu2Li4uLIycmJxo0bR6ampvT1118z90RHR5O+vj7x\n+XxavHgxsyumth+y+W5ubnTixAlasWIF9e3bl6ytrWnlypXMdVk7vb29KTMzkz7++GPS1dUlPp9P\n58+fp6NHj1K/fv2Iz+fT7Nmz6enTp3JtQu13zSj6NPh4YGFhYakXVZ4jKj1pnjx5QkeOHKGjR4/W\neUC/62zatIk+++wzhddOnjxJXl5eJBaLqaKiguzt7SknJ4cyMjKIw+HQoUOH6i1HRJSfn09ENVuV\n9fT0mLSigcjhw4dp9OjRSm2Ni4ujVq1aUXJyMpPXUP1SW0+fPk3V1dUkFArp7NmzRERkbGxMJSUl\nzP21KS0tZbZWJyYmkpWVFWNHmzZt6O7du1ReXk5mZmb0+PFjIiKysLCgP//8k4qLi+nTTz8ld3d3\nhX5IByLp6emkq6tLJSUljO1isZjc3d3p6NGjSu2svY24oKCA+b527VpauHChXJvqPxCJU+uBiLpv\nkWT9a7mos29Eqj1HGowR2b9/P8zNzfHzzz9j27ZtMDMzQ0RERNNO07Qg6puulxU/69evH3JychiB\nMR0dHYwcObLBcuHh4XB2dsaAAQPw8uXLemNPatuyZs0aCAQC6OrqMnl8Ph8CgYBJq1K/rq4unJ2d\n0apVK9jb2zOxFVIRsMjISLz33nsK7Vm6dCmsra3h5+eHu3fvMtdsbGxgbGwMTU1N2NnZ4cKFC/jr\nr78gkUhgY2OD9957D+PGjWOWrmpz/vx5WFpaYv78+fjpp5/Qvn17XLlyBR4eHjA3N0dycjKzfViZ\nnbJ15+bmYsaMGeDxeNixYwdu3bqloFURgMB/PsH4n+gX/vnektPXFFyXScXHywkvtbT0tWvXmpU9\nrH+sf+qajo+Ph0gkgkgkQmBgIFSioZGKoaEh3b17l0nfu3ePDA0NX2eApFbUtzSjTPwsIyODzMzM\nGiyXnp5OPB6Pmfng8/nMbISiGZG0tDTq1asXSSQSuXypAFntJQ1V6q9t6/r16+VsrU8ELCQkhKZO\nnUplZWWUl5dHGhoaCu2YO3cuhYWF1StYJkvt+4mIJBIJGRsbU2pqKhERLViwoF47a4u0TZkyhX7/\n/Xeqrq6mgwcPMmJuUqD2MyLs0gwLC8ubR5XnSIMzIhKJBB9//DGT/uijjyCRSFQb5bwDODk5oaKi\nQm7nRlJSEs6dO6dU/Kw29Ymkffjhh3j//fdx4cIFXL9+vV5bevfuDWtra3zzzTfMz0hWSr42OTk5\njapfFqKGRcCys7PRs2dPaGpq4tdff23w/019gmUNUVFRgaKiIujr6yM7OxuHDx9mto3VtrO8vBza\n2tpyP4vs7Gz07t0b5eXlzCGILCwsLCxNT4MDEQ8PD7i6umLjxo3YsGEDhg8fDk9Pz3/DthbD8ePH\nceHCBVhYWMDMzAwrVqyArq6uQvEz6cnFsssotct5eXmhuLgYAwYMQM+ePWFqaorg4GC4uLg0aEto\naCjKy8shEAhgY2MDoVCIH374gWlTtt2BAweqVL8iAbPq6uoGRcCmTJmChIQE8Hg8VFZWokOHDkrr\nlKJMsKx2+7XztbS0sHjxYtjY2GDcuHEYNmwYACi0U0tLCyNGjMCVK1cgEAiQkJCAr7/+GgsWLMCg\nQYPA5/PfuR0yQPzbNqBJkZ1GVkdY/1ou6uybqtQraEZEePz4MV68eIFjx46Bw+EwGhQsLO8S797A\nRL2278bHq7doFOtfy0WdfQNUEzRrcCBibm6O1NTUN24cS8tAQ0MD5ubmTNrb2xsBAQEIDg7GrFmz\n0K5dOwBAhw4dmNkeVcjJycH8+fNVDnwWiUQ4d+4cI0c/bdo0zJ07txGe1MXBwQEbNmyAlZVVg2VV\n+WViYWFhYZFHlWdnvYJmHA4Htra2OHz4MLPDg+Xdon379goP8Nu0aRN8fHyYgUhjZwy6devWqN1X\nHA4H69evx5gxYxrVTkN1Nsbud2VWRJ1mQlhYWJo/DcaInD9/HqNHj8aHH34IHo8HHo8n94bM8u7x\n448/IicnB46OjnB2dmbyV65ciT59+mDChAmM/LxIJMKiRYtgZ2cHa2trnD59GgCQmZnJLPFVV1fj\niy++gIGBASwsLPDTTz8pbLf2qLqyshK+vr4wMDAAl8tl1lorKioU5peXl2P8+PEwMDDA+PHjUVlZ\nydS1ePFiWFlZwdzcHMHBwUo8JzX+xDHfi4oKlPjfclH3dXjWv5aLOvumKg1KvJ84ceLfsIOlmVJW\nVianO/L1119j3rx5CAoKQnx8PN5//30ANQfpdevWDbdu3cKMGTNw7NgxTJ48GQBw/fp1xMbG4urV\nq/j222/rBMX+8ssvuHHjBpKTk9GlSxcUFNT9Q0hE+PLLL7Fq1SoAwM6dO/Hw4UMUFhbizp07uHr1\nKry8vJCRkYGTJ08qzD9+/DjEYjHu3r2LhIQEDBkyBADw8OFDXLx4EVevXgUAvHz58s13JAsLCwuL\nQhociHTs2LFOnra2dpMYw9L8aNeuncKlmdq0bt0aEydOBFCzpfncuXOYPHkyOBwOvLy8oKWlBVtb\nWyQnJ9e59/Tp05g6dSpzVoyiM2MULc1s3rwZEydOhJaWFgYMGIAuXbrg/v37iIqKUpgfHR2NCRMm\nQFNTE87OztDT0wMAdO/eHfn5+fDz88PkyZNhZ2enxEsRAP1/vncGwAfg8E86/p9/W2pamleTlr6l\nSYPoWnpamtdc7GH9Y/2Tph0cHJqVPa+bjo+PR2hoKICaw1lVoiGhkZ49exKHwyEtLS3S0tIiDodD\nOjo6xOfz6cqVK68occLSUpCKodWmtqCabLmIiAgSiURERCQSiSgyMrJOOVmhNA8PD9qzZ0+9dtSu\nh4ho5syZcnlcLpdu3LihNH/GjBly+fr6+sz5N1VVVRQZGUlDhw6lgICAOu3jnRI0Y4XMWFhY3gyq\nPE8ajBEZMWIEfv/9dxQUFKCgoAAhISEYN24cvvnmG8yYMUO10Q6L2qGtrY1nz569kbqGDBmCkJAQ\nJq5E0dKMItzc3BAeHo7y8nJcvHgRL168gImJidJ8V1dXhIeHo6KiArGxscjKygIAPH/+HKWlpfDw\n8EBgYKDCWRv1J/5tG9CkSN/Y1BXWv5aLOvumKg0ORKKioiASiaClpQUtLS34+PggOjoaHh4eKCoq\n+jdsZHmLSGNEpJ+vv/4aADBv3jxMnjyZCVaV3VFSezdKQ9+nT58OHo8HgUAAPp+PvXv3KrSl9q4V\noVAIbW1tmJqaYsaMGdizZw/atGmjNH/YsGFo3bo1TE1N8csvvzBLMNnZ2XB0dIRAIMDSpUuxcuVK\nJb3BUeOPI/NdW7vu0hgLCwtLU1GvjggAuLi4gM/nM+v/e/bsQXJyMmJiYtC3b9939O2xeSHV+uBw\nOHBwcMD333+Ptm3bNqqO69evIycnB66urk1k5evZEB8fj5EjR6JXr14AgA8//BAxMTGv1V5oaCiu\nXr2KzZs3N1iW1RFhYWFhaTyqPDuVzoj4+PgAAD799FN89NFHCAgIQEBAAD766CPs27cP1dXVjToL\nhKXpkGp9XL58Genp6a/0BzolJQXHjx9/Yza9ynlEDdlgb2+PlJQUpKSkvPYgBHh3dEFYWFhYmjNK\nByL37t1DVlYW/vjjD0ybNg3h4eEIDw+Hr68vWrVqhbZt26J3797/pq0sDdC6dWvY29sjLi4OAJCe\nng5nZ2dwuVzY2triwYMHAIBTp05h8ODBsLCwgIODA6qqqrB06VLs27cPAoEA+/fvR1JSEuzs7CAQ\nCDBlyhSkp6cDqJlF8Pf3Z9p0c3PDuXPnANSoq3777bfg8/m4dOkSVq5cCRsbG/Tt25fZdgvURFYv\nX74c1tbWzOCisrJSzgZFYmeKRtXh4eHo3bs3evXqhcWLFzeYHxoaih49esDGxgbXrl1j8mX7xN7e\nXmH/SpecWsqnY8f3G/5P8w/qvk7N+teyUWf/1Nk3lVEWxbpz506ytLSktm3bkr6+vtzHwMDgTQXU\nsrwBpDtRXrx4QS4uLvTHH38QEdGgQYOYnU1RUVHk5+dHRET29vaUnp5OREQvX74kIqLQ0FDy9/dn\n6iwsLCSxWExERPv27SMPDw+m3Ny5c5lybm5udPbsWSIi4nA4tGnTJuZafn4+ERGJxWJyd3eno0eP\nEhGRg4MD+fr6klgspl27dpGvr69CG2SJi4ujTp06EZ/PJz6fT2vWrKHq6moyNDSktLQ0ys/PJxsb\nGzp06JDSfLFYTPr6+vTgwQPKy8sjPp/PtKeoT2RBi9w1o/rul7i4OJXLtkRY/1o26uyfOvtGpNpz\nSKmOyKRJkzBp0iT4+fnh559//tcGRiyNRxpQ+uDBA9jZ2cHHxwfFxcW4fPkypk+fXqf8wIEDMW3a\nNEyZMgXe3t4AamYbSGbGoaysDEuWLMHZs2dBRBCLxUw5ZbRq1QoikYhJX7lyBT///DPu3r2Lly9f\n4vbt23BzcwMATJw4ERoaGnB0dGRmS2rbUJtBgwbh6NGjTPrixYswNTVlZuY8PT1x7tw5dO3aVWH+\nhx9+CDMzMxgaGgIARo4ciefPnyvtk3cJWb0GdYT1r2Wjzv6ps2+q0qCgGTsIaf5IRccKCwvh5OSE\nY8eOYfDgwdDQ0EBiYiI0NTXlyq9atQo3btzArl27YGZmhtu3b9epc+vWrdDR0cGVK1dw69YtjB49\nmmmroqKCKSfdciu9JhXAIyL4+/sjMjISZmZm+Oyzz1BeXs6UlYqWtW3bVi6/MdSO8VA2iKlvcCNF\nUZ+0adOmVikRWp6g2T+pZiR4xKbZNJtW33R8UwiasTR/ZMXEkpOTydTUlCQSCdnZ2VFQUBBVVVWR\nRCKh69evExHRgwcPiIhIIpFQnz596PHjx3TgwAEaN24cU8/06dMpLCyMKisrac6cOaSvr09ERJmZ\nmcTj8aiiooJSU1OpdevWzNKMrB1lZWXUrVs3Kioqor/++osMDAxo+fLlRFSzNCMVEsvNzWXqrm2D\nLHFxceTm5iaXV11dTb1796YHDx5Qfn4+9e/fn44cOUISiURhfnV1NRkYGFB6ejrl5eWRQCBglmYU\n9YksYJdmWjSsfy0bdfZPnX0jekOCZizNH9mZAYFAgN69e2P//v3YvXs3oqOjYWJiAjMzMxw5cgQA\nEBAQAHNzc9ja2mLSpEno3r07HB0dUVRUxASK+vv7Y/v27bC2tkaPHj2YNnr27Al3d3fw+XwsX76c\nGRHXtkNLSwuLFy+GjY0Nxo0bh2HDhjVof20bapepPQPSqlUrrFq1CkKhEFZWVnBwcIC7uzs4HI7C\n/FatWmHZsmWwt7eHq6urXFCqoj5hYWFhYWl6GtQRYWFhaZlbfbW1u6CwML/hgiwsLCxNxGvpiLRk\nNDQ0GCVQS0tLZGVlYcCAAQ3ed+3aNQwaNAiWlpbIycmBl5eX0rKyx9g3JYGBgdiwYUOd/Pv370Mk\nEoHP54PL5WLWrFkA6m6vVQUHBwdGmE5fX5+J+9i1axdsbGzg4+ODo0ePYu3atUrrULVdBwcHmJiY\nwMLCAlOmTGFk1l8FZX2jCsHBwSgrK2vUPfRPMG1L+bCDEBYWlpaAWg5EpAJfKSkpSE5ORs+ePXHh\nwoUG75NqpiQnJ6Nbt24KtSz+bZS9ifv7+2Pw4MG4du0abt++zQwCXuXNXZkEe1BQECIiIrBz5064\nu7tj0aJFjbZTUbk9e/bg+vXr+OSTTxSqmqoqhvY6sxSbNm1CaWlpo+5527ogr6sVUh/SYDN1hfWv\nZaPO/qmzb6qilgMRRXTo0AFAzQ/d2dkZ48ePB5fLxZIlSwAAv/32G8LCwrBixQr4+PggKysLZmZm\nAIDHjx/D1dUVfD4fFhYWjLiXRCLBf//7X3C5XPj5+aGqqqpOu0ePHkX//v0hEAgwZ84c5qC4wMBA\nzJ49G46OjjA3N0d4eDhzz5o1a6Cnp4eBAwfi0aNHCv0pLy+XE5ST2goAeXl5GDZsGMzMzPDjjz8y\n+TExMeDz+TA2NsbYsWOV7lYhIvj5+eHmzZtwd3dHcHCw3IxHbUG0htpVhlAoREJCAoC6YmhB8oE6\nEwAAIABJREFUQUEwNDREr169sGnTJqV9Ix2MODg44OrVq4wdBgYGAIDq6mp88cUXMDAwgIWFBX76\n6Sds3rwZOTk5cHR0ZM7KWbx4MaysrGBubo7g4GAlFlOz+xQVqXZAIAsLC0uz5RWCYJs9GhoajPDV\nmDFjiOh/Ozri4uKoTZs2dPfuXSovLyczMzNmh4RIJKIDBw4Qkfwx9UuXLqXffvuNiGqOiy8rK6OM\njAzicDh0+vRpqq6uJqFQyOwekaWgoID5vnbtWlq4cCERES1btox4PB4VFBTQo0ePyNDQkIhqdpEY\nGRnRkydPKCsri3R1dWnDhg116j148CB98MEHNHToUAoNDaWqqioiIgoJCaGuXbtSTk4OFRYWUvfu\n3amyspJyc3PJ3NycEetatGgRhYeHE5H8LhZ9fX16/vx5ne+yYmOKxL+UtVsbBwcHunLlClVWVtIX\nX3xBS5cuJSJ5MbT8/HwyNjamnJwcZsdNSkpKvX2jbCfO1q1baciQIYy4mvRfWd/S09Np0KBBjI0v\nXryoYzea7a4ZtfwVZmFhURNUeUap5YyIVFcjJSUFBw4cqHPdxsYGxsbG0NTUhJ2dndyyDSkIqrGx\nsUFwcDDWrl2L/Px8aGlpAQB0dXXh7OyMVq1awd7eHpcuXapzb25uLmbMmAEej4cdO3Ywmh0cDgcj\nR45E586d0aNHD2hoaODp06c4efIkPv30U3z88cfQ09ODi4uLQptGjRqFjIwMeHt7448//sDAgQOZ\na0OHDsV//vMfaGtrg8vlIjk5GYmJicjJyYG9vT0EAgGOHj3KSLOritQOqfhXaGionEaJonYV1TFx\n4kTY2dlBQ0MD//3vfwHIi6GdOHGCqUtXVxeurq44f/48YmJiIBQK5fqmIU6fPo2pU6cyuiXSf2Xp\n3r078vPz4efnh4sXL6JTp06N6hcWFhYWllenQUEzdUT2j1Hbtm3lBLoUMXz4cFhZWWHXrl0YMGAA\nIiIi0LlzZ3Tu3FmunuLi4jr3rl69GoMHD8b27dtx5MgRuWWG2veXl5c36pTXDh06QCQSQSQSwcDA\nAGlpaeBwOArrBWqWb6Tn0LwOygTRarerqF+lMSKWlpZy+bJiaLX7QJX+0NLSYvyUFVlTpT/btm2L\n69ev49ChQ1i+fDn4fL6SwFwRmp+g2T+p1xQkCg4OBp/PbxaCSE2RZv1r2Wl19k82RqQ52PMm/GEF\nzUheWKt2Xm1hrLlz51JoaCgR1SzNREZGEpH80ox0GYKIaNasWbR7926560RE69evp8DAwDrturi4\n0NmzZ6mkpIRGjRpFDg4ORFSzNLN+/XqmnJmZGWVlZVFeXh598skn9Pfff9OjR4+oe/fuCpdmoqOj\nmeWY+/fvU4cOHejp06cUEhKi8CyY3Nxc+uCDD+jSpUtERFRcXEz3798nItWWZmTrrS3+9ejRI4Xt\nxsfH17Hb4Z+lmdrI/swKCgrIxMSEnjx5QtnZ2dSrVy+6du1avX2zfPly5vyZr776ilma+fnnn2nI\nkCGMH9KlGR6PR3fu3CEiory8PGaJ6eLFi+Ti4lLHPqj50oy6iyqx/rVs1Nk/dfaNSLVnlFrOiCja\nTaFsZ0h916Tf9+/fj127dqFdu3YwNTXF2LFj8ddff9Vbj5Svv/4aCxYsAIfDwYgRI3D27FmmrKLy\nOjo6EIlE6Nu3L/T09CAUChX6GBMTg/nz56NVq1aQSCTYvn07unbtqrTeDz74APv374efnx/Ky8uh\nqamJ1atXw8jISGH9ivpCmg4ICEBaWhrat2+PSZMmMYJnqvSHsnzZvM6dO2PWrFmws7MDACxYsAAW\nFhYAoLRvfHx84O/vDwsLC0yYMIGpb/r06bh//z4EAgG6dOmCmTNnYs6cOZg3bx4mT54MbW1tBAUF\nwdfXFxKJBB988AFWrlyptE/UFembjbrC+teyUWf/1Nk3VWEFzVhYVKC5CpqxomUsLCzNmXdW0IxF\nMVKhNwsLCwwfPhw3b96st/zrirZJt0wrs8PKygpffvmlwm3PquIgs223Mbx8+RLbtm1r1D3UDETK\nan/e1CBEdp1aHWH9a9mos3/q7JuqsAORdwip0Nv169chEomafAlC2SyC1I7ExETcvn0bJ0+elLsu\nFosb1carzFYUFBRg69atjbrnbYuXNZWYGQsLC8vbhB2IvIMQEfLy8phtyMXFxXBxcYGlpSWGDRuG\nM2fO1Lnn4cOHsLS0xNWrV5Geng5nZ2dwuVzY2triwYMHAGpmUGxsbPDJJ59gzZo1DdrRpk0bODk5\nISEhAWfPnoWjoyM8PDxgbm6OyspK+Pr6wsDAAFwul3lrKC8vx/jx42FgYIDx48ejsrKSqU92BiYy\nMhK+vr4AgKdPn2L06NEwNDSEQCDApUuXsHjxYqSnp0MgEGDRokUoLS3F6NGjIRAIwOPxGKG1Wj3X\nrD5vUsxM3depWf9aNursnzr7pipqGazKopiysjIIBAIUFBSgrKyM0flo164dDh48CG1tbTx69AgD\nBgzA48ePmfvu3bsHb29vhIWFgcfjYfDgwQgKCoKVlRWOHz+ODRs2YNu2bdi4cSO8vLzwxRdfYNmy\nZQ3a8+LFCxw7dgxLly4FEeHcuXO4cuUKBAIBjhw5gsLCQty5cwdXr16Fl5cXMjIycPz4cYjFYty9\nexcJCQkYMmQIU5+yoON58+bB1NQUERER0NDQQFFREdauXYtbt24hJSUFABASEgIzMzMcPHgQRISS\nkpLX7m8WFhYWloZhZ0TeIaRCb5mZmdi6dSs8PT0B1MRsbNq0CXZ2dnB3d0d+fj4jRf/s2TOMGjUK\ne/bsAY/HQ3FxMS5fvozp06dDIBBgyZIlSExMBACcPHkSU6dOBYfDwdSpU5XaIR0QjRw5Eu7u7rC3\ntwcA8Pl8CAQCAEBUVBQmTpwILS0tDBgwAF26dMH9+/cRHR2NCRMmQFNTE87OztDT01PYhmxwVGxs\nLPz9/dG6det/ljQ61gme4vP5iIiIwNKlS5GZmak0vkVdUfd1ata/lo06+6fOvqkKOyPyjjJmzBhM\nmzYNpaWluHz5Ms6fP4+TJ0/ivffeQ9euXRlxsM6dO6Nnz544f/48TExMIJFIoKGhgcTERDlVVSmq\nbMKSDohq061bN5Xqks2XnfnQ0NBgvufn58tda8gugUCAP//8E3v27MGIESPw3Xffwc3NrVYpEdRV\n0OzatWuvdX9zT7P+tey0uvunTul4VtCMpT5kRcPOnz9P1tbWRES0a9cu8vX1JSKi3bt3E4fDoays\nLEa0raSkhAYOHEh79uwhIiI7OzsKCgqiqqoqkkgkdP36dSIimjdvHq1fv56qq6tp2bJlCoXlatsh\npbbQ3JEjR8jT05PKysrowoUL1K1bN6qsrKT/+7//I09PTyovL6czZ84Qh8NhxNgcHR3p4sWLVFxc\nTE5OToxP48ePp8WLF1NlZSWJxWJ6+fIl5eXl0UcffcS0l5WVRWKxmIiIvvvuO1q9erWcfWiWgmbs\nry8LC0vzRpXnFLs08w4hXRKxsLDAunXrsHHjRgA159a8ePECpqamSEhIAJfLZe7hcDho3749jh07\nhqCgIBw7dgy7d+9GdHQ0TExMYGZmhiNHjgAAPvvsM+zfvx8mJibQ1NRstKCZbL5QKIS2tjZMTU0x\nY8YM7NmzB23atMGwYcPQunVrmJqa4pdffmFEz4Aa8bh58+bByckJVlZWTP6mTZtw+/ZtGBsbw9ra\nGnfu3IGOjg48PT1haWmJgIAAxMfHg8/nw8rKCklJSfDz83u9zmZhYWFhUQlW0IyFRQWao6DZmxQz\ni4+PZ6ZZ1RHWv5aNOvunzr4Baixo1qpVK/j4+DBpsViMDz/8EO7u7q9U3+sIdzk4OMDExAQCgQAC\ngQBjx46tt3xWVhb27t37Sm01B0QikcITjZOSkjBu3Djw+XxwuVwsX74cAHD06FElB8gpFzxTRmBg\nIDZs2AAAWLZsmcJtxg3Z+TpQMxAwk/2wiqosLCzqQIsMVn3vvfdw69YtlJeXQ0tLC6dOnUL37t3f\nylurstNklZGRkYE9e/bA29u7zjWxWIzWrZv3j0SZgNikSZOwbds2ODk5QSKR4N69ewAAd3d3pQPE\nxv68ZMtLBzqqlH1TNJdZkaaQdVfnNzKA9a+lo87+qbNvqtIiZ0QAYNiwYYiKigIA7N27F97e3sz0\nT0lJCXx9fcHlcmFiYsKUy8zMxODBg2FpaQlPT09GR0OWx48fw9XVFXw+HxYWFoxYV30omnYSiURY\ntGgR7OzsYG1tjdOnTwMAFi9ejPPnz0MgECA4OBhhYWHw8vKCi4sLhEIhSktLFdpeXV2NL7/8Elwu\nF71798aOHTsYXxsW4vofv/76K2xsbGBlZYWAgACUlZXVay8AzJ07F/r6+hAKhXjx4oVCfysrK2Fo\naAigZsbK1NQUABAaGgp/f38A9Que/fDDD+ByuTAyMpJTfF2zZg309PQwcOBAPHr0SK5/pTMeQUFB\n6Nu3LywsLBAQEMCUSUpKUuiPoraICL6+vrC0tASPx8P+/fsV9N7bFzF700JmLCwsLG+dV42EfZt0\n6NCBbty4weye4PP5FB8fz+y6+Oqrr2jTpk1ERPT333+TjY0NERGVlpZSeXk5ERElJiaSlZUVERGz\nO4SIaOnSpfTbb78REVFVVRWVlZXVa4u9vT0ZGxsTn88nPp9PAQEBREQ0ZcoUEgqFVFZWRgkJCeTo\n6EhEJGcnEVFISAh16dKFMjIy6rV9+/bt9Pnnn5NEIqHi4mISCARUUVFBO3bsoG+++YaIiCQSCRUV\nFdVr7/Pnz5nvc+bMoc2bN9dr79WrV8nW1pZevnxJt27dIi0tLTpw4ECderds2UJdunShkSNH0sGD\nB5n80NBQmjt3LhER+fv707p160gikdC3337L7J45efIkeXl5kVgspoqKCrK3t6ecnBzKzc0lIyMj\nevLkCWVlZZGuri5t2LCBiIhEIhEdOHCASkpKyNjYmGnv5cuX9fqjrK3Y2FiaNGlSnXqkoFntmnnz\nv7bqfhQ561/LRp39U2ffiFR7XjXvdYB64PF4yMzMxN69ezF8+HC5azExMaioqEBISAiAmnNFMjIy\n8PHHH2Pp0qU4c+YMqqurcf/+/Tr12tjYYPHixcjLy4Ovry+6du1arx3KlmY4HA68vLygpaUFW1tb\nZvaFFMwmODk5MfutFdn+8OFDxMTEIDU1FbGxsQCAwsJCJCYmgs/nY+3ateBwOIwken08fPgQ8+bN\nQ0pKCsrKyhh7lNl74sQJeHp6omPHjuByuejXr5/CeufMmQMvLy9ERERg1apVCA8PR3h4uJy/J0+e\nxMWLFxnBs6CgIMbnpKQkWFtbA6iZ5Tlz5gw4HA6EQiE+/vhjAICLi0uddtu3b4+PPvoIPj4+mDhx\nIj799NN6/VHUVmxsLJydnXH58mUsXLgQIpFIScyQCM1FR4TVaWD9Y/1j080xHf+u6IhI36RXrFhB\nOjo6dPPmTTkdCisrKzp79myd+0JCQmjq1KlUVlZGeXl5pKGhQUTyMyJERE+ePKEffviBevfuTSkp\nKfXa4uDgwOhYyCISiSgyMrKOzbX1MmRnDOqz3cPDg8LCwhTa8OLFC9q6dSuZmZnR0aNH67XX3t6e\nYmJiiIgoODiYRCJRvfauXr2a1q9fL+evohkRWUpLS6lz585UXFxMISEhjH+ffPIJ5ebmElFNn0vb\nWLhwIQUGBtapZ/fu3XJ9M2XKFLkZEVl7o6Ojady4cTR27Nh6/VHWFhFRWVkZhYWFUb9+/WjLli1y\n16DmMyIsLCwsTYEqz6sWGyMCAFOnTkVgYCD69Okjly8UCrF9+3YUFRUBAKPimZ2djZ49e0JTUxO/\n/vorJBJJnTofPnyIjz/+GF988QWcnZ1x+/ZtAICzszOePHmi0A5qxA5obW1t5ObmKr1Xme1CoRBh\nYWHMvffv30dpaSkePXqEDh06YPbs2Zg4cSJu3LgBAJg8eTKSkpLqtJ+TkwMjIyMUFBRg7969DQZg\nurq64uDBg8y5L1I599pIY1kA4MqVK4z+iCyffvopwsLCIJFImBGz1LeIiAgmBiQ7Oxu5ubkQCoWI\niYnB06dP8fjxY4W7ZEpKSvDs2TMIhUJs3LiReXNShrK2pD/byZMnY/78+QqVX1lYWFhY3jwtciAi\n/eOpq6uLuXPnMnnS/G+//Rba2towNzeHmZkZcwDblClTkJCQAB6Ph8rKSrnto9J79+/fDzMzM/Tt\n2xelpaUYO3YsJBIJ0tPT8f77io9dnzhxIrN9d+jQoXXqlP1uYWGBnj17MsGqtXehKLN9+vTp6N+/\nP2xsbMDj8TB79myIxWLEKxHiSk1Nha6ubh1bV65cCTc3NwiFQjg6OirsV9nvUr/Mzc2xYMECuLq6\nKuyDXbt2wdjYGCYmJpg3bx727dsnd2Q9oFzwbMiQIZg5cyYcHR1hZmaGsWPHori4GDo6OhCJROjb\nty+8vb0hFArr2FtUVAR3d3fw+XxMmDCB2d6rzB9FbRUVFSE1NRX9+vWDpaUldu/ejUWLFin0U12R\nTq2qK6x/LRt19k+dfVMVVtBMBW7duoWQkBCsX7/+bZuiEoWFhZgxYwb27dv3tk1RG5rL1l2gabbv\nxqu5qBLrX8tGnf1TZ98A1QTN2IEIyyujoaEBc3NzSCQSdO/eHWvXroWZmZnS8pmZmXB3d0dqauor\ntefg4IC///4b7dq1A1AzezRmzJhXqkuKvr4+kpOTlc52SVHll4mFhYWFRR5Vnp0tdtcMy9unffv2\nTCxFREQEVq5c2aSzMI0Vj1O1zqYo2xQ0xUwICwsLy9umRcaIsDQviAh5eXnQ0tICABQXF8PFxQWW\nlpYYNmyYXJCpRCLBtGnTYGBgAC8vL5SXlyM2NhajR49mypw6dUrpTEftkXVBQQFGjhwJfX199O3b\nl5ltUZafn5+PoUOHwtDQEPPmzWPqoxYgaNaUQmbqvk7N+teyUWf/1Nk3VWEHIiyvjPQ0XwMDAwQG\nBjJqqe3atcPBgweRnJyMn3/+GSKRiLnnzp07cHNzw927dyGRSBAVFQUnJyfcvXsXz58/BwCEhIRg\n2rRpddojIiYw2NLSEvn5+di5cyd69uyJhw8fYsGCBZg8eTIAKM0PDQ2FiYkJ0tLSYGFhweyeiY+P\nh1gsRnJyMlJTUxk9EhYWFhaWpoUdiLC8Mu3atUNKSgoyMzOxdetWeHp6AqiJHdm0aRPs7Ozg7u6O\n/Px8PHv2DADQqVMnjB49GpqamvD29kZ0dDQAwMfHBzt37sSLFy+QmJiocHeOdGkmJSWFieuIioqC\nSCRCq1atMG7cOGRkZKCoqEhpfnR0NJM/ceJEaGpqAgBMTU0ZQbPU1FR07NhRgcciAIH/fILxP5Ex\n/PO9qdMyqfh4uTep101L85qq/redZv1r2Wl19k8qAtZc7HnddHx8PEQiEUQiEQIDA6ESTaJgwvJO\nIBUJI6qRl+/UqROVlJRQXFwcDR06lAoLC6m6upp0dHQoKyuLMjIyqHPnzsw9ERERNH36dCIiysnJ\nISsrK9q2bRstWrRIYXsODg505coVubyhQ4cyeVVVVdSpUycqLCxUmj9kyBAmv6ysjLS0tBjZ++Yv\naMb+urKwsLQsVHlusTMiLG+ECxcuwMjICO3bt0d2djZ0dXWhra2N8PBw5Of/L8Dy5cuXOHToECoq\nKhAeHs7MfPznP/9Bt27dsGrVKvj6+iptp3bAqJubG3bu3Inq6mpERETA0NAQ2traSvNdXV2xc+dO\nSCQS7N27FxUVFQDwzgua1X77VDdY/1o26uyfOvumKuxAhOWVkcaIWFhYYN26ddi4cSMAYNSoUXjx\n4gVMTU2RkJAALpfL3GNiYoIjR47AxMQEHA5H7pygCRMmQE9PD8bGxirb4OPjg8zMTBgaGmLjxo0I\nCwurN3/KlCm4ffs2jIyMkJycjJ49ewLAOy9oxsLCwvK2YHVEWJoNM2fOhK2tbb0zIm+Lt711F2C3\n77KwsLQ8WEEzlhZD37590bVrVxw6dAjbt29HREQECgoKoKGhge3bt8PGxuaNtnf27Fm0bdsWtra2\nKpVnBc1YWFhYGo8qz052aYalWZCUlISoqChcuXIFW7ZswYkTJ3Djxg2cOXMGPXr0eOPtxcXF4eLF\ni426R/bsnNf9dOxYv5Lrv426r1Oz/rVs1Nk/dfZNVdiBCEuzory8HLq6uszJve+//z6ys7Ph4eEB\nADh8+DDat28PsViM8vJyGBoaAgDS09Ph7OwMLpcLW1tbPHjwAACQm5uLUaNGgcvlwsLCApcvX0Zm\nZia2b9+OoKAgCAQCJCQk4NSpUxg8eDAsLCxgb2+vxLqWIU7GwsLC0qJoym07LCyNRSKRkKOjI+np\n6ZG/vz+lpaVRVVUV9erVi4iIFi5cSDY2NnThwgWKj4+nCRMmEBHRoEGDmG25UVFR5OfnR0RE3t7e\ndOjQISIiSk1NpWHDhhERUWBgIG3YsIFp197entLT04mI6OXLl3Xswhvfvsv+6rGwsKg/qjzr2LNm\nWJoVHA4HsbGxSEpKQmRkJAYMGICQkBAYGhri7t27SEpKwueff45z586huroagwYNQklJCS5fvozp\n06fXqe/UqVO4c+cOI6zz4sULlJWVAZCXix84cCCmTZuGKVOmwNvbW4l1IgD6/3zvDIAPwOGfdPw/\n/6qarpmSlZ66KStuxKbZNJtm0y01HR8fj9DQUAA1h4qqAhusytKsCQ0NxalTp2Bqaor27dvj+PHj\nCA8Px5QpUyCRSLB+/Xr06NED//nPf5Cfn88opUr54IMPkJycDD09Pbn85cuXo0OHDli4cCGTd+PG\nDezatQsHDx7E7du30aZNG+Zaza6ZN/mr0ryCX+NlBkXqCOtfy0ad/VNn3wA2WJWlBXL//n2kpaUB\nAMRiMRITE6Gjo4NBgwYhODgYdnZ2+OCDD/D8+XPcv38fffr0QceOHcHn87Ft2zaIxWIQEW7cuAEA\nEAqF2Lx5MyNcdu3aNQCAtrY2cnNzmXbT09Nhbm6OtWvXQlNTE0+fPv2XPWdhYWF5N2FnRFiaFcnJ\nyfD398eLFy9QWloKKysr/PLLL2jXrh26dOmCY8eOwcXFBbNmzcLTp09x6NAhAEBmZib8/Pzw4MED\n5hybb775Bs+fP8ecOXOQlJSEdu3awd7eHlu3bsWDBw/g5+eH58+fY/PmzQgKCkJaWhrat2+PUaNG\nYfHixXJ2vWkdEVYThIWF5V2A1RFhYXlDsDoiLCwsLI3nrS7NODk5ISYmRi4vODgYc+bMeWNtXL9+\nHSdOnGiwXGhoKPz9/RVe69Chwxuz51URiUQ4cOCAXF5YWBgmTJggl5eXl4euXbuiqqrqjbW9Zs0a\nlcop6ydFttdHYGAgunfvDoFAgGHDhjFBTq9CfHw83N3dX+new4cP486dO6/ctrrxOj+HlgDrX8tG\nnf1TZ99UpckGIt7e3ggPD5fL27dvX50/rq9DSkoKjh8/3mC5+qbVm4N0t1TkSpYxY8bg1KlTzA4P\nAIiMjMSIESPkgihfl++++05lG5XlN6YPORwOPv/8c6SkpODrr7/GF198UadMdXW1yvW9KtKA1Mag\nrmJmLCwsLG+TJhuIeHh4ICoqCmKxGEDNGn5OTg4GDhyImJgY8Pl8GBsbY+zYsSgvLwcAHD9+HPr6\n+hAIBFi8eDHztltSUgJfX19wuVyYmJggKioKVVVVWLp0Kfbt2weBQID9+/cjKSkJdnZ2EAgEmDJl\nCtLT0xl7nj17BicnJ+jp6WHlypUKbf7hhx/A5XJhZGTElCEi+Pr6wtLSEjweD/v3769z36+//gob\nGxtYWVkhICCAGTyIRCIsWrQIdnZ2sLa2xunTp5l75s6dC319fQiFQrx48aLO1JW2tjbs7e1x9OhR\nJi88PBze3t4KRbqAmhkTFxcXGBoaYt68edDX12dOvt21axfMzMxgZGSE2bNnAwAWL17MHFzn4+MD\noObAOisrKzg5OSEiIkLOpiVLlqB3795wdnbG8+fPmXyp7VevXkX//v1hYmKCoUOHIi8vT2E/S8sP\nGDAADx48QGlpKRwcHLBkyRJYW1vjxx9/RGxsLExNTWFgYIBp06ahsrISAHDy5EkYGBhAIBDg5MmT\nTJ2BgYHYsGEDkzYzM8OjR48AAH/88QeMjIxgbm6OyZMn49KlSzh69Ci+/PJLWFpa4uHDh9izZw9s\nbW1hYWFRz/Zd9RUzU+eofYD1r6Wjzv6ps28q8+blS/6Hm5sbHT58mIiIvvvuO/ryyy8pNzeXzM3N\nGdGoRYsWUXh4OBER8Xg8Sk5OppKSEho2bBi5u7sTEdFXX31FmzZtIiKiv//+m2xsbIiIKDQ0lPz9\n/Zn2CgsLSSwWExHRvn37yMPDg4iIQkJCqEuXLvTw4UN68uQJ9e7dm/Ly8oiIqEOHDkREdPLkSfLy\n8iKxWEwVFRVkb29POTk5FBsbS5MmTWLaUCR29fz5c+b7nDlzaPPmzURENGXKFBIKhVRWVkYJCQnk\n6OhIRERXr14lW1tbevnyJd26dYu0tLTowIEDdeqNjIyk0aNHExFRdnY2devWjaqrq5WKdK1bt44W\nLlxIEomEduzYQRwOh54/f063b9+mQYMGUXl5OXN/YmKinP9S8vPzGT/19PSYNIfDoY0bN5JYLCZ/\nf39av349ERGJRCI6cOAAVVZWEo/Ho8ePHxMR0ZYtW+j777+v41NgYCBz78GDB2nw4MFEVCMo5u3t\nTRUVFUREZGVlRYmJiVRaWkojR46k4OBgIiIyNzenP//8k4qLi+nTTz9l/o/I1ktEZGZmRllZWXTz\n5k3q3r073b9/n4iICgoK5OyWYmxsTCUlJYzvtcEbFTRjxcxYWFjeDVR53jWpoJl0eWbEiBHYt28f\nduzYgcTEROTk5DAy2pWVlSgqKsLAgQPB4XAgEAgAAGPHjkVkZCQAICYmBhUVFQgJCQGC/xZkAAAg\nAElEQVQAFBQU4OHDhyAiuZmEsrIyLFmyBGfPngURMbMxADBo0CAYGBgAqNnSGR0djYkTJzLXY2Ji\nkJSUBGtrawA1szCxsbFwdnbG5cuXsXDhQohEIvB4vDp+Pnz4EPPmzUNKSgrKysoYmzgcDry8vKCl\npQVbW1skJycDAE6cOAFPT0907NgRXC4X/fr1U9h/w4YNw5w5c1BUVIT9+/fD09MTrVq1UirSFRMT\ng3Xr1oHD4cDb25uJxzlz5gzS09PRv39/ADUy6nFxcQrbDQ8PR2RkJJ49e4aXL18iLS0NNjY24HA4\nmDJlCjQ0NDB58mR89dVXjAYHEeHevXvIyspiZrGqq6sVitkQEYKCgrBr1y4IBAJs2rSJ6asJEyag\nbdu2yMnJQWVlJWPfxIkTER4eDk9PTxARcwDeuHHj6o1PISLExsZi+PDhMDIyAgB07txZ7roUa2tr\neHt7w8fHB6NHj1ZSowhvStCsOQkQATXxW3w+v9nYw/rH+veu+CcbI9Ic7HkT/jRW0KxJX82Kioqo\na9eulJycTJ988gkRER05coQcHBzqlH38+DGZm5sz6ZCQEHJzcyOimrfjs2fP1rknNDSU5s6dy6SX\nLVtGgYGBVFlZSSkpKaSvr8/UJX1zJiL673//S7t37yai/80ILFy4kAIDAxX6UVZWRmFhYdSvXz/a\nsmVLnev29vYUExNDRETBwcEkEomIqOatOzIykiknbWv16tVyb+8ODg4KZ0SIiCZPnkyhoaHUv39/\nunTpEhER6ejoUFZWVp2yLi4ulJyczNispaVFeXl5tHnzZsam2sjOiKSnpxOPx2NmePh8PtPvrVq1\nYvKTkpLIxcWF8fHAgQOUmprK9Hd91JZWl+2Dq1evEhHRX3/9RTwej7m2b98+GjNmTJ182f8j33//\nPa1evZq5pqurS5mZmbR582aaOXNmnfZqz4gQEV24cIFmz57NzLjJAjWfEYmLi3vbJjQprH8tG3X2\nT519I1LtedekgmYdOnSAo6MjfH19mSDV/v374+bNm0hMTARQM/OQlpaG7t27g4hw7do1lJaWIjIy\nkgmCFAqF2L59O4qKigDUBKkCdUWpsrOzmVmPX3/9Vc6WhIQEZGZm4unTp4iJiYFQKJS7LhQKERER\nwcQVZGdnIzc3F0+ePAEATJ48GfPnz2faliUnJwdGRkYoKCjA3r17GwzedHV1xcGDB1FYWIg7d+4w\nfaEIb29vbNy4Ec+ePWNmNJSJdAmFQuzZswcSiQT79u1DRUUFOBwOnJ2dERMTw+wSyc/PZ/zU0NBA\naWkp48eHH36I999/HxcuXMD169cZO4gIO3fuRHV1NXbu3AlXV1c5O42NjQEABw4cABGhqqpKaTAo\nKdnKJc3X1dWFpqYmLl++jLKyMuzduxcODg7Q1dWFhoYGkpKSUFJSIhevY2tri4SEBBARTpw4gZyc\nHHA4HDg5OSEqKgr3798HUDObBtT833n27BnTbmZmJuzs7LBx40Y8efKE6dt3BembjbrC+teyUWf/\n1Nk3VWlyZVVvb2+kpqYyAYAffvgh9u/fDz8/P5iYmMDOzg737t0DULODY9SoURgwYAD09PTQsWNH\nAMC3334LbW1tmJubw8zMDMuWLQMAODo6oqioCAKBABEREfD398f27dthbW2NHj16MAMCDoeDIUOG\nYOrUqejbty98fHygo6PDXAOAIUOGYObMmXB0dISZmRnGjh2LoqIipKamol+/frC0tMTu3buxaNGi\nOj6uXLkSbm5uEAqFcHR0lLsmOyiRfhcIBBAIBDA3N8eCBQvq/FGXxcXFBU+ePMG4ceOYvB9//BGP\nHj2Cqakp+vTpg19++QVATXBscnIyjIyMcPnyZbz//vvQ1taGqakpgoKCMHr0aPTp0wdDhw7F33//\nDQD44osvMGjQIPj4+GDgwIHo2bMnTE1NERwcDBcXF6bN9957D8+ePYOxsTFu3rwJkUgkZ2ebNm1w\n6NAhbNy4EcbGxhAIBLh06ZJCn+rbgSNl3bp1EIlE4HK50NHRgZ+fHwDg+++/x9ixYzFw4EBYWFgw\n99ja2qJHjx7o06cPjhw5Ai6XCwDgcrlYs2YNXF1dYW5uziwnTZgwAXv27IGlpSXS0tLg4+MDc3Nz\nODs7IzAwsI5U/D8WvpGPtnYXhf6zsLCwvJM05ZRMYykuLiYiotLSUho7diwToPim2bx5Mw0ePJh4\nPB7x+Xz6888/m6QdVbl8+TKNHTuWLCwsyNTUVOkSUUNUVFQwwbqRkZHE5/PfpJmvjb29PXNCrixi\nsZi+++47srS0JB6PR1wuV26Z5XUZNmyYwgDUxtDMflXeOOo+Pcz617JRZ//U2TeiZhCs2lh+/fVX\nhIWFIS8vDw4ODpg1a9Ybb+PSpUvYsmULrl69ivbt2yM/P/+tT8NPmjQJ27Ztg5OTEyQSCTND1Fge\nPXrEzORoa2vjt99+e8OWvh7KNEeWLFmCZ8+e4dKlS2jbti0KCwuxatWqOuVIJgi4MURFRb2awbVQ\ntV1Wvp2FhYWlETT9eKh5ERsbS87OzgqvnTlzhkxMTEhfX5+mTp3KbCXt2bOnXKCmNNh22bJlNGPG\nDLK1taXu3bvTb7/9RkQ1MzujRo0iPp9PZmZmdP78+Xpt0tfXp8zMzDr5f/75J9na2hKfz6fJkyfT\ngwcPiKgmSFM2SHf48OEUHx9PREQnTpygPn36EJfLZfwsLi4mkUhEpqamZGxsTMeOHSMiokePHtGn\nn35KFhYWZG5uTmlpafXauWLFCurbty9ZW1vTypUrmXx7e3sKDAwkKysrGjx4sFzA7Lhx40hfX5/G\njRtHdnZ2dWZESkpKSEdHh5kNq01GRgaZmJjQjBkzmC25s2fPJmtra7K1taWtW7cyfnt5eTH3xcXF\nMYGssj+/nTt3Up8+fah3797k5+fHlF+0aBEzIxMUFFTHDjQqWPWd+7ViYWFhUYgqz8N37okpkUjI\n0dGR9PT0yN/fX+6PrzLtCn19faUDESMjI3r69Ck9ePCA9PT0qLq6mnbs2EHffPMN015RUVG9Nm3Z\nsoW6dOlCI0eOpIMHDzL5ynRRau8WcnNzo7Nnz9KzZ89IR0eH+WMv1cxQpsOydOlSZvBUVVVFZWVl\n9dop1RQRi8Xk7u5OR48eJaKaHS++vr4kFotp165d5OvrS0REBw4cIA8PDyovL6fTp08Th8NhdsZI\nuX79OgkEAqVtZmRkEIfDYXRTZO2oqKgggUBAqampJBaLSU9Pj0pLS4mIyM/Pj9kZJf35KdNTSU9P\np0GDBjH1v3jxoo4d7ECEhYWFpfGo8jxs8mDV5gaHw0FsbCwiIyPRrl07DBgwAMePH5fTrmjXrh0m\nTpyIc+fONVifq6srunbtCkNDQ/B4PFy6dAl8Ph8RERFYunQpMjMzGzzPZs6cObh37x6GDh2KVatW\n4f/bu/+gqK4rDuDfFRJAIAYRAfnhYowpsMIuv2pxGkBSoigqARVERYyYxJFWM6khaa1gUx2CP5JY\nSWnSFJQYFehY00KNiYBEowIiZWRiKwIiioK/+A0unP5B94VlF1iTNSvP85lhxrd7971zFoG79917\nblRUFID+uigbNmyAp6cn3nnnHWEVCmlZdUJEOH36NLy9veHt7Q3gu5oZX3zxBT766CMoFArMnj0b\nN27cwOXLl+Hn54f33nsPKSkpuH37NkxNTYeNs7S0FBEREfDw8MC5c+fUVsXExMTAyMgIQUFBwiTV\n/Px8LF26FCYmJggODoazs/OI72dGRgYUCgWcnZ3R0NAAALC2tsaCBQuENseOHcPcuXOhUChw9epV\nVFVVwcjICLNnz8aRI0egVCqRl5en9hoiUqun4u3tjfLychQUFMDR0RG3b9/Gq6++ilOnTmHcuHEj\nxik2A2sZiBHnN7qJOT8x56arR2qOyI/J19cXvr6+cHV1xaeffgq5XK72PBEJcwJMTU2FeSSqkukD\n26mo5kAoFAqcOXMG+/fvx/z587Ft2zbMmzdv2HhsbGywdu1axMXFYdKkSWhvb0daWhqsra1RWlqK\nCxcuCIW2zMzM1Oa1qGIabg7Dnj178Pzzz6s9NmXKFHh7eyMrKwszZ85Edna2xvswMM+EhATk5ORA\nJpNhw4YNQml+ALCy6l8J8uSTTwqPD951UVt8U6dOxZUrV9DW1gYLCwusXLlSKByn2nPGzs5OaN/a\n2orExEQUFxfDwcEB4eHhwvWioqLwxz/+EePHj4ePjw/Mzc01rhcSEiIUxhuooqIChw8fRnJyMuRy\nOVJSUrS8Cyuha0GzR6nAkC7HqiXgj0o8nB/n9zjlJ6bjwketoNmj6OLFi0K57/v379Mrr7wilIn3\n8fGhM2fOUEdHBy1cuJA++OADIiKKi4uj/fv3U09PDy1btkzt1sy0adPo5s2bVF1dTZMnT6be3l6q\nq6sTbqls27ZNWAGyfPlyOnv2rEZMqjkbREQnTpwgKysr6u3tpdWrV1NmZib19PTQ2rVrhYJhtbW1\nNH36dOru7qbKykoyNjamoqIiampqImtrayopKSGi70rPv/3227R06VJqaWkhIhLmcFy+fFm47iuv\nvCLcypg1axZdu3ZNLcbOzk6aNGkStba20tWrV8nFxYWSk5OJSL0YWVNTkxDn3/72N4qMjKSuri76\n6quvtN6aISLauHEjxcbGUldXFxH13/qZNm0a1dXVUU1NDclkMqFtXV0dTZs2jZRKJV28eJEsLS0p\nMzNTeJ1UKqVFixZRdna28JqBt2YmTZpEVVVVwvtTV1dHzc3NwqqaU6dOCcXaBgLfmmGMsQemy+/D\nx25EpK2tDQkJCbh79y46Ojrg7e0t1OFQ1a7o7OxEcHCwULsiISEB69evx7vvvov58+cLtwwkEgkC\nAwMxf/581NfXY8uWLRgzZgwKCwuRmpqKJ598ElKpVCiuVllZCQcHB42YsrKy8Prrr0MikcDMzAwH\nDx7EmDFjkJCQgNdeew07duxAdHS0MKIwefJkhIWFQS6Xw93dXeiVTpgwAVlZWVixYgUkEgkcHR1x\n9OhRbNq0CevXr4eHhwfMzc0xZcoUHDlyBIcOHcK+fftgZmYGV1dXLF68GH19faiursb48eo7xJqa\nmiIxMRF+fn4YP348QkNDh3yPVXGGhobi0KFDcHV1hZ+fH/z9/bW237p1K1JSUjBjxgwYGRnBxMQE\nK1euhL29PRoaGtRGUpydnREREQGZTAYnJyehpDzQX5xt3rx5yMzMxN69ezXiGVhPxcjICGZmZkhL\nS4OpqSni4uLQ19eHCRMmDLkpImOMMf2T/L/Hwr6H5ORkWFhYCEWyhtPS0oL4+HgcPHjwR4js+7tw\n4QL++te/Yvv27YYO5ZHyIEuGR+Py3cLCQqFDK0ac3+gm5vzEnBugeYtem8dusqq+6foH6qmnnhI6\nIUZGRkJ1VYVCIZRb10V6ejr27dsHoH9ip6oEvb64u7sP2QnJyMhAQkKC1sdtbGyEfAZXXR3J3Llz\n0dLSgnv37uHDDz8cst1Ik34BID4+Xihlr2/0/00WR/oabZ0QxhgzJB4RMQBLS0th35zBVN8OXTo4\nQUFB2L59u7BK5mHLzMxEaWkpdu/erfF4WVkZPvjgA62vUyqVMDYe+S5gbW0twsLCUFlZqfX54d63\nh22o78doHP1gjLEfC4+IjBK1tbVwdXXFmjVr4OHhgfr6erVP/zk5OYiLiwMAJCUlYceOHcjNzUVp\naSliYmLg5eWFrq4u7Nq1C76+vvD09MSvf/1rjeucPXsW/v7+UCgUiI2NRXV1NYD+EY2oqCiEhoZC\nJpOpdSgyMjLg5OQEPz8/Yea6NoP/oyUlJWHNmjWYOXMmYmNjkZmZqTaaMm/ePGF5tFQqxa1bt5CY\nmIjq6mooFAqte/qoFBYWqs0NWbduHTIzMwH0z9ouKysDAPzrX/+CTCaDu7u7sG9Oe3s74uLi4Obm\nhp/85CdC1dX6+nrMmTMHcrkcnp6euHTpkrYsNb5aW+8MGSdjjLGRcUfEADo7O4XbGBEREZBIJLh4\n8SLmzp2LyspKODs7a90sT/VviUSCiIgI+Pj4YP/+/Th37hz6+vqQnp6OkpISVFRUYNOmTRrXdXV1\nRXFxMcrLyzF37ly1P/YFBQX4y1/+gm+++Qapqam4f/8+ent7kZycjMLCQuTn56OwsFDryAAR4eDB\ng0JOGRkZkEgkOHHiBA4fPoxPP/1U4zXackpJScEzzzyD8vLyIZbPajewdLzq301NTVi2bBkyMzNx\n4cIF5OTkAAD+8Ic/QKFQoKqqCkVFRdiyZQsA4OOPP0ZkZCTOnz+PsrIyODo66nx9MVAtvxMrzm90\nE3N+Ys5NV4/dqplHgZmZGcrLy4Xj2tpajaJdulKNRIwdOxa2trZYvnw5YmJiMHv2bI22nZ2d+M1v\nfoOioiIQEZRKpfBcSEgI7O3tAfTvWHvu3DkolUrIZDI888wzAIAFCxbg1q1bGueVSCSIiopSG0lJ\nTk7GvHnzYGNj88C5/FA0QnG37u5uoZbInTt3hOJuiYmJaG5uRlxcHCZOnKiXWBhjjA2PR0QeEQOL\ndgH9E1pVtP3xVxk4slBUVIRly5YhIyMDS5Ys0Wg7sEDa3r170dnZKTyn+kMNfFeU7EFWimjrRKg6\nNoB6UThAszDcgxh8rqE6R0PZs2cPysvLUV5ejtraWkyZMgVz587FsWPHYGRkhJkzZw5xG2olgKT/\nf72H74qY9X+qGfjJZrQdqx57VOLh/Di/xyU/VRGwRyWeH3pcWFgoFKZMSkqCTvRYt4TpyMLCQu14\ncNEuIqKgoCA6deoUtbW10axZs4T9WzZv3kzbt28nIqKwsDDKz88nov6N7W7cuEFERA0NDTRt2jSN\n6w5VIG3wJnqqvWt6e3vJxcWFqqurqbm5mRQKhVD8baDBryciSkpKEuIk6i9Epq0IG9F3Bceam5vJ\n1tZ2xPets7OTHBwchOJq1tbWQlEzVXE1fRV3U8GQBc34R4gxxoaiy+9IHhExAG2f1gc/9vbbb+OX\nv/wlZs2apbYqZuB8iNWrVyM5ORleXl5obW0VipwtXboUO3bs0LhGQkIC0tPT4ePjAycnJ415FYON\nGTMGmzdvRkBAAObMmYOAgIAh8xkpJ2dnZyG+5ORkrevmra2tERkZCS8vL43Jqp2dnTAxMQHQPyKy\nceNGzJgxA6tWrUJISIjGuQYWd3N3d0d0dDQAYNOmTbC0tISHhwdkMhk2b94MADh06BBkMhl8fX3R\n0dGBxYsXa81VrAZ/+hQbzm90E3N+Ys5NV7x8l40KBQUFeOutt3D69GmDXF/sy3cLC8VdVInzG93E\nnJ+YcwN0W77LHRGmN0ZGRvDw8EBfXx8cHR2RkpICmUz2g8/74YcfIjc3F8nJyZg5c6YeIn1wuvww\nMcYYU8cdEfajGlhwLDs7Gzk5OQ+tpD09QOE3fRh4HbGMgjDG2MPGBc2YQRARmpubYWpqKjyWmpoK\nNzc3PPvss8KmcomJiUhLSxPaqIq1DdVeW+G3tWvXwtfXF/7+/mrl4fPy8iCVSqFQKJCYmCgUQNNH\nQTMxFjET+31qzm90E3N+Ys5NZw9poix7DBkZGZFcLqfJkyfTxIkTqaGhgYiIjh49SosWLSKlUknd\n3d0UEBBA165do/LycgoICBBe7+bmRlevXh2yfU1NDUkkEjp8+LDwmtu3bxMRUXd3NykUCqqsrCQi\nounTp9O5c+eovb2dQkNDKSwsjIiI3nrrLXr//feJiKixsZH8/PyIiOh3v/sdffzxx0REdP/+fers\n7FTLDWqrZsT3Y1NQUGDoEB4qzm90E3N+Ys6NSLffl1zQjOnNwEJtubm5iIiIwDfffIMvvvgCJSUl\n8PHxAdA/KnH8+HHExMTg5s2buH79Om7evAkrKys4ODhg165dWtvPnDlTo/DbsWPHkJmZidraWjQ1\nNaGqqgpPP/00JBIJFAoFAGDx4sVCZdUfVtBsJQApAOC9996DXC4XJpkNrAkwGo9Vjz0q8XB+nN/j\nkl/g/+uIPCrx/NDjwsJCZGRkAOjfvkMXPEeE6c3AOSJEBCsrKzQ0NGDz5s2wtLQUlsoOtHnzZkyY\nMAGNjY2wt7fHunXr8MYbb2htP3hTvNbWVnh6eqK4uBgODg4IDw9HeHg4goODERoaioqKCgD9++Xk\n5ubi888/h4+PD3bu3Innn39eI5bGxkZkZWUhPT0d2dnZkMvlwnP9c0RUPyo8cZUxxnTBc0SYwZw8\neRLPPvsszM3N8eKLLyI7OxtXrlwBADQ0NKCpqQkAsGTJEnz22WfIycnBokWLAGDY9gPduXMHTzzx\nBOzs7PCf//wHX331FQDAwcEBRITz58+jo6MDOTk5wmTTF198Eenp6UKHSTWCU1NTAzs7O7zxxhsI\nDg5GVVXVQ3x3Hj2qTzRixfmNbmLOT8y56YpvzTC9UW3m19fXh8mTJ2Pnzp0AgF/84hdYs2YNgoKC\nYGZmhnHjxiErKws2NjZwc3NDW1sbHB0dYWtrO2z7wYXTnJ2dERERAZlMBicnJ7Udebdt24aFCxfC\nysoKP/vZz9DS0gKgv6DZ+vXr4eHhAXNzc0yZMgVHjhzBoUOHsG/fPpiZmcHV1fWxK2jGGGOGwrdm\nmCi1t7fD3NwcnZ2dWLlyJfz9/fGrX/3qe5+Pl+8yxtiD41sz7AczMjKCQqEQvlS3Swa7d++e2vLZ\nH0NgYCDKyso0HlcqlVi4cCHGjh2LcePG4cyZM4iMjBSeVxVFq62txfTp03W+HhGBiLgTwhhjesQd\nETassWPHCjvVlpeXw9nZWWu7O3fuqNUE+TEMtcdNYmIinJyc0N7ejp6eHmzatAkvvfSS8PzJkye/\n1/Weemr89471USf2+9Sc3+gm5vzEnJuuuCPCHkh7ezteeOEFeHl5ITQ0VJggmpiYiOrqaigUCrz5\n5pvo6OhAeHg4FAoFpk+fjq+//lrjXL///e/h5+cHX19fvPPOO8LjgYGBSE5Oho+PDwICAoQJpV1d\nXYiKioKLiwuioqLQ09OjMeTX0dGBPXv24M033xQ6KS+//DKuXLmCgoICAICFhYVGLLoUNBNjITPG\nGDO4h1TDhImEqkiZXC6nl156iZRKJbW0tBARUV1dHTk6OhIRUW1tLclkMuF1n3zyCf32t78lIqK+\nvj5qbW3VOLeqGJlSqaSwsDD6/PPPiYgoMDCQ4uLiSKlUUlZWFsXFxRERUW5uLkVERFBXVxd9+eWX\nJJFIqKysTO2cFRUV5OzsrHGt5cuX0+7du4mIyMLCgoiIampqhJh1K2jGPy6MMfYgdPm9yatm2LAG\nFilTef/995GXl4f29nbcvn0bN2/e1BiZkMvlSElJgUQiQVxcHFxcXDTOXVpaij/96U/49ttvce/e\nPVRVVWHevHkAgJiYGBgZGSEoKEgYLcnPz8fSpUthYmKC4ODgIW8TaTM4vsF0K2jWX4YeAJ5++mlR\nFTTjYz7mYz7Wx3Hh9yhoxh/x2LBUowcqBQUFFBISQi0tLdTb20vW1tZUV1enNrqgcvfuXUpLSyOZ\nTCaMdqj09fXRc889J5RkX79+PSUnJxNR/4iIaqSjqamJpFIpERHFx8dTTk6OcA6pVKoxItLW1kYm\nJib07bffql3L3t6ejh8/rpbT4JivX79OqampNHXqVCovL1c7L0Q+IiL2MtOc3+gm5vzEnBuRbiMi\nPEeEPZCGhgY4ODjA0tISBw4cwO3b/StILC0t1YqOXblyBRYWFnjttdcQExODf//732rn6e7uRmtr\nK6RSKRoaGvD3v/99xGvPmTMHBw4cQHd3N44fP466ujqNNubm5li3bh3effdd9PX1AeivrOro6Iig\noKAhz3358uXHuqDZ+fPnDR3CQ8X5jW5izk/MuemKOyJsWINXpSxcuBB3796Fq6srvv76a7i5uQEA\nrK2tERkZCS8vL2zcuBGFhYWQy+Xw9vZGSUkJXn31VbXzmJqaIjExEX5+fliyZAlCQ0NHjCE0NBTG\nxsZwdXXFn//8Z/j7+2ttn5KSAltbW3h5ecHT0xNHjx5Ffn6+1pxU/z506BBkMhl8fX3R0dHx2BU0\nu3v3rqFDeKg4v9FNzPmJOTddcUEzxnQgkUhEXcgsKSlJmP8iRpzf6Cbm/MScG8AFzRjTK7F2QoD+\n4m5ixvmNbmLOT8y56YpHRBjTgVwuF3bzZYwxphtPT88R58FwR4QxxhhjBsO3ZhhjjDFmMNwRYYwx\nxpjBcEeEsRGcOHECXl5e8PDwwO7duw0djl6tWrUKtra2D7QL8WhSX1+PoKAguLu7IzAwUKj4KAZd\nXV346U9/CrlcjhkzZmDXrl2GDumh6O3thUKhQFhYmKFD0TupVAoPDw8oFAr4+fkZOhy9a29vR2xs\nLBQKBdzc3HD69Gmt7XiOCGPD6O3txXPPPYcvv/wSDg4O8PX1xWeffQZXV1dDh6YXxcXFsLCwwIoV\nK1BZWWnocPSusbERjY2NkMvlaG5uhkwmQ0FBgWi+fx0dHRg7diy6u7vh7e2Nw4cPY+rUqYYOS692\n7tyJsrIytLa24siRI4YOR69cXFxQVlaG8ePFubN3bGwsAgICsGrVKiiVSrS3t2PcuHEa7XhEhLFh\nnD17FlOnToVUKsUTTzyBqKgonarAjhY///nPYWVlZegwHho7OzvI5XIAwIQJE+Dr64tr164ZOCr9\nGTt2LACgra0NSqUSJiYmBo5Iv65evYq8vDysXr16xFoUo5VY87p37x6Ki4uxatUqAICxsbHWTgjA\nHRHGhtXQ0AAnJyfh2NHREQ0NDQaMiH1fly5dwoULFzBjxgxDh6I3fX198PT0hK2tLdatW6f2f1UM\nNmzYgNTUVIwZI84/VRKJBLNmzYJCocBHH31k6HD0qqamBjY2Nli5ciVkMhni4+PR2dmpta04v7uM\n6cngEvdsdGpra0NUVBR27doFc3NzQ4ejN2PGjEFFRQUuXbqEtLQ0jZ2yR7N//N7G4AcAAAIaSURB\nVOMfmDhxIhQKhWhHDU6ePImKigrs378fW7duRXFxsaFD0hulUomSkhJERESgpKQE3d3dyM7O1tqW\nOyKMDcPBwQH19fXCcX19PRwdHQ0YEXtQ9+/fR0REBJYtW4YFCxYYOpyHQiqVIjQ0FEVFRYYORW9O\nnTqFI0eOwMXFBdHR0Th+/DhWrFhh6LD0yt7eHgDg6uqK8PBwnD171sAR6Y+joyOsra0RFhYGMzMz\nREdHq+35NRB3RBgbho+PD/773/+itrYWPT09OHjwIObPn2/osJiOiAgvv/wy3N3dsX79ekOHo1fN\nzc3Chmm3bt1Cfn6+qFY/bd26FfX19aipqcGBAwcwa9Ys7N2719Bh6U1HRwdaW1sBAE1NTcjLyxPV\n98/Ozg5Tp07FmTNn0NfXh3/+85944YUXtLY1/pFjY2xUMTY2xieffILw8HAolUrEx8eLZsUFAERH\nR6OoqAi3bt2Ck5MTtmzZgri4OEOHpTcnT55EVlaWsEQSALZt24bZs2cbOLIf7vr164iNjUVvby/s\n7Ozw+uuvIzg42NBhPTRiu01648YNhIeHA+jfvXzDhg0ICQkxcFT6lZmZiRUrVqC5uRnTp09HSkqK\n1na8fJcxxhhjBsO3ZhhjjDFmMNwRYYwxxpjBcEeEMcYYYwbDHRHGGGOMGQx3RBhjjDFmMNwRYYwx\nxpjBcEeEMcYYYwbDHRHGGGOMGcz/APghkDt6yxIgAAAAAElFTkSuQmCC\n", "text": [ "" ] } ], "prompt_number": 153 }, { "cell_type": "markdown", "metadata": {}, "source": [ "- \uc880 \ub354 \uc751\uc6a9\ud558\uba74 \uac01 \uc601\uc591\uc18c\uac00 \uc5b4\ub5a4 \uc74c\uc2dd\uc5d0 \uac00\uc7a5 \ub9ce\uc774 \ub4e4\uc5c8\ub294\uc9c0 \ucc3e\uc544\ubcfc \uc218\ub3c4 \uc788\ub2e4." ] }, { "cell_type": "code", "collapsed": false, "input": [ "by_nutrient = ndata.groupby(['nutgroup', 'nutrient'])\n", "\n", "get_maximum = lambda x: x.xs(x.value.idxmax())\n", "get_minimum = lambda x: x.xs(x.value.idxmin())\n", "\n", "max_foods = by_nutrient.apply(get_maximum)[['value', 'food']]\n", "\n", "# make the food a little smaller\n", "max_foods.food = max_foods.food.str[:50]" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 156 }, { "cell_type": "code", "collapsed": false, "input": [ "max_foods" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n",
        "<class 'pandas.core.frame.DataFrame'>\n",
        "MultiIndex: 94 entries, (Amino Acids, Alanine) to (Vitamins, Vitamin K (phylloquinone))\n",
        "Data columns (total 2 columns):\n",
        "value    94  non-null values\n",
        "food     94  non-null values\n",
        "dtypes: float64(1), object(1)\n",
        "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 158, "text": [ "\n", "MultiIndex: 94 entries, (Amino Acids, Alanine) to (Vitamins, Vitamin K (phylloquinone))\n", "Data columns (total 2 columns):\n", "value 94 non-null values\n", "food 94 non-null values\n", "dtypes: float64(1), object(1)" ] } ], "prompt_number": 158 }, { "cell_type": "code", "collapsed": false, "input": [ "max_foods.ix['Amino Acids']['food']" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 157, "text": [ "nutrient\n", "Alanine Gelatins, dry powder, unsweetened\n", "Arginine Seeds, sesame flour, low-fat\n", "Aspartic acid Soy protein isolate\n", "Cystine Seeds, cottonseed flour, low fat (glandless)\n", "Glutamic acid Soy protein isolate\n", "Glycine Gelatins, dry powder, unsweetened\n", "Histidine Whale, beluga, meat, dried (Alaska Native)\n", "Hydroxyproline KENTUCKY FRIED CHICKEN, Fried Chicken, ORIGINA...\n", "Isoleucine Soy protein isolate, PROTEIN TECHNOLOGIES INTE...\n", "Leucine Soy protein isolate, PROTEIN TECHNOLOGIES INTE...\n", "Lysine Seal, bearded (Oogruk), meat, dried (Alaska Na...\n", "Methionine Fish, cod, Atlantic, dried and salted\n", "Phenylalanine Soy protein isolate, PROTEIN TECHNOLOGIES INTE...\n", "Proline Gelatins, dry powder, unsweetened\n", "Serine Soy protein isolate, PROTEIN TECHNOLOGIES INTE...\n", "Threonine Soy protein isolate, PROTEIN TECHNOLOGIES INTE...\n", "Tryptophan Sea lion, Steller, meat with fat (Alaska Native)\n", "Tyrosine Soy protein isolate, PROTEIN TECHNOLOGIES INTE...\n", "Valine Soy protein isolate, PROTEIN TECHNOLOGIES INTE...\n", "Name: food, dtype: object" ] } ], "prompt_number": 157 }, { "cell_type": "markdown", "metadata": {}, "source": [ "## [About my IPython in github](https://github.com/re4lfl0w/ipython)" ] } ], "metadata": {} } ] }