{
"metadata": {
"name": ""
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 6. \ub370\uc774\ud130 \ub85c\ub529, \uc800\uc7a5, \ud30c\uc77c \ud615\uc2dd\n",
"\n",
"### \uc785\u2219\ucd9c\ub825 \ubc29\ubc95\n",
"\n",
"- \ud14d\uc2a4\ud2b8 \ud30c\uc77c \uc774\uc6a9\ud558\ub294 \ubc29\ubc95\n",
"- \ub370\uc774\ud130\ubca0\uc774\uc2a4 \uc774\uc6a9\ud558\ub294 \ubc29\ubc95\n",
"- \uc6f9 API \uc774\uc6a9\ud574\uc11c \ub124\ud2b8\uc6cc\ud06c\ub97c \ud1b5\ud574 \ubd88\ub7ec\uc624\ub294 \ubc29\ubc95"
]
},
{
"cell_type": "heading",
"level": 2,
"metadata": {},
"source": [
"6.1 \ud14d\uc2a4\ud2b8 \ud30c\uc77c \uc774\uc6a9\ud558\ub294 \ubc29\ubc95"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### \ud30c\uc774\uc36c \uc88b\uc740 \uc774\uc720\n",
"\n",
"- \ub2e8\uc21c\ud55c \ubb38\ubc95\n",
"- \uc9c1\uad00\uc801\uc778 \uc790\ub8cc \uad6c\uc870\n",
"- \ud29c\ud50c\uc5d0 \ub370\uc774\ud130\ub97c \uc800\uc7a5\ud558\uace0 \uc77d\uc5b4\ub0b4\ub294 \ud3b8\ub9ac\ud55c \uae30\ub2a5\n",
"\n",
"#### pandas \ud30c\uc77c \ud30c\uc2f1 \ud568\uc218\n",
"\n",
"\ud568\uc218 | \uc124\uba85\n",
"--- | ---\n",
"read_csv | \ud30c\uc77c, URL \ub610\ub294 \ud30c\uc77c\uacfc \uc720\uc0ac\ud55c \uac1d\uccb4\ub85c\ubd80\ud130 \uad6c\ubd84\ub41c \ub370\uc774\ud130\ub97c \uc77d\uc5b4\uc628\ub2e4. \ub370\uc774\ud130 \uad6c\ubd84\uc790\ub294 \uc27c\ud45c(,)\ub97c \uae30\ubcf8\uc73c\ub85c \ud55c\ub2e4.\n",
"read_table | \ud30c\uc77c, URL \ub610\ub294 \ud30c\uc77c\uacfc \uc720\uc0ac\ud55c \uac1d\uccb4\ub85c\ubd80\ud130 \uad6c\ubd84\ub41c \ub370\uc774\ud130\ub97c \uc77d\uc5b4\uc628\ub2e4. \ub370\uc774\ud130 \uad6c\ubd84\uc790\ub294 \ud0ed('\\t')\uc744 \uae30\ubcf8\uc73c\ub85c \ud55c\ub2e4.\n",
"read_fwf | \uace0\uc815\ud3ed \uce7c\ub7fc \ud615\uc2dd\uc5d0\uc11c \ub370\uc774\ud130\ub97c \uc77d\uc5b4\uc628\ub2e4(\uad6c\ubd84\uc790\uac00 \uc5c6\ub294 \ub370\uc774\ud130)\n",
"read_clipboard | \ud074\ub9bd\ubcf4\ub4dc\uc5d0 \uc788\ub294 \ub370\uc774\ud130\ub97c \uc77d\uc5b4\uc624\ub294 read_table \ud568\uc218. \uc6f9\ud398\uc774\uc9c0\uc5d0\uc11c \ud45c\ub97c \uae01\uc5b4\uc62c \ub54c \uc720\uc6a9\ud558\ub2e4.\n",
"\n",
"#### pandas \ud30c\uc77c \ud30c\uc2f1 \ud568\uc218 \uc635\uc158\n",
"\n",
"- **\uc0c9\uc778**: \ubc18\ud658\ud558\ub294 DataFrame\uc5d0\uc11c \ud558\ub098 \uc774\uc0c1\uc758 \uce7c\ub7fc\uc744 \uc0c9\uc778\uc73c\ub85c \uc9c0\uc815\ud560 \uc218 \uc788\ub2e4. \ud30c\uc77c\uc774\ub098 \uc0ac\uc6a9\uc790\ub85c\ubd80\ud130 \uce7c\ub7fc\uc758 \uc774\ub984\uc744 \ubc1b\uac70\ub098 \uc544\ubb34\uac83\ub3c4 \ubc1b\uc9c0 \uc54a\uc744 \uc218 \uc788\ub2e4.\n",
"- **\uc790\ub8cc\ud615 \ucd94\ub860\uacfc \ub370\uc774\ud130 \ubcc0\ud658**: \uc0ac\uc6a9\uc790 \uc815\uc758 \uac12 \ubcc0\ud658\uacfc \ube44\uc5b4\uc788\ub294 \uac12\uc744 \uc704\ud55c \uc0ac\uc6a9\uc790 \ub9ac\uc2a4\ud2b8\ub97c \ud3ec\ud568\ud55c\ub2e4.\n",
"- **\ub0a0\uc9dc \ubd84\uc11d**: \uc5ec\ub7ec \uce7c\ub7fc\uc5d0 \uac78\uccd0 \uc788\ub294 \ub0a0\uc9dc\uc640 \uc2dc\uac04 \uc815\ubcf4\ub97c \ud558\ub098\uc758 \uce7c\ub7fc\uc5d0 \uc870\ud569\ud574\uc11c \uacb0\uacfc\uc5d0 \ubc18\uc601\ud55c\ub2e4.\n",
"- **\ubc18\ubcf5**: \uc5ec\ub7ec \ud30c\uc77c\uc5d0 \uac78\uccd0 \uc788\ub294 \uc790\ub8cc\ub97c \ubc18\ubcf5\uc801\uc73c\ub85c \uc77d\uc5b4\uc62c \uc218 \uc788\ub2e4.\n",
"- **\uc815\uc81c\ub418\uc9c0 \uc54a\ub294 \ub370\uc774\ud130 \ucc98\ub9ac**: \ub85c\uc6b0\ub098 \uaf2c\ub9ac\ub9d0, \uc8fc\uc11d \uac74\ub108\ub6f0\uae30 \ub610\ub294 \ucc9c \ub2e8\uc704\ub9c8\ub2e4 \uc27c\ud45c\ub85c \uad6c\ubd84\ub41c \uc22b\uc790 \uac19\uc740 \uc0ac\uc18c\ud55c \uc77c\uc744 \ucc98\ub9ac\ud574\uc900\ub2e4.\n",
"\n",
"#### \uc790\ub8cc\ud615 \ucd94\ub860\uc740 \ub9e4\uc6b0 \uc911\uc694\n",
"\n",
"- \uc5b4\ub5a4 \uce7c\ub7fc\uc774 \uc22b\uc790\uc778\uc9c0 \ubd88\ub9ac\uc5b8\uc778\uc9c0 \uc9c0\uc815\ud574\uc904 \ud544\uc694\uac00 \uc5c6\ub2e4"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"from pandas import DataFrame, Series\n",
"import pandas as pd"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 179
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"!cat ch06/ex1.csv"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"a,b,c,d,message\r\n",
"1,2,3,4,hello\r\n",
"5,6,7,8,world\r\n",
"9,10,11,12,foo"
]
}
],
"prompt_number": 180
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"df = pd.read_csv('ch06/ex1.csv')"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 181
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"pd.read_csv('ch06/ex1.csv', header=None)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"
\n",
"
\n",
" \n",
" \n",
" | \n",
" 0 | \n",
" 1 | \n",
" 2 | \n",
" 3 | \n",
" 4 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" a | \n",
" b | \n",
" c | \n",
" d | \n",
" message | \n",
"
\n",
" \n",
" 1 | \n",
" 1 | \n",
" 2 | \n",
" 3 | \n",
" 4 | \n",
" hello | \n",
"
\n",
" \n",
" 2 | \n",
" 5 | \n",
" 6 | \n",
" 7 | \n",
" 8 | \n",
" world | \n",
"
\n",
" \n",
" 3 | \n",
" 9 | \n",
" 10 | \n",
" 11 | \n",
" 12 | \n",
" foo | \n",
"
\n",
" \n",
"
\n",
"
"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 182,
"text": [
" 0 1 2 3 4\n",
"0 a b c d message\n",
"1 1 2 3 4 hello\n",
"2 5 6 7 8 world\n",
"3 9 10 11 12 foo"
]
}
],
"prompt_number": 182
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# \uc6d0\ub798 \uc788\ub358 Column\uba85 \ubb34\uc2dc\ud558\uace0 \ub0b4\uac00 \uc6d0\ud558\ub294 Column\uba85 \uc124\uc815\n",
"pd.read_csv('ch06/ex1.csv', names=[5,6,7,8,9])"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" 5 | \n",
" 6 | \n",
" 7 | \n",
" 8 | \n",
" 9 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" a | \n",
" b | \n",
" c | \n",
" d | \n",
" message | \n",
"
\n",
" \n",
" 1 | \n",
" 1 | \n",
" 2 | \n",
" 3 | \n",
" 4 | \n",
" hello | \n",
"
\n",
" \n",
" 2 | \n",
" 5 | \n",
" 6 | \n",
" 7 | \n",
" 8 | \n",
" world | \n",
"
\n",
" \n",
" 3 | \n",
" 9 | \n",
" 10 | \n",
" 11 | \n",
" 12 | \n",
" foo | \n",
"
\n",
" \n",
"
\n",
"
"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 183,
"text": [
" 5 6 7 8 9\n",
"0 a b c d message\n",
"1 1 2 3 4 hello\n",
"2 5 6 7 8 world\n",
"3 9 10 11 12 foo"
]
}
],
"prompt_number": 183
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"pd.read_csv('ch06/ex1.csv', names=['a1', 'b1', 'c1', 'd1', 'message1'])"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" a1 | \n",
" b1 | \n",
" c1 | \n",
" d1 | \n",
" message1 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" a | \n",
" b | \n",
" c | \n",
" d | \n",
" message | \n",
"
\n",
" \n",
" 1 | \n",
" 1 | \n",
" 2 | \n",
" 3 | \n",
" 4 | \n",
" hello | \n",
"
\n",
" \n",
" 2 | \n",
" 5 | \n",
" 6 | \n",
" 7 | \n",
" 8 | \n",
" world | \n",
"
\n",
" \n",
" 3 | \n",
" 9 | \n",
" 10 | \n",
" 11 | \n",
" 12 | \n",
" foo | \n",
"
\n",
" \n",
"
\n",
"
"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 184,
"text": [
" a1 b1 c1 d1 message1\n",
"0 a b c d message\n",
"1 1 2 3 4 hello\n",
"2 5 6 7 8 world\n",
"3 9 10 11 12 foo"
]
}
],
"prompt_number": 184
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"df"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" a | \n",
" b | \n",
" c | \n",
" d | \n",
" message | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1 | \n",
" 2 | \n",
" 3 | \n",
" 4 | \n",
" hello | \n",
"
\n",
" \n",
" 1 | \n",
" 5 | \n",
" 6 | \n",
" 7 | \n",
" 8 | \n",
" world | \n",
"
\n",
" \n",
" 2 | \n",
" 9 | \n",
" 10 | \n",
" 11 | \n",
" 12 | \n",
" foo | \n",
"
\n",
" \n",
"
\n",
"
"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 185,
"text": [
" a b c d message\n",
"0 1 2 3 4 hello\n",
"1 5 6 7 8 world\n",
"2 9 10 11 12 foo"
]
}
],
"prompt_number": 185
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# csv\ub294 DataFrame\uc73c\ub85c \uc77d\uc5b4\uc628\ub2e4.\n",
"type(df)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 186,
"text": [
"pandas.core.frame.DataFrame"
]
}
],
"prompt_number": 186
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"pd.read_table('ch06/ex1.csv', sep=',')"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" a | \n",
" b | \n",
" c | \n",
" d | \n",
" message | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1 | \n",
" 2 | \n",
" 3 | \n",
" 4 | \n",
" hello | \n",
"
\n",
" \n",
" 1 | \n",
" 5 | \n",
" 6 | \n",
" 7 | \n",
" 8 | \n",
" world | \n",
"
\n",
" \n",
" 2 | \n",
" 9 | \n",
" 10 | \n",
" 11 | \n",
" 12 | \n",
" foo | \n",
"
\n",
" \n",
"
\n",
"
"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 187,
"text": [
" a b c d message\n",
"0 1 2 3 4 hello\n",
"1 5 6 7 8 world\n",
"2 9 10 11 12 foo"
]
}
],
"prompt_number": 187
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"pd.read_table('ch06/ex1.csv', sep=',', header=None)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" 0 | \n",
" 1 | \n",
" 2 | \n",
" 3 | \n",
" 4 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" a | \n",
" b | \n",
" c | \n",
" d | \n",
" message | \n",
"
\n",
" \n",
" 1 | \n",
" 1 | \n",
" 2 | \n",
" 3 | \n",
" 4 | \n",
" hello | \n",
"
\n",
" \n",
" 2 | \n",
" 5 | \n",
" 6 | \n",
" 7 | \n",
" 8 | \n",
" world | \n",
"
\n",
" \n",
" 3 | \n",
" 9 | \n",
" 10 | \n",
" 11 | \n",
" 12 | \n",
" foo | \n",
"
\n",
" \n",
"
\n",
"
"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 188,
"text": [
" 0 1 2 3 4\n",
"0 a b c d message\n",
"1 1 2 3 4 hello\n",
"2 5 6 7 8 world\n",
"3 9 10 11 12 foo"
]
}
],
"prompt_number": 188
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"!cat ch06/ex2.csv"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"1,2,3,4,hello\r\n",
"5,6,7,8,world\r\n",
"9,10,11,12,foo"
]
}
],
"prompt_number": 189
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# header \uc790\ub3d9 \uc0dd\uc131\n",
"pd.read_csv('ch06/ex2.csv', header=None)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" 0 | \n",
" 1 | \n",
" 2 | \n",
" 3 | \n",
" 4 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1 | \n",
" 2 | \n",
" 3 | \n",
" 4 | \n",
" hello | \n",
"
\n",
" \n",
" 1 | \n",
" 5 | \n",
" 6 | \n",
" 7 | \n",
" 8 | \n",
" world | \n",
"
\n",
" \n",
" 2 | \n",
" 9 | \n",
" 10 | \n",
" 11 | \n",
" 12 | \n",
" foo | \n",
"
\n",
" \n",
"
\n",
"
"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 190,
"text": [
" 0 1 2 3 4\n",
"0 1 2 3 4 hello\n",
"1 5 6 7 8 world\n",
"2 9 10 11 12 foo"
]
}
],
"prompt_number": 190
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# header \uc635\uc158\uc774 \uc5c6\uc744\uc2dc header\ub97c \uccab\ubc88\uc9f8 \uc904\ub85c \uc774\uc6a9\n",
"pd.read_csv('ch06/ex2.csv')"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" 1 | \n",
" 2 | \n",
" 3 | \n",
" 4 | \n",
" hello | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 5 | \n",
" 6 | \n",
" 7 | \n",
" 8 | \n",
" world | \n",
"
\n",
" \n",
" 1 | \n",
" 9 | \n",
" 10 | \n",
" 11 | \n",
" 12 | \n",
" foo | \n",
"
\n",
" \n",
"
\n",
"
"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 191,
"text": [
" 1 2 3 4 hello\n",
"0 5 6 7 8 world\n",
"1 9 10 11 12 foo"
]
}
],
"prompt_number": 191
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# Column\uba85 \ucd94\uac00\n",
"pd.read_csv('ch06/ex2.csv', names=['a', 'b', 'c', 'message'])"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" a | \n",
" b | \n",
" c | \n",
" message | \n",
"
\n",
" \n",
" \n",
" \n",
" 1 | \n",
" 2 | \n",
" 3 | \n",
" 4 | \n",
" hello | \n",
"
\n",
" \n",
" 5 | \n",
" 6 | \n",
" 7 | \n",
" 8 | \n",
" world | \n",
"
\n",
" \n",
" 9 | \n",
" 10 | \n",
" 11 | \n",
" 12 | \n",
" foo | \n",
"
\n",
" \n",
"
\n",
"
"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 192,
"text": [
" a b c message\n",
"1 2 3 4 hello\n",
"5 6 7 8 world\n",
"9 10 11 12 foo"
]
}
],
"prompt_number": 192
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"names = ['a', 'b', 'c', 'd', 'message']"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 193
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"pd.read_csv('ch06/ex2.csv', names=names)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" a | \n",
" b | \n",
" c | \n",
" d | \n",
" message | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1 | \n",
" 2 | \n",
" 3 | \n",
" 4 | \n",
" hello | \n",
"
\n",
" \n",
" 1 | \n",
" 5 | \n",
" 6 | \n",
" 7 | \n",
" 8 | \n",
" world | \n",
"
\n",
" \n",
" 2 | \n",
" 9 | \n",
" 10 | \n",
" 11 | \n",
" 12 | \n",
" foo | \n",
"
\n",
" \n",
"
\n",
"
"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 194,
"text": [
" a b c d message\n",
"0 1 2 3 4 hello\n",
"1 5 6 7 8 world\n",
"2 9 10 11 12 foo"
]
}
],
"prompt_number": 194
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# message -> index\n",
"pd.read_csv('ch06/ex2.csv', names=names, index_col='message')"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" a | \n",
" b | \n",
" c | \n",
" d | \n",
"
\n",
" \n",
" message | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" hello | \n",
" 1 | \n",
" 2 | \n",
" 3 | \n",
" 4 | \n",
"
\n",
" \n",
" world | \n",
" 5 | \n",
" 6 | \n",
" 7 | \n",
" 8 | \n",
"
\n",
" \n",
" foo | \n",
" 9 | \n",
" 10 | \n",
" 11 | \n",
" 12 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 195,
"text": [
" a b c d\n",
"message \n",
"hello 1 2 3 4\n",
"world 5 6 7 8\n",
"foo 9 10 11 12"
]
}
],
"prompt_number": 195
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"pd.read_csv('ch06/ex2.csv', names=names, index_col='a')"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" b | \n",
" c | \n",
" d | \n",
" message | \n",
"
\n",
" \n",
" a | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 1 | \n",
" 2 | \n",
" 3 | \n",
" 4 | \n",
" hello | \n",
"
\n",
" \n",
" 5 | \n",
" 6 | \n",
" 7 | \n",
" 8 | \n",
" world | \n",
"
\n",
" \n",
" 9 | \n",
" 10 | \n",
" 11 | \n",
" 12 | \n",
" foo | \n",
"
\n",
" \n",
"
\n",
"
"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 196,
"text": [
" b c d message\n",
"a \n",
"1 2 3 4 hello\n",
"5 6 7 8 world\n",
"9 10 11 12 foo"
]
}
],
"prompt_number": 196
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"!cat ch06/csv_mindex.csv"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"key1,key2,value1,value2\r\n",
"one,a,1,2\r\n",
"one,b,3,4\r\n",
"one,c,5,6\r\n",
"one,d,7,8\r\n",
"two,a,9,10\r\n",
"two,b,11,12\r\n",
"two,c,13,14\r\n",
"two,d,15,16\r\n"
]
}
],
"prompt_number": 197
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### \uacc4\uce35\uc801 \uc0c9\uc778\uc744 \uc9c0\uc815\ud558\uace0 \uc2f6\ub2e4\uba74 \uce7c\ub7fc \ubc88\ud638\ub098 \uc774\ub984\uc758 \ub9ac\uc2a4\ud2b8\ub97c \ub118\uae34\ub2e4\n",
"\n",
"- 2\ubc88\uc9f8 \uacf5\ubd80\ud558\uba74\uc11c \uc815\ub9ac\ud558\ub2c8 \uacc4\uce35\uc801 \uc0c9\uc778\uc744 \uc5b4\ub5bb\uac8c \uc0ac\uc6a9\ud558\ub294\uc9c0 \uc870\uae08\uc740 \uc774\ud574\uac00 \uac04\ub2e4."
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"parsed = pd.read_csv('ch06/csv_mindex.csv', index_col=['key1', 'key2'])"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 198
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"parsed"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" | \n",
" value1 | \n",
" value2 | \n",
"
\n",
" \n",
" key1 | \n",
" key2 | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" one | \n",
" a | \n",
" 1 | \n",
" 2 | \n",
"
\n",
" \n",
" b | \n",
" 3 | \n",
" 4 | \n",
"
\n",
" \n",
" c | \n",
" 5 | \n",
" 6 | \n",
"
\n",
" \n",
" d | \n",
" 7 | \n",
" 8 | \n",
"
\n",
" \n",
" two | \n",
" a | \n",
" 9 | \n",
" 10 | \n",
"
\n",
" \n",
" b | \n",
" 11 | \n",
" 12 | \n",
"
\n",
" \n",
" c | \n",
" 13 | \n",
" 14 | \n",
"
\n",
" \n",
" d | \n",
" 15 | \n",
" 16 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 199,
"text": [
" value1 value2\n",
"key1 key2 \n",
"one a 1 2\n",
" b 3 4\n",
" c 5 6\n",
" d 7 8\n",
"two a 9 10\n",
" b 11 12\n",
" c 13 14\n",
" d 15 16"
]
}
],
"prompt_number": 199
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### \uace0\uc815\ub41c \uad6c\ubd84\uc790\uac00 \uc5c6\ub2e4\uba74 read_table\uc758 \uad6c\ubd84\uc790\ub85c \uc815\uaddc\ud45c\ud604\uc2dd\uc744 \uc0ac\uc6a9\ud558\uba74 \ub41c\ub2e4.\n",
"\n",
"- [\ud30c\uc774\uc36c \u2013 \uc815\uaddc\uc2dd\ud45c\ud604\uc2dd(Regular Expression) \ubaa8\ub4c8](http://devanix.tistory.com/296)\n",
"- [\ubc88\uc5ed \ud30c\uc774\uc36c \uc815\uaddc\ud45c\ud604\uc2dd](http://codeflow.co.kr/question/1061/%ED%8C%8C%EC%9D%B4%EC%8D%AC-%EC%A0%95%EA%B7%9C-%ED%91%9C%ED%98%84%EC%8B%9D/)\n",
"- [tutorial point](http://www.tutorialspoint.com/python/python_reg_expressions.htm)\n",
"- [\ud30c\uc774\uc36c - \uc815\uaddc\ud45c\ud604\uc2dd \ubaa8\ub4c8](http://devanix.tistory.com/296)"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"list(open('ch06/ex3.txt'))"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 200,
"text": [
"[' A B C\\n',\n",
" 'aaa -0.264438 -1.026059 -0.619500\\n',\n",
" 'bbb 0.927272 0.302904 -0.032399\\n',\n",
" 'ccc -0.264273 -0.386314 -0.217601\\n',\n",
" 'ddd -0.871858 -0.348382 1.100491\\n']"
]
}
],
"prompt_number": 200
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### \uc9c1\uc811 \ud30c\uc77c\uc744 \uace0\uccd0\ub3c4 \ub418\uc9c0\ub9cc \uc774 \ud30c\uc77c\uc740 \uc5ec\ub7ec \uac1c\uc758 \uacf5\ubc31\ubb38\uc790\ub85c \ud544\ub4dc\uac00 \uad6c\ubd84\ub418\uc5b4 \uc788\uc73c\ubbc0\ub85c \uc774\ub97c \ud45c\ud604\ud560 \uc218 \uc788\ub294 \uc815\uaddc\ud45c\ud604\uc2dd \\s+\ub97c \uc0ac\uc6a9\ud574\uc11c \ucc98\ub9ac"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"result = pd.read_table('ch06/ex3.txt', sep='\\s+')"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 201
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"result"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" A | \n",
" B | \n",
" C | \n",
"
\n",
" \n",
" \n",
" \n",
" aaa | \n",
" -0.264438 | \n",
" -1.026059 | \n",
" -0.619500 | \n",
"
\n",
" \n",
" bbb | \n",
" 0.927272 | \n",
" 0.302904 | \n",
" -0.032399 | \n",
"
\n",
" \n",
" ccc | \n",
" -0.264273 | \n",
" -0.386314 | \n",
" -0.217601 | \n",
"
\n",
" \n",
" ddd | \n",
" -0.871858 | \n",
" -0.348382 | \n",
" 1.100491 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 202,
"text": [
" A B C\n",
"aaa -0.264438 -1.026059 -0.619500\n",
"bbb 0.927272 0.302904 -0.032399\n",
"ccc -0.264273 -0.386314 -0.217601\n",
"ddd -0.871858 -0.348382 1.100491"
]
}
],
"prompt_number": 202
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### \uc774 \uacbd\uc6b0, \uccab\ubc88\uc9f8 \ub85c\uc6b0\ub294 \ub2e4\ub978 \ub85c\uc6b0\ubcf4\ub2e4 \uce7c\ub7fc\uc774 \ud558\ub098 \uc801\uae30 \ub54c\ubb38\uc5d0 read_table\uc740 \uccab \ubc88\uc9f8 \uce7c\ub7fc\uc774 DataFrame\uc758 \uc0c9\uc778\uc774 \ub418\uc5b4\uc57c \ud55c\ub2e4\uace0 \ucd94\ub860"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"-------\n",
"\n",
"### read_table\uacfc read_csv\uc758 \ucc28\uc774\uc810\uc740??\n",
"\n",
"- read_csv: \ud30c\uc77c, URL \ub610\ub294 \ud30c\uc77c\uacfc \uc720\uc0ac\ud55c \uac1d\uccb4\ub85c\ubd80\ud130 \uad6c\ubd84\ub41c \ub370\uc774\ud130\ub97c \uc77d\uc5b4\uc628\ub2e4. \ub370\uc774\ud130 \uad6c\ubd84\uc790\ub294 \uc27c\ud45c(,)\ub97c \uae30\ubcf8\uc73c\ub85c \ud55c\ub2e4.\n",
"- read_table: \ud30c\uc77c, URL \ub610\ub294 \ud30c\uc77c\uacfc \uc720\uc0ac\ud55c \uac1d\uccb4\ub85c\ubd80\ud130 \uad6c\ubd84\ub41c \ub370\uc774\ud130\ub97c \uc77d\uc5b4\uc628\ub2e4. \ub370\uc774\ud130 \uad6c\ubd84\uc790\ub294 \ud0ed('\\t')\ub97c \uae30\ubcf8\uc73c\ub85c \ud55c\ub2e4.\n",
"\n",
"#### \uadf8\ub7ec\ub2c8 \ub458 \ub2e4 \uc0ac\uc6a9\uc744 \ud574\ub3c4 \ub418\uc9c0\ub9cc \uc660\ub9cc\ud558\uba74 read_csv \uac19\uc740 \uacbd\uc6b0\ub294 csv \ud30c\uc77c\ub9cc \uc0ac\uc6a9\uc744 \ud558\uace0 \ub098\uba38\uc9c0 \ud2b9\ubcc4\ud55c \uacbd\uc6b0\ub97c read_table\ub85c \ud65c\uc6a9\n",
"\n",
"--------"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"pd.read_csv('ch06/ex3.txt', delimiter='\\s+')"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" A | \n",
" B | \n",
" C | \n",
"
\n",
" \n",
" \n",
" \n",
" aaa | \n",
" -0.264438 | \n",
" -1.026059 | \n",
" -0.619500 | \n",
"
\n",
" \n",
" bbb | \n",
" 0.927272 | \n",
" 0.302904 | \n",
" -0.032399 | \n",
"
\n",
" \n",
" ccc | \n",
" -0.264273 | \n",
" -0.386314 | \n",
" -0.217601 | \n",
"
\n",
" \n",
" ddd | \n",
" -0.871858 | \n",
" -0.348382 | \n",
" 1.100491 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 203,
"text": [
" A B C\n",
"aaa -0.264438 -1.026059 -0.619500\n",
"bbb 0.927272 0.302904 -0.032399\n",
"ccc -0.264273 -0.386314 -0.217601\n",
"ddd -0.871858 -0.348382 1.100491"
]
}
],
"prompt_number": 203
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### [IO Tools(Text, CSV, HDF5, \u22ef) example](http://pandas.pydata.org/pandas-docs/stable/io.html)\n",
"\n",
"- \ud30c\uc11c \ud568\uc218\ub294 \ud30c\uc77c \ud615\uc2dd\uc5d0\uc11c \ubc1c\uc0dd\ud560 \uc218 \uc788\ub294 \ub9e4\uc6b0 \ub2e4\uc591\ud55c \uc608\uc678\ub97c \uc798 \ucc98\ub9ac\ud560 \uc218 \uc788\ub3c4\ub85d \ub9ce\uc740 \ucd94\uac00 \uc778\uc790\ub97c \uac00\uc9c0\uace0 \uc788\ub2e4.\n",
"- skiprows\ub97c \uc774\uc6a9\ud574\uc11c \uccab\ubc88\uc9f8, \uc138\ubc88\uc9f8, \ub124\ubc88\uc9f8 \ub85c\uc6b0\ub97c \uac74\ub108\ub6f8 \uc218 \uc788\uc74c"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# Read CSV(comma-separated) file into DataFrame\n",
"pd.read_csv?"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 204
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"!cat ch06/ex4.csv"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"# hey!\r\n",
"a,b,c,d,message\r\n",
"# just wanted to make things more difficult for you\r\n",
"# who reads CSV files with computers, anyway?\r\n",
"1,2,3,4,hello\r\n",
"5,6,7,8,world\r\n",
"9,10,11,12,foo"
]
}
],
"prompt_number": 205
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"pd.read_csv('ch06/ex4.csv', skiprows=[0, 2, 3])"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" a | \n",
" b | \n",
" c | \n",
" d | \n",
" message | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1 | \n",
" 2 | \n",
" 3 | \n",
" 4 | \n",
" hello | \n",
"
\n",
" \n",
" 1 | \n",
" 5 | \n",
" 6 | \n",
" 7 | \n",
" 8 | \n",
" world | \n",
"
\n",
" \n",
" 2 | \n",
" 9 | \n",
" 10 | \n",
" 11 | \n",
" 12 | \n",
" foo | \n",
"
\n",
" \n",
"
\n",
"
"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 206,
"text": [
" a b c d message\n",
"0 1 2 3 4 hello\n",
"1 5 6 7 8 world\n",
"2 9 10 11 12 foo"
]
}
],
"prompt_number": 206
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"- **\ub204\ub77d\ub41c \uac12\uc744 \uc798 \ucc98\ub9ac\ud558\ub294 \uc77c**\uc740 \ud30c\uc77c\uc744 \uc77d\ub294 \uacfc\uc815\uc5d0\uc11c **\uc790\uc8fc \ubc1c\uc0dd**\ud558\ub294 \uc77c\uc774\uace0 **\uc911\uc694\ud55c \ubb38\uc81c**\n",
"- \ub204\ub77d\ub41c \uac12\uc740 \ud45c\uae30\ud558\uc9c0 \uc54a\uac70\ub098(\ube44\uc5b4\uc788\ub294 \ubb38\uc790\uc5f4) \uad6c\ubd84\ud558\uae30 \uc26c\uc6b4 \ud2b9\uc218\ud55c \ubb38\uc790\ub85c \ud45c\uae30\n",
"- NA, -1, #IND, NULL\ucc98\ub7fc \ube44\uc5b4\uc788\ub294 \uac12\uc73c\ub85c \uc778\uc2dd"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"!cat ch06/ex5.csv"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"something,a,b,c,d,message\r\n",
"one,1,2,3,4,NA\r\n",
"two,5,6,,8,world\r\n",
"three,9,10,11,12,foo"
]
}
],
"prompt_number": 207
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"result = pd.read_csv('ch06/ex5.csv')"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 208
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"result"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" something | \n",
" a | \n",
" b | \n",
" c | \n",
" d | \n",
" message | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" one | \n",
" 1 | \n",
" 2 | \n",
" 3 | \n",
" 4 | \n",
" NaN | \n",
"
\n",
" \n",
" 1 | \n",
" two | \n",
" 5 | \n",
" 6 | \n",
" NaN | \n",
" 8 | \n",
" world | \n",
"
\n",
" \n",
" 2 | \n",
" three | \n",
" 9 | \n",
" 10 | \n",
" 11 | \n",
" 12 | \n",
" foo | \n",
"
\n",
" \n",
"
\n",
"
"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 209,
"text": [
" something a b c d message\n",
"0 one 1 2 3 4 NaN\n",
"1 two 5 6 NaN 8 world\n",
"2 three 9 10 11 12 foo"
]
}
],
"prompt_number": 209
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"pd.isnull(result)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" something | \n",
" a | \n",
" b | \n",
" c | \n",
" d | \n",
" message | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" True | \n",
"
\n",
" \n",
" 1 | \n",
" False | \n",
" False | \n",
" False | \n",
" True | \n",
" False | \n",
" False | \n",
"
\n",
" \n",
" 2 | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
"
\n",
" \n",
"
\n",
"
"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 210,
"text": [
" something a b c d message\n",
"0 False False False False False True\n",
"1 False False False True False False\n",
"2 False False False False False False"
]
}
],
"prompt_number": 210
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### na_values \uc635\uc158\uc740 \ub9ac\uc2a4\ud2b8\ub098 \ubb38\uc790\uc5f4 \uc9d1\ud569\uc744 \ubc1b\uc544\uc11c \ub204\ub77d\ub41c \uac12\uc744 \ucc98\ub9ac\n",
"\n",
"----------\n",
"\n",
"### Why? na_values\ub97c \uc0ac\uc6a9\ud558\uc9c0?\n",
"\n",
"- \ud2b9\uc815\ud55c \uac12\ub4e4\uc740 \uacc4\uc0b0\uc744 \ud558\uc9c0 \uc54a\uc73c\ub824\uace0??? \uc74c..\n",
"\n",
"-----------"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"result = pd.read_csv('ch06/ex5.csv', na_values=['NULL'])"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 211
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"result"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" something | \n",
" a | \n",
" b | \n",
" c | \n",
" d | \n",
" message | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" one | \n",
" 1 | \n",
" 2 | \n",
" 3 | \n",
" 4 | \n",
" NaN | \n",
"
\n",
" \n",
" 1 | \n",
" two | \n",
" 5 | \n",
" 6 | \n",
" NaN | \n",
" 8 | \n",
" world | \n",
"
\n",
" \n",
" 2 | \n",
" three | \n",
" 9 | \n",
" 10 | \n",
" 11 | \n",
" 12 | \n",
" foo | \n",
"
\n",
" \n",
"
\n",
"
"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 212,
"text": [
" something a b c d message\n",
"0 one 1 2 3 4 NaN\n",
"1 two 5 6 NaN 8 world\n",
"2 three 9 10 11 12 foo"
]
}
],
"prompt_number": 212
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# world\ub97c NA\uac12\uc73c\ub85c \ucc98\ub9ac\ud558\ub2c8 NaN\uc73c\ub85c \ub098\uc628\ub2e4.\n",
"# \ud2b9\uc815\ud55c \uac12\uc744 NA \ucc98\ub9ac\ud560 \uc218 \uc788\uc744\uac83 \uac19\ub2e4.\n",
"pd.read_csv('ch06/ex5.csv', na_values=['world'])"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" something | \n",
" a | \n",
" b | \n",
" c | \n",
" d | \n",
" message | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" one | \n",
" 1 | \n",
" 2 | \n",
" 3 | \n",
" 4 | \n",
" NaN | \n",
"
\n",
" \n",
" 1 | \n",
" two | \n",
" 5 | \n",
" 6 | \n",
" NaN | \n",
" 8 | \n",
" NaN | \n",
"
\n",
" \n",
" 2 | \n",
" three | \n",
" 9 | \n",
" 10 | \n",
" 11 | \n",
" 12 | \n",
" foo | \n",
"
\n",
" \n",
"
\n",
"
"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 213,
"text": [
" something a b c d message\n",
"0 one 1 2 3 4 NaN\n",
"1 two 5 6 NaN 8 NaN\n",
"2 three 9 10 11 12 foo"
]
}
],
"prompt_number": 213
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### \uc5f4\ub9c8\ub2e4 \ub2e4\ub978 NA \ubb38\uc790\ub97c \uc0ac\uc804 \uac12\uc73c\ub85c \ub118\uaca8 \ucc98\ub9ac \uac00\ub2a5"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"sentinels = {'message': ['foo', 'NA'], 'something': ['two']}"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 214
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"pd.read_csv('ch06/ex5.csv', na_values=sentinels)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" something | \n",
" a | \n",
" b | \n",
" c | \n",
" d | \n",
" message | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" one | \n",
" 1 | \n",
" 2 | \n",
" 3 | \n",
" 4 | \n",
" NaN | \n",
"
\n",
" \n",
" 1 | \n",
" NaN | \n",
" 5 | \n",
" 6 | \n",
" NaN | \n",
" 8 | \n",
" world | \n",
"
\n",
" \n",
" 2 | \n",
" three | \n",
" 9 | \n",
" 10 | \n",
" 11 | \n",
" 12 | \n",
" NaN | \n",
"
\n",
" \n",
"
\n",
"
"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 215,
"text": [
" something a b c d message\n",
"0 one 1 2 3 4 NaN\n",
"1 NaN 5 6 NaN 8 world\n",
"2 three 9 10 11 12 NaN"
]
}
],
"prompt_number": 215
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### read_csv / read_table \ud568\uc218 \uc778\uc790\n",
"\n",
"\uc778\uc790 | \uc124\uba85\n",
"--- | ---\n",
"path | \ud30c\uc77c \uc2dc\uc2a4\ud15c\uc5d0\uc11c\uc758 \uc704\uce58, URL, \ud30c\uc77c \uac1d\uccb4\ub97c \ub098\ud0c0\ub0b4\ub294 \ubb38\uc790\uc5f4\n",
"sep or delimiter | \ud544\ub4dc\ub97c \uad6c\ubd84\ud558\uae30 \uc704\ud574 \uc0ac\uc6a9\ud560 \uc5f0\uc18d\ub41c \ubb38\uc790\ub098 \uc815\uaddc\ud45c\ud604\uc2dd\n",
"header | \uce7c\ub7fc\uc758 \uc774\ub984\uc73c\ub85c \uc0ac\uc6a9\ud560 \ub85c\uc6b0\uc758 \ubc88\ud638, \uae30\ubcf8 \uac12\uc740 0(\uccab \ub85c\uc6b0)\uc774\uba70 \ud5e4\ub354\uac00 \uc5c6\uc73c\uba74 None\uc73c\ub85c \uc9c0\uc815\ud560 \uc218 \uc788\ub2e4.\n",
"index_col | \uc0c9\uc778\uc73c\ub85c \uc0ac\uc6a9\ud560 \uce7c\ub7fc \ubc88\ud638\ub098 \uc774\ub984, \uacc4\uce35\uc801 \uc0c9\uc778\uc744 \uc9c0\uc815\ud560 \uacbd\uc6b0 \ub9ac\uc2a4\ud2b8\ub97c \ub118\uae38 \uc218 \uc788\ub2e4.\n",
"names | \uceec\ub7fc \uc774\ub984\uc73c\ub85c \uc0ac\uc6a9\ud560 \ub9ac\uc2a4\ud2b8. header = None\uacfc \ud568\uaed8 \uc0ac\uc6a9\ud55c\ub2e4.\n",
"skiprows | \ud30c\uc77c\uc758 \uc2dc\uc791\ubd80\ud130 \ubb34\uc2dc\ud560 \ub85c\uc6b0\uc758 \uac1c\uc218 \ub610\ub294 \ubb34\uc2dc\ud560 \ub85c\uc6b0 \ubc88\ud638\uac00 \ub2f4\uae34 \ub9ac\uc2a4\ud2b8\n",
"na_values | NA \uac12\uc73c\ub85c \ucc98\ub9ac\ud560 \uac12\ub4e4\uc758 \ub098\uc5f4\n",
"comment | \uc8fc\uc11d\uc73c\ub85c \ubd84\ub958\ub418\uc5b4 \ud30c\uc2f1\ud558\uc9c0 \uc54a\uc744 \ubb38\uc790 \ud639\uc740 \ubb38\uc790\uc5f4\n",
"parse_dates | \ub0a0\uc9dc\ub97c datetime\uc73c\ub85c \ubcc0\ud658\ud560\uc9c0\uc758 \uc5ec\ubd80. \uae30\ubcf8\uac12\uc740 False\uc774\uba70, True\uc77c \uacbd\uc6b0 \ubaa8\ub4e0 \uce7c\ub7fc\uc5d0 \ub2e4 \uc801\uc6a9\ub41c\ub2e4. \ub9ac\uc2a4\ud2b8\ub97c \ub118\uae30\uba74 \ubcc0\ud658\ud560 \uce7c\ub7fc\uc744 \uc9c0\uc815\ud560 \uc218 \uc788\ub294\ub370, [1, 2, 3]\uc744 \ub118\uae30\uba74 \uac01\uac01\uc758 \uce7c\ub7fc\uc744 datetime\uc73c\ub85c \ubcc0\ud658\ud558\uace0, [[1, 3]]\uc744 \ub118\uae30\uba74 1, 3\ubc88 \uce7c\ub7fc\uc744 \uc870\ud569\ud574\uc11c \ud558\ub098\uc758 datetime\uc73c\ub85c \ubcc0\ud658\ud55c\ub2e4.\n",
"keep_date_col | \uc5ec\ub7ec \uce7c\ub7fc\uc744 datetime\uc73c\ub85c \ubcc0\ud658\ud588\uc744 \uacbd\uc6b0 \uc6d0\ub798 \uce7c\ub7fc\uc744 \ub0a8\uaca8\ub458\uc9c0\uc758 \uc5ec\ubd80. \uae30\ubcf8\uac12\uc740 False\n",
"converters | \ubcc0\ud658 \uc2dc \uce7c\ub7fc\uc5d0 \uc801\uc6a9\ud560 \ud568\uc218\ub97c \uc9c0\uc815\ud55c\ub2e4. \uc608\ub97c \ub4e4\uc5b4 {'foo': f}\ub294 'foo'\uce7c\ub7fc\uc5d0 f \ud568\uc218\ub97c \uc801\uc6a9\ud55c\ub2e4. \uc804\ub2ec\ud558\ub294 \uc0ac\uc804\uc758 \ud0a4 \uac12\uc740 \uce7c\ub7fc \uc774\ub984\uc774\ub098 \ubc88\ud638\uac00 \ub420 \uc218 \uc788\ub2e4.\n",
"dayfirst | \ubaa8\ud638\ud55c \ub0a0\uc9dc \ud615\uc2dd\uc77c \uacbd\uc6b0 \uad6d\uc81c \ud615\uc2dd\uc73c\ub85c \uac04\uc8fc\ud55c\ub2e4(7/6/2012\ub294 2012\ub144 6\uc6d4 7\uc77c\ub85c \uac04\uc8fc\ud55c\ub2e4). \uae30\ubcf8\uac12\uc740 False\n",
"date_parser | \ub0a0\uc9dc \ubcc0\ud658 \uc2dc \uc0ac\uc6a9\ud560 \ud568\uc218\n",
"nrows | \ud30c\uc77c\uc758 \uccab \uc77c\ubd80\ub9cc \uc77d\uc5b4\uc62c \ub54c \ucc98\uc74c \uba87 \uc904\uc744 \uc77d\uc744 \uac83\uc778\uc9c0 \uc9c0\uc815\ud55c\ub2e4.\n",
"iterator | \ud30c\uc77c\uc744 \uc870\uae08\uc529 \uc77d\uc744 \ub54c \uc0ac\uc6a9\ud558\ub3c4\ub85d TextParser \uac1d\uccb4\ub97c \ubc18\ud658\ud558\ub3c4\ub85d \ud55c\ub2e4. \uae30\ubcf8\uac12\uc740 False\n",
"chunksize | TextParser \uac1d\uccb4\uc5d0\uc11c \uc0ac\uc6a9\ud560, \ud55c \ubc88\uc5d0 \uc77d\uc744 \ud30c\uc77c\uc758 \ud06c\uae30\n",
"skip_footer | \ubb34\uc2dc\ud560 \ud30c\uc77c\uc758 \ub9c8\uc9c0\ub9c9 \uc904 \uc218\n",
"verbose | \ud30c\uc2f1 \uacb0\uacfc\uc5d0 \ub300\ud55c \uc815\ubcf4\ub97c \ucd9c\ub825\ud55c\ub2e4. \uc22b\uc790\uac00 \uc544\ub2cc \uac12\ub4e4\uc774 \ub4e4\uc5b4\uc788\ub294 \uce7c\ub7fc\uc774\uba74\uc11c \ub204\ub77d\ub41c \uac12\uc774 \uc788\ub2e4\uba74 \uc904 \ubc88\ud638\ub97c \ucd9c\ub825\ud55c\ub2e4. \uae30\ubcf8\uac12\uc740 False\n",
"encoding | \uc720\ub2c8\ucf54\ub4dc \uc778\ucf54\ub529 \uc885\ub958\ub97c \uc9c0\uc815\ud55c\ub2e4. UTF-8\ub85c \uc778\ucf54\ub529\ub41c \ud14d\uc2a4\ud2b8\uc77c \uacbd\uc6b0 'utf-8'\ub85c \uc9c0\uc815\ud55c\ub2e4.\n",
"squeeze | \ub85c\uc6b0\uac00 \ud558\ub098\ubfd0\uc774\ub77c\uba74 Series \uac1d\uccb4\ub97c \ubc18\ud658\ud55c\ub2e4. \uae30\ubcf8\uac12\uc740 False\n",
"thousands | \uc22b\uc790\ub97c \ucc9c \ub2e8\uc704\ub85c \ub04a\uc744 \ub54c \uc0ac\uc6a9\ud560 ', '\ub098 '.' \uac19\uc740 \uad6c\ubd84\uc790"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# \uc774 \uba85\ub839\uc5b4\ub85c \uc5b4\ub5a4 \ud568\uc218\uc778\uc9c0, \uc5b4\ub5a4 \ud30c\ub77c\ubbf8\ud130\ub97c \ub118\uaca8\uc57c \ud558\ub294\uc9c0 \uc815\ud655\ud788 \uc54c \uc218 \uc788\ub2e4.\n",
"# \uad73\uc774 \uba85\ub839\uc5b4\ub4e4\uc744 \ub530\ub77c\uce60 \ud544\uc694\ub294 \uc5c6\ub294\ub370 \uc5b4\ub5a4 \ud30c\ub77c\ubbf8\ud130\ub4e4\uc744 \ub118\uae30\ub294\uc9c0 \ud55c \ubc88 \uacf5\ubd80\ud558\ub294 \uacb8\uacb8\ud574\uc11c \uccd0\ubd24\ub2e4.\n",
"pd.read_csv?"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 216
},
{
"cell_type": "raw",
"metadata": {},
"source": [
"Type: function\n",
"String form: \n",
"File: /Library/Python/2.7/site-packages/pandas-0.12.0_307_g3a2fe0b-py2.7-macosx-10.8-intel.egg/pandas/io/parsers.py\n",
"Definition: pd.read_csv(filepath_or_buffer, sep=',', dialect=None, compression=None, doublequote=True, escapechar=None, quotechar='\"', quoting=0, skipinitialspace=False, lineterminator=None, header='infer', index_col=None, names=None, prefix=None, skiprows=None, skipfooter=None, skip_footer=0, na_values=None, na_fvalues=None, true_values=None, false_values=None, delimiter=None, converters=None, dtype=None, usecols=None, engine='c', delim_whitespace=False, as_recarray=False, na_filter=True, compact_ints=False, use_unsigned=False, low_memory=True, buffer_lines=None, warn_bad_lines=True, error_bad_lines=True, keep_default_na=True, thousands=None, comment=None, decimal='.', parse_dates=False, keep_date_col=False, dayfirst=False, date_parser=None, memory_map=False, nrows=None, iterator=False, chunksize=None, verbose=False, encoding=None, squeeze=False, mangle_dupe_cols=True, tupleize_cols=True)\n",
"Docstring:\n",
"Read CSV (comma-separated) file into DataFrame\n",
"\n",
"Also supports optionally iterating or breaking of the file\n",
"into chunks.\n",
"\n",
"Parameters\n",
"----------\n",
"filepath_or_buffer : string or file handle / StringIO. The string could be\n",
" a URL. Valid URL schemes include http, ftp, s3, and file. For file URLs, a host\n",
" is expected. For instance, a local file could be\n",
" file ://localhost/path/to/table.csv\n",
"sep : string, default ','\n",
" Delimiter to use. If sep is None, will try to automatically determine\n",
" this. Regular expressions are accepted.\n",
"\n",
"lineterminator : string (length 1), default None\n",
" Character to break file into lines. Only valid with C parser\n",
"quotechar : string\n",
" The character to used to denote the start and end of a quoted item. Quoted items can include the delimiter and it will be ignored.\n",
"quoting : int\n",
" Controls whether quotes should be recognized. Values are taken from\n",
" `csv.QUOTE_*` values. Acceptable values are 0, 1, 2, and 3 for\n",
" QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONE, and QUOTE_NONNUMERIC, respectively.\n",
"skipinitialspace : boolean, default False\n",
" Skip spaces after delimiter\n",
"escapechar : string\n",
"dtype : Type name or dict of column -> type\n",
" Data type for data or columns. E.g. {'a': np.float64, 'b': np.int32}\n",
"compression : {'gzip', 'bz2', None}, default None\n",
" For on-the-fly decompression of on-disk data\n",
"dialect : string or csv.Dialect instance, default None\n",
" If None defaults to Excel dialect. Ignored if sep longer than 1 char\n",
" See csv.Dialect documentation for more details\n",
"header : int, default 0 if names parameter not specified,\n",
" Row to use for the column labels of the parsed DataFrame. Specify None if\n",
" there is no header row. Can be a list of integers that specify row\n",
" locations for a multi-index on the columns E.g. [0,1,3]. Interveaning\n",
" rows that are not specified (E.g. 2 in this example are skipped)\n",
"skiprows : list-like or integer\n",
" Row numbers to skip (0-indexed) or number of rows to skip (int)\n",
" at the start of the file\n",
"index_col : int or sequence or False, default None\n",
" Column to use as the row labels of the DataFrame. If a sequence is given, a\n",
" MultiIndex is used. If you have a malformed file with delimiters at the end\n",
" of each line, you might consider index_col=False to force pandas to _not_\n",
" use the first column as the index (row names)\n",
"names : array-like\n",
" List of column names to use. If file contains no header row, then you\n",
" should explicitly pass header=None\n",
"prefix : string or None (default)\n",
" Prefix to add to column numbers when no header, e.g 'X' for X0, X1, ...\n",
"na_values : list-like or dict, default None\n",
" Additional strings to recognize as NA/NaN. If dict passed, specific\n",
" per-column NA values\n",
"true_values : list\n",
" Values to consider as True\n",
"false_values : list\n",
" Values to consider as False\n",
"keep_default_na : bool, default True\n",
" If na_values are specified and keep_default_na is False the default NaN\n",
" values are overridden, otherwise they're appended to\n",
"parse_dates : boolean, list of ints or names, list of lists, or dict\n",
" If True -> try parsing the index.\n",
" If [1, 2, 3] -> try parsing columns 1, 2, 3 each as a separate date column.\n",
" If [[1, 3]] -> combine columns 1 and 3 and parse as a single date column.\n",
" {'foo' : [1, 3]} -> parse columns 1, 3 as date and call result 'foo'\n",
"keep_date_col : boolean, default False\n",
" If True and parse_dates specifies combining multiple columns then\n",
" keep the original columns.\n",
"date_parser : function\n",
" Function to use for converting a sequence of string columns to an\n",
" array of datetime instances. The default uses dateutil.parser.parser\n",
" to do the conversion.\n",
"dayfirst : boolean, default False\n",
" DD/MM format dates, international and European format\n",
"thousands : str, default None\n",
" Thousands separator\n",
"comment : str, default None\n",
" Indicates remainder of line should not be parsed\n",
" Does not support line commenting (will return empty line)\n",
"decimal : str, default '.'\n",
" Character to recognize as decimal point. E.g. use ',' for European data\n",
"nrows : int, default None\n",
" Number of rows of file to read. Useful for reading pieces of large files\n",
"iterator : boolean, default False\n",
" Return TextFileReader object\n",
"chunksize : int, default None\n",
" Return TextFileReader object for iteration\n",
"skipfooter : int, default 0\n",
" Number of line at bottom of file to skip\n",
"converters : dict. optional\n",
" Dict of functions for converting values in certain columns. Keys can either\n",
" be integers or column labels\n",
"verbose : boolean, default False\n",
" Indicate number of NA values placed in non-numeric columns\n",
"delimiter : string, default None\n",
" Alternative argument name for sep. Regular expressions are accepted.\n",
"encoding : string, default None\n",
" Encoding to use for UTF when reading/writing (ex. 'utf-8')\n",
"squeeze : boolean, default False\n",
" If the parsed data only contains one column then return a Series\n",
"na_filter: boolean, default True\n",
" Detect missing value markers (empty strings and the value of na_values). In\n",
" data without any NAs, passing na_filter=False can improve the performance\n",
" of reading a large file\n",
"usecols : array-like\n",
" Return a subset of the columns.\n",
" Results in much faster parsing time and lower memory usage.\n",
"mangle_dupe_cols: boolean, default True\n",
" Duplicate columns will be specified as 'X.0'...'X.N', rather than 'X'...'X'\n",
"tupleize_cols: boolean, default False\n",
" Leave a list of tuples on columns as is (default is to convert to\n",
" a Multi Index on the columns)\n",
"\n",
"Returns\n",
"-------\n",
"result : DataFrame or TextParser"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 6.1.1 \ud14d\uc2a4\ud2b8 \ud30c\uc77c \uc870\uae08\uc529 \uc77d\uc5b4\uc624\uae30"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"result = pd.read_csv('ch06/ex6.csv')"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 217
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"result"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"\n",
"<class 'pandas.core.frame.DataFrame'>\n",
"Int64Index: 10000 entries, 0 to 9999\n",
"Data columns (total 5 columns):\n",
"one 10000 non-null values\n",
"two 10000 non-null values\n",
"three 10000 non-null values\n",
"four 10000 non-null values\n",
"key 10000 non-null values\n",
"dtypes: float64(4), object(1)\n",
"
"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 218,
"text": [
"\n",
"Int64Index: 10000 entries, 0 to 9999\n",
"Data columns (total 5 columns):\n",
"one 10000 non-null values\n",
"two 10000 non-null values\n",
"three 10000 non-null values\n",
"four 10000 non-null values\n",
"key 10000 non-null values\n",
"dtypes: float64(4), object(1)"
]
}
],
"prompt_number": 218
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### nrows\ub85c \ucc98\uc74c \uba87 \uc904\ub9cc \uc77d\uc5b4\ubcfc \uc218 \uc788\ub2e4.\n",
"\n",
"- \ub9ac\ub205\uc2a4\uc758 head \uc758 \uae30\ub2a5\uacfc \uac19\ub2e4\uace0 \uc0dd\uac01\ud558\uba74 \ub41c\ub2e4."
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"pd.read_csv('ch06/ex6.csv', nrows=5)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" one | \n",
" two | \n",
" three | \n",
" four | \n",
" key | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 0.467976 | \n",
" -0.038649 | \n",
" -0.295344 | \n",
" -1.824726 | \n",
" L | \n",
"
\n",
" \n",
" 1 | \n",
" -0.358893 | \n",
" 1.404453 | \n",
" 0.704965 | \n",
" -0.200638 | \n",
" B | \n",
"
\n",
" \n",
" 2 | \n",
" -0.501840 | \n",
" 0.659254 | \n",
" -0.421691 | \n",
" -0.057688 | \n",
" G | \n",
"
\n",
" \n",
" 3 | \n",
" 0.204886 | \n",
" 1.074134 | \n",
" 1.388361 | \n",
" -0.982404 | \n",
" R | \n",
"
\n",
" \n",
" 4 | \n",
" 0.354628 | \n",
" -0.133116 | \n",
" 0.283763 | \n",
" -0.837063 | \n",
" Q | \n",
"
\n",
" \n",
"
\n",
"
"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 219,
"text": [
" one two three four key\n",
"0 0.467976 -0.038649 -0.295344 -1.824726 L\n",
"1 -0.358893 1.404453 0.704965 -0.200638 B\n",
"2 -0.501840 0.659254 -0.421691 -0.057688 G\n",
"3 0.204886 1.074134 1.388361 -0.982404 R\n",
"4 0.354628 -0.133116 0.283763 -0.837063 Q"
]
}
],
"prompt_number": 219
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### TextParser \uac1d\uccb4\ub97c \uc774\uc6a9\ud574\uc11c chunksize\uc5d0 \ub530\ub77c \ubd84\ub9ac\ub41c \ud30c\uc77c\uc744 \uc21c\ud68c \uac00\ub2a5"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"chunker = pd.read_csv('ch06/ex6.csv', chunksize=1000)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 220
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"chunker"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 221,
"text": [
""
]
}
],
"prompt_number": 221
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"chunker = pd.read_csv('ch06/ex6.csv', chunksize=1000)\n",
"\n",
"tot = Series([])\n",
"for piece in chunker:\n",
" # piece['key']\uc5d0 \uc788\ub294 E, X, L \ub4f1\uc758 \uc22b\uc790\ub97c \uc13c\ub2e4. \uac12\uc774 \uc5c6\ub294 \uac83\ub4e4\uc740 0\uc73c\ub85c \ucc44\uc6b4\ub2e4.\n",
" tot = tot.add( piece['key'].value_counts(), fill_value=0)\n",
"\n",
"# Key\uac00 \uc544\ub2cc \uac12\uc744(order) \uae30\uc900\uc73c\ub85c \ub0b4\ub9bc\ucc28\uc21c \uc815\ub9ac\n",
"tot = tot.order(ascending=False)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 222
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"tot[:10]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 223,
"text": [
"E 368\n",
"X 364\n",
"L 346\n",
"O 343\n",
"Q 340\n",
"M 338\n",
"J 337\n",
"F 335\n",
"K 334\n",
"H 330\n",
"dtype: float64"
]
}
],
"prompt_number": 223
},
{
"cell_type": "heading",
"level": 3,
"metadata": {},
"source": [
"6.1.2 \ub370\uc774\ud130\ub97c \ud14d\uc2a4\ud2b8 \ud615\uc2dd\uc73c\ub85c \uae30\ub85d\ud558\uae30"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"data = pd.read_csv('ch06/ex5.csv')"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 224
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"data"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" something | \n",
" a | \n",
" b | \n",
" c | \n",
" d | \n",
" message | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" one | \n",
" 1 | \n",
" 2 | \n",
" 3 | \n",
" 4 | \n",
" NaN | \n",
"
\n",
" \n",
" 1 | \n",
" two | \n",
" 5 | \n",
" 6 | \n",
" NaN | \n",
" 8 | \n",
" world | \n",
"
\n",
" \n",
" 2 | \n",
" three | \n",
" 9 | \n",
" 10 | \n",
" 11 | \n",
" 12 | \n",
" foo | \n",
"
\n",
" \n",
"
\n",
"
"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 225,
"text": [
" something a b c d message\n",
"0 one 1 2 3 4 NaN\n",
"1 two 5 6 NaN 8 world\n",
"2 three 9 10 11 12 foo"
]
}
],
"prompt_number": 225
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"data.to_csv('ch06/out.csv')"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 226
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"!cat ch06/out.csv"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
",something,a,b,c,d,message\r\n",
"0,one,1,2,3.0,4,\r\n",
"1,two,5,6,,8,world\r\n",
"2,three,9,10,11.0,12,foo\r\n"
]
}
],
"prompt_number": 227
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# csv\ub85c \uc9c0\uc815\ud558\ub294\ub370 output\uc740 \ud45c\uc900\uc544\uc6c3\ud48b(\ubaa8\ub2c8\ud130), separator\ub294 '|'\n",
"data.to_csv(sys.stdout, sep='|')"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"|something|a|b|c|d|message\n",
"0|one|1|2|3.0|4|\n",
"1|two|5|6||8|world\n",
"2|three|9|10|11.0|12|foo\n"
]
}
],
"prompt_number": 228
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# Write DataFrame to a comma-separated value (csv) file\n",
"# na_rep -> Missing data representation. NA REPresentation\n",
"data.to_csv?"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 229
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### na_rep\ub85c \ub204\ub77d\ub41c\uac12\uc744 \uc6d0\ud558\ub294 \uac12\uc73c\ub85c \ubcc0\uacbd \uac00\ub2a5"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"data.to_csv(sys.stdout, na_rep='NULL')"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
",something,a,b,c,d,message\n",
"0,one,1,2,3.0,4,NULL\n",
"1,two,5,6,NULL,8,world\n",
"2,three,9,10,11.0,12,foo\n"
]
}
],
"prompt_number": 230
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"data.to_csv(sys.stdout, na_rep='NaN')"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
",something,a,b,c,d,message\n",
"0,one,1,2,3.0,4,NaN\n",
"1,two,5,6,NaN,8,world\n",
"2,three,9,10,11.0,12,foo\n"
]
}
],
"prompt_number": 231
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### row, column \uac12\uc744 \uc800\uc7a5\ud560 \uac83\uc778\uc9c0 \uc120\ud0dd \uac00\ub2a5"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"data.to_csv(sys.stdout, index=False, header=False)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"one,1,2,3.0,4,\n",
"two,5,6,,8,world\n",
"three,9,10,11.0,12,foo\n"
]
}
],
"prompt_number": 232
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### \uceec\ub7fc\uc758 \uc77c\ubd80\ubd84\ub9cc \uae30\ub85d \uac00\ub2a5, \uc21c\uc11c\ub97c \uc9c1\uc811 \uc9c0\uc815 \uac00\ub2a5"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"data.to_csv(sys.stdout, index=False, cols=['a', 'b', 'c'])"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"a,b,c\n",
"1,2,3.0\n",
"5,6,\n",
"9,10,11.0\n"
]
}
],
"prompt_number": 233
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Series\uc5d0\ub3c4 to_csv method \uc874\uc7ac"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"dates = pd.date_range('1/1/2000', periods=7)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 234
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"ts = Series(np.arange(7), index=dates)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 235
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"ts.to_csv('ch06/tseries.csv')"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 236
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"!cat ch06/tseries.csv"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"2000-01-01,0\r\n",
"2000-01-02,1\r\n",
"2000-01-03,2\r\n",
"2000-01-04,3\r\n",
"2000-01-05,4\r\n",
"2000-01-06,5\r\n",
"2000-01-07,6\r\n"
]
}
],
"prompt_number": 237
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### \uc57d\uac04 \ubcf5\uc7a1\ud558\uac8c \ud5e4\ub354\ub97c \uc5c6\uc560\uace0 \uccab \ubc88\uc9f8 \uce7c\ub7fc\uc744 \uc0c9\uc778\uc73c\ub85c \ud558\uba74 read_csv \uba54\uc11c\ub4dc\ub85c Series \uac1d\uccb4\ub97c \uc5bb\uc744 \uc218 \uc788\uc9c0\ub9cc from_csv \uba54\uc11c\ub4dc\uac00 \uc880 \ub354 \ud3b8\ub9ac\ud558\uace0 \uac04\ub2e8\ud558\uac8c \ubb38\uc81c \ud574\uacb0"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"pd.DataFrame.to_csv?"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 238
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"Series.from_csv('ch06/tseries.csv', parse_dates=True)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 239,
"text": [
"2000-01-01 0\n",
"2000-01-02 1\n",
"2000-01-03 2\n",
"2000-01-04 3\n",
"2000-01-05 4\n",
"2000-01-06 5\n",
"2000-01-07 6\n",
"dtype: int64"
]
}
],
"prompt_number": 239
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"type( Series.from_csv('ch06/tseries.csv', parse_dates=True) )"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 240,
"text": [
"pandas.core.series.Series"
]
}
],
"prompt_number": 240
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# parse dates: boolean, default True.\n",
"# Parse dates. Different default from read_table\n",
"Series.from_csv?"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 241
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### read_csv\ub97c Series\ub85c \uc77d\uc744 \uc218 \uc788\ub2e4\uace0 \uc2e4\ud5d8\ud558\ub294 \uc911\uc778\ub370 \uc798 \uc548\ub418\ub124.. \n",
"\n",
"- DataFrame\uc73c\ub85c \uc77d\uc5b4\uc9d0"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"pd.read_csv('ch06/tseries.csv', header=None)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" 0 | \n",
" 1 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 2000-01-01 | \n",
" 0 | \n",
"
\n",
" \n",
" 1 | \n",
" 2000-01-02 | \n",
" 1 | \n",
"
\n",
" \n",
" 2 | \n",
" 2000-01-03 | \n",
" 2 | \n",
"
\n",
" \n",
" 3 | \n",
" 2000-01-04 | \n",
" 3 | \n",
"
\n",
" \n",
" 4 | \n",
" 2000-01-05 | \n",
" 4 | \n",
"
\n",
" \n",
" 5 | \n",
" 2000-01-06 | \n",
" 5 | \n",
"
\n",
" \n",
" 6 | \n",
" 2000-01-07 | \n",
" 6 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 242,
"text": [
" 0 1\n",
"0 2000-01-01 0\n",
"1 2000-01-02 1\n",
"2 2000-01-03 2\n",
"3 2000-01-04 3\n",
"4 2000-01-05 4\n",
"5 2000-01-06 5\n",
"6 2000-01-07 6"
]
}
],
"prompt_number": 242
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"type(pd.read_csv('ch06/tseries.csv', header=None))"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 243,
"text": [
"pandas.core.frame.DataFrame"
]
}
],
"prompt_number": 243
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"pd.read_csv?"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 244
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"###6.1.3 \uc218\ub3d9\uc73c\ub85c \uad6c\ubd84 \ud615\uc2dd \ucc98\ub9ac\ud558\uae30"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### read_table\uc5d0\uc11c \uc77d\uc744 \uc218 \uc5c6\ub294 \uc798\ubabb\ub41c \ud615\uc2dd\uc758 \uc904\uc774 \ud3ec\ud568\ub41c \ub370\uc774\ud130\uac00 \ub4dc\ubb3c\uac8c \ubc1c\uacac \ub428 -> \uc218\ub3d9 \ucc98\ub9ac"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"!cat ch06/ex7.csv"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\"a\",\"b\",\"c\"\r\n",
"\"1\",\"2\",\"3\"\r\n",
"\"1\",\"2\",\"3\",\"4\"\r\n"
]
}
],
"prompt_number": 245
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import csv\n",
"f = open('ch06/ex7.csv')\n",
"\n",
"reader = csv.reader(f)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 246
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"for line in reader:\n",
" print line"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"['a', 'b', 'c']\n",
"['1', '2', '3']\n",
"['1', '2', '3', '4']\n"
]
}
],
"prompt_number": 247
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"lines = list(csv.reader(open('ch06/ex7.csv')))"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 248
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"header, values = lines[0], lines[1:]"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 249
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"header"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 250,
"text": [
"['a', 'b', 'c']"
]
}
],
"prompt_number": 250
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"values"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 251,
"text": [
"[['1', '2', '3'], ['1', '2', '3', '4']]"
]
}
],
"prompt_number": 251
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# header = a,b,c\n",
"# values\ub97c 1,1\uc744 \uac19\uc774 \ubb36\ub294\ub2e4. 2,2 \ubb36\uace0. 3,3 \ubb36\uace0. 4\ub294 header\uac00 a,b,c 3\uac1c \ubc16\uc5d0 \uc5c6\uae30 \ub54c\ubb38\uc5d0 \ud3ec\ud568\ub418\uc9c0 \uc54a\ub294\ub2e4.\n",
"data_dict = {h: v for h, v in zip(header, zip(*values))}"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 252
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"data_dict"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 253,
"text": [
"{'a': ('1', '1'), 'b': ('2', '2'), 'c': ('3', '3')}"
]
}
],
"prompt_number": 253
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### CSV \ud30c\uc77c\uc740 \ub2e4\uc591\ud55c \ud30c\uc77c \uc874\uc7ac\ud558\uae30 \ub54c\ubb38\uc5d0 \ub2e4\uc591\ud55c \uc635\uc158\ub4e4\uc740 csv.Dialect \uc0c1\uc18d\ubc1b\uc544 \ud574\uacb0\n",
"\n",
"- \ub2e4\uc591\ud55c \uad6c\ubd84\uc790\n",
"- \ubb38\uc790\uc5f4\uc744 \ub458\ub7ec\uc2f8\ub294 \ubc29\ubc95\n",
"- \uac1c\ud589\ubb38\uc790"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"class my_dialect(csv.Dialect):\n",
" lineterminator = '\\n'\n",
" delimiter = ';'\n",
" quotechar = '\"'\n",
"\n",
"reader = csv.reader"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 254
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"reader = csv.reader?"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 255
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"reader = csv.reader"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"reader = csv.reader"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 256
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### TypeError: \"quoting\" must be an integer\n",
"\n",
"- [_csv.Error: field larger than field limit (131072) \ucc38\uace0](http://stackoverflow.com/questions/15063936/csv-error-field-larger-than-field-limit-131072)"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# quoting\uc774 \uaf2d integer\uc5ec\uc57c \ud55c\ub2e4\ub294 \uc624\ub958\uac00 \ubc1c\uc0dd\ud574\uc11c \uc0bd\uc9c8\ud558\ub2e4\uac00 \ub4a4\uc5d0 quoting keyword\ub97c \ubd99\uc5ec\uc90c..\n",
"reader = csv.reader(f, dialect=my_dialect)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"ename": "TypeError",
"evalue": "\"quoting\" must be an integer",
"output_type": "pyerr",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;31m# quoting\uc774 \uaf2d integer\uc5ec\uc57c \ud55c\ub2e4\ub294 \uc624\ub958\uac00 \ubc1c\uc0dd\ud574\uc11c \uc0bd\uc9c8\ud558\ub2e4\uac00 \ub4a4\uc5d0 quoting keyword\ub97c \ubd99\uc5ec\uc90c..\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mreader\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcsv\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreader\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdialect\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmy_dialect\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;31mTypeError\u001b[0m: \"quoting\" must be an integer"
]
}
],
"prompt_number": 257
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"reader = csv.reader(f, dialect=my_dialect, quoting=csv.QUOTE_NONE)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 258
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"csv.QUOTE_NONE"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 259,
"text": [
"3"
]
}
],
"prompt_number": 259
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"reader = csv.reader(f, delimiter='|')"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 260
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### [13.1. csv \u2014 CSV File Reading and Writing](https://docs.python.org/3.1/library/csv.html#dialects-and-formatting-parameters)"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# \uc5b4\ub5a4 \uc635\uc158\ub4e4 \uc788\ub294\uc9c0 \ubcf4\ub824\uace0 \ud588\ub354\ub2c8 \uc548 \ubcf4\uc5ec\uc8fc\ub124...\n",
"csv.reader??"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 261
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### CSV Note\n",
"\n",
"- \uc880 \ub354 \ubcf5\uc7a1\ud558\uac70\ub098 \uad6c\ubd84\uc790\uac00 \ud55c \uae00\uc790\ub97c \ucd08\uacfc\ud558\ub294 \uace0\uc815 \uae38\uc774\ub97c \uac00\uc9c4\ub2e4\uba74 csv \ubaa8\ub4c8\uc744 \uc0ac\uc6a9\ud560 \uc218 \uc5c6\ub2e4.\n",
"- \uc774\ub7f0 \uacbd\uc6b0\uc5d0\ub294 \uc904\uc744 \ub098\ub204\uace0 \ubb38\uc790\uc5f4\uc758 split \uba54\uc11c\ub4dc\ub098 \uc815\uaddc\ud45c\ud604\uc2dd \uba54\uc11c\ub4dc\uc778 re.split \ub4f1\uc744 \uc774\uc6a9\ud574\uc11c \uac00\uacf5\ud558\ub294 \uc791\uc5c5\uc744 \ud574\uc57c \ud55c\ub2e4."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### CSV \ud30c\uc77c \uae30\ub85d"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"with open('ch06/mydata.csv', 'w') as f:\n",
" writer = csv.writer(f, dialect=my_dialect, quoting=csv.QUOTE_NONE)\n",
" writer.writerow(('one', 'two', 'three'))\n",
" writer.writerow(('1', '2', '3'))\n",
" writer.writerow(('4', '5', '6'))\n",
" writer.writerow(('7', '8', '9'))"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 262
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"!cat ch06/mydata.csv"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"one;two;three\r\n",
"1;2;3\r\n",
"4;5;6\r\n",
"7;8;9\r\n"
]
}
],
"prompt_number": 263
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### JSON \ub370\uc774\ud130\n",
"\n",
"- JSON(JavaScript Object Notation)\uc740 \uc6f9\ube0c\ub77c\uc6b0\uc800\uc640 \ub2e4\ub978 \uc560\ud50c\ub9ac\ucf00\uc774\uc158\uc774 HTTP \uc694\uccad\uc73c\ub85c \ub370\uc774\ud130\ub97c \ubcf4\ub0bc \ub54c \ub110\ub9ac \uc0ac\uc6a9\ud558\ub294 \ud45c\uc900 \ud30c\uc77c \ud615\uc2dd \uc911 \ud558\ub098\ub2e4.\n",
"- JSON\uc740 CSV \uac19\uc740 \ud45c \ud615\uc2dd\uc758 \ud14d\uc2a4\ud2b8\ubcf4\ub2e4 \uc880 \ub354 \uc720\uc5f0\ud55c \ub370\uc774\ud130 \ud615\uc2dd\uc774\uba70, JSON \ub370\uc774\ud130\uc758 \uc608\ub294 \ub2e4\uc74c\uacfc \uac19\ub2e4."
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# json\uc740 python\uc5d0\uc11c\ucc98\ub7fc '\uc73c\ub85c \ud558\uba74 \uc548\ub41c\ub2e4. \ud604\uc7ac \"\"\"\ub85c \uac10\uc2f8 \ubb38\uc790\uc5f4\ub85c \uc800\uc7a5\ub418\uc5b4 \uc788\uae30 \ub54c\ubb38\uc5d0 \n",
"# javascript\uc5d0\uc11c\ub294 '\ub97c string \uac12\uc73c\ub85c \uc778\uc2dd\ud558\uc9c0 \uc54a\uc544\uc11c \uc5d0\ub7ec \ubc1c\uc0dd\n",
"obj = \"\"\"\n",
"{\n",
" 'name': 'Wes',\n",
" 'places_lived': ['United States', 'Spain', 'Germany'],\n",
" 'pet': null, 'siblings': [{'name': 'Scott', 'age':25, 'pet':'Zuko'},\n",
" {'name': 'Katie', 'age':33, 'pet': 'Cisco'}]\n",
"}\n",
"\"\"\""
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 264
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import json"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 265
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# ValueError: Expecting property name: line 3 column 5 (char 7)\n",
"result = json.loads(obj)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"ename": "ValueError",
"evalue": "Expecting property name: line 3 column 5 (char 7)",
"output_type": "pyerr",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;31m# ValueError: Expecting property name: line 3 column 5 (char 7)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mjson\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mloads\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mobj\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;32m/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/json/__init__.pyc\u001b[0m in \u001b[0;36mloads\u001b[0;34m(s, encoding, cls, object_hook, parse_float, parse_int, parse_constant, object_pairs_hook, **kw)\u001b[0m\n\u001b[1;32m 336\u001b[0m \u001b[0mparse_int\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0mNone\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mparse_float\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0mNone\u001b[0m \u001b[0;32mand\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 337\u001b[0m parse_constant is None and object_pairs_hook is None and not kw):\n\u001b[0;32m--> 338\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0m_default_decoder\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdecode\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ms\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 339\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mcls\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 340\u001b[0m \u001b[0mcls\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mJSONDecoder\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/json/decoder.pyc\u001b[0m in \u001b[0;36mdecode\u001b[0;34m(self, s, _w)\u001b[0m\n\u001b[1;32m 363\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 364\u001b[0m \"\"\"\n\u001b[0;32m--> 365\u001b[0;31m \u001b[0mobj\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mend\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mraw_decode\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ms\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0midx\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0m_w\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ms\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 366\u001b[0m \u001b[0mend\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_w\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ms\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mend\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 367\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mend\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ms\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/json/decoder.pyc\u001b[0m in \u001b[0;36mraw_decode\u001b[0;34m(self, s, idx)\u001b[0m\n\u001b[1;32m 379\u001b[0m \"\"\"\n\u001b[1;32m 380\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 381\u001b[0;31m \u001b[0mobj\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mend\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mscan_once\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ms\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0midx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 382\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mStopIteration\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 383\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"No JSON object could be decoded\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mValueError\u001b[0m: Expecting property name: line 3 column 5 (char 7)"
]
}
],
"prompt_number": 266
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"obj = \"\"\"\n",
"{\n",
" \"name\": \"Wes\",\n",
" \"places_lived\": [\"United States\", \"Spain\", \"Germany\"],\n",
" \"pet\": null, \"siblings\": [{\"name\": \"Scott\", \"age\":25, \"pet\":\"Zuko\"},\n",
" {\"name\": \"Katie\", \"age\":33, \"pet\": \"Cisco\"}]\n",
"}\n",
"\"\"\""
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 267
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"obj"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 268,
"text": [
"'\\n{\\n \"name\": \"Wes\",\\n \"places_lived\": [\"United States\", \"Spain\", \"Germany\"],\\n \"pet\": null, \"siblings\": [{\"name\": \"Scott\", \"age\":25, \"pet\":\"Zuko\"},\\n {\"name\": \"Katie\", \"age\":33, \"pet\": \"Cisco\"}]\\n}\\n'"
]
}
],
"prompt_number": 268
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### JSON\uc740 \ub110 \uac12\uc778 null\uacfc \ub2e4\ub978 \uba87 \uac00\uc9c0 \uc0ac\uc18c\ud55c \uc8fc\uc758\uc0ac\ud56d(\ub9ac\uc2a4\ud2b8\uc758 \ub9c8\uc9c0\ub9c9\uc5d0 \uc27c\ud45c\uac00 \uc788\uc73c\uba74 \uc548\ub41c\ub2e4\ub358\uac00 \ud558\ub294)\uc744 \uc81c\uc678\ud558\uba74 \ud30c\uc774\uc36c \ucf54\ub4dc\uc640 \uac70\uc758 \uc720\uc0ac\n",
"\n",
"- \uae30\ubcf8 \uc790\ub8cc\ud615\uc740 \uac1d\uccb4(\uc0ac\uc804), \ubc30\uc5f4(\ub9ac\uc2a4\ud2b8), \ubb38\uc790\uc5f4, \uc22b\uc790, \ubd88\ub9ac\uc5b8 \uadf8\ub9ac\uace0 \ub110\n",
"- \uac1d\uccb4\uc758 \ud0a4\ub294 \ubc18\ub4dc\uc2dc \ubb38\uc790\uc5f4\n",
"- JSON \uc77d\uace0 \uc4f8 \uc218 \uc788\ub294 \ub77c\uc774\ube0c\ub7ec\ub9ac\uac00 \uba87 \uac1c \uc788\uc9c0\ub9cc \ud45c\uc900 \ub77c\uc774\ube0c\ub7ec\ub9ac\uc778 json \uc0ac\uc6a9"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# ValueError: Expecting property name: line 3 column 5 (char 7)\n",
"result = json.loads(obj)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 269
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"result"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 270,
"text": [
"{u'name': u'Wes',\n",
" u'pet': None,\n",
" u'places_lived': [u'United States', u'Spain', u'Germany'],\n",
" u'siblings': [{u'age': 25, u'name': u'Scott', u'pet': u'Zuko'},\n",
" {u'age': 33, u'name': u'Katie', u'pet': u'Cisco'}]}"
]
}
],
"prompt_number": 270
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### json.dumps\ub294 \ud30c\uc774\uc36c \uac1d\uccb4\ub97c JSON \ud615\ud0dc\ub85c \ubcc0\ud658"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"asjson = json.dumps(result)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 271
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# '\uac00 \uc544\ub2c8\ub77c \"\uc778 \uac83\uc744 \ud655\uc778\ud558\uc790\n",
"asjson"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 272,
"text": [
"'{\"pet\": null, \"siblings\": [{\"pet\": \"Zuko\", \"age\": 25, \"name\": \"Scott\"}, {\"pet\": \"Cisco\", \"age\": 33, \"name\": \"Katie\"}], \"name\": \"Wes\", \"places_lived\": [\"United States\", \"Spain\", \"Germany\"]}'"
]
}
],
"prompt_number": 272
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### JSON \uac1d\uccb4\ub098 \uac1d\uccb4\uc758 \ub9ac\uc2a4\ud2b8\ub97c DataFrame\uc774\ub098 \ub2e4\ub978 \uc790\ub8cc \uad6c\uc870\ub85c \uc5b4\ub5bb\uac8c \ubcc0\ud658\ud574\uc11c \ubd84\uc11d\uc744 \ud560 \uac83\uc778\uc9c0\ub294 \ub3c5\uc790\uc758 \ubaab\n",
"\n",
"- JSON \uac1d\uccb4\uc758 \ub9ac\uc2a4\ud2b8\ub97c DataFrame \uc0dd\uc131\uc790\ub85c \ub118\uae30\uace0 \ub370\uc774\ud130 \ud544\ub4dc \uc120\ud0dd \uac00\ub2a5"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"siblings = DataFrame(result['siblings'], columns=['name', 'age'])"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 273
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"siblings"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" name | \n",
" age | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" Scott | \n",
" 25 | \n",
"
\n",
" \n",
" 1 | \n",
" Katie | \n",
" 33 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 274,
"text": [
" name age\n",
"0 Scott 25\n",
"1 Katie 33"
]
}
],
"prompt_number": 274
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# \ucc45\uc5d0 \ub098\uc640\uc788\uc9c0 \uc54a\uc740 \ub0b4\uc6a9\uc744 \ud55c \ubc88 \ub354 \ud574\ubd10\uc57c \uc27d\uac8c \uc774\ud574\uac00 \ub418\ub294\ub4ef\n",
"siblings2 = DataFrame(result['siblings'], columns=['name', 'age', 'pet'])"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 275
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"siblings2"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" name | \n",
" age | \n",
" pet | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" Scott | \n",
" 25 | \n",
" Zuko | \n",
"
\n",
" \n",
" 1 | \n",
" Katie | \n",
" 33 | \n",
" Cisco | \n",
"
\n",
" \n",
"
\n",
"
"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 276,
"text": [
" name age pet\n",
"0 Scott 25 Zuko\n",
"1 Katie 33 Cisco"
]
}
],
"prompt_number": 276
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### pandas\uc5d0\uc11c JSON\uc744 \ube60\ub974\uac8c \uc77d\uace0(from_json) \uc4f0\ub294(to_json) \ub124\uc774\ud2f0\ube0c \uad6c\ud604\uc911"
]
},
{
"cell_type": "heading",
"level": 3,
"metadata": {},
"source": [
"6.1.5 XML\uacfc HTML: \uc6f9 \ub0b4\uc6a9 \uae01\uc5b4\uc624\uae30"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### [lxml](http://lxml.de)\n",
"\n",
"- \uc544\uc8fc \ud070 \ud30c\uc77c\uc744 \ube60\ub974\uac8c \ucc98\ub9ac \uac00\ub2a5\n",
"- \uc5ec\ub7ec \uc885\ub958\uc758 \uc778\ud130\ud398\uc774\uc2a4 \uc81c\uacf5\n",
"- lxml.html: HTML \ucc98\ub9ac\n",
"- lxml.objectify: XML \ucc98\ub9ac\n",
"\n",
"#### \ub300\ubd80\ubd84\uc758 \uc6f9\uc0ac\uc774\ud2b8\ub294 \ub531 \ud544\uc694\ud55c \ub0b4\uc6a9\ub9cc \ub4e4\uc5b4\uc788\ub294 JSON\uc774\ub098 XML\uc744 \ub9ce\uc774 \uc0ac\uc6a9\ud558\uc9c0 \uc54a\uace0 HTML\uc744 \uc0ac\uc6a9"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"from lxml.html import parse\n",
"from urllib2 import urlopen\n",
"\n",
"# \ub370\uc774\ud130\ub97c \uac00\uc838 \uc62c url\uc744 \ub118\uae34 \ud6c4\n",
"# \ub370\uc774\ud130\ub97c \ubc1b\uc544 \uc628 \ud6c4 parse\n",
"parsed = parse(urlopen('http://finance.yahoo.com/q/op?s=AAPL+Options'))\n",
"\n",
"doc = parsed.getroot()"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 277
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### doc \uac1d\uccb4\uc5d0\ub294 \ubaa8\ub4e0 HTML \ud0dc\uadf8 \ucd94\ucd9c\n",
"\n",
"- \uc6b0\ub9ac\uac00 \uad00\uc2ec \uac00\uc838\uc57c \ud560 table \ud0dc\uadf8\ub3c4 \ud3ec\ud568\n",
"- \uc5b4\ub5bb\uac8c \ub3d9\uc791\ud558\ub294\uc9c0 \ud655\uc778\ud558\uae30 \uc704\ud574 \uae01\uc5b4\uc628 HTML \ubb38\uc11c\uc5d0\uc11c \uc678\ubd80 \uc5f0\uacb0 URL\uc744 \ubaa8\ub450 \ucc3e\uc544\ubcf4\uc790.\n",
"- \uc678\ubd80 \uc5f0\uacb0\uc740 a \ud0dc\uadf8\ub85c \uc9c0\uc815\n",
"- findall \uba54\uc11c\ub4dc\uc5d0 XPath(\ubb38\uc11c \uc9c8\uc758 \uc5b8\uc5b4)\ub97c \uc0ac\uc6a9\ud574\uc11c \ud574\ub2f9 \uc5d8\ub9ac\uba3c\ud2b8\ub97c \uac00\uc838\uc62c \uc218 \uc788\ub2e4.\n",
"\n",
"#### XPath tutorial site\n",
"\n",
"- [W3schools](http://www.w3schools.com/XPath/)\n",
"- [XPath and XSLT with lxml](http://lxml.de/xpathxslt.html)\n",
"- [Using Chrome Developer Tools](http://stackoverflow.com/questions/3030487/is-there-a-way-to-get-the-xpath-in-google-chrome)"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"links = doc.findall('.//a')"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 278
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# \uc774 \uac1d\uccb4\ub294 HTML \uc5d8\ub9ac\uba58\ud2b8\ub97c \ud45c\ud604\ud558\ub294 \uac1d\uccb4\uc77c\ubfd0\n",
"# URL\uacfc \ub9c1\ud06c \uc774\ub984\uc744 \uac00\uc838\uc624\ub824\uba74 \uac01 \uc5d8\ub9ac\uba3c\ud2b8\uc5d0 \ub300\ud574 get \uba54\uc11c\ub4dc\ub97c \ud638\ucd9c\ud558\uc5ec URL\uc744 \uc5bb\uace0\n",
"# text_content \uba54\uc11c\ub4dc\ub97c \uc0ac\uc6a9\ud574\uc11c \ub9c1\ud06c \uc774\ub984\uc744 \uac00\uc838\uc640\uc57c \ud55c\ub2e4.\n",
"links[15:20]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 279,
"text": [
"[,\n",
" ,\n",
" ,\n",
" ,\n",
" ]"
]
}
],
"prompt_number": 279
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### \uc774 \uac1d\uccb4\ub294 HTML \uc5d8\ub9ac\uba3c\ud2b8\ub97c \ud45c\ud604\ud558\ub294 \uac1d\uccb4\uc77c \ubfd0\n",
"\n",
"- \uc5d8\ub9ac\uba3c\ud2b8\ub97c \ud45c\ud604\ud558\ub294 \uac1d\uccb4\ub77c\uace0 \uc0dd\uac01\ud558\uc790. \uc548 \uadf8\ub7ec\uba74 \uc0bd\uc9c8\ud558\uac8c \ub41c\ub2e4!\n",
"- URL\uacfc \ub9c1\ud06c \uc774\ub984\uc744 \uac00\uc838\uc624\ub824\uba74 \uac01 \uc5d8\ub9ac\uba3c\ud2b8\uc5d0 \ub300\ud574 get \uba54\uc11c\ub4dc\ub97c \ud638\ucd9c\ud558\uc5ec URL\uc744 \uc5bb\uace0, text_content \uba54\uc11c\ub4dc\ub97c \uc774\uc6a9\ud574\uc11c \ub9c1\ud06c \uc774\ub984\uc744 \uac00\uc838\uc640\uc57c \ud55c\ub2e4."
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"lnk = links[28]"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 280
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"lnk"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 281,
"text": [
""
]
}
],
"prompt_number": 281
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"lnk.get('href')"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 282,
"text": [
"'https://edit.yahoo.com/mc2.0/eval_profile?.intl=us&.lang=en-US&.done=http://finance.yahoo.com/q/op%3fs=AAPL%2bOptions&.src=quote&.intl=us&.lang=en-US'"
]
}
],
"prompt_number": 282
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"lnk.text_content()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 283,
"text": [
"'Account Info'"
]
}
],
"prompt_number": 283
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### [list comprehensions in Python](http://www.pythonforbeginners.com/lists/list-comprehensions-in-python/)"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"urls = [lnk.get('href') for lnk in doc.findall('.//a')]"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 284
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"len(urls)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 285,
"text": [
"1239"
]
}
],
"prompt_number": 285
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"urls[-3:-1]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 286,
"text": [
"['http://www.capitaliq.com', 'http://www.csidata.com']"
]
}
],
"prompt_number": 286
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"urls[-10:]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 287,
"text": [
"['/q?s=AAPL140517P00780000',\n",
" '/q/op?s=AAPL&k=800.000000',\n",
" '/q?s=AAPL140517P00800000',\n",
" '/q/op?s=AAPL&k=805.000000',\n",
" '/q?s=AAPL140517P00805000',\n",
" '/q/os?s=AAPL&m=2014-05-30',\n",
" 'http://help.yahoo.com/l/us/yahoo/finance/quotes/fitadelay.html',\n",
" 'http://www.capitaliq.com',\n",
" 'http://www.csidata.com',\n",
" 'http://www.morningstar.com/']"
]
}
],
"prompt_number": 287
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### \ucc3e\uace0\uc790 \ud558\ub294 table \uc77c\uc77c\uc774 \ud655\uc778\n",
"\n",
"- \uba87\uba87 \uc6f9\uc0ac\uc774\ud2b8\ub294 table\ub9c8\ub2e4 id \uc18d\uc131\uc744 \uc918\uc11c \uc27d\uac8c \ud560 \uc218 \uc788\uc9c0\ub9cc \uc5b4\ub514 \uc138\uc0c1 \uc77c\uc774 \uc27d\uac8c \ub418\ub294\uac8c \uc788\ub098? \ub178\uac00\ub2e4 \ud574\uc57c\uc9c0.."
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"tables = doc.findall('.//table')"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 288
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"tables"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 289,
"text": [
"[,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ]"
]
}
],
"prompt_number": 289
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"calls = tables[9]"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 290
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"calls"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 291,
"text": [
""
]
}
],
"prompt_number": 291
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"puts = tables[13]"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 292
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"rows = calls.findall('.//tr')"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 293
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"rows"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 294,
"text": [
"[,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
"