{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"toc": true
},
"source": [
"
Table of Contents
\n",
""
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"ExecuteTime": {
"end_time": "2019-12-24T17:44:12.942927Z",
"start_time": "2019-12-24T17:44:11.943916Z"
}
},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Reading and Writing Data in Text Format"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"ExecuteTime": {
"end_time": "2019-12-24T17:44:12.966381Z",
"start_time": "2019-12-24T17:44:12.945392Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"a,b,c,d,message\n",
"1,2,3,4,hello\n",
"5,6,7,8,world\n",
"9,10,11,12,foo\n"
]
}
],
"source": [
"!type examples\\ex1.csv"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"ExecuteTime": {
"end_time": "2019-12-24T17:44:13.113898Z",
"start_time": "2019-12-24T17:44:12.967654Z"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" a | \n",
" b | \n",
" c | \n",
" d | \n",
" message | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 1 | \n",
" 2 | \n",
" 3 | \n",
" 4 | \n",
" hello | \n",
"
\n",
" \n",
" | 1 | \n",
" 5 | \n",
" 6 | \n",
" 7 | \n",
" 8 | \n",
" world | \n",
"
\n",
" \n",
" | 2 | \n",
" 9 | \n",
" 10 | \n",
" 11 | \n",
" 12 | \n",
" foo | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" a b c d message\n",
"0 1 2 3 4 hello\n",
"1 5 6 7 8 world\n",
"2 9 10 11 12 foo"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = pd.read_csv(r'examples\\ex1.csv')\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"ExecuteTime": {
"end_time": "2019-12-24T17:44:13.284776Z",
"start_time": "2019-12-24T17:44:13.116362Z"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" a | \n",
" b | \n",
" c | \n",
" d | \n",
" message | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 1 | \n",
" 2 | \n",
" 3 | \n",
" 4 | \n",
" hello | \n",
"
\n",
" \n",
" | 1 | \n",
" 5 | \n",
" 6 | \n",
" 7 | \n",
" 8 | \n",
" world | \n",
"
\n",
" \n",
" | 2 | \n",
" 9 | \n",
" 10 | \n",
" 11 | \n",
" 12 | \n",
" foo | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" a b c d message\n",
"0 1 2 3 4 hello\n",
"1 5 6 7 8 world\n",
"2 9 10 11 12 foo"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.read_table('examples/ex1.csv', sep=',')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"A file will not always have a header row. Consider this file"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"ExecuteTime": {
"end_time": "2019-12-24T17:44:13.454943Z",
"start_time": "2019-12-24T17:44:13.290845Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1,2,3,4,hello\n",
"5,6,7,8,world\n",
"9,10,11,12,foo\n"
]
}
],
"source": [
"!type examples\\ex2.csv"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"ExecuteTime": {
"end_time": "2019-12-24T17:44:13.597498Z",
"start_time": "2019-12-24T17:44:13.458877Z"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" 0 | \n",
" 1 | \n",
" 2 | \n",
" 3 | \n",
" 4 | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 1 | \n",
" 2 | \n",
" 3 | \n",
" 4 | \n",
" hello | \n",
"
\n",
" \n",
" | 1 | \n",
" 5 | \n",
" 6 | \n",
" 7 | \n",
" 8 | \n",
" world | \n",
"
\n",
" \n",
" | 2 | \n",
" 9 | \n",
" 10 | \n",
" 11 | \n",
" 12 | \n",
" foo | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" 0 1 2 3 4\n",
"0 1 2 3 4 hello\n",
"1 5 6 7 8 world\n",
"2 9 10 11 12 foo"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.read_csv(r'examples/ex2.csv', header=None)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"ExecuteTime": {
"end_time": "2019-12-24T17:44:13.738188Z",
"start_time": "2019-12-24T17:44:13.609101Z"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" a | \n",
" b | \n",
" c | \n",
" d | \n",
" message | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 1 | \n",
" 2 | \n",
" 3 | \n",
" 4 | \n",
" hello | \n",
"
\n",
" \n",
" | 1 | \n",
" 5 | \n",
" 6 | \n",
" 7 | \n",
" 8 | \n",
" world | \n",
"
\n",
" \n",
" | 2 | \n",
" 9 | \n",
" 10 | \n",
" 11 | \n",
" 12 | \n",
" foo | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" a b c d message\n",
"0 1 2 3 4 hello\n",
"1 5 6 7 8 world\n",
"2 9 10 11 12 foo"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.read_csv(r'examples/ex2.csv', names=['a', 'b', 'c', 'd', 'message'])"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"ExecuteTime": {
"end_time": "2019-12-24T17:44:13.885401Z",
"start_time": "2019-12-24T17:44:13.741950Z"
}
},
"outputs": [],
"source": [
"names=['a', 'b', 'c', 'd', 'message']"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"ExecuteTime": {
"end_time": "2019-12-24T17:44:14.053188Z",
"start_time": "2019-12-24T17:44:13.885401Z"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" a | \n",
" b | \n",
" c | \n",
" d | \n",
"
\n",
" \n",
" | message | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" | hello | \n",
" 1 | \n",
" 2 | \n",
" 3 | \n",
" 4 | \n",
"
\n",
" \n",
" | world | \n",
" 5 | \n",
" 6 | \n",
" 7 | \n",
" 8 | \n",
"
\n",
" \n",
" | foo | \n",
" 9 | \n",
" 10 | \n",
" 11 | \n",
" 12 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" a b c d\n",
"message \n",
"hello 1 2 3 4\n",
"world 5 6 7 8\n",
"foo 9 10 11 12"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# make message column to be the index of the returned DataFrame\n",
"\n",
"pd.read_csv(r'examples/ex2.csv', names=names, index_col='message')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"In the event that you want to form a hierarchical index from multiple columns, pass a list of column numbers or names"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"ExecuteTime": {
"end_time": "2019-12-24T17:44:14.240514Z",
"start_time": "2019-12-24T17:44:14.054189Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"key1,key2,value1,value2\n",
"one,a,1,2\n",
"one,b,3,4\n",
"one,c,5,6\n",
"one,d,7,8\n",
"two,a,9,10\n",
"two,b,11,12\n",
"two,c,13,14\n",
"two,d,15,16\n"
]
}
],
"source": [
"!type examples\\csv_mindex.csv"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"ExecuteTime": {
"end_time": "2019-12-24T17:44:14.396063Z",
"start_time": "2019-12-24T17:44:14.242881Z"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" | \n",
" value1 | \n",
" value2 | \n",
"
\n",
" \n",
" | key1 | \n",
" key2 | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" | one | \n",
" a | \n",
" 1 | \n",
" 2 | \n",
"
\n",
" \n",
" | b | \n",
" 3 | \n",
" 4 | \n",
"
\n",
" \n",
" | c | \n",
" 5 | \n",
" 6 | \n",
"
\n",
" \n",
" | d | \n",
" 7 | \n",
" 8 | \n",
"
\n",
" \n",
" | two | \n",
" a | \n",
" 9 | \n",
" 10 | \n",
"
\n",
" \n",
" | b | \n",
" 11 | \n",
" 12 | \n",
"
\n",
" \n",
" | c | \n",
" 13 | \n",
" 14 | \n",
"
\n",
" \n",
" | d | \n",
" 15 | \n",
" 16 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" value1 value2\n",
"key1 key2 \n",
"one a 1 2\n",
" b 3 4\n",
" c 5 6\n",
" d 7 8\n",
"two a 9 10\n",
" b 11 12\n",
" c 13 14\n",
" d 15 16"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"parsed = pd.read_csv(r'examples/csv_mindex.csv', index_col=['key1', 'key2'])\n",
"parsed"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"In some cases, a table might not have a fixed delimiter, using whitespace or some other pattern to separate fields."
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"ExecuteTime": {
"end_time": "2019-12-24T17:44:14.540609Z",
"start_time": "2019-12-24T17:44:14.397062Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"[' A B C\\n',\n",
" 'aaa -0.264438 -1.026059 -0.619500\\n',\n",
" 'bbb 0.927272 0.302904 -0.032399\\n',\n",
" 'ccc -0.264273 -0.386314 -0.217601\\n',\n",
" 'ddd -0.871858 -0.348382 1.100491\\n']"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"list(open(r'examples/ex3.txt'))"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"ExecuteTime": {
"end_time": "2019-12-24T17:44:14.697406Z",
"start_time": "2019-12-24T17:44:14.546683Z"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" A | \n",
" B | \n",
" C | \n",
"
\n",
" \n",
" \n",
" \n",
" | aaa | \n",
" -0.264438 | \n",
" -1.026059 | \n",
" -0.619500 | \n",
"
\n",
" \n",
" | bbb | \n",
" 0.927272 | \n",
" 0.302904 | \n",
" -0.032399 | \n",
"
\n",
" \n",
" | ccc | \n",
" -0.264273 | \n",
" -0.386314 | \n",
" -0.217601 | \n",
"
\n",
" \n",
" | ddd | \n",
" -0.871858 | \n",
" -0.348382 | \n",
" 1.100491 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" A B C\n",
"aaa -0.264438 -1.026059 -0.619500\n",
"bbb 0.927272 0.302904 -0.032399\n",
"ccc -0.264273 -0.386314 -0.217601\n",
"ddd -0.871858 -0.348382 1.100491"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"result = pd.read_table(r'examples/ex3.txt', sep='\\s+')\n",
"result"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"ExecuteTime": {
"end_time": "2019-12-24T17:44:14.864981Z",
"start_time": "2019-12-24T17:44:14.700337Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"# hey!\n",
"a,b,c,d,message\n",
"# just wanted to make things more difficult for you\n",
"# who reads CSV files with computers, anyway?\n",
"1,2,3,4,hello\n",
"5,6,7,8,world\n",
"9,10,11,12,foo\n"
]
}
],
"source": [
"!type examples\\ex4.csv"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"ExecuteTime": {
"end_time": "2019-12-24T17:44:14.990299Z",
"start_time": "2019-12-24T17:44:14.867993Z"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" a | \n",
" b | \n",
" c | \n",
" d | \n",
" message | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 1 | \n",
" 2 | \n",
" 3 | \n",
" 4 | \n",
" hello | \n",
"
\n",
" \n",
" | 1 | \n",
" 5 | \n",
" 6 | \n",
" 7 | \n",
" 8 | \n",
" world | \n",
"
\n",
" \n",
" | 2 | \n",
" 9 | \n",
" 10 | \n",
" 11 | \n",
" 12 | \n",
" foo | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" a b c d message\n",
"0 1 2 3 4 hello\n",
"1 5 6 7 8 world\n",
"2 9 10 11 12 foo"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.read_csv(r'examples/ex4.csv', skiprows=[0, 2,3])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Handling missing values is an important and frequently nuanced part of the file parsing process. Missing data is usually either not present (empty string) or marked by some sentinel value. By default, pandas uses a set of commonly occurring sentinels, such as **NA** and **NUL**"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"ExecuteTime": {
"end_time": "2019-12-24T17:44:15.168025Z",
"start_time": "2019-12-24T17:44:14.994393Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"something,a,b,c,d,message\n",
"one,1,2,3,4,NA\n",
"two,5,6,,8,world\n",
"three,9,10,11,12,foo\n"
]
}
],
"source": [
"!type examples\\ex5.csv"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"ExecuteTime": {
"end_time": "2019-12-24T17:44:15.304991Z",
"start_time": "2019-12-24T17:44:15.169022Z"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" something | \n",
" a | \n",
" b | \n",
" c | \n",
" d | \n",
" message | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" one | \n",
" 1 | \n",
" 2 | \n",
" 3.0 | \n",
" 4 | \n",
" NaN | \n",
"
\n",
" \n",
" | 1 | \n",
" two | \n",
" 5 | \n",
" 6 | \n",
" NaN | \n",
" 8 | \n",
" world | \n",
"
\n",
" \n",
" | 2 | \n",
" three | \n",
" 9 | \n",
" 10 | \n",
" 11.0 | \n",
" 12 | \n",
" foo | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" something a b c d message\n",
"0 one 1 2 3.0 4 NaN\n",
"1 two 5 6 NaN 8 world\n",
"2 three 9 10 11.0 12 foo"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"result = pd.read_csv(r'examples/ex5.csv')\n",
"result"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"ExecuteTime": {
"end_time": "2019-12-24T17:44:15.550424Z",
"start_time": "2019-12-24T17:44:15.307795Z"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" something | \n",
" a | \n",
" b | \n",
" c | \n",
" d | \n",
" message | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" True | \n",
"
\n",
" \n",
" | 1 | \n",
" False | \n",
" False | \n",
" False | \n",
" True | \n",
" False | \n",
" False | \n",
"
\n",
" \n",
" | 2 | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" something a b c d message\n",
"0 False False False False False True\n",
"1 False False False True False False\n",
"2 False False False False False False"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.isnull(result)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The na_values option can take either a list or set of strings to consider missing values"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"ExecuteTime": {
"end_time": "2019-12-24T17:44:15.711268Z",
"start_time": "2019-12-24T17:44:15.554376Z"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" something | \n",
" a | \n",
" b | \n",
" c | \n",
" d | \n",
" message | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" one | \n",
" 1 | \n",
" 2 | \n",
" 3.0 | \n",
" 4 | \n",
" NaN | \n",
"
\n",
" \n",
" | 1 | \n",
" two | \n",
" 5 | \n",
" 6 | \n",
" NaN | \n",
" 8 | \n",
" world | \n",
"
\n",
" \n",
" | 2 | \n",
" three | \n",
" 9 | \n",
" 10 | \n",
" 11.0 | \n",
" 12 | \n",
" foo | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" something a b c d message\n",
"0 one 1 2 3.0 4 NaN\n",
"1 two 5 6 NaN 8 world\n",
"2 three 9 10 11.0 12 foo"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"result = pd.read_csv(r'examples/ex5.csv', na_values=['NULL'])\n",
"result"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {
"ExecuteTime": {
"end_time": "2019-12-24T17:44:15.847179Z",
"start_time": "2019-12-24T17:44:15.711268Z"
}
},
"outputs": [],
"source": [
"# Different NA sentinels can be specified for each column in a dict\n",
"\n",
"sentinels = {'message': ['foo', 'NA'], 'something':['two']}"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {
"ExecuteTime": {
"end_time": "2019-12-24T17:44:16.005092Z",
"start_time": "2019-12-24T17:44:15.851139Z"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" something | \n",
" a | \n",
" b | \n",
" c | \n",
" d | \n",
" message | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" one | \n",
" 1 | \n",
" 2 | \n",
" 3.0 | \n",
" 4 | \n",
" NaN | \n",
"
\n",
" \n",
" | 1 | \n",
" NaN | \n",
" 5 | \n",
" 6 | \n",
" NaN | \n",
" 8 | \n",
" world | \n",
"
\n",
" \n",
" | 2 | \n",
" three | \n",
" 9 | \n",
" 10 | \n",
" 11.0 | \n",
" 12 | \n",
" NaN | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" something a b c d message\n",
"0 one 1 2 3.0 4 NaN\n",
"1 NaN 5 6 NaN 8 world\n",
"2 three 9 10 11.0 12 NaN"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.read_csv(r'examples/ex5.csv', na_values=sentinels)"
]
},
{
"cell_type": "markdown",
"metadata": {
"ExecuteTime": {
"end_time": "2019-12-23T12:59:01.714350Z",
"start_time": "2019-12-23T12:59:01.697710Z"
}
},
"source": [
"## Reading Text Files in Pieces"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"When processing very large files or figuring out the right set of arguments to correctly process a large file, you may only want to read in a small piece of a file or iterate\n",
"through smaller chunks of the file.Before we look at a large file, we make the pandas display settings more compact"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {
"ExecuteTime": {
"end_time": "2019-12-24T17:44:16.156659Z",
"start_time": "2019-12-24T17:44:16.007763Z"
}
},
"outputs": [],
"source": [
"pd.options.display.max_rows = 10"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {
"ExecuteTime": {
"end_time": "2019-12-24T17:44:16.353691Z",
"start_time": "2019-12-24T17:44:16.161827Z"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" one | \n",
" two | \n",
" three | \n",
" four | \n",
" key | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 0.467976 | \n",
" -0.038649 | \n",
" -0.295344 | \n",
" -1.824726 | \n",
" L | \n",
"
\n",
" \n",
" | 1 | \n",
" -0.358893 | \n",
" 1.404453 | \n",
" 0.704965 | \n",
" -0.200638 | \n",
" B | \n",
"
\n",
" \n",
" | 2 | \n",
" -0.501840 | \n",
" 0.659254 | \n",
" -0.421691 | \n",
" -0.057688 | \n",
" G | \n",
"
\n",
" \n",
" | 3 | \n",
" 0.204886 | \n",
" 1.074134 | \n",
" 1.388361 | \n",
" -0.982404 | \n",
" R | \n",
"
\n",
" \n",
" | 4 | \n",
" 0.354628 | \n",
" -0.133116 | \n",
" 0.283763 | \n",
" -0.837063 | \n",
" Q | \n",
"
\n",
" \n",
" | ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" | 9995 | \n",
" 2.311896 | \n",
" -0.417070 | \n",
" -1.409599 | \n",
" -0.515821 | \n",
" L | \n",
"
\n",
" \n",
" | 9996 | \n",
" -0.479893 | \n",
" -0.650419 | \n",
" 0.745152 | \n",
" -0.646038 | \n",
" E | \n",
"
\n",
" \n",
" | 9997 | \n",
" 0.523331 | \n",
" 0.787112 | \n",
" 0.486066 | \n",
" 1.093156 | \n",
" K | \n",
"
\n",
" \n",
" | 9998 | \n",
" -0.362559 | \n",
" 0.598894 | \n",
" -1.843201 | \n",
" 0.887292 | \n",
" G | \n",
"
\n",
" \n",
" | 9999 | \n",
" -0.096376 | \n",
" -1.012999 | \n",
" -0.657431 | \n",
" -0.573315 | \n",
" 0 | \n",
"
\n",
" \n",
"
\n",
"
10000 rows × 5 columns
\n",
"
"
],
"text/plain": [
" one two three four key\n",
"0 0.467976 -0.038649 -0.295344 -1.824726 L\n",
"1 -0.358893 1.404453 0.704965 -0.200638 B\n",
"2 -0.501840 0.659254 -0.421691 -0.057688 G\n",
"3 0.204886 1.074134 1.388361 -0.982404 R\n",
"4 0.354628 -0.133116 0.283763 -0.837063 Q\n",
"... ... ... ... ... ..\n",
"9995 2.311896 -0.417070 -1.409599 -0.515821 L\n",
"9996 -0.479893 -0.650419 0.745152 -0.646038 E\n",
"9997 0.523331 0.787112 0.486066 1.093156 K\n",
"9998 -0.362559 0.598894 -1.843201 0.887292 G\n",
"9999 -0.096376 -1.012999 -0.657431 -0.573315 0\n",
"\n",
"[10000 rows x 5 columns]"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"result = pd.read_csv(r'examples/ex6.csv')\n",
"result"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {
"ExecuteTime": {
"end_time": "2019-12-24T17:44:16.491655Z",
"start_time": "2019-12-24T17:44:16.356466Z"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" one | \n",
" two | \n",
" three | \n",
" four | \n",
" key | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 0.467976 | \n",
" -0.038649 | \n",
" -0.295344 | \n",
" -1.824726 | \n",
" L | \n",
"
\n",
" \n",
" | 1 | \n",
" -0.358893 | \n",
" 1.404453 | \n",
" 0.704965 | \n",
" -0.200638 | \n",
" B | \n",
"
\n",
" \n",
" | 2 | \n",
" -0.501840 | \n",
" 0.659254 | \n",
" -0.421691 | \n",
" -0.057688 | \n",
" G | \n",
"
\n",
" \n",
" | 3 | \n",
" 0.204886 | \n",
" 1.074134 | \n",
" 1.388361 | \n",
" -0.982404 | \n",
" R | \n",
"
\n",
" \n",
" | 4 | \n",
" 0.354628 | \n",
" -0.133116 | \n",
" 0.283763 | \n",
" -0.837063 | \n",
" Q | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" one two three four key\n",
"0 0.467976 -0.038649 -0.295344 -1.824726 L\n",
"1 -0.358893 1.404453 0.704965 -0.200638 B\n",
"2 -0.501840 0.659254 -0.421691 -0.057688 G\n",
"3 0.204886 1.074134 1.388361 -0.982404 R\n",
"4 0.354628 -0.133116 0.283763 -0.837063 Q"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# nrows - read a small number of rows \n",
"\n",
"pd.read_csv(r'examples/ex6.csv', nrows=5)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"To read a file in pieces, specify a **chunksize** as a number of rows\n",
"\n",
"The TextParser object returned by read_csv allows you to iterate over the parts of the file according to the chunksize."
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {
"ExecuteTime": {
"end_time": "2019-12-24T17:44:16.639722Z",
"start_time": "2019-12-24T17:44:16.497681Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
""
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"chunker = pd.read_csv(r'examples/ex6.csv', chunksize=1000)\n",
"chunker"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {
"ExecuteTime": {
"end_time": "2019-12-24T17:44:16.931145Z",
"start_time": "2019-12-24T17:44:16.643350Z"
}
},
"outputs": [],
"source": [
"chunker = pd.read_csv(r'examples/ex6.csv', chunksize=1000)\n",
"tot = pd.Series([])\n",
"for piece in chunker:\n",
" tot = tot.add(piece['key'].value_counts(), fill_value=0)\n",
"tot = tot.sort_values(ascending=False)"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {
"ExecuteTime": {
"end_time": "2019-12-24T17:44:16.948713Z",
"start_time": "2019-12-24T17:44:16.933227Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"E 368.0\n",
"X 364.0\n",
"L 346.0\n",
"O 343.0\n",
"Q 340.0\n",
"M 338.0\n",
"J 337.0\n",
"F 335.0\n",
"K 334.0\n",
"H 330.0\n",
"dtype: float64"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"tot[:10]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Writing Data to Text Format"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {
"ExecuteTime": {
"end_time": "2019-12-24T17:44:17.102861Z",
"start_time": "2019-12-24T17:44:16.948713Z"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" something | \n",
" a | \n",
" b | \n",
" c | \n",
" d | \n",
" message | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" one | \n",
" 1 | \n",
" 2 | \n",
" 3.0 | \n",
" 4 | \n",
" NaN | \n",
"
\n",
" \n",
" | 1 | \n",
" two | \n",
" 5 | \n",
" 6 | \n",
" NaN | \n",
" 8 | \n",
" world | \n",
"
\n",
" \n",
" | 2 | \n",
" three | \n",
" 9 | \n",
" 10 | \n",
" 11.0 | \n",
" 12 | \n",
" foo | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" something a b c d message\n",
"0 one 1 2 3.0 4 NaN\n",
"1 two 5 6 NaN 8 world\n",
"2 three 9 10 11.0 12 foo"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data = pd.read_csv(r'examples/ex5.csv')\n",
"data"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Using DataFrame’s **to_csv** method, we can write the data out to a comma separated file"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {
"ExecuteTime": {
"end_time": "2019-12-24T17:44:17.249514Z",
"start_time": "2019-12-24T17:44:17.105944Z"
}
},
"outputs": [],
"source": [
"data.to_csv(r'examples/out.csv')"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {
"ExecuteTime": {
"end_time": "2019-12-24T17:44:17.428423Z",
"start_time": "2019-12-24T17:44:17.257017Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
",something,a,b,c,d,message\n",
"0,one,1,2,3.0,4,\n",
"1,two,5,6,,8,world\n",
"2,three,9,10,11.0,12,foo\n"
]
}
],
"source": [
"!type examples\\out.csv"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {
"ExecuteTime": {
"end_time": "2019-12-24T17:44:17.544879Z",
"start_time": "2019-12-24T17:44:17.431586Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"|something|a|b|c|d|message\r\n",
"0|one|1|2|3.0|4|\r\n",
"1|two|5|6||8|world\r\n",
"2|three|9|10|11.0|12|foo\r\n"
]
}
],
"source": [
"import sys\n",
"\n",
"data.to_csv(sys.stdout, sep='|')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Missing values appear as empty strings in the output. You might want to denote them by some other sentinel value"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {
"ExecuteTime": {
"end_time": "2019-12-24T17:44:17.693974Z",
"start_time": "2019-12-24T17:44:17.552304Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
",something,a,b,c,d,message\r\n",
"0,one,1,2,3.0,4,NULL\r\n",
"1,two,5,6,NULL,8,world\r\n",
"2,three,9,10,11.0,12,foo\r\n"
]
}
],
"source": [
"data.to_csv(sys.stdout, na_rep='NULL')"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {
"ExecuteTime": {
"end_time": "2019-12-24T17:44:17.831547Z",
"start_time": "2019-12-24T17:44:17.699052Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"one,1,2,3.0,4,\r\n",
"two,5,6,,8,world\r\n",
"three,9,10,11.0,12,foo\r\n"
]
}
],
"source": [
"# With no other options specified, both the row and column labels \n",
"# are written. Both of these can be disabled\n",
"\n",
"data.to_csv(sys.stdout, index=False, header=False)"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {
"ExecuteTime": {
"end_time": "2019-12-24T17:44:17.980296Z",
"start_time": "2019-12-24T17:44:17.836819Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"a,b,c\r\n",
"1,2,3.0\r\n",
"5,6,\r\n",
"9,10,11.0\r\n"
]
}
],
"source": [
"# write only a subset of the columns, and in an order of your choosing\n",
"\n",
"data.to_csv(sys.stdout ,index=False, columns=['a', 'b', 'c'])"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {
"ExecuteTime": {
"end_time": "2019-12-24T17:44:18.437609Z",
"start_time": "2019-12-24T17:44:17.984237Z"
}
},
"outputs": [],
"source": [
"#Series\n",
"\n",
"dates = pd.date_range('1/1/2020', periods=10)\n",
"ts = pd.Series(np.arange(10), index=dates)\n",
"ts.to_csv(r'examples/tseries.csv', header=False)"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {
"ExecuteTime": {
"end_time": "2019-12-24T17:44:18.595013Z",
"start_time": "2019-12-24T17:44:18.443009Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2020-01-01,0\n",
"2020-01-02,1\n",
"2020-01-03,2\n",
"2020-01-04,3\n",
"2020-01-05,4\n",
"2020-01-06,5\n",
"2020-01-07,6\n",
"2020-01-08,7\n",
"2020-01-09,8\n",
"2020-01-10,9\n"
]
}
],
"source": [
"!type examples\\tseries.csv"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Working with Delimited Formats\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"It’s possible to load most forms of tabular data from disk using functions like pandas.read_table. In some cases, however, some manual processing may be necessary.\n",
"It’s not uncommon to receive a file with one or more malformed lines that trip up read_table"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {
"ExecuteTime": {
"end_time": "2019-12-24T17:44:18.746774Z",
"start_time": "2019-12-24T17:44:18.596249Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\"a\",\"b\",\"c\"\n",
"\"1\",\"2\",\"3\"\n",
"\"1\",\"2\",\"3\"\n"
]
}
],
"source": [
"!type examples\\ex7.csv"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"For any file with a single-character delimiter, you can use Python’s built-in csv module. To use it, pass any open file or file-like object to csv.reader"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {
"ExecuteTime": {
"end_time": "2019-12-24T17:44:18.872970Z",
"start_time": "2019-12-24T17:44:18.749959Z"
}
},
"outputs": [],
"source": [
"import csv\n",
"\n",
"f = open(r'examples/ex7.csv')\n",
"reader = csv.reader(f)"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {
"ExecuteTime": {
"end_time": "2019-12-24T17:44:19.022732Z",
"start_time": "2019-12-24T17:44:18.878009Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['a', 'b', 'c']\n",
"['1', '2', '3']\n",
"['1', '2', '3']\n"
]
}
],
"source": [
"for line in reader:\n",
" print(line)"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {
"ExecuteTime": {
"end_time": "2019-12-24T17:44:19.162044Z",
"start_time": "2019-12-24T17:44:19.026915Z"
}
},
"outputs": [],
"source": [
"with open(r'examples/ex7.csv') as f:\n",
" lines = list(csv.reader(f))"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {
"ExecuteTime": {
"end_time": "2019-12-24T17:44:19.313160Z",
"start_time": "2019-12-24T17:44:19.163107Z"
},
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['a', 'b', 'c']\n",
"[['1', '2', '3'], ['1', '2', '3']]\n"
]
}
],
"source": [
"header, values = lines[0], lines[1:]\n",
"print(header)\n",
"print(values)"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {
"ExecuteTime": {
"end_time": "2019-12-24T17:44:19.469890Z",
"start_time": "2019-12-24T17:44:19.318193Z"
},
"scrolled": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1 1\n",
"2 2\n",
"3 3\n"
]
}
],
"source": [
"for i, j in zip(*values):\n",
" print(i, j)"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {
"ExecuteTime": {
"end_time": "2019-12-24T17:44:19.610273Z",
"start_time": "2019-12-24T17:44:19.473506Z"
}
},
"outputs": [],
"source": [
"data_dict = {h: v for h, v in zip(header, zip(*values))}"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {
"ExecuteTime": {
"end_time": "2019-12-24T17:44:19.754072Z",
"start_time": "2019-12-24T17:44:19.618224Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"{'a': ('1', '1'), 'b': ('2', '2'), 'c': ('3', '3')}"
]
},
"execution_count": 44,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data_dict"
]
},
{
"cell_type": "markdown",
"metadata": {
"ExecuteTime": {
"end_time": "2019-12-23T16:17:13.529427Z",
"start_time": "2019-12-23T16:17:13.524002Z"
}
},
"source": [
"## JSON Data"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {
"ExecuteTime": {
"end_time": "2019-12-24T17:44:19.901997Z",
"start_time": "2019-12-24T17:44:19.761474Z"
}
},
"outputs": [],
"source": [
"obj = '''\n",
"{\"name\": \"Wes\",\n",
" \"places_lived\": [\"United States\", \"Spain\", \"Germany\"],\n",
" \"pet\": null,\n",
" \"siblings\": [{\"name\": \"Scott\", \"age\": 30, \"pets\": [\"Zeus\", \"Zuko\"]},\n",
" {\"name\": \"Katie\", \"age\": 38,\n",
" \"pets\": [\"Sixes\", \"Stache\", \"Cisco\"]}]\n",
"}\n",
"'''"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {
"ExecuteTime": {
"end_time": "2019-12-24T17:44:20.068177Z",
"start_time": "2019-12-24T17:44:19.907459Z"
}
},
"outputs": [],
"source": [
"import json"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {
"ExecuteTime": {
"end_time": "2019-12-24T17:44:20.222470Z",
"start_time": "2019-12-24T17:44:20.072556Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"{'name': 'Wes',\n",
" 'places_lived': ['United States', 'Spain', 'Germany'],\n",
" 'pet': None,\n",
" 'siblings': [{'name': 'Scott', 'age': 30, 'pets': ['Zeus', 'Zuko']},\n",
" {'name': 'Katie', 'age': 38, 'pets': ['Sixes', 'Stache', 'Cisco']}]}"
]
},
"execution_count": 47,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"result = json.loads(obj)\n",
"result"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {
"ExecuteTime": {
"end_time": "2019-12-24T17:44:20.371155Z",
"start_time": "2019-12-24T17:44:20.228491Z"
}
},
"outputs": [],
"source": [
"# json.dumps, on the other hand, converts a Python object back to JSON:\n",
"\n",
"asjson = json.dumps(result)"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {
"ExecuteTime": {
"end_time": "2019-12-24T17:44:20.540849Z",
"start_time": "2019-12-24T17:44:20.377740Z"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" name | \n",
" age | \n",
" pets | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" Scott | \n",
" 30 | \n",
" [Zeus, Zuko] | \n",
"
\n",
" \n",
" | 1 | \n",
" Katie | \n",
" 38 | \n",
" [Sixes, Stache, Cisco] | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" name age pets\n",
"0 Scott 30 [Zeus, Zuko]\n",
"1 Katie 38 [Sixes, Stache, Cisco]"
]
},
"execution_count": 49,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"siblings = pd.DataFrame(result['siblings'], columns=['name', 'age', 'pets'])\n",
"siblings"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The **pandas.read_json** can automatically convert JSON datasets in specific arrangements into a Series or DataFrame."
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {
"ExecuteTime": {
"end_time": "2019-12-24T17:44:20.729151Z",
"start_time": "2019-12-24T17:44:20.544383Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[{\"a\": 1, \"b\": 2, \"c\": 3},\n",
" {\"a\": 4, \"b\": 5, \"c\": 6},\n",
" {\"a\": 7, \"b\": 8, \"c\": 9}]\n"
]
}
],
"source": [
"!type examples\\example.json"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {
"ExecuteTime": {
"end_time": "2019-12-24T17:44:20.868493Z",
"start_time": "2019-12-24T17:44:20.731146Z"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" a | \n",
" b | \n",
" c | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 1 | \n",
" 2 | \n",
" 3 | \n",
"
\n",
" \n",
" | 1 | \n",
" 4 | \n",
" 5 | \n",
" 6 | \n",
"
\n",
" \n",
" | 2 | \n",
" 7 | \n",
" 8 | \n",
" 9 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" a b c\n",
"0 1 2 3\n",
"1 4 5 6\n",
"2 7 8 9"
]
},
"execution_count": 51,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data = pd.read_json(r'examples/example.json')\n",
"data"
]
},
{
"cell_type": "code",
"execution_count": 52,
"metadata": {
"ExecuteTime": {
"end_time": "2019-12-24T17:44:21.008429Z",
"start_time": "2019-12-24T17:44:20.873164Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{\"a\":{\"0\":1,\"1\":4,\"2\":7},\"b\":{\"0\":2,\"1\":5,\"2\":8},\"c\":{\"0\":3,\"1\":6,\"2\":9}}\n",
"[{\"a\":1,\"b\":2,\"c\":3},{\"a\":4,\"b\":5,\"c\":6},{\"a\":7,\"b\":8,\"c\":9}]\n"
]
}
],
"source": [
"# pandas -> json\n",
"\n",
"print(data.to_json())\n",
"print(data.to_json(orient='records'))"
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {
"ExecuteTime": {
"end_time": "2019-12-24T17:44:21.143500Z",
"start_time": "2019-12-24T17:44:21.013899Z"
}
},
"outputs": [],
"source": [
"# **ToDo:**\n",
"\n",
"# 1.5 - XML and HTML: Web Scraping\n",
"# 2 - Binary Data Formats\n",
"# 2.1 - Using HDF5 Format\n",
"# 2.2 - Reading Microsoft Excel Files\n",
"# 3 - Interacting with Web APIs\n",
"# 4 - Interacting with Databases"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
},
"toc": {
"base_numbering": 1,
"nav_menu": {},
"number_sections": true,
"sideBar": true,
"skip_h1_title": false,
"title_cell": "Table of Contents",
"title_sidebar": "Contents",
"toc_cell": true,
"toc_position": {},
"toc_section_display": true,
"toc_window_display": false
},
"varInspector": {
"cols": {
"lenName": 16,
"lenType": 16,
"lenVar": 40
},
"kernels_config": {
"python": {
"delete_cmd_postfix": "",
"delete_cmd_prefix": "del ",
"library": "var_list.py",
"varRefreshCmd": "print(var_dic_list())"
},
"r": {
"delete_cmd_postfix": ") ",
"delete_cmd_prefix": "rm(",
"library": "var_list.r",
"varRefreshCmd": "cat(var_dic_list()) "
}
},
"types_to_exclude": [
"module",
"function",
"builtin_function_or_method",
"instance",
"_Feature"
],
"window_display": false
}
},
"nbformat": 4,
"nbformat_minor": 2
}