{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Chapter 18: Code listing"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Robert Johansson\n",
"\n",
"Source code listings for [Numerical Python - Scientific Computing and Data Science Applications with Numpy, SciPy and Matplotlib](https://www.apress.com/us/book/9781484242452) (ISBN 978-1-484242-45-2)."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Imports"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from __future__ import print_function"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"import numpy as np\n",
"np.random.seed(0)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import csv"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import json"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"import h5py"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"import tables"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"import pickle\n",
"# import cPickle"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"import msgpack"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# CSV"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Overwriting playerstats-2013-2014.csv\n"
]
}
],
"source": [
"%%writefile playerstats-2013-2014.csv\n",
"# 2013-2014 / Regular Season / All Skaters / Summary / Points\n",
"Rank,Player,Team,Pos,GP,G,A,P,+/-,PIM,PPG,PPP,SHG,SHP,GW,OT,S,S%,TOI/GP,Shift/GP,FO%\n",
"1,Sidney Crosby,PIT,C,80,36,68,104,+18,46,11,38,0,0,5,1,259,13.9,21:58,24.0,52.5\n",
"2,Ryan Getzlaf,ANA,C,77,31,56,87,+28,31,5,23,0,0,7,1,204,15.2,21:17,25.2,49.0\n",
"3,Claude Giroux,PHI,C,82,28,58,86,+7,46,7,37,0,0,7,1,223,12.6,20:26,25.1,52.9\n",
"4,Tyler Seguin,DAL,C,80,37,47,84,+16,18,11,25,0,0,8,0,294,12.6,19:20,23.4,41.5\n",
"5,Corey Perry,ANA,R,81,43,39,82,+32,65,8,18,0,0,9,1,280,15.4,19:28,23.2,36.0"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Overwriting playerstats-2013-2014-top30.csv\n"
]
}
],
"source": [
"%%writefile playerstats-2013-2014-top30.csv\n",
"# 2013-2014 / Regular Season / All Skaters / Summary / Points\n",
"Rank,Player,Team,Pos,GP,G,A,P,+/-,PIM,PPG,PPP,SHG,SHP,GW,OT,S,S%,TOI/GP,Shift/GP,FO%\n",
"1,Sidney Crosby,PIT,C,80,36,68,104,+18,46,11,38,0,0,5,1,259,13.9,21:58,24.0,52.5\n",
"2,Ryan Getzlaf,ANA,C,77,31,56,87,+28,31,5,23,0,0,7,1,204,15.2,21:17,25.2,49.0\n",
"3,Claude Giroux,PHI,C,82,28,58,86,+7,46,7,37,0,0,7,1,223,12.6,20:26,25.1,52.9\n",
"4,Tyler Seguin,DAL,C,80,37,47,84,+16,18,11,25,0,0,8,0,294,12.6,19:20,23.4,41.5\n",
"5,Corey Perry,ANA,R,81,43,39,82,+32,65,8,18,0,0,9,1,280,15.4,19:28,23.2,36.0\n",
"6,Phil Kessel,TOR,R,82,37,43,80,-5,27,8,20,0,0,6,0,305,12.1,20:39,24.5,14.3\n",
"7,Taylor Hall,EDM,L,75,27,53,80,-15,44,7,17,0,1,1,1,250,10.8,20:00,25.4,45.7\n",
"8,Alex Ovechkin,WSH,L,78,51,28,79,-35,48,24,39,0,1,10,3,386,13.2,20:32,21.8,66.7\n",
"9,Joe Pavelski,SJS,C,82,41,38,79,+23,32,16,31,1,2,3,0,225,18.2,19:51,27.1,56.0\n",
"10,Jamie Benn,DAL,L,81,34,45,79,+21,64,5,19,1,3,3,1,279,12.2,19:09,25.0,52.8\n",
"11,Nicklas Backstrom,WSH,C,82,18,61,79,-20,54,6,44,1,1,1,0,196,9.2,19:48,23.3,50.4\n",
"12,Patrick Sharp,CHI,L,82,34,44,78,+13,40,10,25,0,0,3,1,313,10.9,18:53,22.7,54.6\n",
"13,Joe Thornton,SJS,C,82,11,65,76,+20,32,2,19,0,1,3,1,122,9.0,18:55,26.3,56.1\n",
"14,Erik Karlsson,OTT,D,82,20,54,74,-15,36,5,31,0,0,1,0,257,7.8,27:04,28.6,0.0\n",
"15,Evgeni Malkin,PIT,C,60,23,49,72,+10,62,7,30,0,0,3,0,191,12.0,20:03,21.4,48.8\n",
"16,Patrick Marleau,SJS,L,82,33,37,70,+0,18,11,23,2,2,4,0,285,11.6,20:31,27.3,52.9\n",
"17,Anze Kopitar,LAK,C,82,29,41,70,+34,24,10,23,0,0,9,2,200,14.5,20:53,25.4,53.3\n",
"18,Matt Duchene,COL,C,71,23,47,70,+8,19,5,17,0,0,6,1,217,10.6,18:29,22.0,50.3\n",
"19,Martin St. Louis,\"TBL, NYR\",R,81,30,39,69,+13,10,9,21,1,2,5,1,204,14.7,20:56,25.7,40.7\n",
"20,Patrick Kane,CHI,R,69,29,40,69,+7,22,10,25,0,0,6,0,227,12.8,19:36,22.9,50.0\n",
"21,Blake Wheeler,WPG,R,82,28,41,69,+4,63,8,19,0,0,4,2,225,12.4,18:41,24.0,37.5\n",
"22,Kyle Okposo,NYI,R,71,27,42,69,-9,51,5,15,0,0,4,1,195,13.8,20:26,22.2,47.5\n",
"23,David Krejci,BOS,C,80,19,50,69,+39,28,3,19,0,0,6,1,169,11.2,19:07,21.3,51.2\n",
"24,Chris Kunitz,PIT,L,78,35,33,68,+25,66,13,22,0,0,8,0,218,16.1,19:09,22.2,75.0\n",
"25,Jonathan Toews,CHI,C,76,28,40,68,+26,34,5,15,3,5,5,0,193,14.5,20:28,25.9,57.2\n",
"26,Thomas Vanek,\"BUF, NYI, MTL\",L,78,27,41,68,+7,46,8,18,0,0,4,0,248,10.9,19:21,21.6,43.5\n",
"27,Jaromir Jagr,NJD,R,82,24,43,67,+16,46,5,17,0,0,6,1,231,10.4,19:09,22.8,0.0\n",
"28,John Tavares,NYI,C,59,24,42,66,-6,40,8,25,0,0,4,0,188,12.8,21:14,22.3,49.1\n",
"29,Jason Spezza,OTT,C,75,23,43,66,-26,46,9,22,0,0,5,0,223,10.3,18:12,23.8,54.0\n",
"30,Jordan Eberle,EDM,R,80,28,37,65,-11,18,7,20,1,1,4,1,200,14.0,19:32,25.4,38.1"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"# 2013-2014 / Regular Season / All Skaters / Summary / Points\n",
"Rank,Player,Team,Pos,GP,G,A,P,+/-,PIM,PPG,PPP,SHG,SHP,GW,OT,S,S%,TOI/GP,Shift/GP,FO%\n",
"1,Sidney Crosby,PIT,C,80,36,68,104,+18,46,11,38,0,0,5,1,259,13.9,21:58,24.0,52.5\n",
"2,Ryan Getzlaf,ANA,C,77,31,56,87,+28,31,5,23,0,0,7,1,204,15.2,21:17,25.2,49.0\n",
"3,Claude Giroux,PHI,C,82,28,58,86,+7,46,7,37,0,0,7,1,223,12.6,20:26,25.1,52.9\n"
]
}
],
"source": [
"!head -n 5 playerstats-2013-2014-top30.csv"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"rows = []"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"with open(\"playerstats-2013-2014.csv\") as f:\n",
" csvreader = csv.reader(f)\n",
" rows = [fields for fields in csvreader]"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"['Player', 'Team', 'Pos', 'GP', 'G']"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"rows[1][1:6]"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"['Sidney Crosby', 'PIT', 'C', '80', '36']"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"rows[2][1:6]"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"data = np.random.randn(100, 3)"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"np.savetxt(\"data.csv\", data, delimiter=\",\", header=\"x, y, z\", comments=\"# Random x, y, z coordinates\\n\")"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"# Random x, y, z coordinates\n",
"x, y, z\n",
"1.764052345967664026e+00,4.001572083672232938e-01,9.787379841057392005e-01\n",
"2.240893199201457797e+00,1.867557990149967484e+00,-9.772778798764110153e-01\n",
"9.500884175255893682e-01,-1.513572082976978872e-01,-1.032188517935578448e-01\n"
]
}
],
"source": [
"!head -n 5 data.csv"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"data_load = np.loadtxt(\"data.csv\", skiprows=2, delimiter=\",\")"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"array([ 2.2408932 , 1.86755799, -0.97727788])"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data_load[1,:]"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"dtype('float64')"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data_load.dtype"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"(data == data_load).all()"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"data = np.loadtxt(\"playerstats-2013-2014.csv\", skiprows=2, delimiter=\",\", dtype=bytes)"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"array([b'Sidney Crosby', b'PIT', b'C', b'80', b'36'], dtype='|S13')"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data[0][1:6]"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"array([[ 68., 104., 18.],\n",
" [ 56., 87., 28.],\n",
" [ 58., 86., 7.],\n",
" [ 47., 84., 16.],\n",
" [ 39., 82., 32.]])"
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.loadtxt(\"playerstats-2013-2014.csv\", skiprows=2, delimiter=\",\", usecols=[6,7,8])"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"df = pd.read_csv(\"playerstats-2013-2014.csv\", skiprows=1)"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"df = df.set_index(\"Rank\")"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Player | \n",
" GP | \n",
" G | \n",
" A | \n",
" P | \n",
"
\n",
" \n",
" Rank | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 1 | \n",
" Sidney Crosby | \n",
" 80 | \n",
" 36 | \n",
" 68 | \n",
" 104 | \n",
"
\n",
" \n",
" 2 | \n",
" Ryan Getzlaf | \n",
" 77 | \n",
" 31 | \n",
" 56 | \n",
" 87 | \n",
"
\n",
" \n",
" 3 | \n",
" Claude Giroux | \n",
" 82 | \n",
" 28 | \n",
" 58 | \n",
" 86 | \n",
"
\n",
" \n",
" 4 | \n",
" Tyler Seguin | \n",
" 80 | \n",
" 37 | \n",
" 47 | \n",
" 84 | \n",
"
\n",
" \n",
" 5 | \n",
" Corey Perry | \n",
" 81 | \n",
" 43 | \n",
" 39 | \n",
" 82 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Player GP G A P\n",
"Rank \n",
"1 Sidney Crosby 80 36 68 104\n",
"2 Ryan Getzlaf 77 31 56 87\n",
"3 Claude Giroux 82 28 58 86\n",
"4 Tyler Seguin 80 37 47 84\n",
"5 Corey Perry 81 43 39 82"
]
},
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[[\"Player\", \"GP\", \"G\", \"A\", \"P\"]]"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Int64Index: 5 entries, 1 to 5\n",
"Data columns (total 20 columns):\n",
"Player 5 non-null object\n",
"Team 5 non-null object\n",
"Pos 5 non-null object\n",
"GP 5 non-null int64\n",
"G 5 non-null int64\n",
"A 5 non-null int64\n",
"P 5 non-null int64\n",
"+/- 5 non-null int64\n",
"PIM 5 non-null int64\n",
"PPG 5 non-null int64\n",
"PPP 5 non-null int64\n",
"SHG 5 non-null int64\n",
"SHP 5 non-null int64\n",
"GW 5 non-null int64\n",
"OT 5 non-null int64\n",
"S 5 non-null int64\n",
"S% 5 non-null float64\n",
"TOI/GP 5 non-null object\n",
"Shift/GP 5 non-null float64\n",
"FO% 5 non-null float64\n",
"dtypes: float64(3), int64(13), object(4)\n",
"memory usage: 840.0+ bytes\n"
]
}
],
"source": [
"df.info()"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"df[[\"Player\", \"GP\", \"G\", \"A\", \"P\"]].to_csv(\"playerstats-2013-2014-subset.csv\")"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Rank,Player,GP,G,A,P\n",
"1,Sidney Crosby,80,36,68,104\n",
"2,Ryan Getzlaf,77,31,56,87\n",
"3,Claude Giroux,82,28,58,86\n",
"4,Tyler Seguin,80,37,47,84\n"
]
}
],
"source": [
"!head -n 5 playerstats-2013-2014-subset.csv"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# HDF5"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## h5py"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import h5py"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# mode = \"w\", \"r\", \"w-\", \"r+\", \"a\""
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"f = h5py.File(\"data.h5\", \"w\")"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"'r+'"
]
},
"execution_count": 41,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"f.mode"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"f.flush()"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"f.close()"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"f = h5py.File(\"data.h5\", \"w\")"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"'/'"
]
},
"execution_count": 45,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"f.name"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"grp1 = f.create_group(\"experiment1\")"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"'/experiment1'"
]
},
"execution_count": 47,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"grp1.name"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"grp2_meas = f.create_group(\"experiment2/measurement\")"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"'/experiment2/measurement'"
]
},
"execution_count": 49,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"grp2_meas.name"
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"grp2_sim = f.create_group(\"experiment2/simulation\")"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"'/experiment2/simulation'"
]
},
"execution_count": 51,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"grp2_sim.name"
]
},
{
"cell_type": "code",
"execution_count": 52,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
""
]
},
"execution_count": 52,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"f[\"/experiment1\"]"
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
""
]
},
"execution_count": 53,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"f[\"/experiment2/simulation\"]"
]
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"grp_expr2 = f[\"/experiment2\"]"
]
},
{
"cell_type": "code",
"execution_count": 55,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
""
]
},
"execution_count": 55,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"grp_expr2['simulation']"
]
},
{
"cell_type": "code",
"execution_count": 56,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"['experiment1', 'experiment2']"
]
},
"execution_count": 56,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"list(f.keys())"
]
},
{
"cell_type": "code",
"execution_count": 57,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"[('experiment1', ),\n",
" ('experiment2', )]"
]
},
"execution_count": 57,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"list(f.items())"
]
},
{
"cell_type": "code",
"execution_count": 58,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"experiment1\n",
"experiment2\n",
"experiment2/measurement\n",
"experiment2/simulation\n"
]
}
],
"source": [
"f.visit(lambda x: print(x))"
]
},
{
"cell_type": "code",
"execution_count": 59,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"experiment1 \n",
"experiment2 \n",
"experiment2/measurement \n",
"experiment2/simulation \n"
]
}
],
"source": [
"f.visititems(lambda name, value: print(name, value))"
]
},
{
"cell_type": "code",
"execution_count": 60,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 60,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\"experiment1\" in f"
]
},
{
"cell_type": "code",
"execution_count": 61,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 61,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\"simulation\" in f[\"experiment2\"]"
]
},
{
"cell_type": "code",
"execution_count": 62,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"False"
]
},
"execution_count": 62,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\"experiment3\" in f"
]
},
{
"cell_type": "code",
"execution_count": 63,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"f.flush()"
]
},
{
"cell_type": "code",
"execution_count": 64,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"/ Group\n",
"/experiment1 Group\n",
"/experiment2 Group\n",
"/experiment2/measurement Group\n",
"/experiment2/simulation Group\n"
]
}
],
"source": [
"!h5ls -r data.h5"
]
},
{
"cell_type": "code",
"execution_count": 65,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"data1 = np.arange(10)"
]
},
{
"cell_type": "code",
"execution_count": 66,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"data2 = np.random.randn(100, 100)"
]
},
{
"cell_type": "code",
"execution_count": 67,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"f[\"array1\"] = data1"
]
},
{
"cell_type": "code",
"execution_count": 68,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"f[\"/experiment2/measurement/meas1\"] = data2"
]
},
{
"cell_type": "code",
"execution_count": 69,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"array1 \n",
"experiment1 \n",
"experiment2 \n",
"experiment2/measurement \n",
"experiment2/measurement/meas1 \n",
"experiment2/simulation \n"
]
}
],
"source": [
"f.visititems(lambda name, value: print(name, value))"
]
},
{
"cell_type": "code",
"execution_count": 70,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"ds = f[\"array1\"]"
]
},
{
"cell_type": "code",
"execution_count": 71,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
""
]
},
"execution_count": 71,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ds"
]
},
{
"cell_type": "code",
"execution_count": 72,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"'/array1'"
]
},
"execution_count": 72,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ds.name"
]
},
{
"cell_type": "code",
"execution_count": 73,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"dtype('int64')"
]
},
"execution_count": 73,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ds.dtype"
]
},
{
"cell_type": "code",
"execution_count": 74,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"(10,)"
]
},
"execution_count": 74,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ds.shape"
]
},
{
"cell_type": "code",
"execution_count": 75,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"10"
]
},
"execution_count": 75,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ds.len()"
]
},
{
"cell_type": "code",
"execution_count": 76,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/rob/miniconda3/envs/py3.6/lib/python3.6/site-packages/h5py/_hl/dataset.py:313: H5pyDeprecationWarning: dataset.value has been deprecated. Use dataset[()] instead.\n",
" \"Use dataset[()] instead.\", H5pyDeprecationWarning)\n"
]
},
{
"data": {
"text/plain": [
"array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])"
]
},
"execution_count": 76,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ds.value"
]
},
{
"cell_type": "code",
"execution_count": 77,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"ds = f[\"/experiment2/measurement/meas1\"]"
]
},
{
"cell_type": "code",
"execution_count": 78,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
""
]
},
"execution_count": 78,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ds"
]
},
{
"cell_type": "code",
"execution_count": 79,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"dtype(' 2.0"
]
},
{
"cell_type": "code",
"execution_count": 90,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"((100,), dtype('bool'))"
]
},
"execution_count": 90,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"mask.shape, mask.dtype"
]
},
{
"cell_type": "code",
"execution_count": 91,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"array([2.04253623, 2.1041854 , 2.05689385])"
]
},
"execution_count": 91,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ds[mask, 0]"
]
},
{
"cell_type": "code",
"execution_count": 92,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"array([[ 2.04253623, -0.91946118, 0.11467003, -0.1374237 , 1.36552692],\n",
" [ 2.1041854 , 0.22725706, -1.1291663 , -0.28133197, -0.7394167 ],\n",
" [ 2.05689385, 0.18041971, -0.06670925, -0.02835398, 0.48480475]])"
]
},
"execution_count": 92,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ds[mask, :5]"
]
},
{
"cell_type": "code",
"execution_count": 93,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# create empty data sets, assign and update datasets"
]
},
{
"cell_type": "code",
"execution_count": 94,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"ds = f.create_dataset(\"array2\", data=np.random.randint(10, size=10))"
]
},
{
"cell_type": "code",
"execution_count": 95,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
""
]
},
"execution_count": 95,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ds"
]
},
{
"cell_type": "code",
"execution_count": 96,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"array([0, 2, 2, 4, 7, 3, 7, 2, 4, 1])"
]
},
"execution_count": 96,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ds.value"
]
},
{
"cell_type": "code",
"execution_count": 97,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"ds = f.create_dataset(\"/experiment2/simulation/data1\", shape=(5, 5), fillvalue=-1)"
]
},
{
"cell_type": "code",
"execution_count": 98,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
""
]
},
"execution_count": 98,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ds"
]
},
{
"cell_type": "code",
"execution_count": 99,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"array([[-1., -1., -1., -1., -1.],\n",
" [-1., -1., -1., -1., -1.],\n",
" [-1., -1., -1., -1., -1.],\n",
" [-1., -1., -1., -1., -1.],\n",
" [-1., -1., -1., -1., -1.]], dtype=float32)"
]
},
"execution_count": 99,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ds.value"
]
},
{
"cell_type": "code",
"execution_count": 100,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"ds = f.create_dataset(\"/experiment1/simulation/data1\", shape=(5000, 5000, 5000),\n",
" fillvalue=0, compression='gzip')"
]
},
{
"cell_type": "code",
"execution_count": 101,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
""
]
},
"execution_count": 101,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ds"
]
},
{
"cell_type": "code",
"execution_count": 102,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"ds[:, 0, 0] = np.random.rand(5000)"
]
},
{
"cell_type": "code",
"execution_count": 103,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"ds[1, :, 0] += np.random.rand(5000)"
]
},
{
"cell_type": "code",
"execution_count": 104,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"array([[0.6939344 , 0. , 0. , 0. , 0. ],\n",
" [1.4819994 , 0.01639538, 0.54387355, 0.11130908, 0.9928771 ]],\n",
" dtype=float32)"
]
},
"execution_count": 104,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ds[:2, :5, 0]"
]
},
{
"cell_type": "code",
"execution_count": 105,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"0.0"
]
},
"execution_count": 105,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ds.fillvalue"
]
},
{
"cell_type": "code",
"execution_count": 106,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"simulation \n",
"simulation/data1 \n"
]
}
],
"source": [
"f[\"experiment1\"].visititems(lambda name, value: print(name, value))"
]
},
{
"cell_type": "code",
"execution_count": 107,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"465.66128730773926"
]
},
"execution_count": 107,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"float(np.prod(ds.shape) * ds[0,0,0].nbytes) / (1024**3) # Gb"
]
},
{
"cell_type": "code",
"execution_count": 108,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"f.flush()"
]
},
{
"cell_type": "code",
"execution_count": 109,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"'data.h5'"
]
},
"execution_count": 109,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"f.filename"
]
},
{
"cell_type": "code",
"execution_count": 110,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"-rw-r--r-- 1 rob staff 357K May 6 16:11 data.h5\n"
]
}
],
"source": [
"!ls -lh data.h5"
]
},
{
"cell_type": "code",
"execution_count": 111,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"del f[\"/experiment1/simulation/data1\"]"
]
},
{
"cell_type": "code",
"execution_count": 112,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"simulation \n"
]
}
],
"source": [
"f[\"experiment1\"].visititems(lambda name, value: print(name, value))"
]
},
{
"cell_type": "code",
"execution_count": 113,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"f.close()"
]
},
{
"cell_type": "code",
"execution_count": 114,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# attributes"
]
},
{
"cell_type": "code",
"execution_count": 115,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"f = h5py.File(\"data.h5\")"
]
},
{
"cell_type": "code",
"execution_count": 116,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
""
]
},
"execution_count": 116,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"f.attrs"
]
},
{
"cell_type": "code",
"execution_count": 117,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"f.attrs[\"desc\"] = \"Result sets from experiments and simulations\""
]
},
{
"cell_type": "code",
"execution_count": 118,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"f[\"experiment1\"].attrs[\"date\"] = \"2015-1-1\""
]
},
{
"cell_type": "code",
"execution_count": 119,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"f[\"experiment2\"].attrs[\"date\"] = \"2015-1-2\""
]
},
{
"cell_type": "code",
"execution_count": 120,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"f[\"experiment2/simulation/data1\"].attrs[\"k\"] = 1.5"
]
},
{
"cell_type": "code",
"execution_count": 121,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"f[\"experiment2/simulation/data1\"].attrs[\"T\"] = 1000"
]
},
{
"cell_type": "code",
"execution_count": 122,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"['date']"
]
},
"execution_count": 122,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"list(f[\"experiment1\"].attrs.keys())"
]
},
{
"cell_type": "code",
"execution_count": 123,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"[('T', 1000), ('k', 1.5)]"
]
},
"execution_count": 123,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"list(f[\"experiment2/simulation/data1\"].attrs.items())"
]
},
{
"cell_type": "code",
"execution_count": 124,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 124,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\"T\" in f[\"experiment2/simulation/data1\"].attrs"
]
},
{
"cell_type": "code",
"execution_count": 125,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"del f[\"experiment2/simulation/data1\"].attrs[\"T\"]"
]
},
{
"cell_type": "code",
"execution_count": 126,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"False"
]
},
"execution_count": 126,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\"T\" in f[\"experiment2/simulation/data1\"].attrs"
]
},
{
"cell_type": "code",
"execution_count": 127,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"f[\"experiment2/simulation/data1\"].attrs[\"t\"] = np.array([1, 2, 3])"
]
},
{
"cell_type": "code",
"execution_count": 128,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"array([1, 2, 3])"
]
},
"execution_count": 128,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"f[\"experiment2/simulation/data1\"].attrs[\"t\"]"
]
},
{
"cell_type": "code",
"execution_count": 129,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"f.close()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## pytables"
]
},
{
"cell_type": "code",
"execution_count": 130,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"df = pd.read_csv(\"playerstats-2013-2014-top30.csv\", skiprows=1)\n",
"df = df.set_index(\"Rank\")"
]
},
{
"cell_type": "code",
"execution_count": 131,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Player | \n",
" Pos | \n",
" GP | \n",
" P | \n",
" G | \n",
" A | \n",
" S% | \n",
" Shift/GP | \n",
"
\n",
" \n",
" Rank | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 1 | \n",
" Sidney Crosby | \n",
" C | \n",
" 80 | \n",
" 104 | \n",
" 36 | \n",
" 68 | \n",
" 13.9 | \n",
" 24.0 | \n",
"
\n",
" \n",
" 2 | \n",
" Ryan Getzlaf | \n",
" C | \n",
" 77 | \n",
" 87 | \n",
" 31 | \n",
" 56 | \n",
" 15.2 | \n",
" 25.2 | \n",
"
\n",
" \n",
" 3 | \n",
" Claude Giroux | \n",
" C | \n",
" 82 | \n",
" 86 | \n",
" 28 | \n",
" 58 | \n",
" 12.6 | \n",
" 25.1 | \n",
"
\n",
" \n",
" 4 | \n",
" Tyler Seguin | \n",
" C | \n",
" 80 | \n",
" 84 | \n",
" 37 | \n",
" 47 | \n",
" 12.6 | \n",
" 23.4 | \n",
"
\n",
" \n",
" 5 | \n",
" Corey Perry | \n",
" R | \n",
" 81 | \n",
" 82 | \n",
" 43 | \n",
" 39 | \n",
" 15.4 | \n",
" 23.2 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Player Pos GP P G A S% Shift/GP\n",
"Rank \n",
"1 Sidney Crosby C 80 104 36 68 13.9 24.0\n",
"2 Ryan Getzlaf C 77 87 31 56 15.2 25.2\n",
"3 Claude Giroux C 82 86 28 58 12.6 25.1\n",
"4 Tyler Seguin C 80 84 37 47 12.6 23.4\n",
"5 Corey Perry R 81 82 43 39 15.4 23.2"
]
},
"execution_count": 131,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[[\"Player\", \"Pos\", \"GP\", \"P\", \"G\", \"A\", \"S%\", \"Shift/GP\"]].head(5)"
]
},
{
"cell_type": "code",
"execution_count": 132,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"f = tables.open_file(\"playerstats-2013-2014.h5\", mode=\"w\")"
]
},
{
"cell_type": "code",
"execution_count": 133,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"grp = f.create_group(\"/\", \"season_2013_2014\", title=\"NHL player statistics for the 2013/2014 season\")"
]
},
{
"cell_type": "code",
"execution_count": 134,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"/season_2013_2014 (Group) 'NHL player statistics for the 2013/2014 season'\n",
" children := []"
]
},
"execution_count": 134,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"grp"
]
},
{
"cell_type": "code",
"execution_count": 135,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"/ (RootGroup) ''\n",
" children := ['season_2013_2014' (Group)]"
]
},
"execution_count": 135,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"f.root"
]
},
{
"cell_type": "code",
"execution_count": 136,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"class PlayerStat(tables.IsDescription):\n",
" player = tables.StringCol(20, dflt=\"\")\n",
" position = tables.StringCol(1, dflt=\"C\")\n",
" games_played = tables.UInt8Col(dflt=0)\n",
" points = tables.UInt16Col(dflt=0)\n",
" goals = tables.UInt16Col(dflt=0)\n",
" assists = tables.UInt16Col(dflt=0)\n",
" shooting_percentage = tables.Float64Col(dflt=0.0)\n",
" shifts_per_game_played = tables.Float64Col(dflt=0.0) "
]
},
{
"cell_type": "code",
"execution_count": 137,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"top30_table = f.create_table(grp, 'top30', PlayerStat, \"Top 30 point leaders\")"
]
},
{
"cell_type": "code",
"execution_count": 138,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"playerstat = top30_table.row"
]
},
{
"cell_type": "code",
"execution_count": 139,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"tables.tableextension.Row"
]
},
"execution_count": 139,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"type(playerstat)"
]
},
{
"cell_type": "code",
"execution_count": 140,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"for index, row_series in df.iterrows():\n",
" playerstat[\"player\"] = row_series[\"Player\"] \n",
" playerstat[\"position\"] = row_series[\"Pos\"] \n",
" playerstat[\"games_played\"] = row_series[\"GP\"] \n",
" playerstat[\"points\"] = row_series[\"P\"] \n",
" playerstat[\"goals\"] = row_series[\"G\"]\n",
" playerstat[\"assists\"] = row_series[\"A\"] \n",
" playerstat[\"shooting_percentage\"] = row_series[\"S%\"]\n",
" playerstat[\"shifts_per_game_played\"] = row_series[\"Shift/GP\"]\n",
" playerstat.append()"
]
},
{
"cell_type": "code",
"execution_count": 141,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"top30_table.flush()"
]
},
{
"cell_type": "code",
"execution_count": 142,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"array([b'Sidney Crosby', b'Ryan Getzlaf', b'Claude Giroux',\n",
" b'Tyler Seguin', b'Corey Perry'], dtype='|S20')"
]
},
"execution_count": 142,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"top30_table.cols.player[:5]"
]
},
{
"cell_type": "code",
"execution_count": 143,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"array([104, 87, 86, 84, 82], dtype=uint16)"
]
},
"execution_count": 143,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"top30_table.cols.points[:5]"
]
},
{
"cell_type": "code",
"execution_count": 144,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def print_playerstat(row):\n",
" print(\"%20s\\t%s\\t%s\\t%s\" %\n",
" (row[\"player\"].decode('UTF-8'), row[\"points\"], row[\"goals\"], row[\"assists\"]))"
]
},
{
"cell_type": "code",
"execution_count": 145,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Sidney Crosby\t104\t36\t68\n",
" Ryan Getzlaf\t87\t31\t56\n",
" Claude Giroux\t86\t28\t58\n",
" Tyler Seguin\t84\t37\t47\n",
" Corey Perry\t82\t43\t39\n",
" Phil Kessel\t80\t37\t43\n",
" Taylor Hall\t80\t27\t53\n",
" Alex Ovechkin\t79\t51\t28\n",
" Joe Pavelski\t79\t41\t38\n",
" Jamie Benn\t79\t34\t45\n",
" Nicklas Backstrom\t79\t18\t61\n",
" Patrick Sharp\t78\t34\t44\n",
" Joe Thornton\t76\t11\t65\n",
" Erik Karlsson\t74\t20\t54\n",
" Evgeni Malkin\t72\t23\t49\n",
" Patrick Marleau\t70\t33\t37\n",
" Anze Kopitar\t70\t29\t41\n",
" Matt Duchene\t70\t23\t47\n",
" Martin St. Louis\t69\t30\t39\n",
" Patrick Kane\t69\t29\t40\n",
" Blake Wheeler\t69\t28\t41\n",
" Kyle Okposo\t69\t27\t42\n",
" David Krejci\t69\t19\t50\n",
" Chris Kunitz\t68\t35\t33\n",
" Jonathan Toews\t68\t28\t40\n",
" Thomas Vanek\t68\t27\t41\n",
" Jaromir Jagr\t67\t24\t43\n",
" John Tavares\t66\t24\t42\n",
" Jason Spezza\t66\t23\t43\n",
" Jordan Eberle\t65\t28\t37\n"
]
}
],
"source": [
"for row in top30_table.iterrows():\n",
" print_playerstat(row)"
]
},
{
"cell_type": "code",
"execution_count": 146,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Phil Kessel\t80\t37\t43\n",
" Taylor Hall\t80\t27\t53\n",
" Alex Ovechkin\t79\t51\t28\n",
" Joe Pavelski\t79\t41\t38\n",
" Jamie Benn\t79\t34\t45\n",
" Nicklas Backstrom\t79\t18\t61\n",
" Patrick Sharp\t78\t34\t44\n",
" Joe Thornton\t76\t11\t65\n"
]
}
],
"source": [
"for row in top30_table.where(\"(points > 75) & (points <= 80)\"):\n",
" print_playerstat(row)"
]
},
{
"cell_type": "code",
"execution_count": 147,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Alex Ovechkin\t79\t51\t28\n",
" Joe Pavelski\t79\t41\t38\n"
]
}
],
"source": [
"for row in top30_table.where(\"(goals > 40) & (points < 80)\"):\n",
" print_playerstat(row)"
]
},
{
"cell_type": "code",
"execution_count": 148,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"File(filename=playerstats-2013-2014.h5, title='', mode='w', root_uep='/', filters=Filters(complevel=0, shuffle=False, bitshuffle=False, fletcher32=False, least_significant_digit=None))\n",
"/ (RootGroup) ''\n",
"/season_2013_2014 (Group) 'NHL player statistics for the 2013/2014 season'\n",
"/season_2013_2014/top30 (Table(30,)) 'Top 30 point leaders'\n",
" description := {\n",
" \"assists\": UInt16Col(shape=(), dflt=0, pos=0),\n",
" \"games_played\": UInt8Col(shape=(), dflt=0, pos=1),\n",
" \"goals\": UInt16Col(shape=(), dflt=0, pos=2),\n",
" \"player\": StringCol(itemsize=20, shape=(), dflt=b'', pos=3),\n",
" \"points\": UInt16Col(shape=(), dflt=0, pos=4),\n",
" \"position\": StringCol(itemsize=1, shape=(), dflt=b'C', pos=5),\n",
" \"shifts_per_game_played\": Float64Col(shape=(), dflt=0.0, pos=6),\n",
" \"shooting_percentage\": Float64Col(shape=(), dflt=0.0, pos=7)}\n",
" byteorder := 'little'\n",
" chunkshape := (1489,)"
]
},
"execution_count": 148,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"f"
]
},
{
"cell_type": "code",
"execution_count": 149,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"f.flush()"
]
},
{
"cell_type": "code",
"execution_count": 150,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"f.close()"
]
},
{
"cell_type": "code",
"execution_count": 151,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Opened \"playerstats-2013-2014.h5\" with sec2 driver.\n",
"/ Group\n",
" Attribute: CLASS scalar\n",
" Type: 5-byte null-terminated UTF-8 string\n",
" Data: \"GROUP\"\n",
" Attribute: PYTABLES_FORMAT_VERSION scalar\n",
" Type: 3-byte null-terminated UTF-8 string\n",
" Data: \"2.1\"\n",
" Attribute: TITLE null\n",
" Type: 1-byte null-terminated UTF-8 string\n",
"\n",
" Attribute: VERSION scalar\n",
" Type: 3-byte null-terminated UTF-8 string\n",
" Data: \"1.0\"\n",
" Location: 1:96\n",
" Links: 1\n",
"/season_2013_2014 Group\n",
" Attribute: CLASS scalar\n",
" Type: 5-byte null-terminated UTF-8 string\n",
" Data: \"GROUP\"\n",
" Attribute: TITLE scalar\n",
" Type: 46-byte null-terminated UTF-8 string\n",
" Data: \"NHL player statistics for the 2013/2014 season\"\n",
" Attribute: VERSION scalar\n",
" Type: 3-byte null-terminated UTF-8 string\n",
" Data: \"1.0\"\n",
" Location: 1:1024\n",
" Links: 1\n",
"/season_2013_2014/top30 Dataset {30/Inf}\n",
" Attribute: CLASS scalar\n",
" Type: 5-byte null-terminated UTF-8 string\n",
" Data: \"TABLE\"\n",
" Attribute: FIELD_0_FILL scalar\n",
" Type: native unsigned short\n",
" Data: 0\n",
" Attribute: FIELD_0_NAME scalar\n",
" Type: 7-byte null-terminated UTF-8 string\n",
" Data: \"assists\"\n",
" Attribute: FIELD_1_FILL scalar\n",
" Type: native unsigned char\n",
" Data: 0\n",
" Attribute: FIELD_1_NAME scalar\n",
" Type: 12-byte null-terminated UTF-8 string\n",
" Data: \"games_played\"\n",
" Attribute: FIELD_2_FILL scalar\n",
" Type: native unsigned short\n",
" Data: 0\n",
" Attribute: FIELD_2_NAME scalar\n",
" Type: 5-byte null-terminated UTF-8 string\n",
" Data: \"goals\"\n",
" Attribute: FIELD_3_FILL scalar\n",
" Type: 1-byte null-terminated ASCII string\n",
" Data: \"\"\n",
" Attribute: FIELD_3_NAME scalar\n",
" Type: 6-byte null-terminated UTF-8 string\n",
" Data: \"player\"\n",
" Attribute: FIELD_4_FILL scalar\n",
" Type: native unsigned short\n",
" Data: 0\n",
" Attribute: FIELD_4_NAME scalar\n",
" Type: 6-byte null-terminated UTF-8 string\n",
" Data: \"points\"\n",
" Attribute: FIELD_5_FILL scalar\n",
" Type: 1-byte null-terminated ASCII string\n",
" Data: \"C\"\n",
" Attribute: FIELD_5_NAME scalar\n",
" Type: 8-byte null-terminated UTF-8 string\n",
" Data: \"position\"\n",
" Attribute: FIELD_6_FILL scalar\n",
" Type: native double\n",
" Data: 0\n",
" Attribute: FIELD_6_NAME scalar\n",
" Type: 22-byte null-terminated UTF-8 string\n",
" Data: \"shifts_per_game_played\"\n",
" Attribute: FIELD_7_FILL scalar\n",
" Type: native double\n",
" Data: 0\n",
" Attribute: FIELD_7_NAME scalar\n",
" Type: 19-byte null-terminated UTF-8 string\n",
" Data: \"shooting_percentage\"\n",
" Attribute: NROWS scalar\n",
" Type: native long\n",
" Data: 30\n",
" Attribute: TITLE scalar\n",
" Type: 20-byte null-terminated UTF-8 string\n",
" Data: \"Top 30 point leaders\"\n",
" Attribute: VERSION scalar\n",
" Type: 3-byte null-terminated UTF-8 string\n",
" Data: \"2.7\"\n",
" Location: 1:2264\n",
" Links: 1\n",
" Chunks: {1489} 65516 bytes\n",
" Storage: 1320 logical bytes, 65516 allocated bytes, 2.01% utilization\n",
" Type: struct {\n",
" \"assists\" +0 native unsigned short\n",
" \"games_played\" +2 native unsigned char\n",
" \"goals\" +3 native unsigned short\n",
" \"player\" +5 20-byte null-terminated ASCII string\n",
" \"points\" +25 native unsigned short\n",
" \"position\" +27 1-byte null-terminated ASCII string\n",
" \"shifts_per_game_played\" +28 native double\n",
" \"shooting_percentage\" +36 native double\n",
" } 44 bytes\n",
"H5tools-DIAG: Error detected in HDF5:tools (1.8.14) thread 0:\n",
" #000: h5tools_dump.c line 1843 in h5tools_dump_mem(): H5Sis_simple failed\n",
" major: Failure in tools library\n",
" minor: error in function\n"
]
}
],
"source": [
"!h5ls -rv playerstats-2013-2014.h5"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Pandas hdfstore"
]
},
{
"cell_type": "code",
"execution_count": 152,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 153,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"store = pd.HDFStore('store.h5')"
]
},
{
"cell_type": "code",
"execution_count": 154,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"df = pd.DataFrame(np.random.rand(5,5))"
]
},
{
"cell_type": "code",
"execution_count": 155,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"store[\"df1\"] = df"
]
},
{
"cell_type": "code",
"execution_count": 156,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"df = pd.read_csv(\"playerstats-2013-2014-top30.csv\", skiprows=1)"
]
},
{
"cell_type": "code",
"execution_count": 157,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"store[\"df2\"] = df"
]
},
{
"cell_type": "code",
"execution_count": 158,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"['/df1', '/df2']"
]
},
"execution_count": 158,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"store.keys()"
]
},
{
"cell_type": "code",
"execution_count": 159,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 159,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"'df2' in store"
]
},
{
"cell_type": "code",
"execution_count": 160,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"df = store[\"df1\"]"
]
},
{
"cell_type": "code",
"execution_count": 161,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"/ (RootGroup) ''\n",
" children := ['df1' (Group), 'df2' (Group)]"
]
},
"execution_count": 161,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"store.root"
]
},
{
"cell_type": "code",
"execution_count": 162,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"store.close()"
]
},
{
"cell_type": "code",
"execution_count": 163,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"f = h5py.File(\"store.h5\")"
]
},
{
"cell_type": "code",
"execution_count": 164,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"df1 \t\t\t \n",
"df1/axis0 \t\t \n",
"df1/axis1 \t\t \n",
"df1/block0_items \t \n",
"df1/block0_values \t \n",
"df2 \t\t\t \n",
"df2/axis0 \t\t \n",
"df2/axis1 \t\t \n",
"df2/block0_items \t \n",
"df2/block0_values \t \n",
"df2/block1_items \t \n",
"df2/block1_values \t \n",
"df2/block2_items \t \n",
"df2/block2_values \t \n"
]
}
],
"source": [
"f.visititems(lambda x, y: print(x, \"\\t\" * int(3 - len(str(x))//8), y))"
]
},
{
"cell_type": "code",
"execution_count": 165,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"array([b'S%', b'Shift/GP', b'FO%'], dtype='|S8')"
]
},
"execution_count": 165,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"f[\"/df2/block0_items\"].value "
]
},
{
"cell_type": "code",
"execution_count": 166,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"array([[13.9, 24. , 52.5],\n",
" [15.2, 25.2, 49. ],\n",
" [12.6, 25.1, 52.9]])"
]
},
"execution_count": 166,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"f[\"/df2/block0_values\"][:3]"
]
},
{
"cell_type": "code",
"execution_count": 167,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"array([b'Rank', b'GP', b'G', b'A', b'P', b'+/-', b'PIM', b'PPG', b'PPP',\n",
" b'SHG', b'SHP', b'GW', b'OT', b'S'], dtype='|S4')"
]
},
"execution_count": 167,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"f[\"/df2/block1_items\"].value "
]
},
{
"cell_type": "code",
"execution_count": 168,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"array([[ 1, 80, 36, 68, 104],\n",
" [ 2, 77, 31, 56, 87],\n",
" [ 3, 82, 28, 58, 86]])"
]
},
"execution_count": 168,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"f[\"/df2/block1_values\"][:3, :5]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# JSON"
]
},
{
"cell_type": "code",
"execution_count": 169,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"data = [\"string\", 1.0, 2, None]"
]
},
{
"cell_type": "code",
"execution_count": 170,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"data_json = json.dumps(data)"
]
},
{
"cell_type": "code",
"execution_count": 171,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"'[\"string\", 1.0, 2, null]'"
]
},
"execution_count": 171,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data_json"
]
},
{
"cell_type": "code",
"execution_count": 172,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"data2 = json.loads(data_json)"
]
},
{
"cell_type": "code",
"execution_count": 173,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"['string', 1.0, 2, None]"
]
},
"execution_count": 173,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data"
]
},
{
"cell_type": "code",
"execution_count": 174,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"'string'"
]
},
"execution_count": 174,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data[0]"
]
},
{
"cell_type": "code",
"execution_count": 175,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"data = {\"one\": 1, \"two\": 2.0, \"three\": \"three\"}"
]
},
{
"cell_type": "code",
"execution_count": 176,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"data_json = json.dumps(data)"
]
},
{
"cell_type": "code",
"execution_count": 177,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{\"one\": 1, \"two\": 2.0, \"three\": \"three\"}\n"
]
}
],
"source": [
"print(data_json)"
]
},
{
"cell_type": "code",
"execution_count": 178,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"data = json.loads(data_json)"
]
},
{
"cell_type": "code",
"execution_count": 179,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"2.0"
]
},
"execution_count": 179,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data[\"two\"]"
]
},
{
"cell_type": "code",
"execution_count": 180,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"'three'"
]
},
"execution_count": 180,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data[\"three\"]"
]
},
{
"cell_type": "code",
"execution_count": 181,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"data = {\"one\": [1], \n",
" \"two\": [1, 2], \n",
" \"three\": [1, 2, 3]}"
]
},
{
"cell_type": "code",
"execution_count": 182,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"data_json = json.dumps(data, indent=True)"
]
},
{
"cell_type": "code",
"execution_count": 183,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{\n",
" \"one\": [\n",
" 1\n",
" ],\n",
" \"two\": [\n",
" 1,\n",
" 2\n",
" ],\n",
" \"three\": [\n",
" 1,\n",
" 2,\n",
" 3\n",
" ]\n",
"}\n"
]
}
],
"source": [
"print(data_json)"
]
},
{
"cell_type": "code",
"execution_count": 184,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"data = {\"one\": [1], \n",
" \"two\": {\"one\": 1, \"two\": 2}, \n",
" \"three\": [(1,), (1, 2), (1, 2, 3)],\n",
" \"four\": \"a text string\"}"
]
},
{
"cell_type": "code",
"execution_count": 185,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"with open(\"data.json\", \"w\") as f:\n",
" json.dump(data, f)"
]
},
{
"cell_type": "code",
"execution_count": 186,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{\"one\": [1], \"two\": {\"one\": 1, \"two\": 2}, \"three\": [[1], [1, 2], [1, 2, 3]], \"four\": \"a text string\"}"
]
}
],
"source": [
"!cat data.json"
]
},
{
"cell_type": "code",
"execution_count": 187,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"with open(\"data.json\", \"r\") as f:\n",
" data_from_file = json.load(f)"
]
},
{
"cell_type": "code",
"execution_count": 188,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"{'one': 1, 'two': 2}"
]
},
"execution_count": 188,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data_from_file[\"two\"]"
]
},
{
"cell_type": "code",
"execution_count": 189,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"[[1], [1, 2], [1, 2, 3]]"
]
},
"execution_count": 189,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data_from_file[\"three\"]"
]
},
{
"cell_type": "code",
"execution_count": 190,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{\n",
" \"C\": {\n",
" \"color\": \"#149848\", \n",
" \"transfers\": [\n",
" [\n",
" \"C3\", \n",
" \"F15\"\n",
" ], \n",
" [\n",
" \"C4\", \n",
" \"Z2\"\n",
" ], \n",
" [\n",
" \"C4\", \n",
" \"G2\"\n",
" ], \n",
" [\n",
" \"C7\", \n",
" \"M14\"\n",
" ], \n"
]
}
],
"source": [
"!head -n 20 tokyo-metro.json"
]
},
{
"cell_type": "code",
"execution_count": 191,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" 1471 1508 27638 tokyo-metro.json\n"
]
}
],
"source": [
"!wc tokyo-metro.json"
]
},
{
"cell_type": "code",
"execution_count": 211,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"with open(\"tokyo-metro.json\", \"r\") as f:\n",
" data = json.load(f)"
]
},
{
"cell_type": "code",
"execution_count": 212,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"dict_keys(['C', 'G', 'F', 'H', 'M', 'N', 'T', 'Y', 'Z'])"
]
},
"execution_count": 212,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.keys()"
]
},
{
"cell_type": "code",
"execution_count": 213,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"dict_keys(['color', 'transfers', 'travel_times'])"
]
},
"execution_count": 213,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data[\"C\"].keys()"
]
},
{
"cell_type": "code",
"execution_count": 214,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"'#149848'"
]
},
"execution_count": 214,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data[\"C\"][\"color\"]"
]
},
{
"cell_type": "code",
"execution_count": 215,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"[['C3', 'F15'],\n",
" ['C4', 'Z2'],\n",
" ['C4', 'G2'],\n",
" ['C7', 'M14'],\n",
" ['C7', 'N6'],\n",
" ['C7', 'G6'],\n",
" ['C8', 'M15'],\n",
" ['C8', 'H6'],\n",
" ['C9', 'H7'],\n",
" ['C9', 'Y18'],\n",
" ['C11', 'T9'],\n",
" ['C11', 'M18'],\n",
" ['C11', 'Z8'],\n",
" ['C12', 'M19'],\n",
" ['C18', 'H21']]"
]
},
"execution_count": 215,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data[\"C\"][\"transfers\"]"
]
},
{
"cell_type": "code",
"execution_count": 216,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"[('C3', 'C4', 1), ('C7', 'C8', 1), ('C9', 'C10', 1)]"
]
},
"execution_count": 216,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"[(s, e, tt) for s, e, tt in data[\"C\"][\"travel_times\"] if tt == 1]"
]
},
{
"cell_type": "code",
"execution_count": 217,
"metadata": {
"collapsed": false,
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"{'C': {'color': '#149848',\n",
" 'transfers': [['C3', 'F15'],\n",
" ['C4', 'Z2'],\n",
" ['C4', 'G2'],\n",
" ['C7', 'M14'],\n",
" ['C7', 'N6'],\n",
" ['C7', 'G6'],\n",
" ['C8', 'M15'],\n",
" ['C8', 'H6'],\n",
" ['C9', 'H7'],\n",
" ['C9', 'Y18'],\n",
" ['C11', 'T9'],\n",
" ['C11', 'M18'],\n",
" ['C11', 'Z8'],\n",
" ['C12', 'M19'],\n",
" ['C18', 'H21']],\n",
" 'travel_times': [['C1', 'C2', 2],\n",
" ['C2', 'C3', 2],\n",
" ['C3', 'C4', 1],\n",
" ['C4', 'C5', 2],\n",
" ['C5', 'C6', 2],\n",
" ['C6', 'C7', 2],\n",
" ['C7', 'C8', 1],\n",
" ['C8', 'C9', 3],\n",
" ['C9', 'C10', 1],\n",
" ['C10', 'C11', 2],\n",
" ['C11', 'C12', 2],\n",
" ['C12', 'C13', 2],\n",
" ['C13', 'C14', 2],\n",
" ['C14', 'C15', 2],\n",
" ['C15', 'C16', 2],\n",
" ['C16', 'C17', 3],\n",
" ['C17', 'C18', 3],\n",
" ['C18', 'C19', 3]]},\n",
" 'G': {'color': '#f59230',\n",
" 'transfers': [['G1', 'Z1'],\n",
" ['G1', 'F16'],\n",
" ['G2', 'Z2'],\n",
" ['G2', 'C4'],\n",
" ['G4', 'Z3'],\n",
" ['G5', 'M13'],\n",
" ['G5', 'Y16'],\n",
" ['G5', 'Z4'],\n",
" ['G5', 'N7'],\n",
" ['G6', 'N6'],\n",
" ['G6', 'M14'],\n",
" ['G6', 'C7'],\n",
" ['G9', 'M16'],\n",
" ['G9', 'H8'],\n",
" ['G11', 'T10'],\n",
" ['G12', 'Z9'],\n",
" ['G15', 'H16'],\n",
" ['G16', 'H17']],\n",
" 'travel_times': [['G1', 'G2', 2],\n",
" ['G2', 'G3', 1],\n",
" ['G3', 'G4', 2],\n",
" ['G4', 'G5', 2],\n",
" ['G5', 'G6', 2],\n",
" ['G6', 'G7', 2],\n",
" ['G7', 'G8', 2],\n",
" ['G8', 'G9', 2],\n",
" ['G9', 'G10', 1],\n",
" ['G10', 'G11', 2],\n",
" ['G11', 'G12', 2],\n",
" ['G12', 'G13', 1],\n",
" ['G13', 'G14', 2],\n",
" ['G14', 'G15', 2],\n",
" ['G15', 'G16', 1],\n",
" ['G16', 'G17', 2],\n",
" ['G17', 'G18', 1],\n",
" ['G18', 'G19', 2]]},\n",
" 'F': {'color': '#b96528',\n",
" 'transfers': [['F1', 'Y1'],\n",
" ['F2', 'Y2'],\n",
" ['F3', 'Y3'],\n",
" ['F4', 'Y4'],\n",
" ['F5', 'Y5'],\n",
" ['F6', 'Y6'],\n",
" ['F7', 'Y7'],\n",
" ['F8', 'Y8'],\n",
" ['F9', 'Y9'],\n",
" ['F9', 'M25'],\n",
" ['F13', 'M9'],\n",
" ['F15', 'C3'],\n",
" ['F16', 'Z1'],\n",
" ['F16', 'G1']],\n",
" 'travel_times': [['F1', 'F2', 3],\n",
" ['F2', 'F3', 2],\n",
" ['F3', 'F4', 3],\n",
" ['F4', 'F5', 2],\n",
" ['F5', 'F6', 2],\n",
" ['F6', 'F7', 2],\n",
" ['F7', 'F8', 2],\n",
" ['F8', 'F9', 2],\n",
" ['F9', 'F10', 3],\n",
" ['F10', 'F11', 2],\n",
" ['F11', 'F12', 2],\n",
" ['F12', 'F13', 2],\n",
" ['F13', 'F14', 3],\n",
" ['F14', 'F15', 2],\n",
" ['F15', 'F16', 2]]},\n",
" 'H': {'color': '#9cacb5',\n",
" 'transfers': [['H6', 'M15'],\n",
" ['H6', 'C8'],\n",
" ['H7', 'Y18'],\n",
" ['H7', 'C9'],\n",
" ['H8', 'M16'],\n",
" ['H8', 'G9'],\n",
" ['H12', 'T11'],\n",
" ['H16', 'G15'],\n",
" ['H17', 'G16'],\n",
" ['H21', 'C18']],\n",
" 'travel_times': [['H1', 'H2', 3],\n",
" ['H2', 'H3', 3],\n",
" ['H3', 'H4', 3],\n",
" ['H4', 'H5', 3],\n",
" ['H5', 'H6', 2],\n",
" ['H6', 'H7', 3],\n",
" ['H7', 'H8', 1],\n",
" ['H8', 'H9', 2],\n",
" ['H9', 'H10', 2],\n",
" ['H10', 'H11', 2],\n",
" ['H11', 'H12', 1],\n",
" ['H12', 'H13', 3],\n",
" ['H13', 'H14', 1],\n",
" ['H14', 'H15', 2],\n",
" ['H15', 'H16', 2],\n",
" ['H16', 'H17', 1],\n",
" ['H17', 'H18', 2],\n",
" ['H18', 'H19', 2],\n",
" ['H19', 'H20', 2],\n",
" ['H20', 'H21', 3]]},\n",
" 'M': {'color': '#ff0000',\n",
" 'transfers': [['M9', 'F13'],\n",
" ['M12', 'N8'],\n",
" ['M13', 'G5'],\n",
" ['M13', 'Y16'],\n",
" ['M13', 'Z4'],\n",
" ['M13', 'N7'],\n",
" ['M14', 'C7'],\n",
" ['M14', 'G6'],\n",
" ['M14', 'N6'],\n",
" ['M15', 'H6'],\n",
" ['M15', 'C8'],\n",
" ['M16', 'G9'],\n",
" ['M16', 'H8'],\n",
" ['M18', 'T9'],\n",
" ['M18', 'C11'],\n",
" ['M18', 'Z8'],\n",
" ['M19', 'C12'],\n",
" ['M22', 'N11'],\n",
" ['M25', 'Y9'],\n",
" ['M25', 'F9']],\n",
" 'travel_times': [['M1', 'M2', 2],\n",
" ['M2', 'M3', 2],\n",
" ['M3', 'M4', 2],\n",
" ['M4', 'M5', 2],\n",
" ['M5', 'M6', 2],\n",
" ['M6', 'M7', 2],\n",
" ['M7', 'M8', 2],\n",
" ['M8', 'M9', 2],\n",
" ['M9', 'M10', 1],\n",
" ['M10', 'M11', 2],\n",
" ['M11', 'M12', 2],\n",
" ['M12', 'M13', 3],\n",
" ['M13', 'M14', 2],\n",
" ['M14', 'M15', 1],\n",
" ['M15', 'M16', 3],\n",
" ['M16', 'M17', 2],\n",
" ['M17', 'M18', 2],\n",
" ['M18', 'M19', 2],\n",
" ['M19', 'M20', 1],\n",
" ['M20', 'M21', 2],\n",
" ['M21', 'M22', 2],\n",
" ['M22', 'M23', 3],\n",
" ['M23', 'M24', 2],\n",
" ['M24', 'M25', 3],\n",
" ['m3', 'm4', 2],\n",
" ['m4', 'm5', 2],\n",
" ['m5', 'M6', 2]]},\n",
" 'N': {'color': '#1aaca9',\n",
" 'transfers': [['N1', 'T1'],\n",
" ['N2', 'T2'],\n",
" ['N3', 'T3'],\n",
" ['N6', 'G6'],\n",
" ['N6', 'M14'],\n",
" ['N6', 'C7'],\n",
" ['N7', 'Y16'],\n",
" ['N7', 'Z4'],\n",
" ['N7', 'G5'],\n",
" ['N7', 'M13'],\n",
" ['N8', 'M12'],\n",
" ['N9', 'Y14'],\n",
" ['N10', 'Y13'],\n",
" ['N10', 'T6'],\n",
" ['N11', 'M22']],\n",
" 'travel_times': [['N1', 'N2', 2],\n",
" ['N2', 'N3', 2],\n",
" ['N3', 'N4', 2],\n",
" ['N4', 'N5', 2],\n",
" ['N5', 'N6', 2],\n",
" ['N6', 'N7', 2],\n",
" ['N7', 'N8', 2],\n",
" ['N8', 'N9', 2],\n",
" ['N9', 'N10', 2],\n",
" ['N10', 'N11', 2],\n",
" ['N11', 'N12', 3],\n",
" ['N12', 'N13', 2],\n",
" ['N13', 'N14', 2],\n",
" ['N14', 'N15', 3],\n",
" ['N15', 'N16', 1],\n",
" ['N16', 'N17', 3],\n",
" ['N17', 'N18', 2],\n",
" ['N18', 'N19', 2]]},\n",
" 'T': {'color': '#1aa7d8',\n",
" 'transfers': [['T6', 'N10'],\n",
" ['T6', 'Y13'],\n",
" ['T7', 'Z6'],\n",
" ['T9', 'M18'],\n",
" ['T9', 'C11'],\n",
" ['T9', 'Z8'],\n",
" ['T10', 'G11'],\n",
" ['T11', 'H12']],\n",
" 'travel_times': [['T1', 'T2', 0],\n",
" ['T2', 'T3', 3],\n",
" ['T3', 'T4', 6],\n",
" ['T4', 'T5', 9],\n",
" ['T5', 'T6', 11],\n",
" ['T6', 'T7', 13],\n",
" ['T7', 'T8', 14],\n",
" ['T8', 'T9', 16],\n",
" ['T9', 'T10', 18],\n",
" ['T10', 'T11', 20],\n",
" ['T11', 'T12', 21],\n",
" ['T12', 'T13', 24],\n",
" ['T13', 'T14', 26],\n",
" ['T14', 'T15', 27],\n",
" ['T15', 'T16', 30],\n",
" ['T16', 'T17', 33],\n",
" ['T17', 'T18', 35],\n",
" ['T18', 'T19', 37],\n",
" ['T19', 'T20', 39],\n",
" ['T20', 'T21', 41],\n",
" ['T21', 'T22', 43],\n",
" ['T22', 'T23', 46],\n",
" ['T23', 'T24', 49]]},\n",
" 'Y': {'color': '#ede7c3',\n",
" 'transfers': [['Y1', 'F1'],\n",
" ['Y2', 'F2'],\n",
" ['Y3', 'F3'],\n",
" ['Y4', 'F4'],\n",
" ['Y5', 'F5'],\n",
" ['Y6', 'F6'],\n",
" ['Y7', 'F7'],\n",
" ['Y8', 'F8'],\n",
" ['Y9', 'F9'],\n",
" ['Y9', 'M25'],\n",
" ['Y13', 'T6'],\n",
" ['Y13', 'N10'],\n",
" ['Y14', 'N9'],\n",
" ['Y16', 'Z4'],\n",
" ['Y16', 'N7'],\n",
" ['Y16', 'G5'],\n",
" ['Y16', 'M13'],\n",
" ['Y18', 'H7'],\n",
" ['Y18', 'C9']],\n",
" 'travel_times': [['Y1', 'Y2', 4],\n",
" ['Y2', 'Y3', 2],\n",
" ['Y3', 'Y4', 3],\n",
" ['Y4', 'Y5', 2],\n",
" ['Y5', 'Y6', 2],\n",
" ['Y6', 'Y7', 2],\n",
" ['Y7', 'Y8', 2],\n",
" ['Y8', 'Y9', 3],\n",
" ['Y9', 'Y10', 2],\n",
" ['Y10', 'Y11', 2],\n",
" ['Y11', 'Y12', 2],\n",
" ['Y12', 'Y13', 3],\n",
" ['Y13', 'Y14', 2],\n",
" ['Y14', 'Y15', 2],\n",
" ['Y15', 'Y16', 1],\n",
" ['Y16', 'Y17', 2],\n",
" ['Y17', 'Y18', 2],\n",
" ['Y18', 'Y19', 2],\n",
" ['Y19', 'Y20', 2],\n",
" ['Y20', 'Y21', 2],\n",
" ['Y21', 'Y22', 2],\n",
" ['Y22', 'Y23', 3],\n",
" ['Y23', 'Y24', 2]]},\n",
" 'Z': {'color': '#a384bf',\n",
" 'transfers': [['Z1', 'F16'],\n",
" ['Z1', 'G1'],\n",
" ['Z2', 'C4'],\n",
" ['Z2', 'G2'],\n",
" ['Z3', 'G4'],\n",
" ['Z4', 'Y16'],\n",
" ['Z4', 'N7'],\n",
" ['Z4', 'M13'],\n",
" ['Z4', 'G5'],\n",
" ['Z6', 'T7'],\n",
" ['Z8', 'M18'],\n",
" ['Z8', 'C11'],\n",
" ['Z8', 'T9'],\n",
" ['Z9', 'G12']],\n",
" 'travel_times': [['Z1', 'Z2', 3],\n",
" ['Z2', 'Z3', 2],\n",
" ['Z3', 'Z4', 2],\n",
" ['Z4', 'Z5', 2],\n",
" ['Z5', 'Z6', 2],\n",
" ['Z6', 'Z7', 2],\n",
" ['Z7', 'Z8', 2],\n",
" ['Z8', 'Z9', 2],\n",
" ['Z9', 'Z10', 3],\n",
" ['Z10', 'Z11', 3],\n",
" ['Z11', 'Z12', 3],\n",
" ['Z12', 'Z13', 2],\n",
" ['Z13', 'Z14', 2]]}}"
]
},
"execution_count": 217,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data"
]
},
{
"cell_type": "code",
"execution_count": 218,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"-rw-r--r-- 1 rob staff 27K Mar 25 2018 tokyo-metro.json\n"
]
}
],
"source": [
"!ls -lh tokyo-metro.json"
]
},
{
"cell_type": "code",
"execution_count": 219,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"data_pack = msgpack.packb(data)"
]
},
{
"cell_type": "code",
"execution_count": 220,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# del data"
]
},
{
"cell_type": "code",
"execution_count": 221,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"bytes"
]
},
"execution_count": 221,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"type(data_pack)"
]
},
{
"cell_type": "code",
"execution_count": 222,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"3021"
]
},
"execution_count": 222,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(data_pack)"
]
},
{
"cell_type": "code",
"execution_count": 223,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"with open(\"tokyo-metro.msgpack\", \"wb\") as f:\n",
" f.write(data_pack)"
]
},
{
"cell_type": "code",
"execution_count": 224,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"-rw-r--r-- 1 rob staff 3.0K May 6 16:12 tokyo-metro.msgpack\n"
]
}
],
"source": [
"!ls -lh tokyo-metro.msgpack"
]
},
{
"cell_type": "code",
"execution_count": 225,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"with open(\"tokyo-metro.msgpack\", \"rb\") as f:\n",
" data_msgpack = f.read()\n",
" data = msgpack.unpackb(data_msgpack)"
]
},
{
"cell_type": "code",
"execution_count": 226,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"[b'C', b'G', b'F', b'H', b'M', b'N', b'T', b'Y', b'Z']"
]
},
"execution_count": 226,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"list(data.keys())"
]
},
{
"cell_type": "code",
"execution_count": 227,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"with open(\"tokyo-metro.pickle\", \"wb\") as f:\n",
" pickle.dump(data, f)"
]
},
{
"cell_type": "code",
"execution_count": 228,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"del data"
]
},
{
"cell_type": "code",
"execution_count": 229,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"-rw-r--r-- 1 rob staff 8.5K May 6 16:12 tokyo-metro.pickle\n"
]
}
],
"source": [
"!ls -lh tokyo-metro.pickle"
]
},
{
"cell_type": "code",
"execution_count": 230,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"with open(\"tokyo-metro.pickle\", \"rb\") as f:\n",
" data = pickle.load(f)"
]
},
{
"cell_type": "code",
"execution_count": 231,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"dict_keys([b'C', b'G', b'F', b'H', b'M', b'N', b'T', b'Y', b'Z'])"
]
},
"execution_count": 231,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.keys()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Versions"
]
},
{
"cell_type": "code",
"execution_count": 232,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"%reload_ext version_information"
]
},
{
"cell_type": "code",
"execution_count": 233,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"Software | Version |
---|
Python | 3.6.8 64bit [GCC 4.2.1 Compatible Clang 4.0.1 (tags/RELEASE_401/final)] |
IPython | 7.5.0 |
OS | Darwin 18.2.0 x86_64 i386 64bit |
numpy | 1.16.3 |
pandas | 0.24.2 |
csv | 1.0 |
json | 2.0.9 |
tables | 3.5.1 |
h5py | 2.9.0 |
msgpack | 0.6.1 |
Mon May 06 16:13:03 2019 JST |
"
],
"text/latex": [
"\\begin{tabular}{|l|l|}\\hline\n",
"{\\bf Software} & {\\bf Version} \\\\ \\hline\\hline\n",
"Python & 3.6.8 64bit [GCC 4.2.1 Compatible Clang 4.0.1 (tags/RELEASE\\_401/final)] \\\\ \\hline\n",
"IPython & 7.5.0 \\\\ \\hline\n",
"OS & Darwin 18.2.0 x86\\_64 i386 64bit \\\\ \\hline\n",
"numpy & 1.16.3 \\\\ \\hline\n",
"pandas & 0.24.2 \\\\ \\hline\n",
"csv & 1.0 \\\\ \\hline\n",
"json & 2.0.9 \\\\ \\hline\n",
"tables & 3.5.1 \\\\ \\hline\n",
"h5py & 2.9.0 \\\\ \\hline\n",
"msgpack & 0.6.1 \\\\ \\hline\n",
"\\hline \\multicolumn{2}{|l|}{Mon May 06 16:13:03 2019 JST} \\\\ \\hline\n",
"\\end{tabular}\n"
],
"text/plain": [
"Software versions\n",
"Python 3.6.8 64bit [GCC 4.2.1 Compatible Clang 4.0.1 (tags/RELEASE_401/final)]\n",
"IPython 7.5.0\n",
"OS Darwin 18.2.0 x86_64 i386 64bit\n",
"numpy 1.16.3\n",
"pandas 0.24.2\n",
"csv 1.0\n",
"json 2.0.9\n",
"tables 3.5.1\n",
"h5py 2.9.0\n",
"msgpack 0.6.1\n",
"Mon May 06 16:13:03 2019 JST"
]
},
"execution_count": 233,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%version_information numpy, pandas, csv, json, tables, h5py, msgpack"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "py3.6",
"language": "python",
"name": "py3.6"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.8"
}
},
"nbformat": 4,
"nbformat_minor": 2
}