{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Chapter 18: Code listing" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Robert Johansson\n", "\n", "Source code listings for [Numerical Python - Scientific Computing and Data Science Applications with Numpy, SciPy and Matplotlib](https://www.apress.com/us/book/9781484242452) (ISBN 978-1-484242-45-2)." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Imports" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true }, "outputs": [], "source": [ "from __future__ import print_function" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import numpy as np\n", "np.random.seed(0)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import csv" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import json" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import h5py" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import tables" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import pickle\n", "# import cPickle" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import msgpack" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# CSV" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Overwriting playerstats-2013-2014.csv\n" ] } ], "source": [ "%%writefile playerstats-2013-2014.csv\n", "# 2013-2014 / Regular Season / All Skaters / Summary / Points\n", "Rank,Player,Team,Pos,GP,G,A,P,+/-,PIM,PPG,PPP,SHG,SHP,GW,OT,S,S%,TOI/GP,Shift/GP,FO%\n", "1,Sidney Crosby,PIT,C,80,36,68,104,+18,46,11,38,0,0,5,1,259,13.9,21:58,24.0,52.5\n", "2,Ryan Getzlaf,ANA,C,77,31,56,87,+28,31,5,23,0,0,7,1,204,15.2,21:17,25.2,49.0\n", "3,Claude Giroux,PHI,C,82,28,58,86,+7,46,7,37,0,0,7,1,223,12.6,20:26,25.1,52.9\n", "4,Tyler Seguin,DAL,C,80,37,47,84,+16,18,11,25,0,0,8,0,294,12.6,19:20,23.4,41.5\n", "5,Corey Perry,ANA,R,81,43,39,82,+32,65,8,18,0,0,9,1,280,15.4,19:28,23.2,36.0" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Overwriting playerstats-2013-2014-top30.csv\n" ] } ], "source": [ "%%writefile playerstats-2013-2014-top30.csv\n", "# 2013-2014 / Regular Season / All Skaters / Summary / Points\n", "Rank,Player,Team,Pos,GP,G,A,P,+/-,PIM,PPG,PPP,SHG,SHP,GW,OT,S,S%,TOI/GP,Shift/GP,FO%\n", "1,Sidney Crosby,PIT,C,80,36,68,104,+18,46,11,38,0,0,5,1,259,13.9,21:58,24.0,52.5\n", "2,Ryan Getzlaf,ANA,C,77,31,56,87,+28,31,5,23,0,0,7,1,204,15.2,21:17,25.2,49.0\n", "3,Claude Giroux,PHI,C,82,28,58,86,+7,46,7,37,0,0,7,1,223,12.6,20:26,25.1,52.9\n", "4,Tyler Seguin,DAL,C,80,37,47,84,+16,18,11,25,0,0,8,0,294,12.6,19:20,23.4,41.5\n", "5,Corey Perry,ANA,R,81,43,39,82,+32,65,8,18,0,0,9,1,280,15.4,19:28,23.2,36.0\n", "6,Phil Kessel,TOR,R,82,37,43,80,-5,27,8,20,0,0,6,0,305,12.1,20:39,24.5,14.3\n", "7,Taylor Hall,EDM,L,75,27,53,80,-15,44,7,17,0,1,1,1,250,10.8,20:00,25.4,45.7\n", "8,Alex Ovechkin,WSH,L,78,51,28,79,-35,48,24,39,0,1,10,3,386,13.2,20:32,21.8,66.7\n", "9,Joe Pavelski,SJS,C,82,41,38,79,+23,32,16,31,1,2,3,0,225,18.2,19:51,27.1,56.0\n", "10,Jamie Benn,DAL,L,81,34,45,79,+21,64,5,19,1,3,3,1,279,12.2,19:09,25.0,52.8\n", "11,Nicklas Backstrom,WSH,C,82,18,61,79,-20,54,6,44,1,1,1,0,196,9.2,19:48,23.3,50.4\n", "12,Patrick Sharp,CHI,L,82,34,44,78,+13,40,10,25,0,0,3,1,313,10.9,18:53,22.7,54.6\n", "13,Joe Thornton,SJS,C,82,11,65,76,+20,32,2,19,0,1,3,1,122,9.0,18:55,26.3,56.1\n", "14,Erik Karlsson,OTT,D,82,20,54,74,-15,36,5,31,0,0,1,0,257,7.8,27:04,28.6,0.0\n", "15,Evgeni Malkin,PIT,C,60,23,49,72,+10,62,7,30,0,0,3,0,191,12.0,20:03,21.4,48.8\n", "16,Patrick Marleau,SJS,L,82,33,37,70,+0,18,11,23,2,2,4,0,285,11.6,20:31,27.3,52.9\n", "17,Anze Kopitar,LAK,C,82,29,41,70,+34,24,10,23,0,0,9,2,200,14.5,20:53,25.4,53.3\n", "18,Matt Duchene,COL,C,71,23,47,70,+8,19,5,17,0,0,6,1,217,10.6,18:29,22.0,50.3\n", "19,Martin St. Louis,\"TBL, NYR\",R,81,30,39,69,+13,10,9,21,1,2,5,1,204,14.7,20:56,25.7,40.7\n", "20,Patrick Kane,CHI,R,69,29,40,69,+7,22,10,25,0,0,6,0,227,12.8,19:36,22.9,50.0\n", "21,Blake Wheeler,WPG,R,82,28,41,69,+4,63,8,19,0,0,4,2,225,12.4,18:41,24.0,37.5\n", "22,Kyle Okposo,NYI,R,71,27,42,69,-9,51,5,15,0,0,4,1,195,13.8,20:26,22.2,47.5\n", "23,David Krejci,BOS,C,80,19,50,69,+39,28,3,19,0,0,6,1,169,11.2,19:07,21.3,51.2\n", "24,Chris Kunitz,PIT,L,78,35,33,68,+25,66,13,22,0,0,8,0,218,16.1,19:09,22.2,75.0\n", "25,Jonathan Toews,CHI,C,76,28,40,68,+26,34,5,15,3,5,5,0,193,14.5,20:28,25.9,57.2\n", "26,Thomas Vanek,\"BUF, NYI, MTL\",L,78,27,41,68,+7,46,8,18,0,0,4,0,248,10.9,19:21,21.6,43.5\n", "27,Jaromir Jagr,NJD,R,82,24,43,67,+16,46,5,17,0,0,6,1,231,10.4,19:09,22.8,0.0\n", "28,John Tavares,NYI,C,59,24,42,66,-6,40,8,25,0,0,4,0,188,12.8,21:14,22.3,49.1\n", "29,Jason Spezza,OTT,C,75,23,43,66,-26,46,9,22,0,0,5,0,223,10.3,18:12,23.8,54.0\n", "30,Jordan Eberle,EDM,R,80,28,37,65,-11,18,7,20,1,1,4,1,200,14.0,19:32,25.4,38.1" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "# 2013-2014 / Regular Season / All Skaters / Summary / Points\n", "Rank,Player,Team,Pos,GP,G,A,P,+/-,PIM,PPG,PPP,SHG,SHP,GW,OT,S,S%,TOI/GP,Shift/GP,FO%\n", "1,Sidney Crosby,PIT,C,80,36,68,104,+18,46,11,38,0,0,5,1,259,13.9,21:58,24.0,52.5\n", "2,Ryan Getzlaf,ANA,C,77,31,56,87,+28,31,5,23,0,0,7,1,204,15.2,21:17,25.2,49.0\n", "3,Claude Giroux,PHI,C,82,28,58,86,+7,46,7,37,0,0,7,1,223,12.6,20:26,25.1,52.9\n" ] } ], "source": [ "!head -n 5 playerstats-2013-2014-top30.csv" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "collapsed": true }, "outputs": [], "source": [ "rows = []" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "collapsed": false }, "outputs": [], "source": [ "with open(\"playerstats-2013-2014.csv\") as f:\n", " csvreader = csv.reader(f)\n", " rows = [fields for fields in csvreader]" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "['Player', 'Team', 'Pos', 'GP', 'G']" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "rows[1][1:6]" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "['Sidney Crosby', 'PIT', 'C', '80', '36']" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "rows[2][1:6]" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "collapsed": false }, "outputs": [], "source": [ "data = np.random.randn(100, 3)" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "collapsed": false }, "outputs": [], "source": [ "np.savetxt(\"data.csv\", data, delimiter=\",\", header=\"x, y, z\", comments=\"# Random x, y, z coordinates\\n\")" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "# Random x, y, z coordinates\n", "x, y, z\n", "1.764052345967664026e+00,4.001572083672232938e-01,9.787379841057392005e-01\n", "2.240893199201457797e+00,1.867557990149967484e+00,-9.772778798764110153e-01\n", "9.500884175255893682e-01,-1.513572082976978872e-01,-1.032188517935578448e-01\n" ] } ], "source": [ "!head -n 5 data.csv" ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "collapsed": false }, "outputs": [], "source": [ "data_load = np.loadtxt(\"data.csv\", skiprows=2, delimiter=\",\")" ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "array([ 2.2408932 , 1.86755799, -0.97727788])" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data_load[1,:]" ] }, { "cell_type": "code", "execution_count": 23, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "dtype('float64')" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data_load.dtype" ] }, { "cell_type": "code", "execution_count": 24, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "(data == data_load).all()" ] }, { "cell_type": "code", "execution_count": 29, "metadata": { "collapsed": false }, "outputs": [], "source": [ "data = np.loadtxt(\"playerstats-2013-2014.csv\", skiprows=2, delimiter=\",\", dtype=bytes)" ] }, { "cell_type": "code", "execution_count": 30, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "array([b'Sidney Crosby', b'PIT', b'C', b'80', b'36'], dtype='|S13')" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data[0][1:6]" ] }, { "cell_type": "code", "execution_count": 31, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "array([[ 68., 104., 18.],\n", " [ 56., 87., 28.],\n", " [ 58., 86., 7.],\n", " [ 47., 84., 16.],\n", " [ 39., 82., 32.]])" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.loadtxt(\"playerstats-2013-2014.csv\", skiprows=2, delimiter=\",\", usecols=[6,7,8])" ] }, { "cell_type": "code", "execution_count": 32, "metadata": { "collapsed": false }, "outputs": [], "source": [ "df = pd.read_csv(\"playerstats-2013-2014.csv\", skiprows=1)" ] }, { "cell_type": "code", "execution_count": 33, "metadata": { "collapsed": false }, "outputs": [], "source": [ "df = df.set_index(\"Rank\")" ] }, { "cell_type": "code", "execution_count": 34, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PlayerGPGAP
Rank
1Sidney Crosby803668104
2Ryan Getzlaf77315687
3Claude Giroux82285886
4Tyler Seguin80374784
5Corey Perry81433982
\n", "
" ], "text/plain": [ " Player GP G A P\n", "Rank \n", "1 Sidney Crosby 80 36 68 104\n", "2 Ryan Getzlaf 77 31 56 87\n", "3 Claude Giroux 82 28 58 86\n", "4 Tyler Seguin 80 37 47 84\n", "5 Corey Perry 81 43 39 82" ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[[\"Player\", \"GP\", \"G\", \"A\", \"P\"]]" ] }, { "cell_type": "code", "execution_count": 35, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Int64Index: 5 entries, 1 to 5\n", "Data columns (total 20 columns):\n", "Player 5 non-null object\n", "Team 5 non-null object\n", "Pos 5 non-null object\n", "GP 5 non-null int64\n", "G 5 non-null int64\n", "A 5 non-null int64\n", "P 5 non-null int64\n", "+/- 5 non-null int64\n", "PIM 5 non-null int64\n", "PPG 5 non-null int64\n", "PPP 5 non-null int64\n", "SHG 5 non-null int64\n", "SHP 5 non-null int64\n", "GW 5 non-null int64\n", "OT 5 non-null int64\n", "S 5 non-null int64\n", "S% 5 non-null float64\n", "TOI/GP 5 non-null object\n", "Shift/GP 5 non-null float64\n", "FO% 5 non-null float64\n", "dtypes: float64(3), int64(13), object(4)\n", "memory usage: 840.0+ bytes\n" ] } ], "source": [ "df.info()" ] }, { "cell_type": "code", "execution_count": 36, "metadata": { "collapsed": true }, "outputs": [], "source": [ "df[[\"Player\", \"GP\", \"G\", \"A\", \"P\"]].to_csv(\"playerstats-2013-2014-subset.csv\")" ] }, { "cell_type": "code", "execution_count": 37, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Rank,Player,GP,G,A,P\n", "1,Sidney Crosby,80,36,68,104\n", "2,Ryan Getzlaf,77,31,56,87\n", "3,Claude Giroux,82,28,58,86\n", "4,Tyler Seguin,80,37,47,84\n" ] } ], "source": [ "!head -n 5 playerstats-2013-2014-subset.csv" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# HDF5" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## h5py" ] }, { "cell_type": "code", "execution_count": 38, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import h5py" ] }, { "cell_type": "code", "execution_count": 39, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# mode = \"w\", \"r\", \"w-\", \"r+\", \"a\"" ] }, { "cell_type": "code", "execution_count": 40, "metadata": { "collapsed": false }, "outputs": [], "source": [ "f = h5py.File(\"data.h5\", \"w\")" ] }, { "cell_type": "code", "execution_count": 41, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "'r+'" ] }, "execution_count": 41, "metadata": {}, "output_type": "execute_result" } ], "source": [ "f.mode" ] }, { "cell_type": "code", "execution_count": 42, "metadata": { "collapsed": false }, "outputs": [], "source": [ "f.flush()" ] }, { "cell_type": "code", "execution_count": 43, "metadata": { "collapsed": false }, "outputs": [], "source": [ "f.close()" ] }, { "cell_type": "code", "execution_count": 44, "metadata": { "collapsed": true }, "outputs": [], "source": [ "f = h5py.File(\"data.h5\", \"w\")" ] }, { "cell_type": "code", "execution_count": 45, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "'/'" ] }, "execution_count": 45, "metadata": {}, "output_type": "execute_result" } ], "source": [ "f.name" ] }, { "cell_type": "code", "execution_count": 46, "metadata": { "collapsed": false }, "outputs": [], "source": [ "grp1 = f.create_group(\"experiment1\")" ] }, { "cell_type": "code", "execution_count": 47, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "'/experiment1'" ] }, "execution_count": 47, "metadata": {}, "output_type": "execute_result" } ], "source": [ "grp1.name" ] }, { "cell_type": "code", "execution_count": 48, "metadata": { "collapsed": false }, "outputs": [], "source": [ "grp2_meas = f.create_group(\"experiment2/measurement\")" ] }, { "cell_type": "code", "execution_count": 49, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "'/experiment2/measurement'" ] }, "execution_count": 49, "metadata": {}, "output_type": "execute_result" } ], "source": [ "grp2_meas.name" ] }, { "cell_type": "code", "execution_count": 50, "metadata": { "collapsed": false }, "outputs": [], "source": [ "grp2_sim = f.create_group(\"experiment2/simulation\")" ] }, { "cell_type": "code", "execution_count": 51, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "'/experiment2/simulation'" ] }, "execution_count": 51, "metadata": {}, "output_type": "execute_result" } ], "source": [ "grp2_sim.name" ] }, { "cell_type": "code", "execution_count": 52, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 52, "metadata": {}, "output_type": "execute_result" } ], "source": [ "f[\"/experiment1\"]" ] }, { "cell_type": "code", "execution_count": 53, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 53, "metadata": {}, "output_type": "execute_result" } ], "source": [ "f[\"/experiment2/simulation\"]" ] }, { "cell_type": "code", "execution_count": 54, "metadata": { "collapsed": true }, "outputs": [], "source": [ "grp_expr2 = f[\"/experiment2\"]" ] }, { "cell_type": "code", "execution_count": 55, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 55, "metadata": {}, "output_type": "execute_result" } ], "source": [ "grp_expr2['simulation']" ] }, { "cell_type": "code", "execution_count": 56, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "['experiment1', 'experiment2']" ] }, "execution_count": 56, "metadata": {}, "output_type": "execute_result" } ], "source": [ "list(f.keys())" ] }, { "cell_type": "code", "execution_count": 57, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "[('experiment1', ),\n", " ('experiment2', )]" ] }, "execution_count": 57, "metadata": {}, "output_type": "execute_result" } ], "source": [ "list(f.items())" ] }, { "cell_type": "code", "execution_count": 58, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "experiment1\n", "experiment2\n", "experiment2/measurement\n", "experiment2/simulation\n" ] } ], "source": [ "f.visit(lambda x: print(x))" ] }, { "cell_type": "code", "execution_count": 59, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "experiment1 \n", "experiment2 \n", "experiment2/measurement \n", "experiment2/simulation \n" ] } ], "source": [ "f.visititems(lambda name, value: print(name, value))" ] }, { "cell_type": "code", "execution_count": 60, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 60, "metadata": {}, "output_type": "execute_result" } ], "source": [ "\"experiment1\" in f" ] }, { "cell_type": "code", "execution_count": 61, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 61, "metadata": {}, "output_type": "execute_result" } ], "source": [ "\"simulation\" in f[\"experiment2\"]" ] }, { "cell_type": "code", "execution_count": 62, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "False" ] }, "execution_count": 62, "metadata": {}, "output_type": "execute_result" } ], "source": [ "\"experiment3\" in f" ] }, { "cell_type": "code", "execution_count": 63, "metadata": { "collapsed": true }, "outputs": [], "source": [ "f.flush()" ] }, { "cell_type": "code", "execution_count": 64, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/ Group\n", "/experiment1 Group\n", "/experiment2 Group\n", "/experiment2/measurement Group\n", "/experiment2/simulation Group\n" ] } ], "source": [ "!h5ls -r data.h5" ] }, { "cell_type": "code", "execution_count": 65, "metadata": { "collapsed": true }, "outputs": [], "source": [ "data1 = np.arange(10)" ] }, { "cell_type": "code", "execution_count": 66, "metadata": { "collapsed": false }, "outputs": [], "source": [ "data2 = np.random.randn(100, 100)" ] }, { "cell_type": "code", "execution_count": 67, "metadata": { "collapsed": false }, "outputs": [], "source": [ "f[\"array1\"] = data1" ] }, { "cell_type": "code", "execution_count": 68, "metadata": { "collapsed": false }, "outputs": [], "source": [ "f[\"/experiment2/measurement/meas1\"] = data2" ] }, { "cell_type": "code", "execution_count": 69, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "array1 \n", "experiment1 \n", "experiment2 \n", "experiment2/measurement \n", "experiment2/measurement/meas1 \n", "experiment2/simulation \n" ] } ], "source": [ "f.visititems(lambda name, value: print(name, value))" ] }, { "cell_type": "code", "execution_count": 70, "metadata": { "collapsed": false }, "outputs": [], "source": [ "ds = f[\"array1\"]" ] }, { "cell_type": "code", "execution_count": 71, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 71, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ds" ] }, { "cell_type": "code", "execution_count": 72, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "'/array1'" ] }, "execution_count": 72, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ds.name" ] }, { "cell_type": "code", "execution_count": 73, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "dtype('int64')" ] }, "execution_count": 73, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ds.dtype" ] }, { "cell_type": "code", "execution_count": 74, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "(10,)" ] }, "execution_count": 74, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ds.shape" ] }, { "cell_type": "code", "execution_count": 75, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "10" ] }, "execution_count": 75, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ds.len()" ] }, { "cell_type": "code", "execution_count": 76, "metadata": { "collapsed": false }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/Users/rob/miniconda3/envs/py3.6/lib/python3.6/site-packages/h5py/_hl/dataset.py:313: H5pyDeprecationWarning: dataset.value has been deprecated. Use dataset[()] instead.\n", " \"Use dataset[()] instead.\", H5pyDeprecationWarning)\n" ] }, { "data": { "text/plain": [ "array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])" ] }, "execution_count": 76, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ds.value" ] }, { "cell_type": "code", "execution_count": 77, "metadata": { "collapsed": false }, "outputs": [], "source": [ "ds = f[\"/experiment2/measurement/meas1\"]" ] }, { "cell_type": "code", "execution_count": 78, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 78, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ds" ] }, { "cell_type": "code", "execution_count": 79, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "dtype(' 2.0" ] }, { "cell_type": "code", "execution_count": 90, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "((100,), dtype('bool'))" ] }, "execution_count": 90, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mask.shape, mask.dtype" ] }, { "cell_type": "code", "execution_count": 91, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "array([2.04253623, 2.1041854 , 2.05689385])" ] }, "execution_count": 91, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ds[mask, 0]" ] }, { "cell_type": "code", "execution_count": 92, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "array([[ 2.04253623, -0.91946118, 0.11467003, -0.1374237 , 1.36552692],\n", " [ 2.1041854 , 0.22725706, -1.1291663 , -0.28133197, -0.7394167 ],\n", " [ 2.05689385, 0.18041971, -0.06670925, -0.02835398, 0.48480475]])" ] }, "execution_count": 92, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ds[mask, :5]" ] }, { "cell_type": "code", "execution_count": 93, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# create empty data sets, assign and update datasets" ] }, { "cell_type": "code", "execution_count": 94, "metadata": { "collapsed": false }, "outputs": [], "source": [ "ds = f.create_dataset(\"array2\", data=np.random.randint(10, size=10))" ] }, { "cell_type": "code", "execution_count": 95, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 95, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ds" ] }, { "cell_type": "code", "execution_count": 96, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "array([0, 2, 2, 4, 7, 3, 7, 2, 4, 1])" ] }, "execution_count": 96, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ds.value" ] }, { "cell_type": "code", "execution_count": 97, "metadata": { "collapsed": false }, "outputs": [], "source": [ "ds = f.create_dataset(\"/experiment2/simulation/data1\", shape=(5, 5), fillvalue=-1)" ] }, { "cell_type": "code", "execution_count": 98, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 98, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ds" ] }, { "cell_type": "code", "execution_count": 99, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "array([[-1., -1., -1., -1., -1.],\n", " [-1., -1., -1., -1., -1.],\n", " [-1., -1., -1., -1., -1.],\n", " [-1., -1., -1., -1., -1.],\n", " [-1., -1., -1., -1., -1.]], dtype=float32)" ] }, "execution_count": 99, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ds.value" ] }, { "cell_type": "code", "execution_count": 100, "metadata": { "collapsed": false }, "outputs": [], "source": [ "ds = f.create_dataset(\"/experiment1/simulation/data1\", shape=(5000, 5000, 5000),\n", " fillvalue=0, compression='gzip')" ] }, { "cell_type": "code", "execution_count": 101, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 101, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ds" ] }, { "cell_type": "code", "execution_count": 102, "metadata": { "collapsed": false }, "outputs": [], "source": [ "ds[:, 0, 0] = np.random.rand(5000)" ] }, { "cell_type": "code", "execution_count": 103, "metadata": { "collapsed": true }, "outputs": [], "source": [ "ds[1, :, 0] += np.random.rand(5000)" ] }, { "cell_type": "code", "execution_count": 104, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "array([[0.6939344 , 0. , 0. , 0. , 0. ],\n", " [1.4819994 , 0.01639538, 0.54387355, 0.11130908, 0.9928771 ]],\n", " dtype=float32)" ] }, "execution_count": 104, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ds[:2, :5, 0]" ] }, { "cell_type": "code", "execution_count": 105, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "0.0" ] }, "execution_count": 105, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ds.fillvalue" ] }, { "cell_type": "code", "execution_count": 106, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "simulation \n", "simulation/data1 \n" ] } ], "source": [ "f[\"experiment1\"].visititems(lambda name, value: print(name, value))" ] }, { "cell_type": "code", "execution_count": 107, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "465.66128730773926" ] }, "execution_count": 107, "metadata": {}, "output_type": "execute_result" } ], "source": [ "float(np.prod(ds.shape) * ds[0,0,0].nbytes) / (1024**3) # Gb" ] }, { "cell_type": "code", "execution_count": 108, "metadata": { "collapsed": true }, "outputs": [], "source": [ "f.flush()" ] }, { "cell_type": "code", "execution_count": 109, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "'data.h5'" ] }, "execution_count": 109, "metadata": {}, "output_type": "execute_result" } ], "source": [ "f.filename" ] }, { "cell_type": "code", "execution_count": 110, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "-rw-r--r-- 1 rob staff 357K May 6 16:11 data.h5\n" ] } ], "source": [ "!ls -lh data.h5" ] }, { "cell_type": "code", "execution_count": 111, "metadata": { "collapsed": false }, "outputs": [], "source": [ "del f[\"/experiment1/simulation/data1\"]" ] }, { "cell_type": "code", "execution_count": 112, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "simulation \n" ] } ], "source": [ "f[\"experiment1\"].visititems(lambda name, value: print(name, value))" ] }, { "cell_type": "code", "execution_count": 113, "metadata": { "collapsed": false }, "outputs": [], "source": [ "f.close()" ] }, { "cell_type": "code", "execution_count": 114, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# attributes" ] }, { "cell_type": "code", "execution_count": 115, "metadata": { "collapsed": true }, "outputs": [], "source": [ "f = h5py.File(\"data.h5\")" ] }, { "cell_type": "code", "execution_count": 116, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 116, "metadata": {}, "output_type": "execute_result" } ], "source": [ "f.attrs" ] }, { "cell_type": "code", "execution_count": 117, "metadata": { "collapsed": true }, "outputs": [], "source": [ "f.attrs[\"desc\"] = \"Result sets from experiments and simulations\"" ] }, { "cell_type": "code", "execution_count": 118, "metadata": { "collapsed": true }, "outputs": [], "source": [ "f[\"experiment1\"].attrs[\"date\"] = \"2015-1-1\"" ] }, { "cell_type": "code", "execution_count": 119, "metadata": { "collapsed": true }, "outputs": [], "source": [ "f[\"experiment2\"].attrs[\"date\"] = \"2015-1-2\"" ] }, { "cell_type": "code", "execution_count": 120, "metadata": { "collapsed": false }, "outputs": [], "source": [ "f[\"experiment2/simulation/data1\"].attrs[\"k\"] = 1.5" ] }, { "cell_type": "code", "execution_count": 121, "metadata": { "collapsed": true }, "outputs": [], "source": [ "f[\"experiment2/simulation/data1\"].attrs[\"T\"] = 1000" ] }, { "cell_type": "code", "execution_count": 122, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "['date']" ] }, "execution_count": 122, "metadata": {}, "output_type": "execute_result" } ], "source": [ "list(f[\"experiment1\"].attrs.keys())" ] }, { "cell_type": "code", "execution_count": 123, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "[('T', 1000), ('k', 1.5)]" ] }, "execution_count": 123, "metadata": {}, "output_type": "execute_result" } ], "source": [ "list(f[\"experiment2/simulation/data1\"].attrs.items())" ] }, { "cell_type": "code", "execution_count": 124, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 124, "metadata": {}, "output_type": "execute_result" } ], "source": [ "\"T\" in f[\"experiment2/simulation/data1\"].attrs" ] }, { "cell_type": "code", "execution_count": 125, "metadata": { "collapsed": true }, "outputs": [], "source": [ "del f[\"experiment2/simulation/data1\"].attrs[\"T\"]" ] }, { "cell_type": "code", "execution_count": 126, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "False" ] }, "execution_count": 126, "metadata": {}, "output_type": "execute_result" } ], "source": [ "\"T\" in f[\"experiment2/simulation/data1\"].attrs" ] }, { "cell_type": "code", "execution_count": 127, "metadata": { "collapsed": false }, "outputs": [], "source": [ "f[\"experiment2/simulation/data1\"].attrs[\"t\"] = np.array([1, 2, 3])" ] }, { "cell_type": "code", "execution_count": 128, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "array([1, 2, 3])" ] }, "execution_count": 128, "metadata": {}, "output_type": "execute_result" } ], "source": [ "f[\"experiment2/simulation/data1\"].attrs[\"t\"]" ] }, { "cell_type": "code", "execution_count": 129, "metadata": { "collapsed": true }, "outputs": [], "source": [ "f.close()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## pytables" ] }, { "cell_type": "code", "execution_count": 130, "metadata": { "collapsed": false }, "outputs": [], "source": [ "df = pd.read_csv(\"playerstats-2013-2014-top30.csv\", skiprows=1)\n", "df = df.set_index(\"Rank\")" ] }, { "cell_type": "code", "execution_count": 131, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PlayerPosGPPGAS%Shift/GP
Rank
1Sidney CrosbyC80104366813.924.0
2Ryan GetzlafC7787315615.225.2
3Claude GirouxC8286285812.625.1
4Tyler SeguinC8084374712.623.4
5Corey PerryR8182433915.423.2
\n", "
" ], "text/plain": [ " Player Pos GP P G A S% Shift/GP\n", "Rank \n", "1 Sidney Crosby C 80 104 36 68 13.9 24.0\n", "2 Ryan Getzlaf C 77 87 31 56 15.2 25.2\n", "3 Claude Giroux C 82 86 28 58 12.6 25.1\n", "4 Tyler Seguin C 80 84 37 47 12.6 23.4\n", "5 Corey Perry R 81 82 43 39 15.4 23.2" ] }, "execution_count": 131, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[[\"Player\", \"Pos\", \"GP\", \"P\", \"G\", \"A\", \"S%\", \"Shift/GP\"]].head(5)" ] }, { "cell_type": "code", "execution_count": 132, "metadata": { "collapsed": false }, "outputs": [], "source": [ "f = tables.open_file(\"playerstats-2013-2014.h5\", mode=\"w\")" ] }, { "cell_type": "code", "execution_count": 133, "metadata": { "collapsed": false }, "outputs": [], "source": [ "grp = f.create_group(\"/\", \"season_2013_2014\", title=\"NHL player statistics for the 2013/2014 season\")" ] }, { "cell_type": "code", "execution_count": 134, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "/season_2013_2014 (Group) 'NHL player statistics for the 2013/2014 season'\n", " children := []" ] }, "execution_count": 134, "metadata": {}, "output_type": "execute_result" } ], "source": [ "grp" ] }, { "cell_type": "code", "execution_count": 135, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "/ (RootGroup) ''\n", " children := ['season_2013_2014' (Group)]" ] }, "execution_count": 135, "metadata": {}, "output_type": "execute_result" } ], "source": [ "f.root" ] }, { "cell_type": "code", "execution_count": 136, "metadata": { "collapsed": false }, "outputs": [], "source": [ "class PlayerStat(tables.IsDescription):\n", " player = tables.StringCol(20, dflt=\"\")\n", " position = tables.StringCol(1, dflt=\"C\")\n", " games_played = tables.UInt8Col(dflt=0)\n", " points = tables.UInt16Col(dflt=0)\n", " goals = tables.UInt16Col(dflt=0)\n", " assists = tables.UInt16Col(dflt=0)\n", " shooting_percentage = tables.Float64Col(dflt=0.0)\n", " shifts_per_game_played = tables.Float64Col(dflt=0.0) " ] }, { "cell_type": "code", "execution_count": 137, "metadata": { "collapsed": false }, "outputs": [], "source": [ "top30_table = f.create_table(grp, 'top30', PlayerStat, \"Top 30 point leaders\")" ] }, { "cell_type": "code", "execution_count": 138, "metadata": { "collapsed": true }, "outputs": [], "source": [ "playerstat = top30_table.row" ] }, { "cell_type": "code", "execution_count": 139, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "tables.tableextension.Row" ] }, "execution_count": 139, "metadata": {}, "output_type": "execute_result" } ], "source": [ "type(playerstat)" ] }, { "cell_type": "code", "execution_count": 140, "metadata": { "collapsed": false }, "outputs": [], "source": [ "for index, row_series in df.iterrows():\n", " playerstat[\"player\"] = row_series[\"Player\"] \n", " playerstat[\"position\"] = row_series[\"Pos\"] \n", " playerstat[\"games_played\"] = row_series[\"GP\"] \n", " playerstat[\"points\"] = row_series[\"P\"] \n", " playerstat[\"goals\"] = row_series[\"G\"]\n", " playerstat[\"assists\"] = row_series[\"A\"] \n", " playerstat[\"shooting_percentage\"] = row_series[\"S%\"]\n", " playerstat[\"shifts_per_game_played\"] = row_series[\"Shift/GP\"]\n", " playerstat.append()" ] }, { "cell_type": "code", "execution_count": 141, "metadata": { "collapsed": false }, "outputs": [], "source": [ "top30_table.flush()" ] }, { "cell_type": "code", "execution_count": 142, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "array([b'Sidney Crosby', b'Ryan Getzlaf', b'Claude Giroux',\n", " b'Tyler Seguin', b'Corey Perry'], dtype='|S20')" ] }, "execution_count": 142, "metadata": {}, "output_type": "execute_result" } ], "source": [ "top30_table.cols.player[:5]" ] }, { "cell_type": "code", "execution_count": 143, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "array([104, 87, 86, 84, 82], dtype=uint16)" ] }, "execution_count": 143, "metadata": {}, "output_type": "execute_result" } ], "source": [ "top30_table.cols.points[:5]" ] }, { "cell_type": "code", "execution_count": 144, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def print_playerstat(row):\n", " print(\"%20s\\t%s\\t%s\\t%s\" %\n", " (row[\"player\"].decode('UTF-8'), row[\"points\"], row[\"goals\"], row[\"assists\"]))" ] }, { "cell_type": "code", "execution_count": 145, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " Sidney Crosby\t104\t36\t68\n", " Ryan Getzlaf\t87\t31\t56\n", " Claude Giroux\t86\t28\t58\n", " Tyler Seguin\t84\t37\t47\n", " Corey Perry\t82\t43\t39\n", " Phil Kessel\t80\t37\t43\n", " Taylor Hall\t80\t27\t53\n", " Alex Ovechkin\t79\t51\t28\n", " Joe Pavelski\t79\t41\t38\n", " Jamie Benn\t79\t34\t45\n", " Nicklas Backstrom\t79\t18\t61\n", " Patrick Sharp\t78\t34\t44\n", " Joe Thornton\t76\t11\t65\n", " Erik Karlsson\t74\t20\t54\n", " Evgeni Malkin\t72\t23\t49\n", " Patrick Marleau\t70\t33\t37\n", " Anze Kopitar\t70\t29\t41\n", " Matt Duchene\t70\t23\t47\n", " Martin St. Louis\t69\t30\t39\n", " Patrick Kane\t69\t29\t40\n", " Blake Wheeler\t69\t28\t41\n", " Kyle Okposo\t69\t27\t42\n", " David Krejci\t69\t19\t50\n", " Chris Kunitz\t68\t35\t33\n", " Jonathan Toews\t68\t28\t40\n", " Thomas Vanek\t68\t27\t41\n", " Jaromir Jagr\t67\t24\t43\n", " John Tavares\t66\t24\t42\n", " Jason Spezza\t66\t23\t43\n", " Jordan Eberle\t65\t28\t37\n" ] } ], "source": [ "for row in top30_table.iterrows():\n", " print_playerstat(row)" ] }, { "cell_type": "code", "execution_count": 146, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " Phil Kessel\t80\t37\t43\n", " Taylor Hall\t80\t27\t53\n", " Alex Ovechkin\t79\t51\t28\n", " Joe Pavelski\t79\t41\t38\n", " Jamie Benn\t79\t34\t45\n", " Nicklas Backstrom\t79\t18\t61\n", " Patrick Sharp\t78\t34\t44\n", " Joe Thornton\t76\t11\t65\n" ] } ], "source": [ "for row in top30_table.where(\"(points > 75) & (points <= 80)\"):\n", " print_playerstat(row)" ] }, { "cell_type": "code", "execution_count": 147, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " Alex Ovechkin\t79\t51\t28\n", " Joe Pavelski\t79\t41\t38\n" ] } ], "source": [ "for row in top30_table.where(\"(goals > 40) & (points < 80)\"):\n", " print_playerstat(row)" ] }, { "cell_type": "code", "execution_count": 148, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "File(filename=playerstats-2013-2014.h5, title='', mode='w', root_uep='/', filters=Filters(complevel=0, shuffle=False, bitshuffle=False, fletcher32=False, least_significant_digit=None))\n", "/ (RootGroup) ''\n", "/season_2013_2014 (Group) 'NHL player statistics for the 2013/2014 season'\n", "/season_2013_2014/top30 (Table(30,)) 'Top 30 point leaders'\n", " description := {\n", " \"assists\": UInt16Col(shape=(), dflt=0, pos=0),\n", " \"games_played\": UInt8Col(shape=(), dflt=0, pos=1),\n", " \"goals\": UInt16Col(shape=(), dflt=0, pos=2),\n", " \"player\": StringCol(itemsize=20, shape=(), dflt=b'', pos=3),\n", " \"points\": UInt16Col(shape=(), dflt=0, pos=4),\n", " \"position\": StringCol(itemsize=1, shape=(), dflt=b'C', pos=5),\n", " \"shifts_per_game_played\": Float64Col(shape=(), dflt=0.0, pos=6),\n", " \"shooting_percentage\": Float64Col(shape=(), dflt=0.0, pos=7)}\n", " byteorder := 'little'\n", " chunkshape := (1489,)" ] }, "execution_count": 148, "metadata": {}, "output_type": "execute_result" } ], "source": [ "f" ] }, { "cell_type": "code", "execution_count": 149, "metadata": { "collapsed": true }, "outputs": [], "source": [ "f.flush()" ] }, { "cell_type": "code", "execution_count": 150, "metadata": { "collapsed": true }, "outputs": [], "source": [ "f.close()" ] }, { "cell_type": "code", "execution_count": 151, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Opened \"playerstats-2013-2014.h5\" with sec2 driver.\n", "/ Group\n", " Attribute: CLASS scalar\n", " Type: 5-byte null-terminated UTF-8 string\n", " Data: \"GROUP\"\n", " Attribute: PYTABLES_FORMAT_VERSION scalar\n", " Type: 3-byte null-terminated UTF-8 string\n", " Data: \"2.1\"\n", " Attribute: TITLE null\n", " Type: 1-byte null-terminated UTF-8 string\n", "\n", " Attribute: VERSION scalar\n", " Type: 3-byte null-terminated UTF-8 string\n", " Data: \"1.0\"\n", " Location: 1:96\n", " Links: 1\n", "/season_2013_2014 Group\n", " Attribute: CLASS scalar\n", " Type: 5-byte null-terminated UTF-8 string\n", " Data: \"GROUP\"\n", " Attribute: TITLE scalar\n", " Type: 46-byte null-terminated UTF-8 string\n", " Data: \"NHL player statistics for the 2013/2014 season\"\n", " Attribute: VERSION scalar\n", " Type: 3-byte null-terminated UTF-8 string\n", " Data: \"1.0\"\n", " Location: 1:1024\n", " Links: 1\n", "/season_2013_2014/top30 Dataset {30/Inf}\n", " Attribute: CLASS scalar\n", " Type: 5-byte null-terminated UTF-8 string\n", " Data: \"TABLE\"\n", " Attribute: FIELD_0_FILL scalar\n", " Type: native unsigned short\n", " Data: 0\n", " Attribute: FIELD_0_NAME scalar\n", " Type: 7-byte null-terminated UTF-8 string\n", " Data: \"assists\"\n", " Attribute: FIELD_1_FILL scalar\n", " Type: native unsigned char\n", " Data: 0\n", " Attribute: FIELD_1_NAME scalar\n", " Type: 12-byte null-terminated UTF-8 string\n", " Data: \"games_played\"\n", " Attribute: FIELD_2_FILL scalar\n", " Type: native unsigned short\n", " Data: 0\n", " Attribute: FIELD_2_NAME scalar\n", " Type: 5-byte null-terminated UTF-8 string\n", " Data: \"goals\"\n", " Attribute: FIELD_3_FILL scalar\n", " Type: 1-byte null-terminated ASCII string\n", " Data: \"\"\n", " Attribute: FIELD_3_NAME scalar\n", " Type: 6-byte null-terminated UTF-8 string\n", " Data: \"player\"\n", " Attribute: FIELD_4_FILL scalar\n", " Type: native unsigned short\n", " Data: 0\n", " Attribute: FIELD_4_NAME scalar\n", " Type: 6-byte null-terminated UTF-8 string\n", " Data: \"points\"\n", " Attribute: FIELD_5_FILL scalar\n", " Type: 1-byte null-terminated ASCII string\n", " Data: \"C\"\n", " Attribute: FIELD_5_NAME scalar\n", " Type: 8-byte null-terminated UTF-8 string\n", " Data: \"position\"\n", " Attribute: FIELD_6_FILL scalar\n", " Type: native double\n", " Data: 0\n", " Attribute: FIELD_6_NAME scalar\n", " Type: 22-byte null-terminated UTF-8 string\n", " Data: \"shifts_per_game_played\"\n", " Attribute: FIELD_7_FILL scalar\n", " Type: native double\n", " Data: 0\n", " Attribute: FIELD_7_NAME scalar\n", " Type: 19-byte null-terminated UTF-8 string\n", " Data: \"shooting_percentage\"\n", " Attribute: NROWS scalar\n", " Type: native long\n", " Data: 30\n", " Attribute: TITLE scalar\n", " Type: 20-byte null-terminated UTF-8 string\n", " Data: \"Top 30 point leaders\"\n", " Attribute: VERSION scalar\n", " Type: 3-byte null-terminated UTF-8 string\n", " Data: \"2.7\"\n", " Location: 1:2264\n", " Links: 1\n", " Chunks: {1489} 65516 bytes\n", " Storage: 1320 logical bytes, 65516 allocated bytes, 2.01% utilization\n", " Type: struct {\n", " \"assists\" +0 native unsigned short\n", " \"games_played\" +2 native unsigned char\n", " \"goals\" +3 native unsigned short\n", " \"player\" +5 20-byte null-terminated ASCII string\n", " \"points\" +25 native unsigned short\n", " \"position\" +27 1-byte null-terminated ASCII string\n", " \"shifts_per_game_played\" +28 native double\n", " \"shooting_percentage\" +36 native double\n", " } 44 bytes\n", "H5tools-DIAG: Error detected in HDF5:tools (1.8.14) thread 0:\n", " #000: h5tools_dump.c line 1843 in h5tools_dump_mem(): H5Sis_simple failed\n", " major: Failure in tools library\n", " minor: error in function\n" ] } ], "source": [ "!h5ls -rv playerstats-2013-2014.h5" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Pandas hdfstore" ] }, { "cell_type": "code", "execution_count": 152, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 153, "metadata": { "collapsed": true }, "outputs": [], "source": [ "store = pd.HDFStore('store.h5')" ] }, { "cell_type": "code", "execution_count": 154, "metadata": { "collapsed": true }, "outputs": [], "source": [ "df = pd.DataFrame(np.random.rand(5,5))" ] }, { "cell_type": "code", "execution_count": 155, "metadata": { "collapsed": true }, "outputs": [], "source": [ "store[\"df1\"] = df" ] }, { "cell_type": "code", "execution_count": 156, "metadata": { "collapsed": false }, "outputs": [], "source": [ "df = pd.read_csv(\"playerstats-2013-2014-top30.csv\", skiprows=1)" ] }, { "cell_type": "code", "execution_count": 157, "metadata": { "collapsed": false }, "outputs": [], "source": [ "store[\"df2\"] = df" ] }, { "cell_type": "code", "execution_count": 158, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "['/df1', '/df2']" ] }, "execution_count": 158, "metadata": {}, "output_type": "execute_result" } ], "source": [ "store.keys()" ] }, { "cell_type": "code", "execution_count": 159, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 159, "metadata": {}, "output_type": "execute_result" } ], "source": [ "'df2' in store" ] }, { "cell_type": "code", "execution_count": 160, "metadata": { "collapsed": false }, "outputs": [], "source": [ "df = store[\"df1\"]" ] }, { "cell_type": "code", "execution_count": 161, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "/ (RootGroup) ''\n", " children := ['df1' (Group), 'df2' (Group)]" ] }, "execution_count": 161, "metadata": {}, "output_type": "execute_result" } ], "source": [ "store.root" ] }, { "cell_type": "code", "execution_count": 162, "metadata": { "collapsed": true }, "outputs": [], "source": [ "store.close()" ] }, { "cell_type": "code", "execution_count": 163, "metadata": { "collapsed": true }, "outputs": [], "source": [ "f = h5py.File(\"store.h5\")" ] }, { "cell_type": "code", "execution_count": 164, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "df1 \t\t\t \n", "df1/axis0 \t\t \n", "df1/axis1 \t\t \n", "df1/block0_items \t \n", "df1/block0_values \t \n", "df2 \t\t\t \n", "df2/axis0 \t\t \n", "df2/axis1 \t\t \n", "df2/block0_items \t \n", "df2/block0_values \t \n", "df2/block1_items \t \n", "df2/block1_values \t \n", "df2/block2_items \t \n", "df2/block2_values \t \n" ] } ], "source": [ "f.visititems(lambda x, y: print(x, \"\\t\" * int(3 - len(str(x))//8), y))" ] }, { "cell_type": "code", "execution_count": 165, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "array([b'S%', b'Shift/GP', b'FO%'], dtype='|S8')" ] }, "execution_count": 165, "metadata": {}, "output_type": "execute_result" } ], "source": [ "f[\"/df2/block0_items\"].value " ] }, { "cell_type": "code", "execution_count": 166, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "array([[13.9, 24. , 52.5],\n", " [15.2, 25.2, 49. ],\n", " [12.6, 25.1, 52.9]])" ] }, "execution_count": 166, "metadata": {}, "output_type": "execute_result" } ], "source": [ "f[\"/df2/block0_values\"][:3]" ] }, { "cell_type": "code", "execution_count": 167, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "array([b'Rank', b'GP', b'G', b'A', b'P', b'+/-', b'PIM', b'PPG', b'PPP',\n", " b'SHG', b'SHP', b'GW', b'OT', b'S'], dtype='|S4')" ] }, "execution_count": 167, "metadata": {}, "output_type": "execute_result" } ], "source": [ "f[\"/df2/block1_items\"].value " ] }, { "cell_type": "code", "execution_count": 168, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "array([[ 1, 80, 36, 68, 104],\n", " [ 2, 77, 31, 56, 87],\n", " [ 3, 82, 28, 58, 86]])" ] }, "execution_count": 168, "metadata": {}, "output_type": "execute_result" } ], "source": [ "f[\"/df2/block1_values\"][:3, :5]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# JSON" ] }, { "cell_type": "code", "execution_count": 169, "metadata": { "collapsed": true }, "outputs": [], "source": [ "data = [\"string\", 1.0, 2, None]" ] }, { "cell_type": "code", "execution_count": 170, "metadata": { "collapsed": false }, "outputs": [], "source": [ "data_json = json.dumps(data)" ] }, { "cell_type": "code", "execution_count": 171, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "'[\"string\", 1.0, 2, null]'" ] }, "execution_count": 171, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data_json" ] }, { "cell_type": "code", "execution_count": 172, "metadata": { "collapsed": true }, "outputs": [], "source": [ "data2 = json.loads(data_json)" ] }, { "cell_type": "code", "execution_count": 173, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "['string', 1.0, 2, None]" ] }, "execution_count": 173, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data" ] }, { "cell_type": "code", "execution_count": 174, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "'string'" ] }, "execution_count": 174, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data[0]" ] }, { "cell_type": "code", "execution_count": 175, "metadata": { "collapsed": true }, "outputs": [], "source": [ "data = {\"one\": 1, \"two\": 2.0, \"three\": \"three\"}" ] }, { "cell_type": "code", "execution_count": 176, "metadata": { "collapsed": true }, "outputs": [], "source": [ "data_json = json.dumps(data)" ] }, { "cell_type": "code", "execution_count": 177, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{\"one\": 1, \"two\": 2.0, \"three\": \"three\"}\n" ] } ], "source": [ "print(data_json)" ] }, { "cell_type": "code", "execution_count": 178, "metadata": { "collapsed": true }, "outputs": [], "source": [ "data = json.loads(data_json)" ] }, { "cell_type": "code", "execution_count": 179, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "2.0" ] }, "execution_count": 179, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data[\"two\"]" ] }, { "cell_type": "code", "execution_count": 180, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "'three'" ] }, "execution_count": 180, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data[\"three\"]" ] }, { "cell_type": "code", "execution_count": 181, "metadata": { "collapsed": true }, "outputs": [], "source": [ "data = {\"one\": [1], \n", " \"two\": [1, 2], \n", " \"three\": [1, 2, 3]}" ] }, { "cell_type": "code", "execution_count": 182, "metadata": { "collapsed": true }, "outputs": [], "source": [ "data_json = json.dumps(data, indent=True)" ] }, { "cell_type": "code", "execution_count": 183, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{\n", " \"one\": [\n", " 1\n", " ],\n", " \"two\": [\n", " 1,\n", " 2\n", " ],\n", " \"three\": [\n", " 1,\n", " 2,\n", " 3\n", " ]\n", "}\n" ] } ], "source": [ "print(data_json)" ] }, { "cell_type": "code", "execution_count": 184, "metadata": { "collapsed": true }, "outputs": [], "source": [ "data = {\"one\": [1], \n", " \"two\": {\"one\": 1, \"two\": 2}, \n", " \"three\": [(1,), (1, 2), (1, 2, 3)],\n", " \"four\": \"a text string\"}" ] }, { "cell_type": "code", "execution_count": 185, "metadata": { "collapsed": true }, "outputs": [], "source": [ "with open(\"data.json\", \"w\") as f:\n", " json.dump(data, f)" ] }, { "cell_type": "code", "execution_count": 186, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{\"one\": [1], \"two\": {\"one\": 1, \"two\": 2}, \"three\": [[1], [1, 2], [1, 2, 3]], \"four\": \"a text string\"}" ] } ], "source": [ "!cat data.json" ] }, { "cell_type": "code", "execution_count": 187, "metadata": { "collapsed": false }, "outputs": [], "source": [ "with open(\"data.json\", \"r\") as f:\n", " data_from_file = json.load(f)" ] }, { "cell_type": "code", "execution_count": 188, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "{'one': 1, 'two': 2}" ] }, "execution_count": 188, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data_from_file[\"two\"]" ] }, { "cell_type": "code", "execution_count": 189, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "[[1], [1, 2], [1, 2, 3]]" ] }, "execution_count": 189, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data_from_file[\"three\"]" ] }, { "cell_type": "code", "execution_count": 190, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{\n", " \"C\": {\n", " \"color\": \"#149848\", \n", " \"transfers\": [\n", " [\n", " \"C3\", \n", " \"F15\"\n", " ], \n", " [\n", " \"C4\", \n", " \"Z2\"\n", " ], \n", " [\n", " \"C4\", \n", " \"G2\"\n", " ], \n", " [\n", " \"C7\", \n", " \"M14\"\n", " ], \n" ] } ], "source": [ "!head -n 20 tokyo-metro.json" ] }, { "cell_type": "code", "execution_count": 191, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 1471 1508 27638 tokyo-metro.json\n" ] } ], "source": [ "!wc tokyo-metro.json" ] }, { "cell_type": "code", "execution_count": 211, "metadata": { "collapsed": true }, "outputs": [], "source": [ "with open(\"tokyo-metro.json\", \"r\") as f:\n", " data = json.load(f)" ] }, { "cell_type": "code", "execution_count": 212, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "dict_keys(['C', 'G', 'F', 'H', 'M', 'N', 'T', 'Y', 'Z'])" ] }, "execution_count": 212, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.keys()" ] }, { "cell_type": "code", "execution_count": 213, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "dict_keys(['color', 'transfers', 'travel_times'])" ] }, "execution_count": 213, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data[\"C\"].keys()" ] }, { "cell_type": "code", "execution_count": 214, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "'#149848'" ] }, "execution_count": 214, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data[\"C\"][\"color\"]" ] }, { "cell_type": "code", "execution_count": 215, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "[['C3', 'F15'],\n", " ['C4', 'Z2'],\n", " ['C4', 'G2'],\n", " ['C7', 'M14'],\n", " ['C7', 'N6'],\n", " ['C7', 'G6'],\n", " ['C8', 'M15'],\n", " ['C8', 'H6'],\n", " ['C9', 'H7'],\n", " ['C9', 'Y18'],\n", " ['C11', 'T9'],\n", " ['C11', 'M18'],\n", " ['C11', 'Z8'],\n", " ['C12', 'M19'],\n", " ['C18', 'H21']]" ] }, "execution_count": 215, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data[\"C\"][\"transfers\"]" ] }, { "cell_type": "code", "execution_count": 216, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "[('C3', 'C4', 1), ('C7', 'C8', 1), ('C9', 'C10', 1)]" ] }, "execution_count": 216, "metadata": {}, "output_type": "execute_result" } ], "source": [ "[(s, e, tt) for s, e, tt in data[\"C\"][\"travel_times\"] if tt == 1]" ] }, { "cell_type": "code", "execution_count": 217, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "{'C': {'color': '#149848',\n", " 'transfers': [['C3', 'F15'],\n", " ['C4', 'Z2'],\n", " ['C4', 'G2'],\n", " ['C7', 'M14'],\n", " ['C7', 'N6'],\n", " ['C7', 'G6'],\n", " ['C8', 'M15'],\n", " ['C8', 'H6'],\n", " ['C9', 'H7'],\n", " ['C9', 'Y18'],\n", " ['C11', 'T9'],\n", " ['C11', 'M18'],\n", " ['C11', 'Z8'],\n", " ['C12', 'M19'],\n", " ['C18', 'H21']],\n", " 'travel_times': [['C1', 'C2', 2],\n", " ['C2', 'C3', 2],\n", " ['C3', 'C4', 1],\n", " ['C4', 'C5', 2],\n", " ['C5', 'C6', 2],\n", " ['C6', 'C7', 2],\n", " ['C7', 'C8', 1],\n", " ['C8', 'C9', 3],\n", " ['C9', 'C10', 1],\n", " ['C10', 'C11', 2],\n", " ['C11', 'C12', 2],\n", " ['C12', 'C13', 2],\n", " ['C13', 'C14', 2],\n", " ['C14', 'C15', 2],\n", " ['C15', 'C16', 2],\n", " ['C16', 'C17', 3],\n", " ['C17', 'C18', 3],\n", " ['C18', 'C19', 3]]},\n", " 'G': {'color': '#f59230',\n", " 'transfers': [['G1', 'Z1'],\n", " ['G1', 'F16'],\n", " ['G2', 'Z2'],\n", " ['G2', 'C4'],\n", " ['G4', 'Z3'],\n", " ['G5', 'M13'],\n", " ['G5', 'Y16'],\n", " ['G5', 'Z4'],\n", " ['G5', 'N7'],\n", " ['G6', 'N6'],\n", " ['G6', 'M14'],\n", " ['G6', 'C7'],\n", " ['G9', 'M16'],\n", " ['G9', 'H8'],\n", " ['G11', 'T10'],\n", " ['G12', 'Z9'],\n", " ['G15', 'H16'],\n", " ['G16', 'H17']],\n", " 'travel_times': [['G1', 'G2', 2],\n", " ['G2', 'G3', 1],\n", " ['G3', 'G4', 2],\n", " ['G4', 'G5', 2],\n", " ['G5', 'G6', 2],\n", " ['G6', 'G7', 2],\n", " ['G7', 'G8', 2],\n", " ['G8', 'G9', 2],\n", " ['G9', 'G10', 1],\n", " ['G10', 'G11', 2],\n", " ['G11', 'G12', 2],\n", " ['G12', 'G13', 1],\n", " ['G13', 'G14', 2],\n", " ['G14', 'G15', 2],\n", " ['G15', 'G16', 1],\n", " ['G16', 'G17', 2],\n", " ['G17', 'G18', 1],\n", " ['G18', 'G19', 2]]},\n", " 'F': {'color': '#b96528',\n", " 'transfers': [['F1', 'Y1'],\n", " ['F2', 'Y2'],\n", " ['F3', 'Y3'],\n", " ['F4', 'Y4'],\n", " ['F5', 'Y5'],\n", " ['F6', 'Y6'],\n", " ['F7', 'Y7'],\n", " ['F8', 'Y8'],\n", " ['F9', 'Y9'],\n", " ['F9', 'M25'],\n", " ['F13', 'M9'],\n", " ['F15', 'C3'],\n", " ['F16', 'Z1'],\n", " ['F16', 'G1']],\n", " 'travel_times': [['F1', 'F2', 3],\n", " ['F2', 'F3', 2],\n", " ['F3', 'F4', 3],\n", " ['F4', 'F5', 2],\n", " ['F5', 'F6', 2],\n", " ['F6', 'F7', 2],\n", " ['F7', 'F8', 2],\n", " ['F8', 'F9', 2],\n", " ['F9', 'F10', 3],\n", " ['F10', 'F11', 2],\n", " ['F11', 'F12', 2],\n", " ['F12', 'F13', 2],\n", " ['F13', 'F14', 3],\n", " ['F14', 'F15', 2],\n", " ['F15', 'F16', 2]]},\n", " 'H': {'color': '#9cacb5',\n", " 'transfers': [['H6', 'M15'],\n", " ['H6', 'C8'],\n", " ['H7', 'Y18'],\n", " ['H7', 'C9'],\n", " ['H8', 'M16'],\n", " ['H8', 'G9'],\n", " ['H12', 'T11'],\n", " ['H16', 'G15'],\n", " ['H17', 'G16'],\n", " ['H21', 'C18']],\n", " 'travel_times': [['H1', 'H2', 3],\n", " ['H2', 'H3', 3],\n", " ['H3', 'H4', 3],\n", " ['H4', 'H5', 3],\n", " ['H5', 'H6', 2],\n", " ['H6', 'H7', 3],\n", " ['H7', 'H8', 1],\n", " ['H8', 'H9', 2],\n", " ['H9', 'H10', 2],\n", " ['H10', 'H11', 2],\n", " ['H11', 'H12', 1],\n", " ['H12', 'H13', 3],\n", " ['H13', 'H14', 1],\n", " ['H14', 'H15', 2],\n", " ['H15', 'H16', 2],\n", " ['H16', 'H17', 1],\n", " ['H17', 'H18', 2],\n", " ['H18', 'H19', 2],\n", " ['H19', 'H20', 2],\n", " ['H20', 'H21', 3]]},\n", " 'M': {'color': '#ff0000',\n", " 'transfers': [['M9', 'F13'],\n", " ['M12', 'N8'],\n", " ['M13', 'G5'],\n", " ['M13', 'Y16'],\n", " ['M13', 'Z4'],\n", " ['M13', 'N7'],\n", " ['M14', 'C7'],\n", " ['M14', 'G6'],\n", " ['M14', 'N6'],\n", " ['M15', 'H6'],\n", " ['M15', 'C8'],\n", " ['M16', 'G9'],\n", " ['M16', 'H8'],\n", " ['M18', 'T9'],\n", " ['M18', 'C11'],\n", " ['M18', 'Z8'],\n", " ['M19', 'C12'],\n", " ['M22', 'N11'],\n", " ['M25', 'Y9'],\n", " ['M25', 'F9']],\n", " 'travel_times': [['M1', 'M2', 2],\n", " ['M2', 'M3', 2],\n", " ['M3', 'M4', 2],\n", " ['M4', 'M5', 2],\n", " ['M5', 'M6', 2],\n", " ['M6', 'M7', 2],\n", " ['M7', 'M8', 2],\n", " ['M8', 'M9', 2],\n", " ['M9', 'M10', 1],\n", " ['M10', 'M11', 2],\n", " ['M11', 'M12', 2],\n", " ['M12', 'M13', 3],\n", " ['M13', 'M14', 2],\n", " ['M14', 'M15', 1],\n", " ['M15', 'M16', 3],\n", " ['M16', 'M17', 2],\n", " ['M17', 'M18', 2],\n", " ['M18', 'M19', 2],\n", " ['M19', 'M20', 1],\n", " ['M20', 'M21', 2],\n", " ['M21', 'M22', 2],\n", " ['M22', 'M23', 3],\n", " ['M23', 'M24', 2],\n", " ['M24', 'M25', 3],\n", " ['m3', 'm4', 2],\n", " ['m4', 'm5', 2],\n", " ['m5', 'M6', 2]]},\n", " 'N': {'color': '#1aaca9',\n", " 'transfers': [['N1', 'T1'],\n", " ['N2', 'T2'],\n", " ['N3', 'T3'],\n", " ['N6', 'G6'],\n", " ['N6', 'M14'],\n", " ['N6', 'C7'],\n", " ['N7', 'Y16'],\n", " ['N7', 'Z4'],\n", " ['N7', 'G5'],\n", " ['N7', 'M13'],\n", " ['N8', 'M12'],\n", " ['N9', 'Y14'],\n", " ['N10', 'Y13'],\n", " ['N10', 'T6'],\n", " ['N11', 'M22']],\n", " 'travel_times': [['N1', 'N2', 2],\n", " ['N2', 'N3', 2],\n", " ['N3', 'N4', 2],\n", " ['N4', 'N5', 2],\n", " ['N5', 'N6', 2],\n", " ['N6', 'N7', 2],\n", " ['N7', 'N8', 2],\n", " ['N8', 'N9', 2],\n", " ['N9', 'N10', 2],\n", " ['N10', 'N11', 2],\n", " ['N11', 'N12', 3],\n", " ['N12', 'N13', 2],\n", " ['N13', 'N14', 2],\n", " ['N14', 'N15', 3],\n", " ['N15', 'N16', 1],\n", " ['N16', 'N17', 3],\n", " ['N17', 'N18', 2],\n", " ['N18', 'N19', 2]]},\n", " 'T': {'color': '#1aa7d8',\n", " 'transfers': [['T6', 'N10'],\n", " ['T6', 'Y13'],\n", " ['T7', 'Z6'],\n", " ['T9', 'M18'],\n", " ['T9', 'C11'],\n", " ['T9', 'Z8'],\n", " ['T10', 'G11'],\n", " ['T11', 'H12']],\n", " 'travel_times': [['T1', 'T2', 0],\n", " ['T2', 'T3', 3],\n", " ['T3', 'T4', 6],\n", " ['T4', 'T5', 9],\n", " ['T5', 'T6', 11],\n", " ['T6', 'T7', 13],\n", " ['T7', 'T8', 14],\n", " ['T8', 'T9', 16],\n", " ['T9', 'T10', 18],\n", " ['T10', 'T11', 20],\n", " ['T11', 'T12', 21],\n", " ['T12', 'T13', 24],\n", " ['T13', 'T14', 26],\n", " ['T14', 'T15', 27],\n", " ['T15', 'T16', 30],\n", " ['T16', 'T17', 33],\n", " ['T17', 'T18', 35],\n", " ['T18', 'T19', 37],\n", " ['T19', 'T20', 39],\n", " ['T20', 'T21', 41],\n", " ['T21', 'T22', 43],\n", " ['T22', 'T23', 46],\n", " ['T23', 'T24', 49]]},\n", " 'Y': {'color': '#ede7c3',\n", " 'transfers': [['Y1', 'F1'],\n", " ['Y2', 'F2'],\n", " ['Y3', 'F3'],\n", " ['Y4', 'F4'],\n", " ['Y5', 'F5'],\n", " ['Y6', 'F6'],\n", " ['Y7', 'F7'],\n", " ['Y8', 'F8'],\n", " ['Y9', 'F9'],\n", " ['Y9', 'M25'],\n", " ['Y13', 'T6'],\n", " ['Y13', 'N10'],\n", " ['Y14', 'N9'],\n", " ['Y16', 'Z4'],\n", " ['Y16', 'N7'],\n", " ['Y16', 'G5'],\n", " ['Y16', 'M13'],\n", " ['Y18', 'H7'],\n", " ['Y18', 'C9']],\n", " 'travel_times': [['Y1', 'Y2', 4],\n", " ['Y2', 'Y3', 2],\n", " ['Y3', 'Y4', 3],\n", " ['Y4', 'Y5', 2],\n", " ['Y5', 'Y6', 2],\n", " ['Y6', 'Y7', 2],\n", " ['Y7', 'Y8', 2],\n", " ['Y8', 'Y9', 3],\n", " ['Y9', 'Y10', 2],\n", " ['Y10', 'Y11', 2],\n", " ['Y11', 'Y12', 2],\n", " ['Y12', 'Y13', 3],\n", " ['Y13', 'Y14', 2],\n", " ['Y14', 'Y15', 2],\n", " ['Y15', 'Y16', 1],\n", " ['Y16', 'Y17', 2],\n", " ['Y17', 'Y18', 2],\n", " ['Y18', 'Y19', 2],\n", " ['Y19', 'Y20', 2],\n", " ['Y20', 'Y21', 2],\n", " ['Y21', 'Y22', 2],\n", " ['Y22', 'Y23', 3],\n", " ['Y23', 'Y24', 2]]},\n", " 'Z': {'color': '#a384bf',\n", " 'transfers': [['Z1', 'F16'],\n", " ['Z1', 'G1'],\n", " ['Z2', 'C4'],\n", " ['Z2', 'G2'],\n", " ['Z3', 'G4'],\n", " ['Z4', 'Y16'],\n", " ['Z4', 'N7'],\n", " ['Z4', 'M13'],\n", " ['Z4', 'G5'],\n", " ['Z6', 'T7'],\n", " ['Z8', 'M18'],\n", " ['Z8', 'C11'],\n", " ['Z8', 'T9'],\n", " ['Z9', 'G12']],\n", " 'travel_times': [['Z1', 'Z2', 3],\n", " ['Z2', 'Z3', 2],\n", " ['Z3', 'Z4', 2],\n", " ['Z4', 'Z5', 2],\n", " ['Z5', 'Z6', 2],\n", " ['Z6', 'Z7', 2],\n", " ['Z7', 'Z8', 2],\n", " ['Z8', 'Z9', 2],\n", " ['Z9', 'Z10', 3],\n", " ['Z10', 'Z11', 3],\n", " ['Z11', 'Z12', 3],\n", " ['Z12', 'Z13', 2],\n", " ['Z13', 'Z14', 2]]}}" ] }, "execution_count": 217, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data" ] }, { "cell_type": "code", "execution_count": 218, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "-rw-r--r-- 1 rob staff 27K Mar 25 2018 tokyo-metro.json\n" ] } ], "source": [ "!ls -lh tokyo-metro.json" ] }, { "cell_type": "code", "execution_count": 219, "metadata": { "collapsed": false }, "outputs": [], "source": [ "data_pack = msgpack.packb(data)" ] }, { "cell_type": "code", "execution_count": 220, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# del data" ] }, { "cell_type": "code", "execution_count": 221, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "bytes" ] }, "execution_count": 221, "metadata": {}, "output_type": "execute_result" } ], "source": [ "type(data_pack)" ] }, { "cell_type": "code", "execution_count": 222, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "3021" ] }, "execution_count": 222, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(data_pack)" ] }, { "cell_type": "code", "execution_count": 223, "metadata": { "collapsed": false }, "outputs": [], "source": [ "with open(\"tokyo-metro.msgpack\", \"wb\") as f:\n", " f.write(data_pack)" ] }, { "cell_type": "code", "execution_count": 224, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "-rw-r--r-- 1 rob staff 3.0K May 6 16:12 tokyo-metro.msgpack\n" ] } ], "source": [ "!ls -lh tokyo-metro.msgpack" ] }, { "cell_type": "code", "execution_count": 225, "metadata": { "collapsed": false }, "outputs": [], "source": [ "with open(\"tokyo-metro.msgpack\", \"rb\") as f:\n", " data_msgpack = f.read()\n", " data = msgpack.unpackb(data_msgpack)" ] }, { "cell_type": "code", "execution_count": 226, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "[b'C', b'G', b'F', b'H', b'M', b'N', b'T', b'Y', b'Z']" ] }, "execution_count": 226, "metadata": {}, "output_type": "execute_result" } ], "source": [ "list(data.keys())" ] }, { "cell_type": "code", "execution_count": 227, "metadata": { "collapsed": false }, "outputs": [], "source": [ "with open(\"tokyo-metro.pickle\", \"wb\") as f:\n", " pickle.dump(data, f)" ] }, { "cell_type": "code", "execution_count": 228, "metadata": { "collapsed": true }, "outputs": [], "source": [ "del data" ] }, { "cell_type": "code", "execution_count": 229, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "-rw-r--r-- 1 rob staff 8.5K May 6 16:12 tokyo-metro.pickle\n" ] } ], "source": [ "!ls -lh tokyo-metro.pickle" ] }, { "cell_type": "code", "execution_count": 230, "metadata": { "collapsed": false }, "outputs": [], "source": [ "with open(\"tokyo-metro.pickle\", \"rb\") as f:\n", " data = pickle.load(f)" ] }, { "cell_type": "code", "execution_count": 231, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "dict_keys([b'C', b'G', b'F', b'H', b'M', b'N', b'T', b'Y', b'Z'])" ] }, "execution_count": 231, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.keys()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Versions" ] }, { "cell_type": "code", "execution_count": 232, "metadata": { "collapsed": true }, "outputs": [], "source": [ "%reload_ext version_information" ] }, { "cell_type": "code", "execution_count": 233, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
SoftwareVersion
Python3.6.8 64bit [GCC 4.2.1 Compatible Clang 4.0.1 (tags/RELEASE_401/final)]
IPython7.5.0
OSDarwin 18.2.0 x86_64 i386 64bit
numpy1.16.3
pandas0.24.2
csv1.0
json2.0.9
tables3.5.1
h5py2.9.0
msgpack0.6.1
Mon May 06 16:13:03 2019 JST
" ], "text/latex": [ "\\begin{tabular}{|l|l|}\\hline\n", "{\\bf Software} & {\\bf Version} \\\\ \\hline\\hline\n", "Python & 3.6.8 64bit [GCC 4.2.1 Compatible Clang 4.0.1 (tags/RELEASE\\_401/final)] \\\\ \\hline\n", "IPython & 7.5.0 \\\\ \\hline\n", "OS & Darwin 18.2.0 x86\\_64 i386 64bit \\\\ \\hline\n", "numpy & 1.16.3 \\\\ \\hline\n", "pandas & 0.24.2 \\\\ \\hline\n", "csv & 1.0 \\\\ \\hline\n", "json & 2.0.9 \\\\ \\hline\n", "tables & 3.5.1 \\\\ \\hline\n", "h5py & 2.9.0 \\\\ \\hline\n", "msgpack & 0.6.1 \\\\ \\hline\n", "\\hline \\multicolumn{2}{|l|}{Mon May 06 16:13:03 2019 JST} \\\\ \\hline\n", "\\end{tabular}\n" ], "text/plain": [ "Software versions\n", "Python 3.6.8 64bit [GCC 4.2.1 Compatible Clang 4.0.1 (tags/RELEASE_401/final)]\n", "IPython 7.5.0\n", "OS Darwin 18.2.0 x86_64 i386 64bit\n", "numpy 1.16.3\n", "pandas 0.24.2\n", "csv 1.0\n", "json 2.0.9\n", "tables 3.5.1\n", "h5py 2.9.0\n", "msgpack 0.6.1\n", "Mon May 06 16:13:03 2019 JST" ] }, "execution_count": 233, "metadata": {}, "output_type": "execute_result" } ], "source": [ "%version_information numpy, pandas, csv, json, tables, h5py, msgpack" ] } ], "metadata": { "kernelspec": { "display_name": "py3.6", "language": "python", "name": "py3.6" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.8" } }, "nbformat": 4, "nbformat_minor": 2 }