{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import numpy as np # this module handles arrays, but here we need it for its NaN value\n", "import pandas as pd # this module contains a lot of tools for handling tabular data\n", "import re" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# define paths to the source files and eventual output file\n", "pathBottle='/ocean/eolson/MEOPAR/obs/NemcekHPLC/All 2015 SoG_bottle.xlsx'\n", "pathPhyto='/ocean/eolson/MEOPAR/obs/NemcekHPLC/2015-2019 SoG abs values New ALLO.xlsx'\n", "pathOut='/ocean/eolson/MEOPAR/obs/NemcekHPLC/bottlePhytoMerged2015_NewALLO.csv'" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "def fmtCruise(istr):\n", " if re.fullmatch('[0-9]{4}-[0-9]{2}',istr): \n", " sp=re.split('-',istr)\n", " rstr=sp[0]+'-0'+sp[1]\n", " elif re.fullmatch('[0-9]{4}-[0-9]{3}',istr):\n", " rstr=istr\n", " else:\n", " raise ValueError('Input had unexpected format:',istr)\n", " return rstr" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Reading sheet 2015-17\n", "Reading sheet 2015-20\n", "Reading sheet 2015-18\n", "Reading sheet 2015-21\n", "Reading sheet 2015-19\n", "removed empty columns: {'Ammonium', 'Flag:Ammonium'}\n" ] } ], "source": [ "# load each sheet in the 2015 bottle Excel file and concatenate them together into one table\n", "dfbotlist=list()\n", "for sheet in ('2015-17','2015-20','2015-18','2015-21','2015-19'):\n", " df0=pd.read_excel(pathBottle,sheet_name=sheet,verbose=True,na_values=(-99,-99.9),engine='openpyxl',\n", " dtype={'Cruise':str,'Sample_Number':np.int64}) # read each sheet\n", " df0['Cruise']=fmtCruise(sheet) # create and populate Cruise column based on sheet name\n", " dfbotlist.append(df0) # append the sheet to a list\n", "dfbot=pd.concat(dfbotlist,ignore_index=True,sort=False) # concatenate the list into a single table\n", "# Drop columns with no data in them\n", "l1=set(dfbot.keys())\n", "dfbot.dropna(axis=1,how='all',inplace=True)\n", "print('removed empty columns:',l1-set(dfbot.keys()))" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Index(['File Name', 'Zone', 'FIL:START TIME YYYY/MM/DD HH:MM:SS',\n", " 'LOC:EVENT_NUMBER', 'LOC:STATION', 'LOC:LATITUDE', 'LOC:LONGITUDE',\n", " 'LOC:WATER DEPTH', 'ADM:SCIENTIST', 'Sample_Number', 'Pressure',\n", " 'Temperature:Primary', 'Transmissivity', 'Fluorescence:URU:Seapoint',\n", " 'PAR', 'Salinity:T0:C0', 'Oxygen:Dissolved:SBE',\n", " 'Oxygen:Dissolved:SBE.1', 'pH:SBE:Nominal', 'Salinity:Bottle',\n", " 'Flag:Salinity:Bottle', 'Chlorophyll:Extracted',\n", " 'Flag:Chlorophyll:Extracted', 'Nitrate_plus_Nitrite',\n", " 'Flag:Nitrate_plus_Nitrite', 'Silicate', 'Flag:Silicate', 'Phosphate',\n", " 'Flag:Phosphate', 'Cruise', 'Temperature:Draw',\n", " 'Phaeo-Pigment:Extracted', 'Oxygen:Dissolved', 'Oxygen:Dissolved.1',\n", " 'Flag:Oxygen:Dissolved', 'Temperature:Secondary', 'Salinity:T1:C1',\n", " 'Number_of_bin_records'],\n", " dtype='object')\n" ] } ], "source": [ "# list the column names in the resulting table\n", "print(dfbot.keys())" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "def subval(idf,colList):\n", " # first value in colList should be the column you are going to keep\n", " # follow with other columns that will be used to fill in when that column is NaN\n", " # in order of precedence\n", " if len(colList)==2:\n", " idf[colList[0]]=[r[colList[0]] if not pd.isna(r[colList[0]]) \\\n", " else r[colList[1]] for i,r in idf.iterrows()]\n", " elif len(colList)==3:\n", " idf[colList[0]]=[r[colList[0]] if not pd.isna(r[colList[0]]) \\\n", " else r[colList[1]] if not pd.isna(r[colList[1]]) \\\n", " else r[colList[2]] for i,r in idf.iterrows()]\n", " else:\n", " raise NotImplementedError('Add to code to handle this case')\n", " idf.drop(columns=list(colList[1:]),inplace=True)\n", " return idf" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "dfbot=subval(dfbot,('Oxygen:Dissolved','Oxygen:Dissolved.1'))\n", "dfbot=subval(dfbot,('Temperature:Primary','Temperature:Secondary'))\n", "dfbot=subval(dfbot,('Salinity:T0:C0','Salinity:T1:C1'))\n", "dfbot=subval(dfbot,('Oxygen:Dissolved:SBE','Oxygen:Dissolved:SBE.1'))\n", "dfbot.rename(columns={'LOC:LATITUDE':'Lat',\n", " 'LOC:LONGITUDE':'Lon',\n", " 'Temperature:Primary':'Temperature',\n", " 'Oxygen:Dissolved:SBE':'Oxygen:Dissolved:CTD',\n", " 'Salinity:T0:C0':'Salinity',},inplace=True)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "# define a function that will be applied to the values in the index column;\n", "# this makes it easier to drop non-data rows later\n", "def convertIndex(val):\n", " try:\n", " x =int(val)\n", " except ValueError:\n", " x=-999\n", " return x" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Reading sheet 2015\n" ] } ], "source": [ "# load the 2015 phytoplankton data with the following options:\n", "# sheet_name='2015 CHEMTAX abs results' -> choose the 2015 sheet\n", "# usecols='A:I,T:AC' -> read only columns A:I and T:AC from the Excel sheet\n", "# skiprows=2 -> start reading at the 3rd row of the sheet, \n", "# which contains the column headings\n", "# converters={'Index': convertIndex,} -> apply the function defined above to the Index column\n", "# verbose = True -> print extra information/ warnings/ errors \n", "#usecols='A:I,T:AC',\n", "dfPhyto=pd.read_excel(pathPhyto,sheet_name='2015',engine='openpyxl',\n", " skiprows=0,converters={'Sample#': convertIndex,},\n", " verbose=True)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CruiseDateStationSample#repdepthDiatoms-1Diatoms-2PrasinophytesCryptophytesDinoflagellates-1HaptophytesDictyoRaphidoCyanobacteriaTchlA
48CruiseDateStation-999repdepthDiatoms-1Diatoms-2PrasinophytesCryptophytesDinoflagellates-1HaptophytesDictyoRaphidoCyanobacteriaTchlA
492015-019SeptemberSI9mean0000.2620772.544790.5418011.61053000.004801524.964
502015-019September5923mean00.8242910.141730.08789560.2413630.02190830.06244040.004939410.03916620.01226571.436
512015-019September10238mean04.236971.28060.1385050.4523350.06267710.27376500.058146206.503
522015-019September7552mean01.972830.8515090.4153090.7121030.1005790.3391340.01474960.15978904.566
532015-019September7265mean00.821080.1892510.1666890.2582740.0508020.1344430.007176620.05658120.0007031351.685
542015-019September6977mean00.697040.1911740.1043550.1563050.0004299990.07105860.0119220.039715501.272
552015-019SeptemberADCP87mean00.7474730.2338910.07081160.2099120.02597960.08231370.001087470.034531801.406
562015-019September6597mean00.7346730.1527870.04141430.1692930.01560130.06994480.007100060.02170770.004478091.217
572015-019September63108mean00.6217560.1326320.03372930.1058310.01019630.03390160.0135030.02145100.973
582015-019September62119mean00.633920.1368180.03891490.1467250.008715050.008128580.022167800.0006107120.996
592015-019September56133mean01.164640.04815370.1934940.2821890.01089590.02487960.01612390.04171640.04890611.831
\n", "
" ], "text/plain": [ " Cruise Date Station Sample# rep depth Diatoms-1 Diatoms-2 \\\n", "48 Cruise Date Station -999 rep depth Diatoms-1 Diatoms-2 \n", "49 2015-019 September SI 9 mean 0 0 0 \n", "50 2015-019 September 59 23 mean 0 0.824291 0.14173 \n", "51 2015-019 September 102 38 mean 0 4.23697 1.2806 \n", "52 2015-019 September 75 52 mean 0 1.97283 0.851509 \n", "53 2015-019 September 72 65 mean 0 0.82108 0.189251 \n", "54 2015-019 September 69 77 mean 0 0.69704 0.191174 \n", "55 2015-019 September ADCP 87 mean 0 0.747473 0.233891 \n", "56 2015-019 September 65 97 mean 0 0.734673 0.152787 \n", "57 2015-019 September 63 108 mean 0 0.621756 0.132632 \n", "58 2015-019 September 62 119 mean 0 0.63392 0.136818 \n", "59 2015-019 September 56 133 mean 0 1.16464 0.0481537 \n", "\n", " Prasinophytes Cryptophytes Dinoflagellates-1 Haptophytes Dictyo \\\n", "48 Prasinophytes Cryptophytes Dinoflagellates-1 Haptophytes Dictyo \n", "49 0.262077 2.54479 0.541801 1.61053 0 \n", "50 0.0878956 0.241363 0.0219083 0.0624404 0.00493941 \n", "51 0.138505 0.452335 0.0626771 0.273765 0 \n", "52 0.415309 0.712103 0.100579 0.339134 0.0147496 \n", "53 0.166689 0.258274 0.050802 0.134443 0.00717662 \n", "54 0.104355 0.156305 0.000429999 0.0710586 0.011922 \n", "55 0.0708116 0.209912 0.0259796 0.0823137 0.00108747 \n", "56 0.0414143 0.169293 0.0156013 0.0699448 0.00710006 \n", "57 0.0337293 0.105831 0.0101963 0.0339016 0.013503 \n", "58 0.0389149 0.146725 0.00871505 0.00812858 0.0221678 \n", "59 0.193494 0.282189 0.0108959 0.0248796 0.0161239 \n", "\n", " Raphido Cyanobacteria TchlA \n", "48 Raphido Cyanobacteria TchlA \n", "49 0 0.00480152 4.964 \n", "50 0.0391662 0.0122657 1.436 \n", "51 0.0581462 0 6.503 \n", "52 0.159789 0 4.566 \n", "53 0.0565812 0.000703135 1.685 \n", "54 0.0397155 0 1.272 \n", "55 0.0345318 0 1.406 \n", "56 0.0217077 0.00447809 1.217 \n", "57 0.021451 0 0.973 \n", "58 0 0.000610712 0.996 \n", "59 0.0417164 0.0489061 1.831 " ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# display rows 48 to 59 of the resulting table\n", "dfPhyto[48:60]" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "# now, drop any rows from the table that have NaN values in either of the columns\n", "# 'Index' or 'TchlA (ug/L)'\n", "# This is why we applied a function to the Index column to make sure all \n", "# non-numeric Index values would have a consistent NaN entry, making them easy to identify\n", "# and remove\n", "dfPhyto.dropna(subset=['TchlA'],how='any',inplace=True)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "dfPhyto.drop(dfPhyto[dfPhyto['Sample#']<0].index,inplace=True)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "# pandas creates its own index, and after dropping rows I like to reset it -\n", "# this is just for convenience\n", "dfPhyto.reset_index(drop=True,inplace=True)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CruiseSample#
02015-0179
12015-01723
22015-01737
32015-01751
42015-01764
52015-01776
62015-01786
72015-01796
82015-017109
92015-017120
102015-017134
\n", "
" ], "text/plain": [ " Cruise Sample#\n", "0 2015-017 9\n", "1 2015-017 23\n", "2 2015-017 37\n", "3 2015-017 51\n", "4 2015-017 64\n", "5 2015-017 76\n", "6 2015-017 86\n", "7 2015-017 96\n", "8 2015-017 109\n", "9 2015-017 120\n", "10 2015-017 134" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# display part of the table, confirming that non-data rows have been removed\n", "dfPhyto.loc[:10,['Cruise','Sample#']]" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CruiseSample_Number
02015-0171
12015-0172
22015-0173
32015-0174
42015-0175
52015-0176
62015-0177
72015-0178
82015-0179
92015-01710
102015-01711
112015-01712
122015-01713
132015-01714
142015-01715
152015-01716
162015-01717
172015-01718
182015-01719
192015-01720
202015-01721
\n", "
" ], "text/plain": [ " Cruise Sample_Number\n", "0 2015-017 1\n", "1 2015-017 2\n", "2 2015-017 3\n", "3 2015-017 4\n", "4 2015-017 5\n", "5 2015-017 6\n", "6 2015-017 7\n", "7 2015-017 8\n", "8 2015-017 9\n", "9 2015-017 10\n", "10 2015-017 11\n", "11 2015-017 12\n", "12 2015-017 13\n", "13 2015-017 14\n", "14 2015-017 15\n", "15 2015-017 16\n", "16 2015-017 17\n", "17 2015-017 18\n", "18 2015-017 19\n", "19 2015-017 20\n", "20 2015-017 21" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfbot.loc[:20,['Cruise','Sample_Number']]" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "# due to repeated column names in the original spreadsheet, '.1' was appended to the names\n", "# of the phytoplankton columns; \n", "# these lines correct the column names, removing the '.1':\n", "renameDict=dict()\n", "for colName in dfPhyto.keys():\n", " if colName.endswith('.1'):\n", " renameDict[colName]=colName.split('.1')[0]\n", "dfPhyto.rename(columns=renameDict,inplace=True)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CruiseDateStationSample#repdepthDiatoms-1Diatoms-2PrasinophytesCryptophytesDinoflagellates-1HaptophytesDictyoRaphidoCyanobacteriaTchlA
02015-017AprilSI9mean023.58052.41835000000.163169026.162
12015-017April5923mean00.2717990.030300200.05212750.004673480.08010000.439
22015-017April10237mean02.033340.8097830.04429650.180860.02451260.004100680.004831890.02336980.003907023.129
32015-017April7551mean02.722431.093460.03879710.259750.02946290.01855520.01365850.03289070.01000364.219
42015-017April7264mean03.341391.591860.1729970.9248370.32951400.002136070.022266206.385
...................................................
652015-019September9248mean00.5585970.2077410.6056810.4334240.0009549120.2007970.0312960.07393150.1385772.251
662015-019September6261mean00.2561060.2548450.5569410.8267065.79139e-050.3782960.06197770.07076570.1243042.53
672015-019SeptemberBS11269mean09.836010.01712240.01098730.3449130.001971830.033415000.02558410.27
682015-019September3285mean00.5745240.1407870.347850.6972510.007857980.3093060.02531540.09671670.1713912.371
692015-019September2300mean01.6384400.713150.4598140.002999310.2705790.01994150.1882520.1698283.463
\n", "

70 rows × 16 columns

\n", "
" ], "text/plain": [ " Cruise Date Station Sample# rep depth Diatoms-1 Diatoms-2 \\\n", "0 2015-017 April SI 9 mean 0 23.5805 2.41835 \n", "1 2015-017 April 59 23 mean 0 0.271799 0.0303002 \n", "2 2015-017 April 102 37 mean 0 2.03334 0.809783 \n", "3 2015-017 April 75 51 mean 0 2.72243 1.09346 \n", "4 2015-017 April 72 64 mean 0 3.34139 1.59186 \n", ".. ... ... ... ... ... ... ... ... \n", "65 2015-019 September 9 248 mean 0 0.558597 0.207741 \n", "66 2015-019 September 6 261 mean 0 0.256106 0.254845 \n", "67 2015-019 September BS11 269 mean 0 9.83601 0.0171224 \n", "68 2015-019 September 3 285 mean 0 0.574524 0.140787 \n", "69 2015-019 September 2 300 mean 0 1.63844 0 \n", "\n", " Prasinophytes Cryptophytes Dinoflagellates-1 Haptophytes Dictyo \\\n", "0 0 0 0 0 0 \n", "1 0 0.0521275 0.00467348 0.0801 0 \n", "2 0.0442965 0.18086 0.0245126 0.00410068 0.00483189 \n", "3 0.0387971 0.25975 0.0294629 0.0185552 0.0136585 \n", "4 0.172997 0.924837 0.329514 0 0.00213607 \n", ".. ... ... ... ... ... \n", "65 0.605681 0.433424 0.000954912 0.200797 0.031296 \n", "66 0.556941 0.826706 5.79139e-05 0.378296 0.0619777 \n", "67 0.0109873 0.344913 0.00197183 0.033415 0 \n", "68 0.34785 0.697251 0.00785798 0.309306 0.0253154 \n", "69 0.71315 0.459814 0.00299931 0.270579 0.0199415 \n", "\n", " Raphido Cyanobacteria TchlA \n", "0 0.163169 0 26.162 \n", "1 0 0 0.439 \n", "2 0.0233698 0.00390702 3.129 \n", "3 0.0328907 0.0100036 4.219 \n", "4 0.0222662 0 6.385 \n", ".. ... ... ... \n", "65 0.0739315 0.138577 2.251 \n", "66 0.0707657 0.124304 2.53 \n", "67 0 0.025584 10.27 \n", "68 0.0967167 0.171391 2.371 \n", "69 0.188252 0.169828 3.463 \n", "\n", "[70 rows x 16 columns]" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfPhyto" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "# This is the important step- join the two tables ('left' and 'right'), \n", "# matching the cruise IDs and sample numbers\n", "# how='outer' -> all rows from both the left and the right tables will be included, \n", "# even if they cannot be matched; this makes it easy to check for \n", "# unmatched data later\n", "# left_on specifies the name of the column to match in the left table (dfbot) \n", "# right_on specifies the name of the column to match in the right table (dfPhyto)\n", "dfout = pd.merge(dfbot, dfPhyto, how='outer', \n", " left_on=['Cruise','Sample_Number'], right_on = ['Cruise','Sample#'])" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['File Name', 'Zone', 'FIL:START TIME YYYY/MM/DD HH:MM:SS',\n", " 'LOC:EVENT_NUMBER', 'LOC:STATION', 'Lat', 'Lon', 'LOC:WATER DEPTH',\n", " 'ADM:SCIENTIST', 'Sample_Number', 'Pressure', 'Temperature',\n", " 'Transmissivity', 'Fluorescence:URU:Seapoint', 'PAR', 'Salinity',\n", " 'Oxygen:Dissolved:CTD', 'pH:SBE:Nominal', 'Salinity:Bottle',\n", " 'Flag:Salinity:Bottle', 'Chlorophyll:Extracted',\n", " 'Flag:Chlorophyll:Extracted', 'Nitrate_plus_Nitrite',\n", " 'Flag:Nitrate_plus_Nitrite', 'Silicate', 'Flag:Silicate', 'Phosphate',\n", " 'Flag:Phosphate', 'Cruise', 'Temperature:Draw',\n", " 'Phaeo-Pigment:Extracted', 'Oxygen:Dissolved', 'Flag:Oxygen:Dissolved',\n", " 'Number_of_bin_records', 'Date', 'Station', 'Sample#', 'rep', 'depth',\n", " 'Diatoms-1', 'Diatoms-2', 'Prasinophytes', 'Cryptophytes',\n", " 'Dinoflagellates-1', 'Haptophytes', 'Dictyo', 'Raphido',\n", " 'Cyanobacteria', 'TchlA'],\n", " dtype='object')" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# show the column names in the resulting table\n", "dfout.keys()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Checks" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "# Identify cases where phytoplankton data were matched to multiple samples in bottle data:\n", "dftest=pd.merge(dfbot, dfPhyto,how='right', left_on=['Cruise','Sample_Number'],right_on = ['Cruise','Sample#'])" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [], "source": [ "temp=dftest.groupby(['Cruise','Sample#']).agg({'Cruise':['count']})\n", "temp.columns = ['icount']" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([1])" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.unique(temp.icount)" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
icount
CruiseSample#
\n", "
" ], "text/plain": [ "Empty DataFrame\n", "Columns: [icount]\n", "Index: []" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# check for Phyto samples matched to multiple bottle samples:\n", "temp.loc[temp.icount>1]" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
icount
CruiseSample#
\n", "
" ], "text/plain": [ "Empty DataFrame\n", "Columns: [icount]\n", "Index: []" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# check for phyto samples not matched to bottle samples:\n", "temp.loc[temp.icount==0]" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
icount
CruiseSample_Number
\n", "
" ], "text/plain": [ "Empty DataFrame\n", "Columns: [icount]\n", "Index: []" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "temp2=dfout.groupby(['Cruise','Sample_Number']).agg({'Cruise':['count']})\n", "temp2.columns = ['icount']\n", "# this will catch phyto matched to multiple bottle but also bottle with duplicate sample numbers per cruise:\n", "temp2.loc[temp2.icount>1]" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
icount
CruiseSample#
\n", "
" ], "text/plain": [ "Empty DataFrame\n", "Columns: [icount]\n", "Index: []" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# check for phyto samples not matched to bottle samples:\n", "temp.loc[temp.icount==0]" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(938, 70, 938)" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# if the output table is longer than either of the input tables, some columns were not matched\n", "len(dfout), len(dfPhyto), len(dfbot)" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
File NameZoneFIL:START TIME YYYY/MM/DD HH:MM:SSLOC:EVENT_NUMBERLOC:STATIONLatLonLOC:WATER DEPTHADM:SCIENTISTSample_Number...Diatoms-1Diatoms-2PrasinophytesCryptophytesDinoflagellates-1HaptophytesDictyoRaphidoCyanobacteriaTchlA
02015-17-0001.cheUTC2015-04-01 23:42:171SI48.65233-123.50183187Chandler P.1...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
12015-17-0001.cheUTC2015-04-01 23:42:171SI48.65233-123.50183187Chandler P.2...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
22015-17-0001.cheUTC2015-04-01 23:42:171SI48.65233-123.50183187Chandler P.3...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
32015-17-0001.cheUTC2015-04-01 23:42:171SI48.65233-123.50183187Chandler P.4...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
42015-17-0001.cheUTC2015-04-01 23:42:171SI48.65233-123.50183187Chandler P.5...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
..................................................................
9332015-19-0091.cheUTC2015-10-04 00:30:5591249.40167-124.15600279Chandler P.296...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
9342015-19-0091.cheUTC2015-10-04 00:30:5591249.40167-124.15600279Chandler P.297...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
9352015-19-0091.cheUTC2015-10-04 00:30:5591249.40167-124.15600279Chandler P.298...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
9362015-19-0091.cheUTC2015-10-04 00:30:5591249.40167-124.15600279Chandler P.299...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
9372015-19-0091.cheUTC2015-10-04 00:30:5591249.40167-124.15600279Chandler P.300...1.6384400.713150.4598140.002999310.2705790.01994150.1882520.1698283.463
\n", "

938 rows × 49 columns

\n", "
" ], "text/plain": [ " File Name Zone FIL:START TIME YYYY/MM/DD HH:MM:SS \\\n", "0 2015-17-0001.che UTC 2015-04-01 23:42:17 \n", "1 2015-17-0001.che UTC 2015-04-01 23:42:17 \n", "2 2015-17-0001.che UTC 2015-04-01 23:42:17 \n", "3 2015-17-0001.che UTC 2015-04-01 23:42:17 \n", "4 2015-17-0001.che UTC 2015-04-01 23:42:17 \n", ".. ... ... ... \n", "933 2015-19-0091.che UTC 2015-10-04 00:30:55 \n", "934 2015-19-0091.che UTC 2015-10-04 00:30:55 \n", "935 2015-19-0091.che UTC 2015-10-04 00:30:55 \n", "936 2015-19-0091.che UTC 2015-10-04 00:30:55 \n", "937 2015-19-0091.che UTC 2015-10-04 00:30:55 \n", "\n", " LOC:EVENT_NUMBER LOC:STATION Lat Lon LOC:WATER DEPTH \\\n", "0 1 SI 48.65233 -123.50183 187 \n", "1 1 SI 48.65233 -123.50183 187 \n", "2 1 SI 48.65233 -123.50183 187 \n", "3 1 SI 48.65233 -123.50183 187 \n", "4 1 SI 48.65233 -123.50183 187 \n", ".. ... ... ... ... ... \n", "933 91 2 49.40167 -124.15600 279 \n", "934 91 2 49.40167 -124.15600 279 \n", "935 91 2 49.40167 -124.15600 279 \n", "936 91 2 49.40167 -124.15600 279 \n", "937 91 2 49.40167 -124.15600 279 \n", "\n", " ADM:SCIENTIST Sample_Number ... Diatoms-1 Diatoms-2 Prasinophytes \\\n", "0 Chandler P. 1 ... NaN NaN NaN \n", "1 Chandler P. 2 ... NaN NaN NaN \n", "2 Chandler P. 3 ... NaN NaN NaN \n", "3 Chandler P. 4 ... NaN NaN NaN \n", "4 Chandler P. 5 ... NaN NaN NaN \n", ".. ... ... ... ... ... ... \n", "933 Chandler P. 296 ... NaN NaN NaN \n", "934 Chandler P. 297 ... NaN NaN NaN \n", "935 Chandler P. 298 ... NaN NaN NaN \n", "936 Chandler P. 299 ... NaN NaN NaN \n", "937 Chandler P. 300 ... 1.63844 0 0.71315 \n", "\n", " Cryptophytes Dinoflagellates-1 Haptophytes Dictyo Raphido \\\n", "0 NaN NaN NaN NaN NaN \n", "1 NaN NaN NaN NaN NaN \n", "2 NaN NaN NaN NaN NaN \n", "3 NaN NaN NaN NaN NaN \n", "4 NaN NaN NaN NaN NaN \n", ".. ... ... ... ... ... \n", "933 NaN NaN NaN NaN NaN \n", "934 NaN NaN NaN NaN NaN \n", "935 NaN NaN NaN NaN NaN \n", "936 NaN NaN NaN NaN NaN \n", "937 0.459814 0.00299931 0.270579 0.0199415 0.188252 \n", "\n", " Cyanobacteria TchlA \n", "0 NaN NaN \n", "1 NaN NaN \n", "2 NaN NaN \n", "3 NaN NaN \n", "4 NaN NaN \n", ".. ... ... \n", "933 NaN NaN \n", "934 NaN NaN \n", "935 NaN NaN \n", "936 NaN NaN \n", "937 0.169828 3.463 \n", "\n", "[938 rows x 49 columns]" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfout" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(70, 70)" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Check that the number of cells with data in the 'Cyanobacteria' column is \n", "# the same for the input and output tables:\n", "np.sum(dfPhyto['Cyanobacteria']>=0), np.sum(dfout['Cyanobacteria']>=0)" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Series([], Name: Sample#, dtype: float64)\n" ] } ], "source": [ "# If there were data rows from the phytoplankton table that were not matched to \n", "# rows from the bottle table, their indices from the phytoplankton table would be \n", "# displayed below (the series [] would not be empty)\n", "print(dfout.loc[dfout['ADM:SCIENTIST'].isna()]['Sample#'])" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['File Name', 'Zone', 'FIL:START TIME YYYY/MM/DD HH:MM:SS',\n", " 'LOC:EVENT_NUMBER', 'LOC:STATION', 'Lat', 'Lon', 'LOC:WATER DEPTH',\n", " 'ADM:SCIENTIST', 'Sample_Number', 'Pressure', 'Temperature',\n", " 'Transmissivity', 'Fluorescence:URU:Seapoint', 'PAR', 'Salinity',\n", " 'Oxygen:Dissolved:CTD', 'pH:SBE:Nominal', 'Salinity:Bottle',\n", " 'Flag:Salinity:Bottle', 'Chlorophyll:Extracted',\n", " 'Flag:Chlorophyll:Extracted', 'Nitrate_plus_Nitrite',\n", " 'Flag:Nitrate_plus_Nitrite', 'Silicate', 'Flag:Silicate', 'Phosphate',\n", " 'Flag:Phosphate', 'Cruise', 'Temperature:Draw',\n", " 'Phaeo-Pigment:Extracted', 'Oxygen:Dissolved', 'Flag:Oxygen:Dissolved',\n", " 'Number_of_bin_records', 'Date', 'Station', 'Sample#', 'rep', 'depth',\n", " 'Diatoms-1', 'Diatoms-2', 'Prasinophytes', 'Cryptophytes',\n", " 'Dinoflagellates-1', 'Haptophytes', 'Dictyo', 'Raphido',\n", " 'Cyanobacteria', 'TchlA'],\n", " dtype='object')" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfout.keys()" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [], "source": [ "# drop repetetive/unecessary columns: 'Index','Bin #', 'Subgroup', 'Month',\n", "dfout.drop(labels=['Date', 'Station', 'Sample#', 'rep',\n", " 'depth',],axis=1,inplace=True)" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [], "source": [ "# truncate phyto group values to 3 decimal places:\n", "for col in ('Cyanobacteria', 'Prasinophytes', 'Cryptophytes', 'Diatoms-1',\n", " 'Diatoms-2', 'Dinoflagellates-1', 'Haptophytes', 'Dictyo', 'Raphido',\n", " 'TchlA'):\n", " dfout[col]=[np.round(ii,decimals=3) for ii in dfout[col]] # use list comprehension to set values for entire column" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
File NameZoneFIL:START TIME YYYY/MM/DD HH:MM:SSLOC:EVENT_NUMBERLOC:STATIONLatLonLOC:WATER DEPTHADM:SCIENTISTSample_Number...Diatoms-1Diatoms-2PrasinophytesCryptophytesDinoflagellates-1HaptophytesDictyoRaphidoCyanobacteriaTchlA
\n", "

0 rows × 44 columns

\n", "
" ], "text/plain": [ "Empty DataFrame\n", "Columns: [File Name, Zone, FIL:START TIME YYYY/MM/DD HH:MM:SS, LOC:EVENT_NUMBER, LOC:STATION, Lat, Lon, LOC:WATER DEPTH, ADM:SCIENTIST, Sample_Number, Pressure, Temperature, Transmissivity, Fluorescence:URU:Seapoint, PAR, Salinity, Oxygen:Dissolved:CTD, pH:SBE:Nominal, Salinity:Bottle, Flag:Salinity:Bottle, Chlorophyll:Extracted, Flag:Chlorophyll:Extracted, Nitrate_plus_Nitrite, Flag:Nitrate_plus_Nitrite, Silicate, Flag:Silicate, Phosphate, Flag:Phosphate, Cruise, Temperature:Draw, Phaeo-Pigment:Extracted, Oxygen:Dissolved, Flag:Oxygen:Dissolved, Number_of_bin_records, Diatoms-1, Diatoms-2, Prasinophytes, Cryptophytes, Dinoflagellates-1, Haptophytes, Dictyo, Raphido, Cyanobacteria, TchlA]\n", "Index: []\n", "\n", "[0 rows x 44 columns]" ] }, "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Check for unexpected depths\n", "dfout.loc[((dfout['Pressure']>10))&(dfout['Diatoms-1']>=0)]#,\n", "# ['FIL:START TIME YYYY/MM/DD HH:MM:SS','Pressure [decibar]','Depth [metres]','Diatoms-1']]" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['File Name', 'Zone', 'FIL:START TIME YYYY/MM/DD HH:MM:SS',\n", " 'LOC:EVENT_NUMBER', 'LOC:STATION', 'Lat', 'Lon', 'LOC:WATER DEPTH',\n", " 'ADM:SCIENTIST', 'Sample_Number', 'Pressure', 'Temperature',\n", " 'Transmissivity', 'Fluorescence:URU:Seapoint', 'PAR', 'Salinity',\n", " 'Oxygen:Dissolved:CTD', 'pH:SBE:Nominal', 'Salinity:Bottle',\n", " 'Flag:Salinity:Bottle', 'Chlorophyll:Extracted',\n", " 'Flag:Chlorophyll:Extracted', 'Nitrate_plus_Nitrite',\n", " 'Flag:Nitrate_plus_Nitrite', 'Silicate', 'Flag:Silicate', 'Phosphate',\n", " 'Flag:Phosphate', 'Cruise', 'Temperature:Draw',\n", " 'Phaeo-Pigment:Extracted', 'Oxygen:Dissolved', 'Flag:Oxygen:Dissolved',\n", " 'Number_of_bin_records', 'Diatoms-1', 'Diatoms-2', 'Prasinophytes',\n", " 'Cryptophytes', 'Dinoflagellates-1', 'Haptophytes', 'Dictyo', 'Raphido',\n", " 'Cyanobacteria', 'TchlA'],\n", " dtype='object')" ] }, "execution_count": 37, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfout.keys()" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [], "source": [ "# now write the output table to a .csv file:\n", "dfout.to_csv(pathOut, index=False) " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python (py39)", "language": "python", "name": "py39" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.2" } }, "nbformat": 4, "nbformat_minor": 4 }