{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np # this module handles arrays, but here we need it for its NaN value\n",
"import pandas as pd # this module contains a lot of tools for handling tabular data\n",
"from matplotlib import pyplot as plt\n",
"\n",
"%matplotlib inline"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# define paths to the source files and eventual output file\n",
"pathBottle='/ocean/eolson/MEOPAR/obs/NemcekHPLC//All 2015 SoG_bottle.xlsx'\n",
"pathPhyto='/ocean/eolson/MEOPAR/obs/NemcekHPLC/2015-2018 Abs phyto groupsCorrected.xlsx'\n",
"pathOut='/ocean/eolson/MEOPAR/obs/NemcekHPLC/bottlePhytoMerged2015.csv'"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Reading sheet 2015-17\n",
"Reading sheet 2015-20\n",
"Reading sheet 2015-18\n",
"Reading sheet 2015-21\n",
"Reading sheet 2015-19\n",
"removed empty columns: {'Ammonium', 'Flag:Ammonium'}\n"
]
}
],
"source": [
"# load each sheet in the 2015 bottle Excel file and concatenate them together into one table\n",
"dfbotlist=list()\n",
"for sheet in ('2015-17','2015-20','2015-18','2015-21','2015-19'):\n",
" df0=pd.read_excel(pathBottle,sheet_name=sheet,verbose=True,na_values=(-99,-99.9)) # read each sheet\n",
" df0['Cruise']=sheet # create and populate Cruise column based on sheet name\n",
" dfbotlist.append(df0) # append the sheet to a list\n",
"dfbot=pd.concat(dfbotlist,ignore_index=True,sort=False) # concatenate the list into a single table\n",
"# Drop columns with no data in them\n",
"l1=set(dfbot.keys())\n",
"dfbot.dropna(axis=1,how='all',inplace=True)\n",
"print('removed empty columns:',l1-set(dfbot.keys()))"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Index(['File Name', 'Zone', 'FIL:START TIME YYYY/MM/DD HH:MM:SS',\n",
" 'LOC:EVENT_NUMBER', 'LOC:STATION', 'LOC:LATITUDE', 'LOC:LONGITUDE',\n",
" 'LOC:WATER DEPTH', 'ADM:SCIENTIST', 'Sample_Number', 'Pressure',\n",
" 'Temperature:Primary', 'Transmissivity', 'Fluorescence:URU:Seapoint',\n",
" 'PAR', 'Salinity:T0:C0', 'Oxygen:Dissolved:SBE',\n",
" 'Oxygen:Dissolved:SBE.1', 'pH:SBE:Nominal', 'Salinity:Bottle',\n",
" 'Flag:Salinity:Bottle', 'Chlorophyll:Extracted',\n",
" 'Flag:Chlorophyll:Extracted', 'Nitrate_plus_Nitrite',\n",
" 'Flag:Nitrate_plus_Nitrite', 'Silicate', 'Flag:Silicate', 'Phosphate',\n",
" 'Flag:Phosphate', 'Cruise', 'Temperature:Draw',\n",
" 'Phaeo-Pigment:Extracted', 'Oxygen:Dissolved', 'Oxygen:Dissolved.1',\n",
" 'Flag:Oxygen:Dissolved', 'Temperature:Secondary', 'Salinity:T1:C1',\n",
" 'Number_of_bin_records'],\n",
" dtype='object')\n"
]
}
],
"source": [
"# list the column names in the resulting table\n",
"print(dfbot.keys())"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"def subval(idf,colList):\n",
" # first value in colList should be the column you are going to keep\n",
" # follow with other columns that will be used to fill in when that column is NaN\n",
" # in order of precedence\n",
" if len(colList)==2:\n",
" idf[colList[0]]=[r[colList[0]] if not pd.isna(r[colList[0]]) \\\n",
" else r[colList[1]] for i,r in idf.iterrows()]\n",
" elif len(colList)==3:\n",
" idf[colList[0]]=[r[colList[0]] if not pd.isna(r[colList[0]]) \\\n",
" else r[colList[1]] if not pd.isna(r[colList[1]]) \\\n",
" else r[colList[2]] for i,r in idf.iterrows()]\n",
" else:\n",
" raise NotImplementedError('Add to code to handle this case')\n",
" idf.drop(columns=list(colList[1:]),inplace=True)\n",
" return idf"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"dfbot=subval(dfbot,('Oxygen:Dissolved','Oxygen:Dissolved.1'))\n",
"dfbot=subval(dfbot,('Temperature:Primary','Temperature:Secondary'))\n",
"dfbot=subval(dfbot,('Salinity:T0:C0','Salinity:T1:C1'))\n",
"dfbot=subval(dfbot,('Oxygen:Dissolved:SBE','Oxygen:Dissolved:SBE.1'))\n",
"dfbot.rename(columns={'LOC:LATITUDE':'Lat',\n",
" 'LOC:LONGITUDE':'Lon',\n",
" 'Temperature:Primary':'Temperature',\n",
" 'Oxygen:Dissolved:SBE':'Oxygen:Dissolved:CTD',\n",
" 'Salinity:T0:C0':'Salinity',},inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"# define a function that will be applied to the values in the index column;\n",
"# this makes it easier to drop non-data rows later\n",
"def convertIndex(val):\n",
" try:\n",
" x =int(val)\n",
" except ValueError:\n",
" x=np.nan\n",
" return x"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Reading sheet 2015 CHEMTAX abs results\n"
]
}
],
"source": [
"# load the 2015 phytoplankton data with the following options:\n",
"# sheet_name='2015 CHEMTAX abs results' -> choose the 2015 sheet\n",
"# usecols='A:I,T:AC' -> read only columns A:I and T:AC from the Excel sheet\n",
"# skiprows=2 -> start reading at the 3rd row of the sheet, \n",
"# which contains the column headings\n",
"# converters={'Index': convertIndex,} -> apply the function defined above to the Index column\n",
"# verbose = True -> print extra information/ warnings/ errors\n",
"dfPhyto=pd.read_excel(pathPhyto,sheet_name='2015 CHEMTAX abs results',usecols='A:I,T:AC',\n",
" skiprows=2,converters={'Index': convertIndex,},\n",
" verbose=True)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Bin # | \n",
" Index | \n",
" Subgroup | \n",
" Cruise | \n",
" Month | \n",
" Station | \n",
" Sample# | \n",
" rep | \n",
" depth | \n",
" Cyanobacteria.1 | \n",
" Prasinophytes.1 | \n",
" Cryptophytes.1 | \n",
" Diatoms-1.1 | \n",
" Diatoms-2.1 | \n",
" Dinoflagellates-1.1 | \n",
" Haptophytes.1 | \n",
" Dictyo.1 | \n",
" Raphido.1 | \n",
" TchlA (ug/L) | \n",
"
\n",
" \n",
" \n",
" \n",
" 48 | \n",
" 12 | \n",
" 49.0 | \n",
" 1 | \n",
" 2015-17 | \n",
" April | \n",
" 22 | \n",
" 303 | \n",
" A | \n",
" 0 | \n",
" 0 | \n",
" 0.0915533 | \n",
" 0.0933184 | \n",
" 9.81634 | \n",
" 1.17253 | \n",
" 0.00630329 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 11.1801 | \n",
"
\n",
" \n",
" 49 | \n",
" 12 | \n",
" 50.0 | \n",
" 1 | \n",
" 2015-17 | \n",
" April | \n",
" 22 | \n",
" 303 | \n",
" B | \n",
" 0 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 50 | \n",
" Absolute Pigment Compositions - Bin # 16 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" From Sheet: OutR4 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 51 | \n",
" Tchl_a | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 52 | \n",
" Bin # | \n",
" NaN | \n",
" Subgroup | \n",
" Cruise | \n",
" Month | \n",
" Station | \n",
" Sample# | \n",
" rep | \n",
" depth | \n",
" Cyanobacteria | \n",
" Prasinophytes | \n",
" Cryptophytes | \n",
" Diatoms-1 | \n",
" Diatoms-2 | \n",
" Dinoflagellates-1 | \n",
" Haptophytes | \n",
" Dictyo | \n",
" Raphido | \n",
" TchlA | \n",
"
\n",
" \n",
" 53 | \n",
" 16 | \n",
" 51.0 | \n",
" 2 | \n",
" 2015-20 | \n",
" June | \n",
" CPF2 | \n",
" 396 | \n",
" A | \n",
" 0 | \n",
" 0.0672112 | \n",
" 0.10427 | \n",
" 0.0489969 | \n",
" 0 | \n",
" 0.0163572 | \n",
" 0 | \n",
" 0.184355 | \n",
" 0 | \n",
" 0.0422233 | \n",
" 0.463414 | \n",
"
\n",
" \n",
" 54 | \n",
" 16 | \n",
" 52.0 | \n",
" 2 | \n",
" 2015-20 | \n",
" June | \n",
" CPF2 | \n",
" 396 | \n",
" B | \n",
" 0 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 55 | \n",
" 16 | \n",
" 53.0 | \n",
" 2 | \n",
" 2015-20 | \n",
" June | \n",
" CPF1 | \n",
" 397 | \n",
" A | \n",
" 0 | \n",
" 0.0043579 | \n",
" 0.495659 | \n",
" 0.462661 | \n",
" 0.692599 | \n",
" 0 | \n",
" 0.0954998 | \n",
" 0.246477 | \n",
" 0.0565426 | \n",
" 0.0569377 | \n",
" 2.11073 | \n",
"
\n",
" \n",
" 56 | \n",
" 16 | \n",
" 54.0 | \n",
" 2 | \n",
" 2015-20 | \n",
" June | \n",
" CPF1 | \n",
" 397 | \n",
" B | \n",
" 0 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 57 | \n",
" 16 | \n",
" 55.0 | \n",
" 2 | \n",
" 2015-20 | \n",
" June | \n",
" 22 | \n",
" 398 | \n",
" A | \n",
" 0 | \n",
" 0.0140808 | \n",
" 0.169415 | \n",
" 0.087069 | \n",
" 0.392301 | \n",
" 0.0331626 | \n",
" 3.15016e-06 | \n",
" 0.0847585 | \n",
" 0 | \n",
" 0.00837901 | \n",
" 0.789169 | \n",
"
\n",
" \n",
" 58 | \n",
" 16 | \n",
" 56.0 | \n",
" 2 | \n",
" 2015-20 | \n",
" June | \n",
" 22 | \n",
" 398 | \n",
" B | \n",
" 0 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 59 | \n",
" 16 | \n",
" 57.0 | \n",
" 2 | \n",
" 2015-20 | \n",
" June | \n",
" 24 | \n",
" 399 | \n",
" A | \n",
" 0 | \n",
" 0.0200784 | \n",
" 0.101276 | \n",
" 0.0692901 | \n",
" 0.406107 | \n",
" 0.00241722 | \n",
" 0.000152693 | \n",
" 0.00213347 | \n",
" 6.5546e-05 | \n",
" 0 | \n",
" 0.60152 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Bin # Index Subgroup Cruise Month \\\n",
"48 12 49.0 1 2015-17 April \n",
"49 12 50.0 1 2015-17 April \n",
"50 Absolute Pigment Compositions - Bin # 16 NaN NaN NaN NaN \n",
"51 Tchl_a NaN NaN NaN NaN \n",
"52 Bin # NaN Subgroup Cruise Month \n",
"53 16 51.0 2 2015-20 June \n",
"54 16 52.0 2 2015-20 June \n",
"55 16 53.0 2 2015-20 June \n",
"56 16 54.0 2 2015-20 June \n",
"57 16 55.0 2 2015-20 June \n",
"58 16 56.0 2 2015-20 June \n",
"59 16 57.0 2 2015-20 June \n",
"\n",
" Station Sample# rep depth Cyanobacteria.1 Prasinophytes.1 \\\n",
"48 22 303 A 0 0 0.0915533 \n",
"49 22 303 B 0 NaN NaN \n",
"50 From Sheet: OutR4 NaN NaN NaN NaN NaN \n",
"51 NaN NaN NaN NaN NaN NaN \n",
"52 Station Sample# rep depth Cyanobacteria Prasinophytes \n",
"53 CPF2 396 A 0 0.0672112 0.10427 \n",
"54 CPF2 396 B 0 NaN NaN \n",
"55 CPF1 397 A 0 0.0043579 0.495659 \n",
"56 CPF1 397 B 0 NaN NaN \n",
"57 22 398 A 0 0.0140808 0.169415 \n",
"58 22 398 B 0 NaN NaN \n",
"59 24 399 A 0 0.0200784 0.101276 \n",
"\n",
" Cryptophytes.1 Diatoms-1.1 Diatoms-2.1 Dinoflagellates-1.1 Haptophytes.1 \\\n",
"48 0.0933184 9.81634 1.17253 0.00630329 0 \n",
"49 NaN NaN NaN NaN NaN \n",
"50 NaN NaN NaN NaN NaN \n",
"51 NaN NaN NaN NaN NaN \n",
"52 Cryptophytes Diatoms-1 Diatoms-2 Dinoflagellates-1 Haptophytes \n",
"53 0.0489969 0 0.0163572 0 0.184355 \n",
"54 NaN NaN NaN NaN NaN \n",
"55 0.462661 0.692599 0 0.0954998 0.246477 \n",
"56 NaN NaN NaN NaN NaN \n",
"57 0.087069 0.392301 0.0331626 3.15016e-06 0.0847585 \n",
"58 NaN NaN NaN NaN NaN \n",
"59 0.0692901 0.406107 0.00241722 0.000152693 0.00213347 \n",
"\n",
" Dictyo.1 Raphido.1 TchlA (ug/L) \n",
"48 0 0 11.1801 \n",
"49 NaN NaN NaN \n",
"50 NaN NaN NaN \n",
"51 NaN NaN NaN \n",
"52 Dictyo Raphido TchlA \n",
"53 0 0.0422233 0.463414 \n",
"54 NaN NaN NaN \n",
"55 0.0565426 0.0569377 2.11073 \n",
"56 NaN NaN NaN \n",
"57 0 0.00837901 0.789169 \n",
"58 NaN NaN NaN \n",
"59 6.5546e-05 0 0.60152 "
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# display rows 48 to 59 of the resulting table\n",
"dfPhyto[48:60]"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"# now, drop any rows from the table that have NaN values in either of the columns\n",
"# 'Index' or 'TchlA (ug/L)'\n",
"# This is why we applied a function to the Index column to make sure all \n",
"# non-numeric Index values would have a consistent NaN entry, making them easy to identify\n",
"# and remove\n",
"dfPhyto.dropna(subset=['Index', 'TchlA (ug/L)'],how='any',inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"# pandas creates its own index, and after dropping rows I like to reset it -\n",
"# this is just for convenience\n",
"dfPhyto.reset_index(drop=True,inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Bin # | \n",
" Index | \n",
" Subgroup | \n",
" Cruise | \n",
" Month | \n",
" Station | \n",
" Sample# | \n",
" rep | \n",
" depth | \n",
" Cyanobacteria.1 | \n",
" Prasinophytes.1 | \n",
" Cryptophytes.1 | \n",
" Diatoms-1.1 | \n",
" Diatoms-2.1 | \n",
" Dinoflagellates-1.1 | \n",
" Haptophytes.1 | \n",
" Dictyo.1 | \n",
" Raphido.1 | \n",
" TchlA (ug/L) | \n",
"
\n",
" \n",
" \n",
" \n",
" 48 | \n",
" 7 | \n",
" 95.0 | \n",
" 3 | \n",
" 2015-18 | \n",
" June | \n",
" 2 | \n",
" 202 | \n",
" A | \n",
" 0 | \n",
" 0.1201 | \n",
" 0.0876395 | \n",
" 0.151612 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0.360452 | \n",
" 0 | \n",
" 0.0861721 | \n",
" 0.805976 | \n",
"
\n",
" \n",
" 49 | \n",
" 7 | \n",
" 97.0 | \n",
" 3 | \n",
" 2015-18 | \n",
" June | \n",
" 3 | \n",
" 218 | \n",
" A | \n",
" 0 | \n",
" 0.108716 | \n",
" 0.100695 | \n",
" 0.113358 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0.437597 | \n",
" 0 | \n",
" 0.00902126 | \n",
" 0.769387 | \n",
"
\n",
" \n",
" 50 | \n",
" 7 | \n",
" 99.0 | \n",
" 3 | \n",
" 2015-18 | \n",
" June | \n",
" 6 | \n",
" 249 | \n",
" A | \n",
" 0 | \n",
" 0.0899446 | \n",
" 0.0257313 | \n",
" 0.0671602 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0.347255 | \n",
" 0 | \n",
" 0 | \n",
" 0.530091 | \n",
"
\n",
" \n",
" 51 | \n",
" 7 | \n",
" 101.0 | \n",
" 3 | \n",
" 2015-18 | \n",
" June | \n",
" 9 | \n",
" 261 | \n",
" A | \n",
" 0 | \n",
" 0.0929619 | \n",
" 0.0873561 | \n",
" 0.102004 | \n",
" 0.00641763 | \n",
" 0 | \n",
" 0 | \n",
" 0.279578 | \n",
" 0 | \n",
" 0 | \n",
" 0.568318 | \n",
"
\n",
" \n",
" 52 | \n",
" 7 | \n",
" 103.0 | \n",
" 3 | \n",
" 2015-18 | \n",
" June | \n",
" 12 | \n",
" 277 | \n",
" A | \n",
" 0 | \n",
" 0.0786977 | \n",
" 0.1356 | \n",
" 0.168394 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0.444634 | \n",
" 0 | \n",
" 0.0214413 | \n",
" 0.848767 | \n",
"
\n",
" \n",
" 53 | \n",
" 7 | \n",
" 105.0 | \n",
" 3 | \n",
" 2015-18 | \n",
" June | \n",
" 14 | \n",
" 292 | \n",
" A | \n",
" 0 | \n",
" 0.0520265 | \n",
" 0.135979 | \n",
" 0.161225 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0.933363 | \n",
" 0 | \n",
" 0 | \n",
" 1.28259 | \n",
"
\n",
" \n",
" 54 | \n",
" 7 | \n",
" 107.0 | \n",
" 3 | \n",
" 2015-18 | \n",
" June | \n",
" 16 | \n",
" 303 | \n",
" A | \n",
" 0 | \n",
" 0.0162557 | \n",
" 0.0659037 | \n",
" 0.131307 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0.79041 | \n",
" 0 | \n",
" 0 | \n",
" 1.00388 | \n",
"
\n",
" \n",
" 55 | \n",
" 1 | \n",
" 109.0 | \n",
" 4 | \n",
" 2015-21 | \n",
" Sept | \n",
" 14 | \n",
" 327 | \n",
" A | \n",
" 0 | \n",
" 0.103725 | \n",
" 0.411755 | \n",
" 0.541998 | \n",
" 0.644463 | \n",
" 0.161644 | \n",
" 0.0743051 | \n",
" 0.794115 | \n",
" 0.0455306 | \n",
" 0.0427693 | \n",
" 2.82031 | \n",
"
\n",
" \n",
" 56 | \n",
" 1 | \n",
" 111.0 | \n",
" 4 | \n",
" 2015-21 | \n",
" Sept | \n",
" 11 | \n",
" 328 | \n",
" A | \n",
" 0 | \n",
" 0.473666 | \n",
" 0.863616 | \n",
" 0.922717 | \n",
" 0.198315 | \n",
" 0.0325998 | \n",
" 0.186888 | \n",
" 1.37823 | \n",
" 0.0334639 | \n",
" 0.106627 | \n",
" 4.19612 | \n",
"
\n",
" \n",
" 57 | \n",
" 1 | \n",
" 113.0 | \n",
" 4 | \n",
" 2015-21 | \n",
" Sept | \n",
" CPF2 | \n",
" 329 | \n",
" A | \n",
" 0 | \n",
" 0.270905 | \n",
" 1.43341 | \n",
" 0.910451 | \n",
" 0.195409 | \n",
" 0.61877 | \n",
" 0 | \n",
" 0.4964 | \n",
" 0.127837 | \n",
" 0.387924 | \n",
" 4.4411 | \n",
"
\n",
" \n",
" 58 | \n",
" 1 | \n",
" 115.0 | \n",
" 4 | \n",
" 2015-21 | \n",
" Sept | \n",
" CPF1 | \n",
" 330 | \n",
" A | \n",
" 0 | \n",
" 0.369211 | \n",
" 1.56642 | \n",
" 0.752309 | \n",
" 0.207375 | \n",
" 0.392154 | \n",
" 0.000738665 | \n",
" 0.142002 | \n",
" 0.130842 | \n",
" 0.4017 | \n",
" 3.96275 | \n",
"
\n",
" \n",
" 59 | \n",
" 1 | \n",
" 117.0 | \n",
" 4 | \n",
" 2015-21 | \n",
" Sept | \n",
" 22 | \n",
" 331 | \n",
" A | \n",
" 0 | \n",
" 0.21602 | \n",
" 1.21956 | \n",
" 0.471939 | \n",
" 0.421989 | \n",
" 0.256469 | \n",
" 0.00168525 | \n",
" 0.402357 | \n",
" 0.0810156 | \n",
" 0.180523 | \n",
" 3.25155 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Bin # Index Subgroup Cruise Month Station Sample# rep depth \\\n",
"48 7 95.0 3 2015-18 June 2 202 A 0 \n",
"49 7 97.0 3 2015-18 June 3 218 A 0 \n",
"50 7 99.0 3 2015-18 June 6 249 A 0 \n",
"51 7 101.0 3 2015-18 June 9 261 A 0 \n",
"52 7 103.0 3 2015-18 June 12 277 A 0 \n",
"53 7 105.0 3 2015-18 June 14 292 A 0 \n",
"54 7 107.0 3 2015-18 June 16 303 A 0 \n",
"55 1 109.0 4 2015-21 Sept 14 327 A 0 \n",
"56 1 111.0 4 2015-21 Sept 11 328 A 0 \n",
"57 1 113.0 4 2015-21 Sept CPF2 329 A 0 \n",
"58 1 115.0 4 2015-21 Sept CPF1 330 A 0 \n",
"59 1 117.0 4 2015-21 Sept 22 331 A 0 \n",
"\n",
" Cyanobacteria.1 Prasinophytes.1 Cryptophytes.1 Diatoms-1.1 Diatoms-2.1 \\\n",
"48 0.1201 0.0876395 0.151612 0 0 \n",
"49 0.108716 0.100695 0.113358 0 0 \n",
"50 0.0899446 0.0257313 0.0671602 0 0 \n",
"51 0.0929619 0.0873561 0.102004 0.00641763 0 \n",
"52 0.0786977 0.1356 0.168394 0 0 \n",
"53 0.0520265 0.135979 0.161225 0 0 \n",
"54 0.0162557 0.0659037 0.131307 0 0 \n",
"55 0.103725 0.411755 0.541998 0.644463 0.161644 \n",
"56 0.473666 0.863616 0.922717 0.198315 0.0325998 \n",
"57 0.270905 1.43341 0.910451 0.195409 0.61877 \n",
"58 0.369211 1.56642 0.752309 0.207375 0.392154 \n",
"59 0.21602 1.21956 0.471939 0.421989 0.256469 \n",
"\n",
" Dinoflagellates-1.1 Haptophytes.1 Dictyo.1 Raphido.1 TchlA (ug/L) \n",
"48 0 0.360452 0 0.0861721 0.805976 \n",
"49 0 0.437597 0 0.00902126 0.769387 \n",
"50 0 0.347255 0 0 0.530091 \n",
"51 0 0.279578 0 0 0.568318 \n",
"52 0 0.444634 0 0.0214413 0.848767 \n",
"53 0 0.933363 0 0 1.28259 \n",
"54 0 0.79041 0 0 1.00388 \n",
"55 0.0743051 0.794115 0.0455306 0.0427693 2.82031 \n",
"56 0.186888 1.37823 0.0334639 0.106627 4.19612 \n",
"57 0 0.4964 0.127837 0.387924 4.4411 \n",
"58 0.000738665 0.142002 0.130842 0.4017 3.96275 \n",
"59 0.00168525 0.402357 0.0810156 0.180523 3.25155 "
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# display part of the table, confirming that non-data rows have been removed\n",
"dfPhyto[48:60]"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"# due to repeated column names in the original spreadsheet, '.1' was appended to the names\n",
"# of the phytoplankton columns; \n",
"# these lines correct the column names, removing the '.1':\n",
"renameDict=dict()\n",
"for colName in dfPhyto.keys():\n",
" if colName.endswith('.1'):\n",
" renameDict[colName]=colName.split('.1')[0]\n",
"dfPhyto.rename(columns=renameDict,inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Bin # | \n",
" Index | \n",
" Subgroup | \n",
" Cruise | \n",
" Month | \n",
" Station | \n",
" Sample# | \n",
" rep | \n",
" depth | \n",
" Cyanobacteria | \n",
" Prasinophytes | \n",
" Cryptophytes | \n",
" Diatoms-1 | \n",
" Diatoms-2 | \n",
" Dinoflagellates-1 | \n",
" Haptophytes | \n",
" Dictyo | \n",
" Raphido | \n",
" TchlA (ug/L) | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 12 | \n",
" 1.0 | \n",
" 1 | \n",
" 2015-17 | \n",
" April | \n",
" SI | \n",
" 9 | \n",
" A | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 24.4709 | \n",
" 1.60687 | \n",
" 0 | \n",
" 0.0143717 | \n",
" 0 | \n",
" 0.0695617 | \n",
" 26.1618 | \n",
"
\n",
" \n",
" 1 | \n",
" 12 | \n",
" 3.0 | \n",
" 1 | \n",
" 2015-17 | \n",
" April | \n",
" 59 | \n",
" 23 | \n",
" A | \n",
" 0 | \n",
" 3.57707e-05 | \n",
" 4.01063e-05 | \n",
" 0 | \n",
" 0.337957 | \n",
" 0.039984 | \n",
" 0 | \n",
" 0.0447033 | \n",
" 2.42373e-05 | \n",
" 0.000804232 | \n",
" 0.423549 | \n",
"
\n",
" \n",
" 2 | \n",
" 12 | \n",
" 5.0 | \n",
" 1 | \n",
" 2015-17 | \n",
" April | \n",
" 102 | \n",
" 37 | \n",
" A | \n",
" 0 | \n",
" 0.00264467 | \n",
" 0.021158 | \n",
" 0.0450918 | \n",
" 2.40675 | \n",
" 0.610975 | \n",
" 0.00463661 | \n",
" 0.0242601 | \n",
" 0.00118509 | \n",
" 0.0118163 | \n",
" 3.12852 | \n",
"
\n",
" \n",
" 3 | \n",
" 12 | \n",
" 7.0 | \n",
" 1 | \n",
" 2015-17 | \n",
" April | \n",
" 75 | \n",
" 51 | \n",
" A | \n",
" 0 | \n",
" 0.00488975 | \n",
" 0.00514994 | \n",
" 0.0671581 | \n",
" 3.17296 | \n",
" 0.881844 | \n",
" 0.00563953 | \n",
" 0.0438563 | \n",
" 0.0151278 | \n",
" 0.0224263 | \n",
" 4.21906 | \n",
"
\n",
" \n",
" 4 | \n",
" 12 | \n",
" 9.0 | \n",
" 1 | \n",
" 2015-17 | \n",
" April | \n",
" 72 | \n",
" 64 | \n",
" A | \n",
" 0 | \n",
" 0 | \n",
" 0.130189 | \n",
" 0.326276 | \n",
" 4.42956 | \n",
" 0.991716 | \n",
" 0.50695 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 6.38469 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 83 | \n",
" 16 | \n",
" 165.0 | \n",
" 5 | \n",
" 2015-19 | \n",
" Sept | \n",
" 9 | \n",
" 248 | \n",
" A | \n",
" 0 | \n",
" 0.0903742 | \n",
" 0.844792 | \n",
" 0.15371 | \n",
" 0.552866 | \n",
" 0.20032 | \n",
" 0.000378653 | \n",
" 0.314222 | \n",
" 0.0342132 | \n",
" 0.0465154 | \n",
" 2.23739 | \n",
"
\n",
" \n",
" 84 | \n",
" 16 | \n",
" 167.0 | \n",
" 5 | \n",
" 2015-19 | \n",
" Sept | \n",
" 6 | \n",
" 261 | \n",
" A | \n",
" 0 | \n",
" 0.0838546 | \n",
" 0.81372 | \n",
" 0.308694 | \n",
" 0.291579 | \n",
" 0.32757 | \n",
" 8.07944e-05 | \n",
" 0.561729 | \n",
" 0.0746783 | \n",
" 0.0588162 | \n",
" 2.52072 | \n",
"
\n",
" \n",
" 85 | \n",
" 16 | \n",
" 169.0 | \n",
" 5 | \n",
" 2015-19 | \n",
" Sept | \n",
" BS11 | \n",
" 269 | \n",
" A | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0.125485 | \n",
" 10.1442 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 10.2697 | \n",
"
\n",
" \n",
" 86 | \n",
" 16 | \n",
" 171.0 | \n",
" 5 | \n",
" 2015-19 | \n",
" Sept | \n",
" 3 | \n",
" 285 | \n",
" A | \n",
" 0 | \n",
" 0.12782 | \n",
" 0.515151 | \n",
" 0.262525 | \n",
" 0.634216 | \n",
" 0.170571 | \n",
" 0.000441982 | \n",
" 0.48741 | \n",
" 0.033452 | \n",
" 0.1269 | \n",
" 2.35849 | \n",
"
\n",
" \n",
" 87 | \n",
" 16 | \n",
" 173.0 | \n",
" 5 | \n",
" 2015-19 | \n",
" Sept | \n",
" 2 | \n",
" 300 | \n",
" A | \n",
" 0 | \n",
" 0.111882 | \n",
" 1.00504 | \n",
" 0.165712 | \n",
" 1.52858 | \n",
" 0.0134512 | \n",
" 0 | \n",
" 0.396609 | \n",
" 0.0252189 | \n",
" 0.216654 | \n",
" 3.46315 | \n",
"
\n",
" \n",
"
\n",
"
88 rows × 19 columns
\n",
"
"
],
"text/plain": [
" Bin # Index Subgroup Cruise Month Station Sample# rep depth \\\n",
"0 12 1.0 1 2015-17 April SI 9 A 0 \n",
"1 12 3.0 1 2015-17 April 59 23 A 0 \n",
"2 12 5.0 1 2015-17 April 102 37 A 0 \n",
"3 12 7.0 1 2015-17 April 75 51 A 0 \n",
"4 12 9.0 1 2015-17 April 72 64 A 0 \n",
".. ... ... ... ... ... ... ... .. ... \n",
"83 16 165.0 5 2015-19 Sept 9 248 A 0 \n",
"84 16 167.0 5 2015-19 Sept 6 261 A 0 \n",
"85 16 169.0 5 2015-19 Sept BS11 269 A 0 \n",
"86 16 171.0 5 2015-19 Sept 3 285 A 0 \n",
"87 16 173.0 5 2015-19 Sept 2 300 A 0 \n",
"\n",
" Cyanobacteria Prasinophytes Cryptophytes Diatoms-1 Diatoms-2 \\\n",
"0 0 0 0 24.4709 1.60687 \n",
"1 3.57707e-05 4.01063e-05 0 0.337957 0.039984 \n",
"2 0.00264467 0.021158 0.0450918 2.40675 0.610975 \n",
"3 0.00488975 0.00514994 0.0671581 3.17296 0.881844 \n",
"4 0 0.130189 0.326276 4.42956 0.991716 \n",
".. ... ... ... ... ... \n",
"83 0.0903742 0.844792 0.15371 0.552866 0.20032 \n",
"84 0.0838546 0.81372 0.308694 0.291579 0.32757 \n",
"85 0 0 0.125485 10.1442 0 \n",
"86 0.12782 0.515151 0.262525 0.634216 0.170571 \n",
"87 0.111882 1.00504 0.165712 1.52858 0.0134512 \n",
"\n",
" Dinoflagellates-1 Haptophytes Dictyo Raphido TchlA (ug/L) \n",
"0 0 0.0143717 0 0.0695617 26.1618 \n",
"1 0 0.0447033 2.42373e-05 0.000804232 0.423549 \n",
"2 0.00463661 0.0242601 0.00118509 0.0118163 3.12852 \n",
"3 0.00563953 0.0438563 0.0151278 0.0224263 4.21906 \n",
"4 0.50695 0 0 0 6.38469 \n",
".. ... ... ... ... ... \n",
"83 0.000378653 0.314222 0.0342132 0.0465154 2.23739 \n",
"84 8.07944e-05 0.561729 0.0746783 0.0588162 2.52072 \n",
"85 0 0 0 0 10.2697 \n",
"86 0.000441982 0.48741 0.033452 0.1269 2.35849 \n",
"87 0 0.396609 0.0252189 0.216654 3.46315 \n",
"\n",
"[88 rows x 19 columns]"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dfPhyto"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"# This is the important step- join the two tables ('left' and 'right'), \n",
"# matching the cruise IDs and sample numbers\n",
"# how='outer' -> all rows from both the left and the right tables will be included, \n",
"# even if they cannot be matched; this makes it easy to check for \n",
"# unmatched data later\n",
"# left_on specifies the name of the column to match in the left table (dfbot) \n",
"# right_on specifies the name of the column to match in the right table (dfPhyto)\n",
"dfout = pd.merge(dfbot, dfPhyto, how='outer', \n",
" left_on=['Cruise','Sample_Number'], right_on = ['Cruise','Sample#'])"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['File Name', 'Zone', 'FIL:START TIME YYYY/MM/DD HH:MM:SS',\n",
" 'LOC:EVENT_NUMBER', 'LOC:STATION', 'Lat', 'Lon', 'LOC:WATER DEPTH',\n",
" 'ADM:SCIENTIST', 'Sample_Number', 'Pressure', 'Temperature',\n",
" 'Transmissivity', 'Fluorescence:URU:Seapoint', 'PAR', 'Salinity',\n",
" 'Oxygen:Dissolved:CTD', 'pH:SBE:Nominal', 'Salinity:Bottle',\n",
" 'Flag:Salinity:Bottle', 'Chlorophyll:Extracted',\n",
" 'Flag:Chlorophyll:Extracted', 'Nitrate_plus_Nitrite',\n",
" 'Flag:Nitrate_plus_Nitrite', 'Silicate', 'Flag:Silicate', 'Phosphate',\n",
" 'Flag:Phosphate', 'Cruise', 'Temperature:Draw',\n",
" 'Phaeo-Pigment:Extracted', 'Oxygen:Dissolved', 'Flag:Oxygen:Dissolved',\n",
" 'Number_of_bin_records', 'Bin #', 'Index', 'Subgroup', 'Month',\n",
" 'Station', 'Sample#', 'rep', 'depth', 'Cyanobacteria', 'Prasinophytes',\n",
" 'Cryptophytes', 'Diatoms-1', 'Diatoms-2', 'Dinoflagellates-1',\n",
" 'Haptophytes', 'Dictyo', 'Raphido', 'TchlA (ug/L)'],\n",
" dtype='object')"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# show the column names in the resulting table\n",
"dfout.keys()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Checks"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"# Identify cases where phytoplankton data were matched to multiple samples in bottle data:\n",
"dftest=pd.merge(dfbot, dfPhyto,how='right', left_on=['Cruise','Sample_Number'],right_on = ['Cruise','Sample#'])"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"temp=dftest.groupby(['Cruise','Sample#']).agg({'Cruise':['count']})\n",
"temp.columns = ['icount']"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([1])"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.unique(temp.icount)"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" | \n",
" icount | \n",
"
\n",
" \n",
" Cruise | \n",
" Sample# | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
"Empty DataFrame\n",
"Columns: [icount]\n",
"Index: []"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# check for Phyto samples matched to multiple bottle samples:\n",
"temp.loc[temp.icount>1]"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" | \n",
" icount | \n",
"
\n",
" \n",
" Cruise | \n",
" Sample# | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
"Empty DataFrame\n",
"Columns: [icount]\n",
"Index: []"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# check for phyto samples not matched to bottle samples:\n",
"temp.loc[temp.icount==0]"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" | \n",
" icount | \n",
"
\n",
" \n",
" Cruise | \n",
" Sample_Number | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
"Empty DataFrame\n",
"Columns: [icount]\n",
"Index: []"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"temp2=dfout.groupby(['Cruise','Sample_Number']).agg({'Cruise':['count']})\n",
"temp2.columns = ['icount']\n",
"# this will catch phyto matched to multiple bottle but also bottle with duplicate sample numbers per cruise:\n",
"temp2.loc[temp2.icount>1]"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" | \n",
" icount | \n",
"
\n",
" \n",
" Cruise | \n",
" Sample# | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
"Empty DataFrame\n",
"Columns: [icount]\n",
"Index: []"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# check for phyto samples not matched to bottle samples:\n",
"temp.loc[temp.icount==0]"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(938, 88, 938)"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# if the output table is longer than either of the input tables, some columns were not matched\n",
"len(dfout), len(dfPhyto), len(dfbot)"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(88, 88)"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Check that the number of cells with data in the 'Cyanobacteria' column is \n",
"# the same for the input and output tables:\n",
"np.sum(dfPhyto['Cyanobacteria']>=0), np.sum(dfout['Cyanobacteria']>=0)"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Series([], Name: Index, dtype: float64)\n"
]
}
],
"source": [
"# If there were data rows from the phytoplankton table that were not matched to \n",
"# rows from the bottle table, their indices from the phytoplankton table would be \n",
"# displayed below (the series [] would not be empty)\n",
"print(dfout.loc[dfout['ADM:SCIENTIST'].isna()]['Index'])"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [],
"source": [
"# drop repetetive/unecessary columns:\n",
"dfout.drop(labels=['Bin #', 'Index', 'Subgroup', 'Month', 'Station', 'Sample#', 'rep',\n",
" 'depth',],axis=1,inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [],
"source": [
"# truncate phyto group values to 3 decimal places:\n",
"for col in ('Cyanobacteria', 'Prasinophytes', 'Cryptophytes', 'Diatoms-1',\n",
" 'Diatoms-2', 'Dinoflagellates-1', 'Haptophytes', 'Dictyo', 'Raphido',\n",
" 'TchlA (ug/L)'):\n",
" dfout[col]=[np.round(ii,decimals=3) for ii in dfout[col]] # use list comprehension to set values for entire column"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['File Name', 'Zone', 'FIL:START TIME YYYY/MM/DD HH:MM:SS',\n",
" 'LOC:EVENT_NUMBER', 'LOC:STATION', 'Lat', 'Lon', 'LOC:WATER DEPTH',\n",
" 'ADM:SCIENTIST', 'Sample_Number', 'Pressure', 'Temperature',\n",
" 'Transmissivity', 'Fluorescence:URU:Seapoint', 'PAR', 'Salinity',\n",
" 'Oxygen:Dissolved:CTD', 'pH:SBE:Nominal', 'Salinity:Bottle',\n",
" 'Flag:Salinity:Bottle', 'Chlorophyll:Extracted',\n",
" 'Flag:Chlorophyll:Extracted', 'Nitrate_plus_Nitrite',\n",
" 'Flag:Nitrate_plus_Nitrite', 'Silicate', 'Flag:Silicate', 'Phosphate',\n",
" 'Flag:Phosphate', 'Cruise', 'Temperature:Draw',\n",
" 'Phaeo-Pigment:Extracted', 'Oxygen:Dissolved', 'Flag:Oxygen:Dissolved',\n",
" 'Number_of_bin_records', 'Cyanobacteria', 'Prasinophytes',\n",
" 'Cryptophytes', 'Diatoms-1', 'Diatoms-2', 'Dinoflagellates-1',\n",
" 'Haptophytes', 'Dictyo', 'Raphido', 'TchlA (ug/L)'],\n",
" dtype='object')"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dfout.keys()"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [],
"source": [
"# now write the output table to a .csv file:\n",
"dfout.to_csv(pathOut, index=False) "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python (py38_ct)",
"language": "python",
"name": "py38_ct"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.2"
}
},
"nbformat": 4,
"nbformat_minor": 2
}