{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "papermill": { "duration": 1.348164, "end_time": "2020-11-16T18:42:06.722640", "exception": false, "start_time": "2020-11-16T18:42:05.374476", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "import numpy as np # this module handles arrays, but here we need it for its NaN value\n", "import pandas as pd # this module contains a lot of tools for handling tabular data\n", "from matplotlib import pyplot as plt\n", "from salishsea_tools import evaltools as et\n", "import datetime as dt\n", "import os\n", "import gsw\n", "import pickle\n", "import netCDF4 as nc\n", "import cmocean\n", "from scipy import stats as spst\n", "from pandas.plotting import register_matplotlib_converters\n", "register_matplotlib_converters()\n", "%matplotlib inline" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "papermill": { "duration": 0.021239, "end_time": "2020-11-16T18:42:06.762623", "exception": false, "start_time": "2020-11-16T18:42:06.741384", "status": "completed" }, "tags": [ "parameters" ] }, "outputs": [], "source": [ "modSourceDir= '/results/SalishSea/nowcast-green.201812/'\n", "modver='201812'\n", "Chl_N=1.8 # Chl:N ratio\n", "fname='compHPLCModelFirstLook-Regress-Base.ipynb'\n", "startYMD=(2015,1,1)\n", "endYMD=(2018,12,31)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "papermill": { "duration": 0.020979, "end_time": "2020-11-16T18:42:06.800993", "exception": false, "start_time": "2020-11-16T18:42:06.780014", "status": "completed" }, "tags": [ "injected-parameters" ] }, "outputs": [], "source": [ "# Parameters\n", "modSourceDir = '/data/eolson/results/MEOPAR/SS36runs/GrahamRuns/2018ES_LF/'\n", "modver = '2018ES_LF'\n", "start_date = dt.datetime(2017,1,1)\n", "end_date = dt.datetime(2017, 6, 30)\n", "Chl_N = 1.8\n", "#fname = \"compHPLCModelFirstLook-Regress-2018ES_LF-2015.ipynb\"\n" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "papermill": { "duration": 0.021188, "end_time": "2020-11-16T18:42:06.878359", "exception": false, "start_time": "2020-11-16T18:42:06.857171", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "datestr='_'+start_date.strftime('%Y%m%d')+'_'+end_date.strftime('%Y%m%d')" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "papermill": { "duration": 0.023253, "end_time": "2020-11-16T18:42:06.919365", "exception": false, "start_time": "2020-11-16T18:42:06.896112", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "def subval(idf,colList):\n", " # first value in colList should be the column you are going to keep\n", " # follow with other columns that will be used to fill in when that column is NaN\n", " # in order of precedence\n", " if len(colList)==2:\n", " idf[colList[0]]=[r[colList[0]] if not pd.isna(r[colList[0]]) \\\n", " else r[colList[1]] for i,r in idf.iterrows()]\n", " elif len(colList)==3:\n", " idf[colList[0]]=[r[colList[0]] if not pd.isna(r[colList[0]]) \\\n", " else r[colList[1]] if not pd.isna(r[colList[1]]) \\\n", " else r[colList[2]] for i,r in idf.iterrows()]\n", " else:\n", " raise NotImplementedError('Add to code to handle this case')\n", " idf.drop(columns=list(colList[1:]),inplace=True)\n", " return idf" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "papermill": { "duration": 0.031836, "end_time": "2020-11-16T18:42:06.968660", "exception": false, "start_time": "2020-11-16T18:42:06.936824", "status": "completed" }, "scrolled": false, "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/ocean/eolson/MEOPAR/obs/NemcekHPLC/bottlePhytoMerged2015_NewALLO.csv ['ADM:SCIENTIST', 'Chlorophyll:Extracted', 'Cruise', 'Cryptophytes', 'Cyanobacteria', 'Diatoms-1', 'Diatoms-2', 'Dictyo', 'Dinoflagellates-1', 'FIL:START TIME YYYY/MM/DD HH:MM:SS', 'File Name', 'Flag:Chlorophyll:Extracted', 'Flag:Nitrate_plus_Nitrite', 'Flag:Oxygen:Dissolved', 'Flag:Phosphate', 'Flag:Salinity:Bottle', 'Flag:Silicate', 'Fluorescence:URU:Seapoint', 'Haptophytes', 'LOC:EVENT_NUMBER', 'LOC:STATION', 'LOC:WATER DEPTH', 'Lat', 'Lon', 'Nitrate_plus_Nitrite', 'Number_of_bin_records', 'Oxygen:Dissolved', 'Oxygen:Dissolved:CTD', 'PAR', 'Phaeo-Pigment:Extracted', 'Phosphate', 'Prasinophytes', 'Pressure', 'Raphido', 'Salinity', 'Salinity:Bottle', 'Sample_Number', 'Silicate', 'TchlA', 'Temperature', 'Temperature:Draw', 'Transmissivity', 'Zone', 'pH:SBE:Nominal']\n", "/ocean/eolson/MEOPAR/obs/NemcekHPLC/bottlePhytoMerged2016_NewALLO.csv ['ADM:PROJECT', 'ADM:SCIENTIST', 'Bottle:Firing_Sequence', 'Bottle_Number', 'Chlorophyll:Extracted [mg/m^3]', 'Cruise', 'Cryptophytes', 'Cyanobacteria', 'Diatoms-1', 'Diatoms-2', 'Dictyo', 'Dinoflagellates-1', 'FIL:START TIME YYYY/MM/DD HH:MM:SS', 'File Name', 'Flag:Chlorophyll:Extracted', 'Flag:Nitrate_plus_Nitrite', 'Flag:Oxygen:Dissolved', 'Flag:Phosphate', 'Flag:Salinity:Bottle', 'Flag:Silicate', 'Fluorescence:URU:Seapoint [mg/m^3]', 'Haptophytes', 'LOC:EVENT_NUMBER', 'LOC:LATITUDE', 'LOC:LONGITUDE', 'LOC:STATION', 'LOC:WATER DEPTH', 'Nitrate_plus_Nitrite [umol/L]', 'Number_of_bin_records', 'Oxygen:Dissolved [mL/L]', 'Oxygen:Dissolved [umol/kg]', 'Oxygen:Dissolved:SBE [mL/L]', 'Oxygen:Dissolved:SBE [umol/kg]', 'PAR [uE/m^2/sec]', 'PAR:Reference [uE/m^2/sec]', 'Phaeo-Pigment:Extracted', 'Phaeo-Pigment:Extracted [mg/m^3]', 'Phosphate', 'Phosphate [umol/L]', 'Prasinophytes', 'Pressure [decibar]', 'Raphido', 'Salinity:Bottle', 'Salinity:Bottle [PSS-78]', 'Salinity:T1:C1 [PSS-78]', 'Sample_Number', 'Silicate [umol/L]', 'TchlA', 'Temperature:Draw', 'Temperature:Secondary [deg C (ITS90)]', 'Transmissivity [*/metre]', 'Zone', 'pH:SBE:Nominal']\n", "/ocean/eolson/MEOPAR/obs/NemcekHPLC/bottlePhytoMerged2017_NewALLO.csv ['ADM:MISSION', 'ADM:PROJECT', 'ADM:SCIENTIST', 'Bottle:Firing_Sequence', 'Bottle_Number', 'Chlorophyll:Extracted [mg/m^3]', 'Comments by sample_numbeR', 'Cruise', 'Cryptophytes', 'Cyanobacteria', 'Depth [metres]', 'Diatoms-1', 'Diatoms-2', 'Dictyo', 'Dinoflagellates-1', 'FIL:START TIME YYYY/MM/DD HH:MM:SS', 'Flag:Chlorophyll:Extracted', 'Flag:Nitrate_plus_Nitrite', 'Flag:Oxygen:Dissolved', 'Flag:Phosphate', 'Flag:Salinity:Bottle', 'Flag:Silicate', 'Fluorescence:URU:Seapoint [mg/m^3]', 'Haptophytes', 'LOC:EVENT_NUMBER', 'LOC:LATITUDE', 'LOC:LONGITUDE', 'LOC:STATION', 'LOC:WATER DEPTH', 'Nitrate_plus_Nitrite [umol/L]', 'Number_of_bin_records', 'Oxygen:Dissolved [mL/L]', 'Oxygen:Dissolved [umol/kg]', 'Oxygen:Dissolved:SBE [mL/L]', 'Oxygen:Dissolved:SBE [umol/kg]', 'PAR [uE/m^2/sec]', 'Phaeo-Pigment:Extracted [mg/m^3]', 'Phosphate [umol/L]', 'Prasinophytes', 'Pressure [decibar]', 'Raphido', 'Salinity [PSS-78]', 'Salinity:Bottle [PSS-78]', 'Sample_Number', 'Silicate [umol/L]', 'TchlA', 'Temperature [deg C (ITS90)]', 'Temperature:Draw [deg C (ITS90)]', 'Transmissivity [*/metre]', 'Zone']\n", "/ocean/eolson/MEOPAR/obs/NemcekHPLC/bottlePhytoMerged2018_NewALLO.csv ['ADM:MISSION', 'ADM:PROJECT', 'ADM:SCIENTIST', 'Alkalinity:Total [umol/L]', 'Bottle:Firing_Sequence', 'Bottle_Number', 'Carbon:Dissolved:Inorganic [umol/kg]', 'Chlorophyll:Extracted [mg/m^3]', 'Comments by sample_numbeR', 'Cruise', 'Cryptophytes', 'Cyanobacteria', 'Depth [metres]', 'Depth:Nominal [metres]', 'Diatoms-1', 'Diatoms-2', 'Dictyo', 'Dinoflagellates-1', 'FIL:START TIME YYYY/MM/DD HH:MM:SS', 'File Name', 'Flag:Alkalinity:Total', 'Flag:Carbon:Dissolved:Inorganic', 'Flag:Chlorophyll:Extracted', 'Flag:Nitrate_plus_Nitrite', 'Flag:Oxygen:Dissolved', 'Flag:Phosphate', 'Flag:Salinity:Bottle', 'Flag:Silicate', 'Fluorescence [mg/m^3]', 'Haptophytes', 'LOC:EVENT_NUMBER', 'LOC:LATITUDE', 'LOC:LONGITUDE', 'LOC:STATION', 'LOC:WATER DEPTH', 'Nitrate_plus_Nitrite [umol/L]', 'Oxygen:Dissolved [mL/L]', 'Oxygen:Dissolved [umol/kg]', 'Oxygen:Dissolved:CTD [mL/L]', 'Oxygen:Dissolved:CTD [umol/kg]', 'PAR [uE/m^2/sec]', 'PAR:Reference [uE/m^2/sec]', 'Phaeo-Pigment:Extracted [mg/m^3]', 'Phosphate [umol/L]', 'Prasinophytes', 'Pressure [decibar]', 'Raphido', 'Salinity [PSS-78]', 'Salinity:Bottle [PSS-78]', 'Sample_Number', 'Silicate [umol/L]', 'TchlA', 'Temperature [deg C (ITS90)]', 'Temperature:Draw [deg C (ITS90)]', 'Transmissivity [*/metre]', 'Zone', 'Zone.1', 'pH:SBE:Nominal']\n" ] }, { "ename": "Exception", "evalue": "no file found including date 2017-01-01 00:00:00 of form:\n /data/eolson/results/MEOPAR/SS36runs/GrahamRuns/2018ES_LF/31dec16/SalishSea_1h_20161231_20161231_ptrc_T.nc", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mIndexError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m/data/eolson/results/MEOPAR/tools/SalishSeaTools/salishsea_tools/evaltools.py\u001b[0m in \u001b[0;36mindex_model_files\u001b[0;34m(start, end, basedir, nam_fmt, flen, ftype, tres)\u001b[0m\n\u001b[1;32m 511\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 512\u001b[0;31m iifstr=glob.glob(os.path.join(basedir,stencil.format(iits.strftime(dfmt).lower(),\n\u001b[0m\u001b[1;32m 513\u001b[0m iits.strftime(ffmt),iite.strftime(ffmt),iits.strftime(wfmt))),recursive=True)[0]\n", "\u001b[0;31mIndexError\u001b[0m: list index out of range", "\nDuring handling of the above exception, another exception occurred:\n", "\u001b[0;31mException\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 60\u001b[0m \u001b[0mfdict\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m{\u001b[0m\u001b[0;34m'ptrc_T'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'grid_T'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 61\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 62\u001b[0;31m \u001b[0mdata\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0met\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmatchData\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mfilemap\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mfdict\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mstart_date\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mend_date\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mnamfmt\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mPATH\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mflen\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 63\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 64\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'matched_'\u001b[0m\u001b[0;34m+\u001b[0m\u001b[0mmodver\u001b[0m\u001b[0;34m+\u001b[0m\u001b[0mdatestr\u001b[0m\u001b[0;34m+\u001b[0m\u001b[0;34m'._NewALLOpkl'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'wb'\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/data/eolson/results/MEOPAR/tools/SalishSeaTools/salishsea_tools/evaltools.py\u001b[0m in \u001b[0;36mmatchData\u001b[0;34m(data, filemap, fdict, mod_start, mod_end, mod_nam_fmt, mod_basedir, mod_flen, method, deltat, deltad, meshPath, maskName, wrapSearch, wrapTol, e3tvar, fid, sdim, quiet, preIndexed)\u001b[0m\n\u001b[1;32m 180\u001b[0m \u001b[0mflist\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 181\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mift\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mftypes\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 182\u001b[0;31m \u001b[0mflist\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mift\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mindex_model_files\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmod_start\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mmod_end\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mmod_basedir\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mmod_nam_fmt\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mmod_flen\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mift\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mfdict\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mift\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 183\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 184\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mmethod\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'bin'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/data/eolson/results/MEOPAR/tools/SalishSeaTools/salishsea_tools/evaltools.py\u001b[0m in \u001b[0;36mindex_model_files\u001b[0;34m(start, end, basedir, nam_fmt, flen, ftype, tres)\u001b[0m\n\u001b[1;32m 520\u001b[0m \u001b[0miite\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0miits\u001b[0m\u001b[0;34m+\u001b[0m\u001b[0mdt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtimedelta\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdays\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mflen\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 521\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mnday\u001b[0m\u001b[0;34m==\u001b[0m\u001b[0mflen\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 522\u001b[0;31m raise Exception('no file found including date '+str(start)+\\\n\u001b[0m\u001b[1;32m 523\u001b[0m ' of form:\\n '+os.path.join(basedir,stencil.format(iits.strftime(dfmt).lower(),\n\u001b[1;32m 524\u001b[0m iits.strftime(ffmt),iite.strftime(ffmt),iits.strftime(wfmt))))\n", "\u001b[0;31mException\u001b[0m: no file found including date 2017-01-01 00:00:00 of form:\n /data/eolson/results/MEOPAR/SS36runs/GrahamRuns/2018ES_LF/31dec16/SalishSea_1h_20161231_20161231_ptrc_T.nc" ] } ], "source": [ "if os.path.isfile('matched_'+modver+datestr+'_NewALLO.pkl'):\n", " data=pickle.load(open( 'matched_'+modver+datestr+'_NewALLO.pkl', 'rb' ) )\n", "else:\n", " # define paths to the source files and eventual output file\n", " flist=('/ocean/eolson/MEOPAR/obs/NemcekHPLC/bottlePhytoMerged2015_NewALLO.csv',\n", " '/ocean/eolson/MEOPAR/obs/NemcekHPLC/bottlePhytoMerged2016_NewALLO.csv',\n", " '/ocean/eolson/MEOPAR/obs/NemcekHPLC/bottlePhytoMerged2017_NewALLO.csv',\n", " '/ocean/eolson/MEOPAR/obs/NemcekHPLC/bottlePhytoMerged2018_NewALLO.csv')#,\n", " #'/ocean/eolson/MEOPAR/obs/NemcekHPLC/bottlePhytoMerged2019.csv')\n", "\n", " dfs=list()\n", " for fname in flist:\n", " idf=pd.read_csv(fname)\n", " print(fname,sorted(idf.keys()))\n", " dfs.append(idf)\n", " df=pd.concat(dfs,ignore_index=True,sort=False); # concatenate the list into a single table\n", "\n", " df.drop(labels=['ADM:MISSION','ADM:PROJECT','ADM:SCIENTIST','Zone','Zone.1','Temperature:Draw',\n", " 'Temperature:Draw [deg C (ITS90)]','Bottle:Firing_Sequence','Comments by sample_numbeR',\n", " 'File Name','LOC:EVENT_NUMBER','Number_of_bin_records'\n", " ],axis=1,inplace=True)\n", "\n", " #df=subval(df,('Dictyochophytes','Dictyo'))\n", " df=subval(df,('Chlorophyll:Extracted [mg/m^3]','Chlorophyll:Extracted'))\n", " #df=subval(df,('Dinoflagellates','Dinoflagellates-1'))\n", " df=subval(df,('Fluorescence [mg/m^3]','Fluorescence:URU:Seapoint [mg/m^3]','Fluorescence:URU:Seapoint'))\n", " df=subval(df,('Lat','LOC:LATITUDE'))\n", " df=subval(df,('Lon','LOC:LONGITUDE'))\n", " df=subval(df,('Nitrate_plus_Nitrite [umol/L]','Nitrate_plus_Nitrite'))\n", " df=subval(df,('PAR [uE/m^2/sec]','PAR'))\n", " df=subval(df,('Phaeo-Pigment:Extracted [mg/m^3]','Phaeo-Pigment:Extracted'))\n", " df=subval(df,('Phosphate [umol/L]','Phosphate'))\n", " df=subval(df,('Pressure [decibar]','Pressure'))\n", " #df=subval(df,('Raphidophytes','Raphido'))\n", " df=subval(df,('Salinity','Salinity [PSS-78]','Salinity:T1:C1 [PSS-78]'))\n", " df=subval(df,('Salinity:Bottle','Salinity:Bottle [PSS-78]'))\n", " df=subval(df,('Silicate [umol/L]','Silicate'))\n", " #df=subval(df,('TchlA (ug/L)','TchlA'))\n", " df=subval(df,('Temperature','Temperature [deg C (ITS90)]','Temperature:Secondary [deg C (ITS90)]'))\n", " df=subval(df,('Transmissivity [*/metre]','Transmissivity'))\n", "\n", " df['Z']=np.where(pd.isna(df['Depth [metres]']),\n", " -1*gsw.z_from_p(df['Pressure [decibar]'],df['Lat']),\n", " df['Depth [metres]'])\n", " df['p']=np.where(pd.isna(df['Pressure [decibar]']),\n", " gsw.p_from_z(-1*df['Depth [metres]'],df['Lat']),\n", " df['Pressure [decibar]'])\n", " df['SA']=gsw.SA_from_SP(df['Salinity'],df['p'],df['Lon'],df['Lat'])\n", " df['CT']=gsw.CT_from_t(df['SA'],df['Temperature'],df['p'])\n", "\n", " df['dtUTC']=[dt.datetime.strptime(ii,'%Y-%m-%d %H:%M:%S') if isinstance(ii,str) else np.nan for ii in df['FIL:START TIME YYYY/MM/DD HH:MM:SS'] ]\n", "\n", " PATH= modSourceDir\n", "\n", " flen=1\n", " namfmt='nowcast'\n", " #varmap={'N':'nitrate','Si':'silicon','Ammonium':'ammonium'}\n", " filemap={'nitrate':'ptrc_T','silicon':'ptrc_T','ammonium':'ptrc_T','diatoms':'ptrc_T','ciliates':'ptrc_T','flagellates':'ptrc_T','vosaline':'grid_T','votemper':'grid_T'}\n", " #gridmap={'nitrate':'tmask','silicon':'tmask','ammonium':'tmask'}\n", " fdict={'ptrc_T':1,'grid_T':1}\n", "\n", " data=et.matchData(df,filemap,fdict,start_date,end_date,namfmt,PATH,flen)\n", "\n", " with open('matched_'+modver+datestr+'._NewALLOpkl','wb') as f:\n", " pickle.dump(data,f)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "papermill": { "duration": 0.025087, "end_time": "2020-11-16T18:42:07.011178", "exception": false, "start_time": "2020-11-16T18:42:06.986091", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "data['other']=0.0\n", "for el in ('Cryptophytes', 'Cyanobacteria', 'Dictyochophytes', 'Dinoflagellates',\n", " 'Haptophytes', 'Prasinophytes', 'Raphidophytes'):\n", " data['other']=data['other']+data[el]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "papermill": { "duration": 0.524902, "end_time": "2020-11-16T18:42:07.553449", "exception": false, "start_time": "2020-11-16T18:42:07.028547", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "fig,ax=plt.subplots(2,2,figsize=(12,8))\n", "ax=ax.flatten()\n", "ax[0].plot(data['Diatoms-1']+data['Diatoms-2'],Chl_N*data['mod_diatoms'],'k.')\n", "ax[0].set_title('Diatoms (mg Chl/m$^3$)')\n", "ax[0].set_xlabel('HPLC')\n", "ax[0].set_ylabel('model')\n", "ax[0].plot((0,18),(0,18),'r-',alpha=.3)\n", "\n", "ax[1].plot(data['other'],Chl_N*(data['mod_flagellates']+data['mod_ciliates']),'k.')\n", "ax[1].set_title('non-Diatoms (mg Chl/m$^3$)')\n", "ax[1].set_xlabel('HPLC')\n", "ax[1].set_ylabel('model')\n", "ax[1].plot((0,12),(0,12),'r-',alpha=.3)\n", "\n", "ax[2].plot(data['Diatoms-1']+data['Diatoms-2'],Chl_N*data['mod_diatoms'],'k.')\n", "ax[2].set_title('Diatoms (mg Chl/m$^3$)')\n", "ax[2].set_xlabel('HPLC')\n", "ax[2].set_ylabel('model')\n", "ax[2].plot((0,25),(0,25),'r-',alpha=.3)\n", "ax[2].set_xlim((0,25))\n", "ax[2].set_ylim((0,25))\n", "\n", "ax[3].plot(data['other'],Chl_N*(data['mod_flagellates']+data['mod_ciliates']),'k.')\n", "ax[3].set_title('non-Diatoms (mg Chl/m$^3$)')\n", "ax[3].set_xlabel('HPLC')\n", "ax[3].set_ylabel('model')\n", "ax[3].plot((0,12),(0,12),'r-',alpha=.3)\n", "ax[3].set_xlim((0,8))\n", "ax[3].set_ylim((0,8))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "papermill": { "duration": 0.023194, "end_time": "2020-11-16T18:42:07.597505", "exception": false, "start_time": "2020-11-16T18:42:07.574311", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "def logt(x):\n", " return np.log10(x+.001)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "papermill": { "duration": 0.218565, "end_time": "2020-11-16T18:42:07.835336", "exception": false, "start_time": "2020-11-16T18:42:07.616771", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "fig,ax=plt.subplots(1,2,figsize=(12,5))\n", "ax[0].plot(logt(data['Diatoms-1']+data['Diatoms-2']),logt(Chl_N*data['mod_diatoms']),'k.')\n", "ax[0].set_title('log10[ Diatoms (mg Chl/m$^3$) + 0.001]',fontsize=18)\n", "ax[0].set_xlabel('HPLC',fontsize=18)\n", "ax[0].set_ylabel('model',fontsize=18)\n", "ax[0].set_xlim(-3.1,2)\n", "ax[0].set_ylim(-3.1,2)\n", "ax[0].plot((-6,3),(-6,3),'r-',alpha=.3)\n", "\n", "ax[1].plot(logt(data['other']),logt(Chl_N*(data['mod_flagellates']+data['mod_ciliates'])),'k.')\n", "ax[1].set_title('log10[ non-Diatoms (mg Chl/m$^3$) + 0.001]',fontsize=18)\n", "ax[1].set_xlabel('HPLC',fontsize=18)\n", "ax[1].set_ylabel('model',fontsize=18)\n", "ax[1].plot((-6,3),(-6,3),'r-',alpha=.3)\n", "ax[1].set_xlim(-3.1,2)\n", "ax[1].set_ylim(-3.1,2)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "papermill": { "duration": 0.126194, "end_time": "2020-11-16T18:42:07.982104", "exception": false, "start_time": "2020-11-16T18:42:07.855910", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "plt.plot(data['TchlA (ug/L)'],Chl_N*(data['mod_flagellates']+data['mod_ciliates']+data['mod_diatoms']),'k.')\n", "plt.title('Total Chlorophyll (ug/L)')\n", "plt.xlabel('HPLC')\n", "plt.ylabel('model')\n", "plt.plot((0,20),(0,20),'r-',alpha=.3)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "papermill": { "duration": 0.122428, "end_time": "2020-11-16T18:42:08.125797", "exception": false, "start_time": "2020-11-16T18:42:08.003369", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "fig,ax=plt.subplots(1,1,figsize=(5,5))\n", "ax.plot(logt(data['TchlA (ug/L)']),logt(Chl_N*(data['mod_flagellates']+data['mod_ciliates']+data['mod_diatoms'])),'k.')\n", "ax.set_title('log10[ Total Chlorophyll (ug/L) + 0.001]')\n", "ax.set_xlabel('HPLC')\n", "ax.set_ylabel('model')\n", "ax.plot((-6,5),(-6,5),'r-',alpha=.3)\n", "ax.set_xlim(-1,2)\n", "ax.set_ylim(-1,2);" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "papermill": { "duration": 0.103263, "end_time": "2020-11-16T18:42:08.251557", "exception": false, "start_time": "2020-11-16T18:42:08.148294", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "def yd(idt):\n", " if type(idt)==dt.datetime:\n", " yd=(idt-dt.datetime(idt.year-1,12,31)).days\n", " else: # assume array or pandas\n", " yd=[(ii-dt.datetime(ii.year-1,12,31)).days for ii in idt]\n", " return yd\n", "\n", "data['yd']=yd(data['dtUTC'])\n", "data['Year']=[ii.year for ii in data['dtUTC']]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "papermill": { "duration": 0.288668, "end_time": "2020-11-16T18:42:08.562106", "exception": false, "start_time": "2020-11-16T18:42:08.273438", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "fig,ax=plt.subplots(1,1,figsize=(5,5))\n", "m=ax.scatter(logt(data['TchlA (ug/L)']),logt(Chl_N*(data['mod_flagellates']+data['mod_ciliates']+data['mod_diatoms'])),\n", " c=data['yd'],cmap=cmocean.cm.phase)\n", "ax.set_title('log10[ Total Chlorophyll (ug/L) + 0.001] By Year Day')\n", "ax.set_xlabel('HPLC')\n", "ax.set_ylabel('model')\n", "ax.plot((-6,5),(-6,5),'r-',alpha=.3)\n", "ax.set_xlim(-1,2)\n", "ax.set_ylim(-1,2);\n", "fig.colorbar(m)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "papermill": { "duration": 0.055199, "end_time": "2020-11-16T18:42:08.641905", "exception": false, "start_time": "2020-11-16T18:42:08.586706", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "with nc.Dataset('/ocean/eolson/MEOPAR/NEMO-forcing/grid/mesh_mask201702_noLPE.nc') as mesh:\n", " tmask=np.copy(mesh.variables['tmask'][0,:,:,:])\n", " navlat=np.copy(mesh.variables['nav_lat'][:,:])\n", " navlon=np.copy(mesh.variables['nav_lon'][:,:])" ] }, { "cell_type": "markdown", "metadata": { "papermill": { "duration": 0.02282, "end_time": "2020-11-16T18:42:08.688297", "exception": false, "start_time": "2020-11-16T18:42:08.665477", "status": "completed" }, "tags": [] }, "source": [ "## Multiple Linear Regression\n", "\n", "#### Diatoms" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "papermill": { "duration": 0.091078, "end_time": "2020-11-16T18:42:08.802299", "exception": false, "start_time": "2020-11-16T18:42:08.711221", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "ii=(~pd.isnull(data['mod_diatoms']))&(~pd.isnull(data['Diatoms-1']))&(~pd.isnull(data['Diatoms-2']))&(~pd.isnull(data['Cyanobacteria']))&\\\n", " (~pd.isnull(data['Cryptophytes']))&(~pd.isnull(data['Prasinophytes']))&(~pd.isnull(data['Haptophytes']))&\\\n", " (~pd.isnull(data['Dictyochophytes']))&(~pd.isnull(data['Dinoflagellates']))&(~pd.isnull(data['Raphidophytes']))\n", "A=np.vstack([data.loc[ii]['Diatoms-1'],data.loc[ii]['Diatoms-2'],data.loc[ii]['Cyanobacteria'],data.loc[ii]['Cryptophytes'],\n", " data.loc[ii]['Prasinophytes'],data.loc[ii]['Haptophytes'],data.loc[ii]['Dictyochophytes'],data.loc[ii]['Dinoflagellates'],\n", " data.loc[ii]['Raphidophytes'],np.ones(np.shape(data.loc[ii]['Diatoms-1']))]).T\n", "b=data.loc[ii]['mod_diatoms']\n", "m=np.linalg.lstsq(A,b,rcond=None)[0]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "papermill": { "duration": 0.028183, "end_time": "2020-11-16T18:42:08.853634", "exception": false, "start_time": "2020-11-16T18:42:08.825451", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "m" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "papermill": { "duration": 0.02893, "end_time": "2020-11-16T18:42:08.905700", "exception": false, "start_time": "2020-11-16T18:42:08.876770", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "clist=('Diatoms-1','Diatoms-2','Cyanobacteria','Cryptophytes','Prasinophytes','Haptophytes',\n", " 'Dictyochophytes','Dinoflagellates','Raphidophytes','ones')\n", "for c, mm in zip(clist,m):\n", " print(c,mm)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "papermill": { "duration": 0.037899, "end_time": "2020-11-16T18:42:08.967198", "exception": false, "start_time": "2020-11-16T18:42:08.929299", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "ii=(~pd.isnull(data['mod_diatoms']))&(~pd.isnull(data['Diatoms-1']))&(~pd.isnull(data['Diatoms-2']))&\\\n", " (~pd.isnull(data['Cryptophytes']))&(~pd.isnull(data['Raphidophytes']))\n", "A=np.vstack([data.loc[ii]['Diatoms-1'],data.loc[ii]['Diatoms-2'],data.loc[ii]['Cryptophytes'],\n", " data.loc[ii]['Raphidophytes'],np.ones(np.shape(data.loc[ii]['Diatoms-1']))]).T\n", "b=data.loc[ii]['mod_diatoms']\n", "m=np.linalg.lstsq(A,b,rcond=None)[0]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "papermill": { "duration": 0.028564, "end_time": "2020-11-16T18:42:09.018990", "exception": false, "start_time": "2020-11-16T18:42:08.990426", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "clist=('Diatoms-1','Diatoms-2','Cryptophytes','Raphidophytes','ones')\n", "for c, mm in zip(clist,m):\n", " print(c,mm)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "papermill": { "duration": 0.035767, "end_time": "2020-11-16T18:42:09.078749", "exception": false, "start_time": "2020-11-16T18:42:09.042982", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "ii=(~pd.isnull(data['mod_diatoms']))&(~pd.isnull(data['Diatoms-1']))&\\\n", " (~pd.isnull(data['Cryptophytes']))&(~pd.isnull(data['Raphidophytes']))\n", "A=np.vstack([data.loc[ii]['Diatoms-1'],data.loc[ii]['Cryptophytes'],\n", " data.loc[ii]['Raphidophytes'],np.ones(np.shape(data.loc[ii]['Diatoms-1']))]).T\n", "b=data.loc[ii]['mod_diatoms']\n", "m=np.linalg.lstsq(A,b,rcond=None)[0]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "papermill": { "duration": 0.028517, "end_time": "2020-11-16T18:42:09.130966", "exception": false, "start_time": "2020-11-16T18:42:09.102449", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "clist=('Diatoms-1','Cryptophytes','Raphidophytes','ones')\n", "for c, mm in zip(clist,m):\n", " print(c,mm)" ] }, { "cell_type": "markdown", "metadata": { "papermill": { "duration": 0.023654, "end_time": "2020-11-16T18:42:09.178735", "exception": false, "start_time": "2020-11-16T18:42:09.155081", "status": "completed" }, "tags": [] }, "source": [ "#### flagellates" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "papermill": { "duration": 0.040516, "end_time": "2020-11-16T18:42:09.242573", "exception": false, "start_time": "2020-11-16T18:42:09.202057", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "ii=(~pd.isnull(data['mod_flagellates']))&(~pd.isnull(data['Diatoms-1']))&(~pd.isnull(data['Diatoms-2']))&(~pd.isnull(data['Cyanobacteria']))&\\\n", " (~pd.isnull(data['Cryptophytes']))&(~pd.isnull(data['Prasinophytes']))&(~pd.isnull(data['Haptophytes']))&\\\n", " (~pd.isnull(data['Dictyochophytes']))&(~pd.isnull(data['Dinoflagellates']))&(~pd.isnull(data['Raphidophytes']))\n", "A=np.vstack([data.loc[ii]['Diatoms-1'],data.loc[ii]['Diatoms-2'],data.loc[ii]['Cyanobacteria'],data.loc[ii]['Cryptophytes'],\n", " data.loc[ii]['Prasinophytes'],data.loc[ii]['Haptophytes'],data.loc[ii]['Dictyochophytes'],data.loc[ii]['Dinoflagellates'],\n", " data.loc[ii]['Raphidophytes'],np.ones(np.shape(data.loc[ii]['Diatoms-1']))]).T\n", "b=data.loc[ii]['mod_flagellates']\n", "m=np.linalg.lstsq(A,b,rcond=None)[0]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "papermill": { "duration": 0.028463, "end_time": "2020-11-16T18:42:09.295206", "exception": false, "start_time": "2020-11-16T18:42:09.266743", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "m" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "papermill": { "duration": 0.029606, "end_time": "2020-11-16T18:42:09.348559", "exception": false, "start_time": "2020-11-16T18:42:09.318953", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "clist=('Diatoms-1','Diatoms-2','Cyanobacteria','Cryptophytes','Prasinophytes','Haptophytes',\n", " 'Dictyochophytes','Dinoflagellates','Raphidophytes','ones')\n", "for c, mm in zip(clist,m):\n", " print(c,mm)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "papermill": { "duration": 0.039195, "end_time": "2020-11-16T18:42:09.411593", "exception": false, "start_time": "2020-11-16T18:42:09.372398", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "ii=(~pd.isnull(data['mod_flagellates']))&(~pd.isnull(data['Diatoms-1']))&(~pd.isnull(data['Cyanobacteria']))&\\\n", " (~pd.isnull(data['Cryptophytes']))&(~pd.isnull(data['Prasinophytes']))&(~pd.isnull(data['Haptophytes']))&\\\n", " (~pd.isnull(data['Dictyochophytes']))&(~pd.isnull(data['Dinoflagellates']))&(~pd.isnull(data['Raphidophytes']))\n", "A=np.vstack([data.loc[ii]['Diatoms-1'],data.loc[ii]['Cyanobacteria'],data.loc[ii]['Cryptophytes'],\n", " data.loc[ii]['Prasinophytes'],data.loc[ii]['Haptophytes'],data.loc[ii]['Dictyochophytes'],\n", " data.loc[ii]['Dinoflagellates'],np.ones(np.shape(data.loc[ii]['Diatoms-1']))]).T\n", "b=data.loc[ii]['mod_flagellates']\n", "m=np.linalg.lstsq(A,b,rcond=None)[0]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "papermill": { "duration": 0.029498, "end_time": "2020-11-16T18:42:09.465405", "exception": false, "start_time": "2020-11-16T18:42:09.435907", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "clist=('Diatoms-1','Cyanobacteria','Cryptophytes','Prasinophytes','Haptophytes',\n", " 'Dictyochophytes','Dinoflagellates','ones')\n", "for c, mm in zip(clist,m):\n", " print(c,mm)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "papermill": { "duration": 0.037213, "end_time": "2020-11-16T18:42:09.527008", "exception": false, "start_time": "2020-11-16T18:42:09.489795", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "ii=(~pd.isnull(data['mod_flagellates']))&(~pd.isnull(data['Cyanobacteria']))&\\\n", " (~pd.isnull(data['Cryptophytes']))&(~pd.isnull(data['Prasinophytes']))&(~pd.isnull(data['Haptophytes']))&\\\n", " (~pd.isnull(data['Dictyochophytes']))&(~pd.isnull(data['Raphidophytes']))\n", "A=np.vstack([data.loc[ii]['Cyanobacteria'],data.loc[ii]['Cryptophytes'],\n", " data.loc[ii]['Prasinophytes'],data.loc[ii]['Haptophytes'],data.loc[ii]['Dictyochophytes'],\n", " np.ones(np.shape(data.loc[ii]['Diatoms-1']))]).T\n", "b=data.loc[ii]['mod_flagellates']\n", "m=np.linalg.lstsq(A,b,rcond=None)[0]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "papermill": { "duration": 0.028887, "end_time": "2020-11-16T18:42:09.580969", "exception": false, "start_time": "2020-11-16T18:42:09.552082", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "clist=('Cyanobacteria','Cryptophytes','Prasinophytes','Haptophytes',\n", " 'Dictyochophytes','ones')\n", "for c, mm in zip(clist,m):\n", " print(c,mm)" ] }, { "cell_type": "markdown", "metadata": { "papermill": { "duration": 0.023822, "end_time": "2020-11-16T18:42:09.628983", "exception": false, "start_time": "2020-11-16T18:42:09.605161", "status": "completed" }, "tags": [] }, "source": [ "#### M. rubrum" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "papermill": { "duration": 0.041124, "end_time": "2020-11-16T18:42:09.693845", "exception": false, "start_time": "2020-11-16T18:42:09.652721", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "ii=(~pd.isnull(data['mod_ciliates']))&(~pd.isnull(data['Diatoms-1']))&(~pd.isnull(data['Diatoms-2']))&(~pd.isnull(data['Cyanobacteria']))&\\\n", " (~pd.isnull(data['Cryptophytes']))&(~pd.isnull(data['Prasinophytes']))&(~pd.isnull(data['Haptophytes']))&\\\n", " (~pd.isnull(data['Dictyochophytes']))&(~pd.isnull(data['Dinoflagellates']))&(~pd.isnull(data['Raphidophytes']))\n", "A=np.vstack([data.loc[ii]['Diatoms-1'],data.loc[ii]['Diatoms-2'],data.loc[ii]['Cyanobacteria'],data.loc[ii]['Cryptophytes'],\n", " data.loc[ii]['Prasinophytes'],data.loc[ii]['Haptophytes'],data.loc[ii]['Dictyochophytes'],data.loc[ii]['Dinoflagellates'],\n", " data.loc[ii]['Raphidophytes'],np.ones(np.shape(data.loc[ii]['Diatoms-1']))]).T\n", "b=data.loc[ii]['mod_ciliates']\n", "m=np.linalg.lstsq(A,b,rcond=None)[0]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "papermill": { "duration": 0.029538, "end_time": "2020-11-16T18:42:09.748410", "exception": false, "start_time": "2020-11-16T18:42:09.718872", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "clist=('Diatoms-1','Diatoms-2','Cyanobacteria','Cryptophytes','Prasinophytes','Haptophytes',\n", " 'Dictyochophytes','Dinoflagellates','Raphidophytes','ones')\n", "for c, mm in zip(clist,m):\n", " print(c,mm)" ] }, { "cell_type": "markdown", "metadata": { "papermill": { "duration": 0.024232, "end_time": "2020-11-16T18:42:09.797479", "exception": false, "start_time": "2020-11-16T18:42:09.773247", "status": "completed" }, "tags": [] }, "source": [ "Diatoms:\n", "- Cryptophytes 0.38\n", "- Diatoms-1 0.16\n", "- Raphidophytes 0.09\n", "\n", "Flagellates:\n", "- Cyanobacteria 0.9248939785743201\n", "- Cryptophytes 0.592769155988474\n", "- Prasinophytes 0.18975008147644779\n", "- Haptophytes 0.2794204846195845\n", "- Dictyochophytes 0.058845777018170356\n", "\n", "M. rubrum:\n", "- Cyanobacteria 0.2710109702166438\n", "\n", "None:\n", "- Diatoms-2\n", "- Dinoflagellates" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "papermill": { "duration": 0.772311, "end_time": "2020-11-16T18:42:10.594089", "exception": false, "start_time": "2020-11-16T18:42:09.821778", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "fig,ax=plt.subplots(2,5,figsize=(15,7))\n", "fig.subplots_adjust(wspace=.4)\n", "ax=ax.flatten()\n", "\n", "chplc=('Diatoms-1', 'Diatoms-2','Cyanobacteria','Cryptophytes', 'Prasinophytes', \n", " 'Haptophytes', 'Dictyochophytes','Dinoflagellates','Raphidophytes','TchlA (ug/L)')\n", "\n", "mvar1=Chl_N*data['mod_diatoms']\n", "mvar2=Chl_N*data['mod_flagellates']\n", "\n", "for ii in range(0,len(chplc)):\n", " ax[ii].plot(logt(data.loc[:,[chplc[ii]]].values),logt(mvar1),'.',color='blue',label='Diatoms')\n", " ax[ii].plot(logt(data.loc[:,[chplc[ii]]].values),logt(mvar2),'.',color='red',label='Flagellates')\n", " ax[ii].set_ylabel('Model Class')\n", " ax[ii].set_xlabel(chplc[ii])\n", " ax[ii].set_title('log10[Chl(mg/m3)+.001]')\n", " ax[ii].plot((-3,1.5),(-3,1.5),'k-',alpha=.2)\n", " ax[ii].set_xlim((-3,1.5))\n", " ax[ii].set_ylim((-3,1.5))\n", " ax[ii].set_aspect(1)\n", "ax[0].legend()\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "papermill": { "duration": 0.268209, "end_time": "2020-11-16T18:42:10.891252", "exception": false, "start_time": "2020-11-16T18:42:10.623043", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "fig,ax=plt.subplots(1,3,figsize=(12,3))\n", "fig.subplots_adjust(wspace=.5)\n", "#ax[0].plot(logt(data['Diatoms-1']+data['Raphidophytes']+.5*data['Cryptophytes']),logt(data['mod_diatoms']),'r.')\n", "ax[0].plot(logt(data['Diatoms-1']+data['Raphidophytes']),logt(data['mod_diatoms']),'k.')\n", "ax[0].set_ylabel('Model diatoms')\n", "ax[0].set_xlabel('Diatoms1+Raphido')\n", "ax[0].set_title('log10[Chl(mg/m3)+.001]')\n", "ax[0].plot((-3,1.5),(-3,1.5),'k-',alpha=.2)\n", "ax[0].set_xlim((-3,1.5))\n", "ax[0].set_ylim((-3,1.5))\n", "\n", "ax[1].plot(logt(data['Cyanobacteria']+data['Cryptophytes']+data['Haptophytes']),logt(data['mod_flagellates']),'k.')\n", "ax[1].set_ylabel('Model flagellates')\n", "ax[1].set_xlabel('Cyano+Crypto+Hapto')\n", "ax[1].set_title('log10[Chl(mg/m3)+.001]')\n", "ax[1].plot((-3,1.5),(-3,1.5),'k-',alpha=.2)\n", "ax[1].set_xlim((-3,1.5))\n", "ax[1].set_ylim((-3,1.5))\n", "\n", "\n", "ax[2].plot(logt(data['Cyanobacteria']),logt(data['mod_ciliates']),'k.')\n", "ax[2].set_ylabel('Model M. rubrum')\n", "ax[2].set_xlabel('Cyano')\n", "ax[2].set_title('log10[Chl(mg/m3)+.001]')\n", "ax[2].plot((-3,1.5),(-3,1.5),'k-',alpha=.2)\n", "ax[2].set_xlim((-3,1.5))\n", "ax[2].set_ylim((-3,1.5))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "papermill": { "duration": 0.041143, "end_time": "2020-11-16T18:42:10.962296", "exception": false, "start_time": "2020-11-16T18:42:10.921153", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "data['mod_diatoms_chl']=Chl_N*data['mod_diatoms']\n", "data['mod_flagellates_chl']=Chl_N*data['mod_flagellates']\n", "data['mod_ciliates_chl']=Chl_N*data['mod_ciliates']\n", "data['mod_TChl']=data['mod_diatoms_chl']+data['mod_flagellates_chl']+data['mod_ciliates_chl']\n", "data['CCPH']=data['Cyanobacteria']+data['Cryptophytes']+data['Prasinophytes']+data['Haptophytes']\n", "data['DD']=data['Diatoms-1']+data['Diatoms-2']\n", "dfVars=data.loc[:,['Diatoms-1', 'Diatoms-2','Cyanobacteria','Cryptophytes', 'Prasinophytes', \n", " 'Haptophytes', 'Dictyochophytes','Dinoflagellates','Raphidophytes','DD','CCPH','TchlA (ug/L)',\n", " 'mod_diatoms_chl','mod_flagellates_chl','mod_ciliates_chl','mod_TChl']]" ] }, { "cell_type": "markdown", "metadata": { "papermill": { "duration": 0.027945, "end_time": "2020-11-16T18:42:11.019030", "exception": false, "start_time": "2020-11-16T18:42:10.991085", "status": "completed" }, "tags": [] }, "source": [ "### Variance-Covariance Matrix" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "papermill": { "duration": 0.047712, "end_time": "2020-11-16T18:42:11.095141", "exception": false, "start_time": "2020-11-16T18:42:11.047429", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "dfVars.cov()" ] }, { "cell_type": "markdown", "metadata": { "papermill": { "duration": 0.029346, "end_time": "2020-11-16T18:42:11.153923", "exception": false, "start_time": "2020-11-16T18:42:11.124577", "status": "completed" }, "tags": [] }, "source": [ "### Correlation Coefficient Matrix" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "papermill": { "duration": 0.046859, "end_time": "2020-11-16T18:42:11.230398", "exception": false, "start_time": "2020-11-16T18:42:11.183539", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "dfVars.corr()" ] }, { "cell_type": "markdown", "metadata": { "papermill": { "duration": 0.029736, "end_time": "2020-11-16T18:42:11.290301", "exception": false, "start_time": "2020-11-16T18:42:11.260565", "status": "completed" }, "tags": [] }, "source": [ "### Cov matrix with log transformed values:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "papermill": { "duration": 0.056868, "end_time": "2020-11-16T18:42:11.377162", "exception": false, "start_time": "2020-11-16T18:42:11.320294", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "dflog=pd.DataFrame()\n", "for el in ['Diatoms-1', 'Diatoms-2','Cyanobacteria','Cryptophytes', 'Prasinophytes', \n", " 'Haptophytes', 'Dictyochophytes','Dinoflagellates','Raphidophytes','CCPH','TchlA (ug/L)',\n", " 'mod_diatoms_chl','mod_flagellates_chl','mod_ciliates_chl','mod_TChl']:\n", " dflog[el]=logt(data[el])\n", "dflog.cov()" ] }, { "cell_type": "markdown", "metadata": { "papermill": { "duration": 0.030888, "end_time": "2020-11-16T18:42:11.438922", "exception": false, "start_time": "2020-11-16T18:42:11.408034", "status": "completed" }, "tags": [] }, "source": [ "### Corr Coeff matrix with log transformed values:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "papermill": { "duration": 0.047288, "end_time": "2020-11-16T18:42:11.517448", "exception": false, "start_time": "2020-11-16T18:42:11.470160", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "dflog.corr()" ] }, { "cell_type": "markdown", "metadata": { "papermill": { "duration": 0.031356, "end_time": "2020-11-16T18:42:11.580122", "exception": false, "start_time": "2020-11-16T18:42:11.548766", "status": "completed" }, "tags": [] }, "source": [ "### New groups Model-Obs Comparison:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "papermill": { "duration": 0.459833, "end_time": "2020-11-16T18:42:12.072069", "exception": false, "start_time": "2020-11-16T18:42:11.612236", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "thresh=.8\n", "msize=20\n", "fig,ax=plt.subplots(1,2,figsize=(9,4))\n", "m=ax[0].scatter(logt(data['DD']),logt(data['mod_diatoms_chl']),\n", " c=data['yd'],s=msize,cmap=cmocean.cm.phase,vmin=0,vmax=366)\n", "m=ax[1].scatter(logt(data['CCPH']),logt(data['mod_flagellates_chl']),\n", " c=data['yd'],s=msize,cmap=cmocean.cm.phase,vmin=0,vmax=366)\n", "ax[0].set_xlim(-3,2)\n", "ax[0].set_ylim(-3,2)\n", "ax[1].set_xlim(-3,1.2)\n", "ax[1].set_ylim(-3,1.2)\n", "ax[0].set_xlabel('Diatoms 1 + Diatoms 2')\n", "ax[0].set_ylabel('Model Diatoms')\n", "ax[0].set_title('log10[Chl (mg/m$^3$)+.001]')\n", "ax[1].set_xlabel('cyano+crypto+prasino+hapto')\n", "ax[1].set_ylabel('Model flagellates')\n", "ax[1].set_title('log10[Chl (mg/m$^3$)+.001]')\n", "ax[0].plot((-3,2),(-3+thresh,2+thresh),'-',color='grey')\n", "ax[0].plot((-3,2),(-3-thresh,2-thresh),'-',color='grey')\n", "ax[1].plot((-3,2),(-3+thresh,2+thresh),'-',color='grey')\n", "ax[1].plot((-3,2),(-3-thresh,2-thresh),'-',color='grey')\n", "ax[0].plot((-3,2),(-3,2),'k-')\n", "ax[1].plot((-3,1.2),(-3,1.2),'k-')\n", "plt.tight_layout()\n", "fig.colorbar(m,ax=ax[1])" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "papermill": { "duration": 0.456893, "end_time": "2020-11-16T18:42:12.563423", "exception": false, "start_time": "2020-11-16T18:42:12.106530", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "print('Diatoms/DD')\n", "fig,ax=plt.subplots(1,4,figsize=(12,4),gridspec_kw={'width_ratios': [1,1,1,.1],'wspace':.5},)\n", "for iax in ax[0:3]:\n", " iax.contour(navlon,navlat,tmask[0,:,:],levels=[0.5,],colors='gray')\n", " iax.set_xlim(-125.3,-122.5)\n", " iax.set_ylim(48,50.5)\n", "ihi=logt(data['mod_diatoms_chl'])>(logt(data['DD'])+thresh)\n", "ilo=logt(data['mod_diatoms_chl'])<(logt(data['DD'])-thresh)\n", "idata=data.loc[(data.DD>=0)&ihi]\n", "ax[0].scatter(idata.Lon,idata.Lat,c=idata.yd,s=msize,cmap=cmocean.cm.phase,vmin=0,vmax=366)\n", "ax[0].set_title('High\\n log(mod)>log(obs)+'+str(thresh))\n", "idata=data.loc[(data.DD>=0)&(~ihi)&(~ilo)]\n", "ax[1].scatter(idata.Lon,idata.Lat,c=idata.yd,s=msize,cmap=cmocean.cm.phase,vmin=0,vmax=366)\n", "ax[1].set_title('Medium\\n log(obs)-'+str(thresh)+'=0)&ilo]\n", "m=ax[2].scatter(idata.Lon,idata.Lat,c=idata.yd,s=msize,cmap=cmocean.cm.phase,vmin=0,vmax=366)\n", "ax[2].set_title('Low\\n log(mod)(logt(data['CCPH'])+thresh)\n", "ilo=logt(data['mod_flagellates_chl'])<(logt(data['CCPH'])-thresh)\n", "idata=data.loc[(data.CCPH>=0)&ihi]\n", "ax[0].scatter(idata.Lon,idata.Lat,c=idata.yd,s=msize,cmap=cmocean.cm.phase,vmin=0,vmax=366)\n", "ax[0].set_title('High\\n log(mod)>log(obs)+'+str(thresh))\n", "idata=data.loc[(data.CCPH>=0)&(~ihi)&(~ilo)]\n", "ax[1].scatter(idata.Lon,idata.Lat,c=idata.yd,s=msize,cmap=cmocean.cm.phase,vmin=0,vmax=366)\n", "ax[1].set_title('Medium\\n log(obs)-'+str(thresh)+'=0)&ilo]\n", "m=ax[2].scatter(idata.Lon,idata.Lat,c=idata.yd,s=msize,cmap=cmocean.cm.phase,vmin=0,vmax=366)\n", "ax[2].set_title('Low\\n log(mod)