{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import pickle\n", "import matplotlib.pyplot as plt\n", "import datetime as dt\n", "from salishsea_tools import evaltools as et" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# read in stored station info (links station names to locations)\n", "dir0 = '/ocean/eolson/MEOPAR/obs/WADE/ptools_data/ecology/'\n", "sta_df = pd.read_pickle(dir0 + 'sta_df.p')" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "bottle_fn = dir0 + 'raw/ParkerMacCready2019CTDDataFeb2020.xlsx'\n", "sheet_name = '2018-2019NutrientData'\n", "sheet_chl = '2018-2019ChlaLabData'\n", "bot = pd.read_excel(bottle_fn, sheet_name=sheet_name,engine='openpyxl')\n", "chl = pd.read_excel(bottle_fn, sheet_name=sheet_chl, engine='openpyxl')" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "bot.dropna(how='all',inplace=True) # drop rows where all values are NaN\n", "chl.dropna(how='all',inplace=True)\n", "bot.dropna(subset=['Sampling Depth'],inplace=True) # drop rows with NaN sampling depths because nominal depths do not appear reliable\n", "chl.dropna(subset=['Sampling Depth'],inplace=True)\n", "# drop rows with no useful data:\n", "bot.dropna(how='all',subset=['NH4_Lab','NO2_Lab','NO3_Lab','PO4_Lab','SiOH4_Lab'],inplace=True)\n", "chl.dropna(how='all',subset=['Chla_Lab'],inplace=True)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "# drop rows that appear to have misaligned Sampling depths\n", "#Date\tStation\tNiskin\tNomdepth\tSampling Depth\tCTD Cast Rep\tChla_Lab\tChla_QC\t\n", " #2019-02-26\tDNA001\t12.0\t30\t1.136\t1.0\t1.9216\t2.0\t\n", "chl.drop(chl.loc[(chl.Date==dt.datetime(2019,2,26))&(chl.Station=='DNA001')&\\\n", " (chl.Nomdepth==30)&(chl['Sampling Depth']==1.9216)].index,inplace=True)\n", "# also remove from chl: \n", "# 2018-02-01 HCB010 9.0 30 10.545 sample is probably actually 30 m based on nuts\n", "# 2018-03-09 CRR001 9.0 30 101.554 \"\n", "chl.drop(chl.loc[(chl.Date==dt.datetime(2018,2,1))&(chl.Station=='HCB010')&\\\n", " (chl.Nomdepth==30)&(chl['Sampling Depth']==10.545)].index,inplace=True)\n", "chl.drop(chl.loc[(chl.Date==dt.datetime(2018,3,9))&(chl.Station=='CRR001')&\\\n", " (chl.Nomdepth==30)&(chl['Sampling Depth']==101.554)].index,inplace=True)\n", "#remove from chl and nuts:\n", "# Date:2019-10-30 Station: ADM001 all 3 values (Nom depth 0,10,30 but sampling depth all ~126.7)\n", "chl.drop(chl.loc[(chl.Date==dt.datetime(2019,10,30))&(chl.Station=='ADM001')].index,inplace=True)\n", "bot.drop(chl.loc[(chl.Date==dt.datetime(2019,10,30))&(chl.Station=='ADM001')].index,inplace=True)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "# average over rows having identical (Date,Station,Niskin,Sampling Depth) [replicates]\n", "# ignore Nomdepth because it seems to not always be accurate\n", "chl2=pd.DataFrame(chl.groupby(['Date','Station','Niskin','Sampling Depth'],as_index=False).mean())" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
DateStationNiskinSampling DepthCTD Cast RepChla_LabChla_QCChla_QAChla_SampleFieldReplicateNumber
02018-01-10PSS019931.1961.00.07102.03.01.0
12018-01-10PSS0191111.4041.00.19602.03.01.0
22018-01-10PSS019121.1871.00.66732.03.01.0
32018-01-10SAR003931.0571.00.05302.03.01.0
42018-01-10SAR0031111.1461.00.20972.03.01.0
..............................
17872019-12-19NSQ002930.6761.00.31542.02.01.0
17882019-12-19NSQ0021010.8711.00.36402.02.01.0
17892019-12-19NSQ002121.2141.00.43972.02.01.0
17902019-12-19OAK0041111.1061.00.57652.02.02.0
17912019-12-19OAK004121.1891.00.57652.02.01.0
\n", "

1792 rows × 9 columns

\n", "
" ], "text/plain": [ " Date Station Niskin Sampling Depth CTD Cast Rep Chla_Lab \\\n", "0 2018-01-10 PSS019 9 31.196 1.0 0.0710 \n", "1 2018-01-10 PSS019 11 11.404 1.0 0.1960 \n", "2 2018-01-10 PSS019 12 1.187 1.0 0.6673 \n", "3 2018-01-10 SAR003 9 31.057 1.0 0.0530 \n", "4 2018-01-10 SAR003 11 11.146 1.0 0.2097 \n", "... ... ... ... ... ... ... \n", "1787 2019-12-19 NSQ002 9 30.676 1.0 0.3154 \n", "1788 2019-12-19 NSQ002 10 10.871 1.0 0.3640 \n", "1789 2019-12-19 NSQ002 12 1.214 1.0 0.4397 \n", "1790 2019-12-19 OAK004 11 11.106 1.0 0.5765 \n", "1791 2019-12-19 OAK004 12 1.189 1.0 0.5765 \n", "\n", " Chla_QC Chla_QA Chla_SampleFieldReplicateNumber \n", "0 2.0 3.0 1.0 \n", "1 2.0 3.0 1.0 \n", "2 2.0 3.0 1.0 \n", "3 2.0 3.0 1.0 \n", "4 2.0 3.0 1.0 \n", "... ... ... ... \n", "1787 2.0 2.0 1.0 \n", "1788 2.0 2.0 1.0 \n", "1789 2.0 2.0 1.0 \n", "1790 2.0 2.0 2.0 \n", "1791 2.0 2.0 1.0 \n", "\n", "[1792 rows x 9 columns]" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "chl2" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "bot2=pd.DataFrame(bot.groupby(['Date','Station','Niskin','Sampling Depth'],as_index=False).mean())" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "len(allbot):2357, len(chl2):1792, len(bot2):2348\n" ] } ], "source": [ "allbot = pd.merge(left=bot2,right=chl2,how='outer',\n", " left_on = ['Date','Station','Niskin','CTD Cast Rep','Sampling Depth'],\n", " right_on = ['Date','Station','Niskin','CTD Cast Rep','Sampling Depth'])\n", "print(f'len(allbot):{len(allbot)}, len(chl2):{len(chl2)}, len(bot2):{len(bot2)}')" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "565 9\n" ] } ], "source": [ "nutNoChl=len(allbot.loc[(~pd.isnull(allbot.NH4_Lab))&(pd.isnull(allbot.Chla_Lab))])\n", "chlNoNut=len(allbot.loc[(pd.isnull(allbot.NH4_Lab))&(~pd.isnull(allbot.Chla_Lab))])\n", "print(nutNoChl,chlNoNut)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2357 2357\n" ] } ], "source": [ "# if these equal the total length, things are making sense:\n", "print(len(bot2)+chlNoNut,len(chl2)+nutNoChl)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['Date', 'Station', 'Niskin', 'Sampling Depth', 'CTD Cast Rep',\n", " 'NH4_Lab', 'NH4_QC', 'NH4_QA', 'NH4_SampleFieldReplicateNumber',\n", " 'NO2_Lab', 'NO2_QC', 'NO2_QA', 'NO2_SampleFieldReplicateNumber',\n", " 'NO3_Lab', 'NO3_QC', 'NO3_QA', 'NO3_SampleFieldReplicateNumber',\n", " 'PO4_Lab', 'PO4_QC', 'PO4_QA', 'PO4_SampleFieldReplicateNumber',\n", " 'SiOH4_Lab', 'SiOH4_QC', 'SiOH4_QA', 'SiOH4_SampleFieldReplicateNumber',\n", " 'Unnamed: 36', 'Chla_Lab', 'Chla_QC', 'Chla_QA',\n", " 'Chla_SampleFieldReplicateNumber'],\n", " dtype='object')" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "allbot.keys()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Now add date/times" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "dfTime=pd.read_excel('/ocean/eolson/MEOPAR/obs/WADE/WDE_Data/OlsonSuchyAllen_UBC_PDR_P003790-010721.xlsx',\n", " engine='openpyxl',sheet_name='EventDateTime')" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " FlightDate SiteCode TimeDown \\n(Local - PST or PDT)\n", "590 2001-07-09 DNA001 17:09:00\n", "591 2001-07-09 DNA001 17:09:00\n", " FlightDate SiteCode TimeDown \\n(Local - PST or PDT)\n", "948 2003-06-25 BUD005 17:00:00\n", "949 2003-06-25 BUD005 17:00:00\n", " FlightDate SiteCode TimeDown \\n(Local - PST or PDT)\n", "1362 2005-08-16 BUD005 10:43:00\n", "1363 2005-08-16 BUD005 10:43:00\n", " FlightDate SiteCode TimeDown \\n(Local - PST or PDT)\n", "1429 2005-11-07 BUD005 11:55:00\n", "1430 2005-11-07 BUD005 11:55:00\n", " FlightDate SiteCode TimeDown \\n(Local - PST or PDT)\n", "1446 2005-12-05 BUD005 11:58:00\n", "1447 2005-12-05 BUD005 11:58:00\n", " FlightDate SiteCode TimeDown \\n(Local - PST or PDT)\n", "2104 2008-08-21 SJF000 09:59:00\n", "2105 2008-08-21 SJF000 11:05:00\n", " FlightDate SiteCode TimeDown \\n(Local - PST or PDT)\n", "2129 2008-09-29 SJF000 10:00:00\n", "2130 2008-09-29 SJF000 10:15:00\n", " FlightDate SiteCode TimeDown \\n(Local - PST or PDT)\n", "2155 2008-11-19 SJF000 10:29:00\n", "2156 2008-11-19 SJF000 10:45:00\n", " FlightDate SiteCode TimeDown \\n(Local - PST or PDT)\n", "2182 2009-03-26 SJF000 09:44:00\n", "2183 2009-03-26 SJF000 09:57:00\n", " FlightDate SiteCode TimeDown \\n(Local - PST or PDT)\n", "2208 2009-04-16 SJF000 10:04:00\n", "2209 2009-04-16 SJF000 10:19:00\n", " FlightDate SiteCode TimeDown \\n(Local - PST or PDT)\n", "2261 2009-06-10 SJF002 10:40:00\n", "2262 2009-06-10 SJF002 11:00:00\n", " FlightDate SiteCode TimeDown \\n(Local - PST or PDT)\n", "2287 2009-07-21 SJF000 10:20:00\n", "2289 2009-07-21 SJF000 10:30:00\n", " FlightDate SiteCode TimeDown \\n(Local - PST or PDT)\n", "2374 2009-10-24 SJF000 10:02:00\n", "2375 2009-10-24 SJF000 10:15:00\n", " FlightDate SiteCode TimeDown \\n(Local - PST or PDT)\n", "2383 2009-11-24 SJF000 10:09:00\n", "2384 2009-11-24 SJF000 10:19:00\n", " FlightDate SiteCode TimeDown \\n(Local - PST or PDT)\n", "2476 2010-03-16 SJF000 09:48:00\n", "2477 2010-03-16 SJF000 09:54:00\n", " FlightDate SiteCode TimeDown \\n(Local - PST or PDT)\n", "2534 2010-06-01 SJF000 09:43:00\n", "2536 2010-06-01 SJF000 09:55:00\n", " FlightDate SiteCode TimeDown \\n(Local - PST or PDT)\n", "2561 2010-06-28 SJF000 09:58:00\n", "2562 2010-06-28 SJF000 10:09:00\n", " FlightDate SiteCode TimeDown \\n(Local - PST or PDT)\n", "2596 2010-08-12 SJF000 11:07:00\n", "2597 2010-08-12 SJF000 11:16:00\n", " FlightDate SiteCode TimeDown \\n(Local - PST or PDT)\n", "2623 2010-09-15 SJF000 10:28:00\n", "2624 2010-09-15 SJF000 10:38:00\n", " FlightDate SiteCode TimeDown \\n(Local - PST or PDT)\n", "2646 2010-10-15 SJF000 10:26:00\n", "2647 2010-10-15 SJF000 10:35:00\n", " FlightDate SiteCode TimeDown \\n(Local - PST or PDT)\n", "2688 2010-11-28 SJF000 10:10:00\n", "2689 2010-11-28 SJF000 10:20:00\n", " FlightDate SiteCode TimeDown \\n(Local - PST or PDT)\n", "2756 2011-03-14 SJF000 10:12:00\n", "2757 2011-03-14 SJF000 10:20:00\n", " FlightDate SiteCode TimeDown \\n(Local - PST or PDT)\n", "2772 2011-04-18 SJF000 10:00:00\n", "2773 2011-04-18 SJF000 10:07:00\n", " FlightDate SiteCode TimeDown \\n(Local - PST or PDT)\n", "2807 2011-05-16 SJF000 09:24:00\n", "2808 2011-05-16 SJF000 09:33:00\n", " FlightDate SiteCode TimeDown \\n(Local - PST or PDT)\n", "2853 2011-07-27 SJF000 10:59:00\n", "2854 2011-07-27 SJF000 11:13:00\n", " FlightDate SiteCode TimeDown \\n(Local - PST or PDT)\n", "2879 2011-08-30 SJF000 09:30:00\n", "2880 2011-08-30 SJF000 09:36:00\n", " FlightDate SiteCode TimeDown \\n(Local - PST or PDT)\n", "2962 2011-12-21 SJF000 08:41:00\n", "2963 2011-12-21 SJF000 08:48:00\n" ] } ], "source": [ "## duplicate Station/Date entries with different times seem to be always within a couple of hours, \n", "# so just take the first (next cell)\n", "test=dfTime.groupby(['FlightDate','SiteCode'])['TimeDown \\n(Local - PST or PDT)'].count()\n", "for date, loc in test[test>1].index:\n", " print(dfTime.loc[(dfTime.FlightDate==date)&(dfTime.SiteCode==loc),['FlightDate','SiteCode','TimeDown \\n(Local - PST or PDT)']])" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Index(['FlightYear', 'FlightMonth', 'FlightDate', 'SiteCode', 'Sampled',\n", " 'TimeDown \\n(Local - PST or PDT)', 'FieldComment'],\n", " dtype='object')\n" ] } ], "source": [ "# drop duplicate rows\n", "dfTime.drop_duplicates(subset=['FlightDate','SiteCode'],keep='first',inplace=True)\n", "print(dfTime.keys())" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "dfTime['dtPac']=[dt.datetime.combine(idate, itime) for idate, itime \\\n", " in zip(dfTime['FlightDate'],dfTime['TimeDown \\n(Local - PST or PDT)'])]\n", "dfTime['dtUTC']=[et.pac_to_utc(ii) for ii in dfTime['dtPac']]" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "allbot['dtUTC']=np.nan # create column and set all values to nan" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "# use loop to set dtUTC values, where available (where dates are unavailable, left as NaN for now)\n", "for ind, row in allbot.iterrows():\n", " ix=(dfTime.FlightDate==row['Date'])&(dfTime.SiteCode==row['Station'])\n", " if np.sum(ix)==1:\n", " idate,itime=dfTime.loc[ix,['FlightDate','TimeDown \\n(Local - PST or PDT)']].values[0]\n", " allbot.loc[ind,['UTCDateTime']]=et.pac_to_utc(dt.datetime.combine(idate,itime))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Now add lat/lons" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [], "source": [ "# PROCESS STATION LOCATION INFO (based on Parker's code)\n", "sta_fn='/ocean/eolson/MEOPAR/obs/WADE/WDE_Data/OlsonSuchyAllen_UBC_PDR_P003790-010721.xlsx'\n", "sheetname='Site Info'\n", "sta_df =pd.read_excel(sta_fn,engine='openpyxl',sheet_name=sheetname)\n", "sta_df.dropna(how='any',subset=['Lat_NAD83 (deg / dec_min)','Long_NAD83 (deg / dec_min)','Station'],inplace=True)\n", "sta_df = sta_df.set_index('Station')\n", "# get locations in decimal degrees\n", "for sta in sta_df.index:\n", " lat_str = sta_df.loc[sta, 'Lat_NAD83 (deg / dec_min)']\n", " lat_deg = float(lat_str.split()[0]) + float(lat_str.split()[1])/60\n", " sta_df.loc[sta,'Lat'] = lat_deg\n", " #\n", " lon_str = sta_df.loc[sta, 'Long_NAD83 (deg / dec_min)']\n", " lon_deg = float(lon_str.split()[0]) + float(lon_str.split()[1])/60\n", " sta_df.loc[sta,'Lon'] = -lon_deg \n", "sta_df.pop('Lat_NAD83 (deg / dec_min)');\n", "sta_df.pop('Long_NAD83 (deg / dec_min)');" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "allbotFinal=pd.merge(left=sta_df,right=allbot,how='right',\n", " left_on='Station',right_on='Station')" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
StationDesigDescripBasin*Max_DepthLatLonDateNiskinSampling Depth...SiOH4_QCSiOH4_QASiOH4_SampleFieldReplicateNumberUnnamed: 36Chla_LabChla_QCChla_QAChla_SampleFieldReplicateNumberdtUTCUTCDateTime
0HCB013NaNNaNNaNNaNNaNNaN2018-01-10326.820...2.03.01.0NaNNaNNaNNaNNaNNaNNaN
1HCB013NaNNaNNaNNaNNaNNaN2018-01-101111.252...2.03.01.0NaNNaNNaNNaNNaNNaNNaN
2HCB013NaNNaNNaNNaNNaNNaN2018-01-10121.336...2.03.01.0NaNNaNNaNNaNNaNNaNNaN
3PSS019CPossession Sound - Gedney IslandWhidbey Basin107.048.010927-122.3012502018-01-10398.380...2.03.01.0NaNNaNNaNNaNNaNNaN2018-01-10 20:34:00
4PSS019CPossession Sound - Gedney IslandWhidbey Basin107.048.010927-122.3012502018-01-10931.196...2.03.01.0NaN0.07102.03.01.0NaN2018-01-10 20:34:00
..................................................................
2352SKG003CSkagit Bay - Str. Point Red BuoyWhidbey Basin25.048.296488-122.4896052018-09-05910.848...NaNNaNNaNNaN1.23932.03.01.0NaN2018-09-05 19:48:00
2353OAK004COakland Bay - Near Eagle PointSouth Basin28.047.213427-123.0776502018-12-10100.880...NaNNaNNaNNaN1.09452.03.02.0NaN2018-12-10 22:14:00
2354ELB015CElliott Bay - E. of Duwamish HeadMain Basin91.047.596487-122.3695722019-03-15120.897...NaNNaNNaNNaN0.85002.02.01.0NaN2019-03-15 19:15:00
2355HCB004CHood Canal - Gt. Bend, Sisters PointHood Canal Basin53.047.356205-123.0248732019-08-16426.098...NaNNaNNaNNaN0.74602.02.01.0NaN2019-08-16 20:47:00
2356ADM003CAdmiralty Inlet (south)Admiralty Inlet118.047.878983-122.4831952019-11-14120.379...NaNNaNNaNNaN0.74042.02.01.0NaN2019-11-14 20:06:00
\n", "

2357 rows × 38 columns

\n", "
" ], "text/plain": [ " Station Desig Descrip Basin \\\n", "0 HCB013 NaN NaN NaN \n", "1 HCB013 NaN NaN NaN \n", "2 HCB013 NaN NaN NaN \n", "3 PSS019 C Possession Sound - Gedney Island Whidbey Basin \n", "4 PSS019 C Possession Sound - Gedney Island Whidbey Basin \n", "... ... ... ... ... \n", "2352 SKG003 C Skagit Bay - Str. Point Red Buoy Whidbey Basin \n", "2353 OAK004 C Oakland Bay - Near Eagle Point South Basin \n", "2354 ELB015 C Elliott Bay - E. of Duwamish Head Main Basin \n", "2355 HCB004 C Hood Canal - Gt. Bend, Sisters Point Hood Canal Basin \n", "2356 ADM003 C Admiralty Inlet (south) Admiralty Inlet \n", "\n", " *Max_Depth Lat Lon Date Niskin Sampling Depth \\\n", "0 NaN NaN NaN 2018-01-10 3 26.820 \n", "1 NaN NaN NaN 2018-01-10 11 11.252 \n", "2 NaN NaN NaN 2018-01-10 12 1.336 \n", "3 107.0 48.010927 -122.301250 2018-01-10 3 98.380 \n", "4 107.0 48.010927 -122.301250 2018-01-10 9 31.196 \n", "... ... ... ... ... ... ... \n", "2352 25.0 48.296488 -122.489605 2018-09-05 9 10.848 \n", "2353 28.0 47.213427 -123.077650 2018-12-10 10 0.880 \n", "2354 91.0 47.596487 -122.369572 2019-03-15 12 0.897 \n", "2355 53.0 47.356205 -123.024873 2019-08-16 4 26.098 \n", "2356 118.0 47.878983 -122.483195 2019-11-14 12 0.379 \n", "\n", " ... SiOH4_QC SiOH4_QA SiOH4_SampleFieldReplicateNumber Unnamed: 36 \\\n", "0 ... 2.0 3.0 1.0 NaN \n", "1 ... 2.0 3.0 1.0 NaN \n", "2 ... 2.0 3.0 1.0 NaN \n", "3 ... 2.0 3.0 1.0 NaN \n", "4 ... 2.0 3.0 1.0 NaN \n", "... ... ... ... ... ... \n", "2352 ... NaN NaN NaN NaN \n", "2353 ... NaN NaN NaN NaN \n", "2354 ... NaN NaN NaN NaN \n", "2355 ... NaN NaN NaN NaN \n", "2356 ... NaN NaN NaN NaN \n", "\n", " Chla_Lab Chla_QC Chla_QA Chla_SampleFieldReplicateNumber dtUTC \\\n", "0 NaN NaN NaN NaN NaN \n", "1 NaN NaN NaN NaN NaN \n", "2 NaN NaN NaN NaN NaN \n", "3 NaN NaN NaN NaN NaN \n", "4 0.0710 2.0 3.0 1.0 NaN \n", "... ... ... ... ... ... \n", "2352 1.2393 2.0 3.0 1.0 NaN \n", "2353 1.0945 2.0 3.0 2.0 NaN \n", "2354 0.8500 2.0 2.0 1.0 NaN \n", "2355 0.7460 2.0 2.0 1.0 NaN \n", "2356 0.7404 2.0 2.0 1.0 NaN \n", "\n", " UTCDateTime \n", "0 NaN \n", "1 NaN \n", "2 NaN \n", "3 2018-01-10 20:34:00 \n", "4 2018-01-10 20:34:00 \n", "... ... \n", "2352 2018-09-05 19:48:00 \n", "2353 2018-12-10 22:14:00 \n", "2354 2019-03-15 19:15:00 \n", "2355 2019-08-16 20:47:00 \n", "2356 2019-11-14 20:06:00 \n", "\n", "[2357 rows x 38 columns]" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "allbotFinal" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array(['BLL040', 'HCB013'], dtype=object)" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.unique(allbotFinal.loc[np.isnan(allbotFinal.Lat),['Station']])" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [], "source": [ "# There are two station designations that are not pressent in the station list. Drop them.\n", "allbotFinal.dropna(how='any',subset=['Lat','Lon'],inplace=True)" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [], "source": [ "# rename Sample Depth to Z\n", "allbotFinal.rename(columns={'Sampling Depth':'Z'},inplace=True)" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
StationDesigDescripBasin*Max_DepthLatLonDateNiskinZ...SiOH4_QCSiOH4_QASiOH4_SampleFieldReplicateNumberUnnamed: 36Chla_LabChla_QCChla_QAChla_SampleFieldReplicateNumberdtUTCUTCDateTime
3PSS019CPossession Sound - Gedney IslandWhidbey Basin107.048.010927-122.3012502018-01-10398.380...2.03.01.0NaNNaNNaNNaNNaNNaN2018-01-10 20:34:00
4PSS019CPossession Sound - Gedney IslandWhidbey Basin107.048.010927-122.3012502018-01-10931.196...2.03.01.0NaN0.07102.03.01.0NaN2018-01-10 20:34:00
5PSS019CPossession Sound - Gedney IslandWhidbey Basin107.048.010927-122.3012502018-01-101111.404...2.03.01.0NaN0.19602.03.01.0NaN2018-01-10 20:34:00
6PSS019CPossession Sound - Gedney IslandWhidbey Basin107.048.010927-122.3012502018-01-10121.187...2.03.01.0NaN0.66732.03.01.0NaN2018-01-10 20:34:00
7SAR003CSaratoga Passage - East PointWhidbey Basin123.048.107595-122.4915422018-01-103152.993...2.03.01.0NaNNaNNaNNaNNaNNaN2018-01-10 19:41:00
..................................................................
2352SKG003CSkagit Bay - Str. Point Red BuoyWhidbey Basin25.048.296488-122.4896052018-09-05910.848...NaNNaNNaNNaN1.23932.03.01.0NaN2018-09-05 19:48:00
2353OAK004COakland Bay - Near Eagle PointSouth Basin28.047.213427-123.0776502018-12-10100.880...NaNNaNNaNNaN1.09452.03.02.0NaN2018-12-10 22:14:00
2354ELB015CElliott Bay - E. of Duwamish HeadMain Basin91.047.596487-122.3695722019-03-15120.897...NaNNaNNaNNaN0.85002.02.01.0NaN2019-03-15 19:15:00
2355HCB004CHood Canal - Gt. Bend, Sisters PointHood Canal Basin53.047.356205-123.0248732019-08-16426.098...NaNNaNNaNNaN0.74602.02.01.0NaN2019-08-16 20:47:00
2356ADM003CAdmiralty Inlet (south)Admiralty Inlet118.047.878983-122.4831952019-11-14120.379...NaNNaNNaNNaN0.74042.02.01.0NaN2019-11-14 20:06:00
\n", "

2309 rows × 38 columns

\n", "
" ], "text/plain": [ " Station Desig Descrip Basin \\\n", "3 PSS019 C Possession Sound - Gedney Island Whidbey Basin \n", "4 PSS019 C Possession Sound - Gedney Island Whidbey Basin \n", "5 PSS019 C Possession Sound - Gedney Island Whidbey Basin \n", "6 PSS019 C Possession Sound - Gedney Island Whidbey Basin \n", "7 SAR003 C Saratoga Passage - East Point Whidbey Basin \n", "... ... ... ... ... \n", "2352 SKG003 C Skagit Bay - Str. Point Red Buoy Whidbey Basin \n", "2353 OAK004 C Oakland Bay - Near Eagle Point South Basin \n", "2354 ELB015 C Elliott Bay - E. of Duwamish Head Main Basin \n", "2355 HCB004 C Hood Canal - Gt. Bend, Sisters Point Hood Canal Basin \n", "2356 ADM003 C Admiralty Inlet (south) Admiralty Inlet \n", "\n", " *Max_Depth Lat Lon Date Niskin Z ... \\\n", "3 107.0 48.010927 -122.301250 2018-01-10 3 98.380 ... \n", "4 107.0 48.010927 -122.301250 2018-01-10 9 31.196 ... \n", "5 107.0 48.010927 -122.301250 2018-01-10 11 11.404 ... \n", "6 107.0 48.010927 -122.301250 2018-01-10 12 1.187 ... \n", "7 123.0 48.107595 -122.491542 2018-01-10 3 152.993 ... \n", "... ... ... ... ... ... ... ... \n", "2352 25.0 48.296488 -122.489605 2018-09-05 9 10.848 ... \n", "2353 28.0 47.213427 -123.077650 2018-12-10 10 0.880 ... \n", "2354 91.0 47.596487 -122.369572 2019-03-15 12 0.897 ... \n", "2355 53.0 47.356205 -123.024873 2019-08-16 4 26.098 ... \n", "2356 118.0 47.878983 -122.483195 2019-11-14 12 0.379 ... \n", "\n", " SiOH4_QC SiOH4_QA SiOH4_SampleFieldReplicateNumber Unnamed: 36 \\\n", "3 2.0 3.0 1.0 NaN \n", "4 2.0 3.0 1.0 NaN \n", "5 2.0 3.0 1.0 NaN \n", "6 2.0 3.0 1.0 NaN \n", "7 2.0 3.0 1.0 NaN \n", "... ... ... ... ... \n", "2352 NaN NaN NaN NaN \n", "2353 NaN NaN NaN NaN \n", "2354 NaN NaN NaN NaN \n", "2355 NaN NaN NaN NaN \n", "2356 NaN NaN NaN NaN \n", "\n", " Chla_Lab Chla_QC Chla_QA Chla_SampleFieldReplicateNumber dtUTC \\\n", "3 NaN NaN NaN NaN NaN \n", "4 0.0710 2.0 3.0 1.0 NaN \n", "5 0.1960 2.0 3.0 1.0 NaN \n", "6 0.6673 2.0 3.0 1.0 NaN \n", "7 NaN NaN NaN NaN NaN \n", "... ... ... ... ... ... \n", "2352 1.2393 2.0 3.0 1.0 NaN \n", "2353 1.0945 2.0 3.0 2.0 NaN \n", "2354 0.8500 2.0 2.0 1.0 NaN \n", "2355 0.7460 2.0 2.0 1.0 NaN \n", "2356 0.7404 2.0 2.0 1.0 NaN \n", "\n", " UTCDateTime \n", "3 2018-01-10 20:34:00 \n", "4 2018-01-10 20:34:00 \n", "5 2018-01-10 20:34:00 \n", "6 2018-01-10 20:34:00 \n", "7 2018-01-10 19:41:00 \n", "... ... \n", "2352 2018-09-05 19:48:00 \n", "2353 2018-12-10 22:14:00 \n", "2354 2019-03-15 19:15:00 \n", "2355 2019-08-16 20:47:00 \n", "2356 2019-11-14 20:06:00 \n", "\n", "[2309 rows x 38 columns]" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "allbotFinal" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### here is an example loading the new chlorophyll data (includes all years)" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [], "source": [ "fn='/ocean/eolson/MEOPAR/obs/WADE/WDE_Data/OlsonSuchyAllen_UBC_PDR_P003790-010721.xlsx'\n", "sheetname='LabChlaPheo'\n", "chlPheo =pd.read_excel(fn,engine='openpyxl',sheet_name=sheetname)" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [], "source": [ "chlPheo.dropna(how='any',subset=['Date','Station','SamplingDepth'],inplace=True)" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [], "source": [ "# average over replicates\n", "chlPheo2=pd.DataFrame(chlPheo.groupby(['Date','Station','SamplingDepth'],as_index=False).mean())" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [], "source": [ "# join to station info (lat/lon)\n", "chlPheo3=pd.merge(left=sta_df,right=chlPheo2,how='right',\n", " left_on='Station',right_on='Station')" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [], "source": [ "# join to date/time\n", "dfTime['dtUTC']=[et.pac_to_utc(dt.datetime.combine(idate,itime)) for idate,itime in \\\n", " zip(dfTime['FlightDate'],dfTime['TimeDown \\n(Local - PST or PDT)'])]\n", "dfTime2=dfTime.loc[:,['FlightDate','SiteCode','dtUTC']]\n", "chlPheoFinal=pd.merge(left=chlPheo3,right=dfTime2,how='left',\n", " left_on=['Date','Station'],right_on=['FlightDate','SiteCode'])" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(11469, 11469, 5186)" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(chlPheoFinal),len(chlPheo3),len(dfTime2)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Stuff that helped id problems in matching chl and nuts:" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [], "source": [ "def intna(x):\n", " try: \n", " y=int(x)\n", " except:\n", " y=np.nan\n", " return y" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.21348314606741572" ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.abs(140-178)/178" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [], "source": [ "nd=[intna(ii) for ii in chl['Nomdepth']]\n", "crit=[np.abs(isd-ind)>max(5,0.3*isd) for ind, isd in zip(nd,chl['Sampling Depth'])]" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['Date', 'Station', 'Niskin', 'Nomdepth', 'Sampling Depth',\n", " 'CTD Cast Rep', 'NH4_Lab', 'NH4_QC', 'NH4_QF', 'NH4_QA', 'NH4_Comment',\n", " 'NH4_SampleFieldReplicateNumber', 'NO2_Lab', 'NO2_QC', 'NO2_QF',\n", " 'NO2_QA', 'NO2_Comment', 'NO2_SampleFieldReplicateNumber', 'NO3_Lab',\n", " 'NO3_QC', 'NO3_QF', 'NO3_QA', 'NO3_Comment',\n", " 'NO3_SampleFieldReplicateNumber', 'PO4_Lab', 'PO4_QC', 'PO4_QF',\n", " 'PO4_QA', 'PO4_Comment', 'PO4_SampleFieldReplicateNumber', 'SiOH4_Lab',\n", " 'SiOH4_QC', 'SiOH4_QF', 'SiOH4_QA', 'SiOH4_Comment',\n", " 'SiOH4_SampleFieldReplicateNumber', 'Unnamed: 36'],\n", " dtype='object')" ] }, "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "bot.keys()" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " Date Station Niskin Nomdepth Sampling Depth CTD Cast Rep \\\n", "119 2019-02-26 DNA001 12 0 1.136 1 \n", "120 2019-02-26 DNA001 10 10 9.702 1 \n", "121 2019-02-26 DNA001 12 30 1.136 1 \n", "\n", " Chla_Lab \n", "119 2.2268 \n", "120 1.9555 \n", "121 1.9216 \n", " Date Station Niskin Nomdepth Sampling Depth CTD Cast Rep \\\n", "163 2019-02-26 DNA001 12 0 1.136 1 \n", "164 2019-02-26 DNA001 10 10 9.702 1 \n", "165 2019-02-26 DNA001 4 30 29.815 1 \n", "\n", " NH4_Lab NO3_Lab \n", "163 0.3302 27.5842 \n", "164 0.3106 27.6943 \n", "165 0.2201 27.8900 \n", "-------\n", "\n", " Date Station Niskin Nomdepth Sampling Depth CTD Cast Rep \\\n", "425 2019-05-08 SJF002 12 0 0.713 1 \n", "426 2019-05-08 SJF002 4 140 79.942 1 \n", "\n", " Chla_Lab \n", "425 1.8423 \n", "426 0.5294 \n", " Date Station Niskin Nomdepth Sampling Depth CTD Cast Rep \\\n", "531 2019-05-08 SJF002 12 0 0.713 1 \n", "532 2019-05-08 SJF002 4 140 79.942 1 \n", "533 2019-05-08 SJF002 4 140 79.942 1 \n", "534 2019-05-08 SJF002 4 140 79.942 1 \n", "\n", " NH4_Lab NO3_Lab \n", "531 1.0515 17.6690 \n", "532 0.1721 29.6922 \n", "533 0.1963 29.6108 \n", "534 0.1745 29.4867 \n", "-------\n", "\n", " Date Station Niskin Nomdepth Sampling Depth CTD Cast Rep \\\n", "776 2019-09-11 WPA003 12 0 1.140 1 \n", "777 2019-09-11 WPA003 11 10 4.075 1 \n", "\n", " Chla_Lab \n", "776 4.5214 \n", "777 4.2727 \n", " Date Station Niskin Nomdepth Sampling Depth CTD Cast Rep \\\n", "966 2019-09-11 WPA003 12 0 1.140 1 \n", "967 2019-09-11 WPA003 11 10 4.075 1 \n", "\n", " NH4_Lab NO3_Lab \n", "966 1.1365 3.1884 \n", "967 0.9785 2.9332 \n", "-------\n", "\n", " Date Station Niskin Nomdepth Sampling Depth CTD Cast Rep \\\n", "791 2019-09-24 ADM002 10 0 0.580 1 \n", "792 2019-09-24 ADM002 6 30 30.052 1 \n", "793 2019-09-24 ADM002 4 80 60.837 1 \n", "\n", " Chla_Lab \n", "791 1.5883 \n", "792 0.6645 \n", "793 0.3328 \n", " Date Station Niskin Nomdepth Sampling Depth CTD Cast Rep \\\n", "981 2019-09-24 ADM002 10 0 0.580 1 \n", "982 2019-09-24 ADM002 8 10 10.043 1 \n", "983 2019-09-24 ADM002 6 30 30.052 1 \n", "984 2019-09-24 ADM002 4 80 60.837 1 \n", "\n", " NH4_Lab NO3_Lab \n", "981 0.3724 24.7083 \n", "982 0.3567 24.8101 \n", "983 0.3554 26.3724 \n", "984 0.1539 28.6972 \n", "-------\n", "\n", " Date Station Niskin Nomdepth Sampling Depth CTD Cast Rep \\\n", "895 2019-10-29 WPA008 12 0 1.295 1 \n", "896 2019-10-29 WPA008 11 10 3.239 1 \n", "\n", " Chla_Lab \n", "895 2.9615 \n", "896 2.9050 \n", " Date Station Niskin Nomdepth Sampling Depth CTD Cast Rep \\\n", "1104 2019-10-29 WPA008 12 0 1.295 1 \n", "1105 2019-10-29 WPA008 11 10 3.239 1 \n", "\n", " NH4_Lab NO3_Lab \n", "1104 3.0141 9.0886 \n", "1105 3.0663 9.3822 \n", "-------\n", "\n", " Date Station Niskin Nomdepth Sampling Depth CTD Cast Rep \\\n", "997 2019-12-05 HCB003 12 0 1.054 1 \n", "998 2019-12-05 HCB003 11 10 10.603 1 \n", "999 2019-12-05 HCB003 9 30 123.116 1 \n", "\n", " Chla_Lab \n", "997 0.8365 \n", "998 0.3594 \n", "999 0.0690 \n", " Date Station Niskin Nomdepth Sampling Depth CTD Cast Rep \\\n", "1231 2019-12-05 HCB003 12 0 1.054 1 \n", "1232 2019-12-05 HCB003 11 10 10.603 1 \n", "1233 2019-12-05 HCB003 9 30 123.116 1 \n", "1234 2019-12-05 HCB003 3 NB 125.111 1 \n", "\n", " NH4_Lab NO3_Lab \n", "1231 0.0 27.0123 \n", "1232 0.0 28.4125 \n", "1233 0.0 28.4254 \n", "1234 0.0 28.4172 \n", "-------\n", "\n", " Date Station Niskin Nomdepth Sampling Depth CTD Cast Rep \\\n", "1156 2018-02-07 PTH005 8 0 0.623 1 \n", "1157 2018-02-07 PTH005 6 10 10.019 1 \n", "1158 2018-02-07 PTH005 4 30 22.011 1 \n", "\n", " Chla_Lab \n", "1156 0.4131 \n", "1157 0.3311 \n", "1158 0.1691 \n", " Date Station Niskin Nomdepth Sampling Depth CTD Cast Rep \\\n", "1428 2018-02-07 PTH005 8 0 0.623 1 \n", "1429 2018-02-07 PTH005 6 10 10.019 1 \n", "1430 2018-02-07 PTH005 4 30 22.011 1 \n", "\n", " NH4_Lab NO3_Lab \n", "1428 0.2193 24.3483 \n", "1429 0.1273 24.3710 \n", "1430 1.0114 23.6798 \n", "-------\n", "\n", " Date Station Niskin Nomdepth Sampling Depth CTD Cast Rep \\\n", "1226 2018-03-07 PTH005 8 0 0.605 1 \n", "1227 2018-03-07 PTH005 6 10 9.959 1 \n", "1228 2018-03-07 PTH005 4 30 22.752 1 \n", "\n", " Chla_Lab \n", "1226 1.6503 \n", "1227 2.7539 \n", "1228 0.6581 \n", " Date Station Niskin Nomdepth Sampling Depth CTD Cast Rep \\\n", "1509 2018-03-07 PTH005 8 0 0.605 1 \n", "1510 2018-03-07 PTH005 6 10 9.959 1 \n", "1511 2018-03-07 PTH005 4 30 22.752 1 \n", "\n", " NH4_Lab NO3_Lab \n", "1509 0.4032 24.4072 \n", "1510 0.3268 24.3368 \n", "1511 0.8594 26.1480 \n", "-------\n", "\n", " Date Station Niskin Nomdepth Sampling Depth CTD Cast Rep \\\n", "1249 2018-03-08 HCB007 12 0 1.123 1 \n", "1250 2018-03-08 HCB007 11 10 19.809 1 \n", "\n", " Chla_Lab \n", "1249 6.6730 \n", "1250 12.8164 \n", " Date Station Niskin Nomdepth Sampling Depth CTD Cast Rep \\\n", "1537 2018-03-08 HCB007 12 0 1.123 1 \n", "1538 2018-03-08 HCB007 11 10 19.809 1 \n", "1539 2018-03-08 HCB007 3 NB 19.693 1 \n", "\n", " NH4_Lab NO3_Lab \n", "1537 0.3574 5.0614 \n", "1538 1.8039 24.3578 \n", "1539 1.8137 24.2544 \n", "-------\n", "\n", " Date Station Niskin Nomdepth Sampling Depth CTD Cast Rep \\\n", "1362 2018-04-19 PTH005 8 0 0.708 1 \n", "1363 2018-04-19 PTH005 6 10 10.013 1 \n", "1364 2018-04-19 PTH005 4 30 20.133 1 \n", "\n", " Chla_Lab \n", "1362 2.0857 \n", "1363 2.0553 \n", "1364 1.2048 \n", " Date Station Niskin Nomdepth Sampling Depth CTD Cast Rep \\\n", "1671 2018-04-19 PTH005 8 0 0.708 1 \n", "1672 2018-04-19 PTH005 6 10 10.013 1 \n", "1673 2018-04-19 PTH005 4 30 20.133 1 \n", "\n", " NH4_Lab NO3_Lab \n", "1671 1.2930 18.1204 \n", "1672 1.4154 17.9189 \n", "1673 1.2144 18.5835 \n", "-------\n", "\n", " Date Station Niskin Nomdepth Sampling Depth CTD Cast Rep \\\n", "1532 2018-06-05 PTH005 8 0 0.732 1 \n", "1533 2018-06-05 PTH005 6 10 10.110 1 \n", "1534 2018-06-05 PTH005 4 30 21.792 1 \n", "\n", " Chla_Lab \n", "1532 12.6560 \n", "1533 8.6061 \n", "1534 2.0148 \n", " Date Station Niskin Nomdepth Sampling Depth CTD Cast Rep \\\n", "1895 2018-06-05 PTH005 8 0 0.732 1 \n", "1896 2018-06-05 PTH005 6 10 10.110 1 \n", "1897 2018-06-05 PTH005 4 30 21.792 1 \n", "\n", " NH4_Lab NO3_Lab \n", "1895 0.4617 5.6939 \n", "1896 1.3238 14.3760 \n", "1897 2.6031 18.3663 \n", "-------\n", "\n", " Date Station Niskin Nomdepth Sampling Depth CTD Cast Rep \\\n", "1701 2018-08-08 PTH005 8 0 0.805 1 \n", "1702 2018-08-08 PTH005 6 10 10.232 1 \n", "1703 2018-08-08 PTH005 4 30 21.665 1 \n", "\n", " Chla_Lab \n", "1701 13.9722 \n", "1702 10.7323 \n", "1703 5.7306 \n", " Date Station Niskin Nomdepth Sampling Depth CTD Cast Rep \\\n", "2146 2018-08-08 PTH005 8 0 0.805 1 \n", "2147 2018-08-08 PTH005 6 10 10.232 1 \n", "2148 2018-08-08 PTH005 4 30 21.665 1 \n", "\n", " NH4_Lab NO3_Lab \n", "2146 0.5354 6.2933 \n", "2147 1.4481 13.6707 \n", "2148 1.8542 17.3004 \n", "-------\n", "\n", " Date Station Niskin Nomdepth Sampling Depth CTD Cast Rep \\\n", "1823 2018-09-07 PTH005 8 0 0.587 1 \n", "1824 2018-09-07 PTH005 6 10 10.196 1 \n", "1825 2018-09-07 PTH005 4 30 21.428 1 \n", "\n", " Chla_Lab \n", "1823 8.7073 \n", "1824 4.6270 \n", "1825 2.5717 \n", " Date Station Niskin Nomdepth Sampling Depth CTD Cast Rep \\\n", "2288 2018-09-07 PTH005 8 0 0.587 1 \n", "2289 2018-09-07 PTH005 6 10 10.196 1 \n", "2290 2018-09-07 PTH005 4 30 21.428 1 \n", "\n", " NH4_Lab NO3_Lab \n", "2288 1.0510 13.3424 \n", "2289 2.3574 16.6559 \n", "2290 1.3044 14.1399 \n", "-------\n", "\n", " Date Station Niskin Nomdepth Sampling Depth CTD Cast Rep \\\n", "2086 2018-12-05 SJF001 4 140 129.765 1 \n", "2088 2018-12-05 SJF001 6 80 29.955 1 \n", "\n", " Chla_Lab \n", "2086 0.1691 \n", "2088 0.2076 \n", " Date Station Niskin Nomdepth Sampling Depth CTD Cast Rep \\\n", "2611 2018-12-05 SJF001 4 140 129.765 1 \n", "2613 2018-12-05 SJF001 6 80 29.955 1 \n", "\n", " NH4_Lab NO3_Lab \n", "2611 0.0162 27.6416 \n", "2613 0.0677 27.1655 \n", "-------\n", "\n" ] } ], "source": [ "for i, row in chl.loc[crit].iterrows():\n", " print(chl.loc[(chl.Date==row['Date'])&(chl.Station==row.Station),\n", " ['Date','Station','Niskin','Nomdepth','Sampling Depth',\n", " 'CTD Cast Rep','Chla_Lab']])\n", " print(bot.loc[(bot.Date==row['Date'])&(bot.Station==row.Station),\n", " ['Date','Station','Niskin','Nomdepth','Sampling Depth',\n", " 'CTD Cast Rep','NH4_Lab','NO3_Lab']])\n", " print('-------\\n')" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [ { "ename": "SyntaxError", "evalue": "invalid syntax (, line 1)", "output_type": "error", "traceback": [ "\u001b[0;36m File \u001b[0;32m\"\"\u001b[0;36m, line \u001b[0;32m1\u001b[0m\n\u001b[0;31m nomd=[ii if np.isfloat]\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid syntax\n" ] } ], "source": [ "nomd=[ii if np.isfloat]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "allbot = pd.merge(left=bot,right=chl,how='outer',\n", " left_on = ['Date','Station','Niskin','CTD Cast Rep','Sampling Depth','NO3_SampleFieldReplicateNumber'],\n", " right_on = ['Date','Station','Niskin','CTD Cast Rep','Sampling Depth','Chla_SampleFieldReplicateNumber'])\n", "print(f'len(allbot):{len(allbot)}, len(chl):{len(chl)}, len(bot):{len(bot)}')\n", "#,'Nomdepth'" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "bot.keys()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "chl.keys()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "allbot.keys()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "allbot['ones']=1" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "allbot.loc[(~pd.isnull(allbot.NH4_Lab))&(pd.isnull(allbot.Chla_Lab)),['Date','Station','Niskin','CTD Cast Rep','Sampling Depth','NH4_Lab','Chla_Lab']]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "allbot.loc[(pd.isnull(allbot.NH4_Lab))&(~pd.isnull(allbot.Chla_Lab)),['Date','Station','Niskin','CTD Cast Rep','Sampling Depth','NH4_Lab','Chla_Lab']]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "bot[['Date','Station','Niskin','Sampling Depth']].head(5)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "chl[['Date','Station','Niskin','Sampling Depth']].head(5)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "allbot.groupby(['Date','Station','Niskin','Sampling Depth'])['ones'].count()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "temp=allbot.groupby(['Date','Station','Niskin','Sampling Depth']).agg({'ones':['count']})\n", "temp.columns = ['icount']\n", "np.unique(temp.icount)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "temp.loc[temp.icount>1]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "bot.loc[(bot.Date==dt.datetime(2018,1,12))&(bot.Station=='OAK004')]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "chl.loc[(chl.Date==dt.datetime(2018,1,12))&(chl.Station=='OAK004')]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "temp.loc[temp.icount==2]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "bot.loc[(bot.Date==dt.datetime(2019,2,26))&(bot.Station=='DNA001'),['Date','Station','Niskin','Nomdepth','Sampling Depth','NH4_Lab','NO3_Lab']]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "chl.loc[(chl.Date==dt.datetime(2019,2,26))&(chl.Station=='DNA001')]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "allbot.loc[(allbot.Date==dt.datetime(2019,2,26))&(allbot.Station=='DNA001'),['Date','Station','Niskin','Sampling Depth','Chla_Lab']]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#remove this entry in chl:\n", " #Date\tStation\tNiskin\tNomdepth\tSampling Depth\tCTD Cast Rep\tChla_Lab\tChla_QC\t\n", " #2019-02-26\tDNA001\t12.0\t30\t1.136\t1.0\t1.9216\t2.0\t" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "allbot.loc[(allbot.Nomdepth_x!=allbot.Nomdepth_y)]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python (py39)", "language": "python", "name": "py39" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.1" } }, "nbformat": 4, "nbformat_minor": 4 }