{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [], "collapsed_sections": [ "3H8vfniWnwZy", "yilB3s2xwmR5", "GQz5a8QmvWi-", "bxhSbhcbwvnS" ] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "markdown", "source": [ "# Data processing tools for MATE Floats! lesson\n", "\n", "Created by Ethan Campbell for NCAT/MATE/GO-BGC Marine Technology Summer Program\n", "\n", "Tuesday, August 22, 2023" ], "metadata": { "id": "OxvLAQ1SWpeR" } }, { "cell_type": "code", "source": [ "import numpy as np # NumPy is an array and math library\n", "import matplotlib.pyplot as plt # Matplotlib is a visualization (plotting) library\n", "import pandas as pd # Pandas lets us work with spreadsheet (.csv) data\n", "import xarray as xr\n", "from datetime import datetime, timedelta # Datetime helps us work with dates and times" ], "metadata": { "id": "34tFnmCvvP2Y" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "## Routine to process R/V Carson Carkeek `.cnv` cast files" ], "metadata": { "id": "3H8vfniWnwZy" } }, { "cell_type": "code", "source": [ "# May 2023 R/V Carson cruise data\n", "# filepaths = ['/content/2023051001001_Carkeek.cnv','/content/2023051101001_Carkeek.cnv']\n", "# export_filepaths = ['/content/2023051001001_Carkeek.csv','/content/2023051101001_Carkeek.csv']\n", "\n", "# August 2023 R/V Carson MATE cruise data\n", "filepaths = ['/content/20230825ctd01_Carkeek.cnv','/content/20230825ctd02_Carkeek.cnv']\n", "export_filepaths = ['/content/20230825ctd01_Carkeek.csv','/content/20230825ctd02_Carkeek.csv']\n", "\n", "casts = []\n", "for idx, filepath in enumerate(filepaths):\n", " # Identify header line number (= line_idx - 1) and extract column names\n", " file_object = open(filepath,'r')\n", " all_lines = file_object.readlines()\n", " file_object.close()\n", " header_names = []\n", " preamble_to_save = []\n", " for line_idx, line in enumerate(all_lines):\n", " if line[0] == '*':\n", " preamble_to_save.append('# ' + line)\n", " if line[0] == '#':\n", " preamble_to_save.append(line)\n", " if '*END*' in line:\n", " break\n", " elif ' name' in line:\n", " header_names.append(line.split('= ')[1].split(':')[0])\n", " print('First line of data after header:',line_idx, line)\n", " print('Header column names:',header_names,'\\n')\n", "\n", " # Load data\n", " cnv_data = pd.read_csv(filepath,header=None,names=header_names,\n", " skiprows=line_idx+1,delim_whitespace=True)\n", " # data = pd.read_csv(filepath,comment='$',delim_whitespace=True,na_values='*',\n", " # header=None,names=col_names[2:])\n", "\n", " # Extract approximate cast (note: this only works for a CNV file with a single cast)\n", " first_sample_idx = np.argmax(np.diff(cnv_data['depSM'].rolling(window=17,center=True).mean()) > 0.03)\n", " last_sample_idx = np.where(np.diff(cnv_data['depSM'].rolling(window=17,center=True).mean()) > 0.03)[0][-1]\n", " cast = cnv_data.loc[first_sample_idx:last_sample_idx]\n", " cast.reset_index(inplace=True,drop=True)\n", "\n", " # De-spike cast data\n", " neg_spike_idx = cast['depSM'].index[cast['depSM'].diff() < -10].values\n", " pos_spike_idx = cast['depSM'].index[cast['depSM'].diff() > 10].values\n", " oxy_spike_idx = cast['sbeox0ML/L'].index[cast['sbeox0ML/L'] < 0.0].values\n", " all_spike_idx = np.sort(np.concatenate((neg_spike_idx,pos_spike_idx,oxy_spike_idx)))\n", " if len(all_spike_idx) > 0:\n", " cast = cast.drop(index=all_spike_idx)\n", "\n", " # Display and save cast data\n", " display(cast)\n", " casts.append(cast)\n", "\n", " # Export data and pre-pend with original header preamble\n", " cast.to_csv(export_filepaths[idx])\n", " with open(export_filepaths[idx], 'r+') as f:\n", " content = f.read()\n", " f.seek(0,0)\n", " f.write(\"\".join(preamble_to_save) + content)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 1000 }, "id": "E3bePjbinzrr", "outputId": "13d77a39-14d4-4033-8e3a-2ce85dc7a029" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "First line of data after header: 250 *END*\n", "\n", "Header column names: ['altM', 'CStarTr0', 'c0mS/cm', 'density00', 'depSM', 'latitude', 'longitude', 'flECO-AFL', 'modError', 'sbeox0Mg/L', 'sbeox0ML/L', 'ph', 'potemp090C', 'prDM', 'sal00', 't090C', 'scan', 'flag'] \n", "\n" ] }, { "output_type": "display_data", "data": { "text/plain": [ " altM CStarTr0 c0mS/cm density00 depSM latitude longitude \\\n", "0 98.51 85.0319 36.454765 1022.5799 3.094 47.69744 -122.45666 \n", "1 98.53 85.0319 36.454386 1022.5800 3.110 47.69744 -122.45666 \n", "2 98.53 85.0575 36.450979 1022.5803 3.151 47.69744 -122.45666 \n", "3 98.53 85.0319 36.447381 1022.5802 3.094 47.69744 -122.45666 \n", "4 98.53 85.0319 36.447445 1022.5819 3.231 47.69744 -122.45666 \n", "... ... ... ... ... ... ... ... \n", "9006 13.60 76.3392 36.207341 1024.1586 188.034 47.69732 -122.45778 \n", "9007 13.55 76.3392 36.207467 1024.1587 188.034 47.69732 -122.45778 \n", "9008 13.53 76.3392 36.207656 1024.1593 188.114 47.69732 -122.45778 \n", "9009 13.53 76.3392 36.207278 1024.1589 188.098 47.69732 -122.45778 \n", "9010 13.55 76.3392 36.206901 1024.1589 188.154 47.69732 -122.45778 \n", "\n", " flECO-AFL modError sbeox0Mg/L sbeox0ML/L ph potemp090C prDM \\\n", "0 1.6112 0 8.0144 5.6080 9.035 13.6443 3.121 \n", "1 1.6112 0 8.0145 5.6081 9.035 13.6439 3.137 \n", "2 1.6112 0 8.0096 5.6046 9.030 13.6407 3.177 \n", "3 1.6112 0 8.0143 5.6080 9.035 13.6374 3.121 \n", "4 1.6112 0 8.0139 5.6077 9.035 13.6360 3.258 \n", "... ... ... ... ... ... ... ... \n", "9006 0.2925 2 5.9266 4.1471 8.865 12.4265 189.707 \n", "9007 0.3218 2 5.9256 4.1464 8.865 12.4266 189.707 \n", "9008 0.3218 2 5.9247 4.1458 8.865 12.4263 189.788 \n", "9009 0.2925 2 5.9237 4.1451 8.865 12.4264 189.772 \n", "9010 0.3218 2 5.9234 4.1449 8.865 12.4264 189.829 \n", "\n", " sal00 t090C scan flag \n", "0 30.2181 13.6447 5364 0.0 \n", "1 30.2180 13.6443 5365 0.0 \n", "2 30.2174 13.6411 5366 0.0 \n", "3 30.2167 13.6378 5367 0.0 \n", "4 30.2178 13.6364 5368 0.0 \n", "... ... ... ... ... \n", "9006 30.8767 12.4508 14370 0.0 \n", "9007 30.8768 12.4508 14371 0.0 \n", "9008 30.8771 12.4506 14372 0.0 \n", "9009 30.8767 12.4507 14373 0.0 \n", "9010 30.8763 12.4507 14374 0.0 \n", "\n", "[9006 rows x 18 columns]" ], "text/html": [ "\n", "
\n", " | altM | \n", "CStarTr0 | \n", "c0mS/cm | \n", "density00 | \n", "depSM | \n", "latitude | \n", "longitude | \n", "flECO-AFL | \n", "modError | \n", "sbeox0Mg/L | \n", "sbeox0ML/L | \n", "ph | \n", "potemp090C | \n", "prDM | \n", "sal00 | \n", "t090C | \n", "scan | \n", "flag | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "98.51 | \n", "85.0319 | \n", "36.454765 | \n", "1022.5799 | \n", "3.094 | \n", "47.69744 | \n", "-122.45666 | \n", "1.6112 | \n", "0 | \n", "8.0144 | \n", "5.6080 | \n", "9.035 | \n", "13.6443 | \n", "3.121 | \n", "30.2181 | \n", "13.6447 | \n", "5364 | \n", "0.0 | \n", "
1 | \n", "98.53 | \n", "85.0319 | \n", "36.454386 | \n", "1022.5800 | \n", "3.110 | \n", "47.69744 | \n", "-122.45666 | \n", "1.6112 | \n", "0 | \n", "8.0145 | \n", "5.6081 | \n", "9.035 | \n", "13.6439 | \n", "3.137 | \n", "30.2180 | \n", "13.6443 | \n", "5365 | \n", "0.0 | \n", "
2 | \n", "98.53 | \n", "85.0575 | \n", "36.450979 | \n", "1022.5803 | \n", "3.151 | \n", "47.69744 | \n", "-122.45666 | \n", "1.6112 | \n", "0 | \n", "8.0096 | \n", "5.6046 | \n", "9.030 | \n", "13.6407 | \n", "3.177 | \n", "30.2174 | \n", "13.6411 | \n", "5366 | \n", "0.0 | \n", "
3 | \n", "98.53 | \n", "85.0319 | \n", "36.447381 | \n", "1022.5802 | \n", "3.094 | \n", "47.69744 | \n", "-122.45666 | \n", "1.6112 | \n", "0 | \n", "8.0143 | \n", "5.6080 | \n", "9.035 | \n", "13.6374 | \n", "3.121 | \n", "30.2167 | \n", "13.6378 | \n", "5367 | \n", "0.0 | \n", "
4 | \n", "98.53 | \n", "85.0319 | \n", "36.447445 | \n", "1022.5819 | \n", "3.231 | \n", "47.69744 | \n", "-122.45666 | \n", "1.6112 | \n", "0 | \n", "8.0139 | \n", "5.6077 | \n", "9.035 | \n", "13.6360 | \n", "3.258 | \n", "30.2178 | \n", "13.6364 | \n", "5368 | \n", "0.0 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
9006 | \n", "13.60 | \n", "76.3392 | \n", "36.207341 | \n", "1024.1586 | \n", "188.034 | \n", "47.69732 | \n", "-122.45778 | \n", "0.2925 | \n", "2 | \n", "5.9266 | \n", "4.1471 | \n", "8.865 | \n", "12.4265 | \n", "189.707 | \n", "30.8767 | \n", "12.4508 | \n", "14370 | \n", "0.0 | \n", "
9007 | \n", "13.55 | \n", "76.3392 | \n", "36.207467 | \n", "1024.1587 | \n", "188.034 | \n", "47.69732 | \n", "-122.45778 | \n", "0.3218 | \n", "2 | \n", "5.9256 | \n", "4.1464 | \n", "8.865 | \n", "12.4266 | \n", "189.707 | \n", "30.8768 | \n", "12.4508 | \n", "14371 | \n", "0.0 | \n", "
9008 | \n", "13.53 | \n", "76.3392 | \n", "36.207656 | \n", "1024.1593 | \n", "188.114 | \n", "47.69732 | \n", "-122.45778 | \n", "0.3218 | \n", "2 | \n", "5.9247 | \n", "4.1458 | \n", "8.865 | \n", "12.4263 | \n", "189.788 | \n", "30.8771 | \n", "12.4506 | \n", "14372 | \n", "0.0 | \n", "
9009 | \n", "13.53 | \n", "76.3392 | \n", "36.207278 | \n", "1024.1589 | \n", "188.098 | \n", "47.69732 | \n", "-122.45778 | \n", "0.2925 | \n", "2 | \n", "5.9237 | \n", "4.1451 | \n", "8.865 | \n", "12.4264 | \n", "189.772 | \n", "30.8767 | \n", "12.4507 | \n", "14373 | \n", "0.0 | \n", "
9010 | \n", "13.55 | \n", "76.3392 | \n", "36.206901 | \n", "1024.1589 | \n", "188.154 | \n", "47.69732 | \n", "-122.45778 | \n", "0.3218 | \n", "2 | \n", "5.9234 | \n", "4.1449 | \n", "8.865 | \n", "12.4264 | \n", "189.829 | \n", "30.8763 | \n", "12.4507 | \n", "14374 | \n", "0.0 | \n", "
9006 rows × 18 columns
\n", "\n", " | altM | \n", "CStarTr0 | \n", "c0mS/cm | \n", "density00 | \n", "depSM | \n", "latitude | \n", "longitude | \n", "flECO-AFL | \n", "modError | \n", "sbeox0Mg/L | \n", "sbeox0ML/L | \n", "ph | \n", "potemp090C | \n", "prDM | \n", "sal00 | \n", "t090C | \n", "scan | \n", "flag | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "98.53 | \n", "84.5703 | \n", "37.265011 | \n", "1022.3681 | \n", "2.294 | \n", "47.69982 | \n", "-122.45644 | \n", "0.3511 | \n", "0 | \n", "8.1838 | \n", "5.7265 | \n", "9.033 | \n", "14.6083 | \n", "2.313 | \n", "30.2016 | \n", "14.6087 | \n", "3917 | \n", "0.0 | \n", "
1 | \n", "98.51 | \n", "84.5703 | \n", "37.262911 | \n", "1022.3688 | \n", "2.438 | \n", "47.69982 | \n", "-122.45644 | \n", "0.3804 | \n", "0 | \n", "8.1890 | \n", "5.7302 | \n", "9.038 | \n", "14.6065 | \n", "2.459 | \n", "30.2011 | \n", "14.6068 | \n", "3918 | \n", "0.0 | \n", "
2 | \n", "98.53 | \n", "84.5703 | \n", "37.263164 | \n", "1022.3679 | \n", "2.118 | \n", "47.69982 | \n", "-122.45644 | \n", "0.3804 | \n", "0 | \n", "8.1888 | \n", "5.7300 | \n", "9.038 | \n", "14.6062 | \n", "2.136 | \n", "30.2017 | \n", "14.6065 | \n", "3919 | \n", "0.0 | \n", "
3 | \n", "98.51 | \n", "84.5703 | \n", "37.263674 | \n", "1022.3684 | \n", "2.254 | \n", "47.69982 | \n", "-122.45644 | \n", "0.3511 | \n", "0 | \n", "8.1893 | \n", "5.7304 | \n", "9.033 | \n", "14.6067 | \n", "2.273 | \n", "30.2017 | \n", "14.6070 | \n", "3920 | \n", "0.0 | \n", "
4 | \n", "98.53 | \n", "84.5703 | \n", "37.263546 | \n", "1022.3673 | \n", "2.062 | \n", "47.69982 | \n", "-122.45644 | \n", "0.3804 | \n", "0 | \n", "8.1947 | \n", "5.7341 | \n", "9.033 | \n", "14.6069 | \n", "2.079 | \n", "30.2015 | \n", "14.6072 | \n", "3921 | \n", "0.0 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
9253 | \n", "13.33 | \n", "69.1337 | \n", "36.215576 | \n", "1024.1453 | \n", "184.981 | \n", "47.69966 | \n", "-122.45648 | \n", "0.3511 | \n", "0 | \n", "5.9840 | \n", "4.1872 | \n", "8.846 | \n", "12.4347 | \n", "186.626 | \n", "30.8793 | \n", "12.4586 | \n", "13170 | \n", "0.0 | \n", "
9254 | \n", "13.31 | \n", "69.1337 | \n", "36.215449 | \n", "1024.1448 | \n", "184.941 | \n", "47.69966 | \n", "-122.45648 | \n", "0.3804 | \n", "0 | \n", "5.9894 | \n", "4.1910 | \n", "8.846 | \n", "12.4350 | \n", "186.585 | \n", "30.8790 | \n", "12.4589 | \n", "13171 | \n", "0.0 | \n", "
9255 | \n", "13.31 | \n", "69.1337 | \n", "36.215513 | \n", "1024.1451 | \n", "184.981 | \n", "47.69966 | \n", "-122.45648 | \n", "0.3804 | \n", "0 | \n", "5.9844 | \n", "4.1875 | \n", "8.846 | \n", "12.4349 | \n", "186.626 | \n", "30.8791 | \n", "12.4588 | \n", "13172 | \n", "0.0 | \n", "
9256 | \n", "13.31 | \n", "69.1337 | \n", "36.215261 | \n", "1024.1448 | \n", "185.038 | \n", "47.69966 | \n", "-122.45648 | \n", "0.3511 | \n", "0 | \n", "5.9842 | \n", "4.1874 | \n", "8.846 | \n", "12.4353 | \n", "186.683 | \n", "30.8785 | \n", "12.4592 | \n", "13173 | \n", "0.0 | \n", "
9257 | \n", "13.26 | \n", "69.1337 | \n", "36.215198 | \n", "1024.1449 | \n", "185.062 | \n", "47.69966 | \n", "-122.45648 | \n", "0.3804 | \n", "0 | \n", "5.9896 | \n", "4.1911 | \n", "8.846 | \n", "12.4352 | \n", "186.707 | \n", "30.8785 | \n", "12.4591 | \n", "13174 | \n", "0.0 | \n", "
9258 rows × 18 columns
\n", "