{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# imports\n", "import sqlalchemy\n", "from sqlalchemy import create_engine, Column, String, Integer, Numeric, MetaData, Table, type_coerce, ForeignKey, case\n", "from sqlalchemy.orm import mapper, create_session, relationship, aliased, Session\n", "from sqlalchemy.ext.declarative import declarative_base\n", "import csv\n", "from sqlalchemy import case\n", "import numpy as np\n", "from sqlalchemy.ext.automap import automap_base\n", "import matplotlib.pyplot as plt\n", "import sqlalchemy.types as types\n", "import numbers\n", "from sqlalchemy.sql import and_, or_, not_, func\n", "from sqlalchemy.sql import select\n", "import os\n", "import glob\n", "import re\n", "from os.path import isfile\n", "import gsw\n", "\n", "import createDBfromDFO_OPDB\n", "%matplotlib inline" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# definitions\n", "basepath='/ocean/eolson/MEOPAR/obs/'\n", "basedir=basepath + 'DFOOPDB/'\n", "dbname='DFO_OcProfDB'\n", "# if PRISM.sqlite does not exist, exit\n", "if not isfile(basedir + dbname + '.sqlite'):\n", " print('ERROR: ' + dbname + '.sqlite does not exist')\n", " # return()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [], "source": [ "engine = create_engine('sqlite:///' + basedir + dbname + '.sqlite', echo = False)\n", "Base=declarative_base(engine)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [], "source": [ "#if JDFLocsTBL or CalcTBL already exists, drop (delete) it\n", "# placement here ensures they will not be reflected \n", "# full table deletion facilitates changes to schema as needed through this script\n", "connection=engine.connect()\n", "if engine.dialect.has_table(connection,'JDFLocsTBL'):\n", " # delete existing JDFLocsTBL\n", " connection.execute('DROP TABLE JDFLocsTBL')\n", "if engine.dialect.has_table(connection,'CalcsTBL'):\n", " # delete existing CalcTBL\n", " connection.execute('DROP TABLE CalcsTBL')\n", "connection.close()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# reflect existing tables, ObsTBL and StationTBL\n", "class Station(Base):\n", " __tablename__= 'StationTBL'\n", " __table_args__= {'autoload':True}\n", " \n", "class Obs(Base):\n", " __tablename__= 'ObsTBL'\n", " __table_args__= {'autoload':True}" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# create JDFLocsTBL to identify rows in Strait of Juan de Fuca\n", "class JDFLocs(Base):\n", " __table__=Table('JDFLocsTBL', Base.metadata,\n", " Column('ObsID', Integer, primary_key=True),\n", " Column('StationID', Integer, ForeignKey('StationTBL.ID')))\n", " \n", "# Next, Construct Table CalcTBL to store data transformations not easily reproduced in queries \n", "# (ie unit conversions using complex functions such as gsw TEOS-10 library)\n", "class Calcs(Base):\n", " __table__=Table('CalcsTBL', Base.metadata,\n", " Column('ObsID', Integer, primary_key=True),\n", " Column('StationID', Integer, ForeignKey('StationTBL.ID')),\n", " Column('Oxygen_umolL', Numeric),\n", " Column('Oxygen_Dissolved_umolL', Numeric),\n", " Column('Oxygen_Dissolved_SBE_umolL', Numeric),\n", " Column('Salinity_Bottle_SA', Numeric),\n", " Column('Salinity_SA', Numeric),\n", " Column('Salinity_T0_C0_SA', Numeric),\n", " Column('Salinity_T1_C1_SA', Numeric),\n", " Column('Salinity__Unknown_SA', Numeric),\n", " Column('Salinity__Pre1978_SA', Numeric))\n", "Base.metadata.create_all(engine)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# Insert data in JDFLocsTBL based on SQL query\n", "connection = engine.connect()\n", "connection.execute(\" INSERT INTO JDFLocsTBL (ObsID, StationID) \\\n", " SELECT ObsTBL.ID, ObsTBL.StationTBLID \\\n", " FROM ObsTBL JOIN StationTBL ON ObsTBL.StationTBLID=StationTBL.ID \\\n", " WHERE StationTBL.Lat BETWEEN 48.2 AND 48.8 \\\n", " AND StationTBL.Lon BETWEEN -125.2 AND -124.2;\")\n", "connection.close()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# One to one relationship from ObsTBL to CalcTBL: create records and insert IDs\n", "connection = engine.connect()\n", "connection.execute(\" INSERT INTO CalcsTBL (ObsID, StationID) \\\n", " SELECT ObsTBL.ID, ObsTBL.StationTBLID \\\n", " FROM ObsTBL;\")\n", "connection.close()" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": true }, "outputs": [], "source": [ "session = create_session(bind = engine, autocommit = False, autoflush = True)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(1, 1)\n", "(2, 1)\n", "(3, 1)\n", "---\n", "(1, 1, None)\n", "(2, 1, None)\n", "(3, 1, None)\n", "---\n", "85433\n", "---\n", "85433\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/home/eolson/anaconda3/envs/teos10/lib/python3.4/site-packages/sqlalchemy/sql/sqltypes.py:562: SAWarning: Dialect sqlite+pysqlite does *not* support Decimal objects natively, and SQLAlchemy must convert from floating point - rounding errors and other issues may occur. Please consider storing Decimal numbers as strings or integers on this platform for lossless storage.\n", " 'storage.' % (dialect.name, dialect.driver))\n" ] } ], "source": [ "qry1=session.query(Obs.ID, Obs.StationTBLID).limit(3).all()\n", "for row in qry1:\n", " print(row)\n", "print('---')\n", "qry1=session.query(Calcs.ObsID, Calcs.StationID, Calcs.Oxygen_umolL).limit(3).all()\n", "for row in qry1:\n", " print(row)\n", "print('---')\n", "qry1=session.query(Obs.ID).count()\n", "print(qry1)\n", "print('---')\n", "qry1=session.query(Calcs.ObsID).count()\n", "print(qry1)" ] }, { "cell_type": "code", "execution_count": 39, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def Ox_mLL_to_umolO2L(Ox_mLL):\n", " #1 ml = 1 ml * (1 l / 10^3 ml) * (1 mol O2 / 22.391 l) * (10^6 umol / mol) = (10^3/22.391) umol\n", " return 10**3/22.391*Ox_mLL\n", "\n", "def Ox_mgL_to_umolO2L(Ox_mgL):\n", " # 1 mg O2 = 1 mg O2 * (1 g / 10^3 mg) * (1 mol O2 / 31.998 g) * (10^6 umol / mol) = (10^3/31.998) umol\n", " return 10**3/31.998*Ox_mgL\n", "\n", "def Ox_umolkg_to_umolO2L(Ox_umolkg, S, T, press, lon, lat):\n", " # if density rho is in kg/L, 1 umol/kg = 1 umol/kg * (rho kg/L) = rho umol/L\n", " # treat ppt as practical salinity; virtually the same\n", " # gsw.SA_from_SP(SP,p,long,lat), gsw.rho(SA,t,p)\n", " SA=gsw.SA_from_SP(S,press,lon,lat)\n", " rho=gsw.rho(SA,T,press)\n", " return rho*Ox_umolkg" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def Sal_PSU_to_SA(S,press,lon,lat):\n", " SA=gsw.SA_from_SP(S,press,lon,lat)\n", " return(SA)" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "collapsed": true }, "outputs": [], "source": [ "Press=case([(Obs.Pressure!=None, Obs.Pressure)], else_=Obs.Pressure_Reversing)\n", "\n", "Sal=case([(Obs.Salinity_Bottle!=None, Obs.Salinity_Bottle)], else_=\n", " case([(Obs.Salinity_T0_C0!=None, Obs.Salinity_T0_C0)], else_=\n", " case([(Obs.Salinity_T1_C1!=None, Obs.Salinity_T1_C1)], else_=\n", " case([(Obs.Salinity!=None, Obs.Salinity)], else_=\n", " case([(Obs.Salinity__Unknown!=None, Obs.Salinity__Unknown)], else_=Obs.Salinity__Pre1978)\n", " ))))\n", "\n", "Tem=case([(Obs.Temperature!=None, Obs.Temperature)], else_=\n", " case([(Obs.Temperature_Primary!=None, Obs.Temperature_Primary)], else_=\n", " case([(Obs.Temperature_Secondary!=None, Obs.Temperature_Secondary)], else_=Obs.Temperature_Reversing)))" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "13277\n", "13277\n" ] } ], "source": [ "test=session.query(Obs.Salinity_Bottle).\\\n", " filter(Obs.Salinity_Bottle!=None).filter(Press!=None).count()\n", "print(test)\n", "Q=session.query(Obs.Salinity_Bottle, Press, Station.Lon, Station.Lat).\\\n", " select_from(Obs).\\\n", " join(Station, Station.ID==Obs.StationTBLID).\\\n", " filter(and_(\n", " Obs.Salinity_Bottle!=None,\n", " Press!=None,\n", " Station.Lon!=None,\n", " Station.Lat!=None)).count()\n", "print(Q)" ] }, { "cell_type": "raw", "metadata": {}, "source": [ "# 1) Convert all Salinity to SA, assuming it is in PSU\n", "# Salinity_Bottle_SA\n", "for ID, S, press, lon, lat in session.query(Obs.ID, Obs.Salinity_Bottle, Press, Station.Lon, Station.Lat).\\\n", " select_from(Obs).\\\n", " join(Station, Station.ID==Obs.StationTBLID).\\\n", " filter(and_(\n", " Obs.Salinity_Bottle!=None,\n", " Press!=None,\n", " Station.Lon!=None,\n", " Station.Lat!=None)).all():\n", " calcRec=session.query(Calcs).filter(Calcs.ObsID==ID).one()\n", " calcRec.Salinity_Bottle_SA=Sal_PSU_to_SA(S,press,lon,lat)\n", "session.commit()" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "collapsed": false }, "outputs": [], "source": [ "def convertSal(ObsColIn,CalcColOut):\n", " for ID, S, press, lon, lat in session.query(Obs.ID, ObsColIn, Press, Station.Lon, Station.Lat).\\\n", " select_from(Obs).\\\n", " join(Station, Station.ID==Obs.StationTBLID).\\\n", " filter(and_(\n", " ObsColIn!=None,\n", " Press!=None,\n", " Station.Lon!=None,\n", " Station.Lat!=None)).all():\n", " calcRec=session.query(Calcs).filter(Calcs.ObsID==ID).one()\n", " setattr(calcRec,CalcColOut,Sal_PSU_to_SA(S,press,lon,lat))\n", " session.commit()" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "collapsed": false }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/eolson/anaconda3/envs/teos10/lib/python3.4/site-packages/sqlalchemy/sql/sqltypes.py:562: SAWarning: Dialect sqlite+pysqlite does *not* support Decimal objects natively, and SQLAlchemy must convert from floating point - rounding errors and other issues may occur. Please consider storing Decimal numbers as strings or integers on this platform for lossless storage.\n", " 'storage.' % (dialect.name, dialect.driver))\n" ] } ], "source": [ "convertSal(Obs.Salinity_Bottle,'Salinity_Bottle_SA')\n", "convertSal(Obs.Salinity,'Salinity_SA')\n", "convertSal(Obs.Salinity_T0_C0,'Salinity_T0_C0_SA')\n", "convertSal(Obs.Salinity_T1_C1,'Salinity_T1_C1_SA')\n", "convertSal(Obs.Salinity__Unknown,'Salinity__Unknown_SA')\n", "convertSal(Obs.Salinity__Pre1978,'Salinity__Pre1978_SA')" ] }, { "cell_type": "code", "execution_count": 23, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[(Decimal('408344.1719517931'),)]\n" ] } ], "source": [ "qtest=session.query(func.sum(Calcs.Salinity_Bottle_SA)).all()\n", "print(qtest)" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[(Decimal('408344.1719517931'),)]\n" ] } ], "source": [ "qtest=session.query(func.sum(Calcs.Salinity_Bottle_SA)).all()\n", "print(qtest)" ] }, { "cell_type": "raw", "metadata": { "collapsed": false }, "source": [ " # Obs.Quality_Flag_Oxyg refers to Obs.Oxygen\n", " # Obs.Flag_Oxygen_Dissolved refers to Obs.Oxygen_Dissolved or Obs.Oxygen_Dissolved_SBE\n", " # Obs.Oxygen_units: mg/l or ml/l; 1 mg/l = 22.391 ml/31.998 = 0.700 ml/l\n", " # Obs.Oxygen_Dissolved_units: mL/L or umol/kg; 1 μmol O2 = .022391 ml\n", " # Obs.Oxygen_Dissolved_SBE_units: mL/L or umol/kg; 1kg = approx 1 L water\n", " # 1 ml/l = 10^3/22.391 = 44.661 μmol/l\n", " # Molar volume at STP = 22.391 l\n", " # Molar weight of oxygen = 31.998 g [this is for an O2 molecule]\n", " # Atomic Mass of oxygen = 15.994 g/mol" ] }, { "cell_type": "code", "execution_count": 41, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# define function to convert O2 from arbitrary units to umol/L:\n", "def convertOx(ObsColIn, ObsColInUnits, CalcColOutStr):\n", " for ID, Ox, OxU, press, S, T, lon, lat in \\\n", " session.query(Obs.ID, ObsColIn, ObsColInUnits, Press, Sal, Tem, Station.Lon, Station.Lat).\\\n", " select_from(Obs).\\\n", " join(Station, Station.ID==Obs.StationTBLID).\\\n", " filter(or_(\n", " and_(\n", " ObsColIn!=None,\n", " ObsColInUnits=='umol/kg',\n", " Press!=None,\n", " Sal!=None,\n", " Tem!=None,\n", " Station.Lon!=None,\n", " Station.Lat!=None),\n", " and_(\n", " ObsColIn!=None,\n", " ObsColInUnits!=None,\n", " ObsColInUnits!='umol/kg'))).all():\n", " calcRec=session.query(Calcs).filter(Calcs.ObsID==ID).one() \n", " if (OxU=='ml/l' or OxU=='mL/L'):\n", " outval=Ox_mLL_to_umolO2L(float(Ox))\n", " elif OxU=='mg/l':\n", " outval=Ox_mgL_to_umolO2L(float(Ox))\n", " elif OxU=='umol/kg':\n", " outval=Ox_umolkg_to_umolO2L(float(Ox), float(S), float(T), float(press), float(lon), float(lat))\n", " elif OxU=='mmol/m**3':\n", " outval=float(Ox)\n", " else:\n", " print('ERROR: OxU=',OxU)\n", " setattr(calcRec,CalcColOutStr,outval)\n", " session.commit()" ] }, { "cell_type": "code", "execution_count": 42, "metadata": { "collapsed": false }, "outputs": [], "source": [ "convertOx(Obs.Oxygen, Obs.Oxygen_units, 'Oxygen_umolL')\n", "convertOx(Obs.Oxygen_Dissolved, Obs.Oxygen_Dissolved_units, 'Oxygen_Dissolved_umolL')\n", "convertOx(Obs.Oxygen_Dissolved_SBE, Obs.Oxygen_Dissolved_SBE_units, 'Oxygen_Dissolved_SBE_umolL')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "qry=session.query(Obs.Pressure_Reversing).filter(Obs.Pressure_Reversing!=None).limit(10).all()\n", "for row in qry:\n", " print(row)" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(Decimal('1991.0000000000'), 59)\n", "(Decimal('1992.0000000000'), 86)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/home/eolson/anaconda3/envs/teos10/lib/python3.4/site-packages/sqlalchemy/sql/sqltypes.py:562: SAWarning: Dialect sqlite+pysqlite does *not* support Decimal objects natively, and SQLAlchemy must convert from floating point - rounding errors and other issues may occur. Please consider storing Decimal numbers as strings or integers on this platform for lossless storage.\n", " 'storage.' % (dialect.name, dialect.driver))\n" ] } ], "source": [ "qry=session.query(Station.StartYear, func.count()).select_from(Station).join(Obs, Obs.StationTBLID==Station.ID).filter(Obs.Pressure_Reversing!=None).group_by(Station.StartYear).all()\n", "for row in qry:\n", " print(row)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# mmol/m3 is equivalent to umol/L" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# all N units are equivalent:\n", "# gram-atomic weight = mol of an element \n", "# units used are: 'microg-at/l','mmol/m**3','umol/L'\n", "\n", "# all S units are roughly equivalent (ppt) to psu\n", "\n", "# all T units are deg C" ] }, { "cell_type": "code", "execution_count": 45, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(None, 76154)\n", "('microg-at/l', 3180)\n", "('mmol/m**3', 2896)\n", "('umol/L', 3203)\n", "----\n", "(None, 30535)\n", "('umol/L', 54898)\n", "----\n", "(None, 84972)\n", "('umol/L', 461)\n" ] } ], "source": [ "qry=session.query(Obs.Nitrate_units, func.count()).group_by(Obs.Nitrate_units).all()\n", "for row in qry:\n", " print(row)\n", "print('----')\n", "qry=session.query(Obs.Nitrate_plus_Nitrite_units, func.count()).group_by(Obs.Nitrate_plus_Nitrite_units).all()\n", "for row in qry:\n", " print(row)\n", "print('----')\n", "qry=session.query(Obs.Nitrate_plus_nitrite_ISUS_units, func.count()).group_by(Obs.Nitrate_plus_nitrite_ISUS_units).all()\n", "for row in qry:\n", " print(row)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "qry=session.query(NOUnits, func.count()).group_by(NOUnits).all()\n", "for row in qry:\n", " print(row)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "Oxy0=case([(Obs.Oxygen!=None, Obs.Oxygen)], else_=Obs.Oxygen_Dissolved)\n", "Oxy0Units=case([(Obs.Oxygen!=None, Obs.Oxygen_units)], else_=Obs.Oxygen_Dissolved_units)\n", "Oxy0Flag=case([(Obs.Oxygen!=None, Obs.Quality_Flag_Oxyg)], else_=Obs.Flag_Oxygen_Dissolved)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [], "source": [ "qry=session.query(Oxy0Units, func.count()).filter(Oxy0!=None).group_by(Oxy0Units).all()\n", "for row in qry:\n", " print(row)" ] }, { "cell_type": "code", "execution_count": 46, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(None, 16657)\n", "('microg-at/l', 3180)\n", "('mmol/m**3', 6020)\n", "('umol/L', 59576)\n" ] } ], "source": [ "qry=session.query(Obs.Silicate_units, func.count()).group_by(Obs.Silicate_units).all()\n", "for row in qry:\n", " print(row) " ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "qry=session.query(Obs.Nitrate_plus_nitrite_ISUS_Voltage_units, \n", " func.count()).group_by(Obs.Nitrate_plus_nitrite_ISUS_Voltage_units).all()\n", "\n", "print('----') \n", "for row in qry:\n", " print(row) \n", "qry=session.query(Obs.Nitrite_units, func.count()).group_by(Obs.Nitrite_units).all()\n", "print('----') \n", "for row in qry:\n", " print(row) \n", "qry=session.query(Obs.Ammonia_units, func.count()).group_by(Obs.Ammonia_units).all()\n", "print('----') \n", "for row in qry:\n", " print(row) \n", "qry=session.query(Obs.Ammonium_units, func.count()).group_by(Obs.Ammonium_units).all()\n", "print('----') \n", "for row in qry:\n", " print(row) " ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "qry=session.query(SalUnits, func.count()).group_by(SalUnits).all()\n", "for row in qry:\n", " print(row)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "qry=session.query(TemUnits, func.count()).group_by(TemUnits).all()\n", "for row in qry:\n", " print(row)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "qry=session.query(Obs.Pressure_Reversing_units, func.count()).group_by(Obs.Pressure_Reversing_units).all()\n", "for row in qry:\n", " print(row)\n", "qry=session.query(Obs.Pressure_units, func.count()).group_by(Obs.Pressure_units).all()\n", "for row in qry:\n", " print(row) " ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "from sqlalchemy import inspect\n", "inst = inspect(Obs)\n", "attr_names = [c_attr.key for c_attr in inst.mapper.column_attrs]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "session.close()\n", "engine.dispose()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.4.4" } }, "nbformat": 4, "nbformat_minor": 0 }