{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Combined DFO_OPDB and PRISM databases\n",
    "- add N, Si, and ancillary data from DFO and from PRISM\n",
    "- check for duplicate entries"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# imports\n",
    "import sqlalchemy\n",
    "from sqlalchemy import create_engine, Column, String, Integer, Numeric, MetaData, Table, type_coerce, ForeignKey, case\n",
    "from sqlalchemy.orm import mapper, create_session, relationship, aliased, Session\n",
    "from sqlalchemy.ext.declarative import declarative_base\n",
    "import csv\n",
    "from sqlalchemy import case\n",
    "import numpy as np\n",
    "from sqlalchemy.ext.automap import automap_base\n",
    "import matplotlib.pyplot as plt\n",
    "import sqlalchemy.types as types\n",
    "import numbers\n",
    "from sqlalchemy.sql import and_, or_, not_, func\n",
    "from sqlalchemy.sql import select\n",
    "import os\n",
    "import glob\n",
    "import re\n",
    "from os.path import isfile\n",
    "from mpl_toolkits.basemap import Basemap, shiftgrid, cm\n",
    "import gsw\n",
    "import matplotlib.cm as cmm\n",
    "import matplotlib.colors as col\n",
    "import matplotlib.colors as col\n",
    "import createDBfromDFO_OPDB\n",
    "from netCDF4 import Dataset\n",
    "import datetime as dt\n",
    "import dateutil as dutil\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# definitions\n",
    "basepath='/ocean/eolson/MEOPAR/obs/'\n",
    "dbnameOPDB='DFOOPDB/DFO_OcProfDB'\n",
    "dbnamePRISM='NANOOS_PRISMCRUISES/PRISM'\n",
    "dbnameComb='combinedForICs'"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## First, create database to hold combined data:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "engine = create_engine('sqlite:///' + basepath + dbnameComb + '.sqlite')\n",
    "Base=declarative_base()\n",
    "\n",
    "class forceNumeric(types.TypeDecorator):\n",
    "    impl = types.Numeric\n",
    "    def process_bind_param(self, value, dialect):\n",
    "        try:\n",
    "            int(float(value))\n",
    "        except:\n",
    "            value = None\n",
    "        return value\n",
    "\n",
    "class forceInt(types.TypeDecorator):\n",
    "    impl = types.Integer\n",
    "    def process_bind_param(self, value, dialect):\n",
    "        try:\n",
    "            int(value)\n",
    "        except:\n",
    "            value = None\n",
    "        return value"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# define Table Classes:\n",
    "class StationTBL(Base):\n",
    "    __table__=Table('StationTBL', Base.metadata,\n",
    "                Column('ID', Integer, primary_key=True),\n",
    "                Column('Lat', forceNumeric),\n",
    "                Column('Lon', forceNumeric),\n",
    "                Column('Year', forceInt),\n",
    "                Column('Month', forceInt),\n",
    "                Column('Day', forceInt),\n",
    "                Column('sourceStaID', forceNumeric),\n",
    "                Column('sourceDB', String),\n",
    "                Column('sourceStaName', String))\n",
    "\n",
    "class ObsTBL(Base):\n",
    "    __table__=Table('ObsTBL', Base.metadata,\n",
    "                    Column('ID', Integer, primary_key=True),\n",
    "                    Column('StationTBLID', forceInt, ForeignKey('StationTBL.ID')),\n",
    "                    Column('sourceObsID', Integer),\n",
    "                    Column('sourceStaID', Integer),\n",
    "                    Column('sourceDB', String),\n",
    "                    Column('Z', forceNumeric),\n",
    "                    Column('Press', forceNumeric),\n",
    "                    Column('T', forceNumeric),\n",
    "                    Column('gswTC', forceNumeric),\n",
    "                    Column('S', forceNumeric),\n",
    "                    Column('gswSA', forceNumeric),\n",
    "                    Column('gswRho', forceNumeric),\n",
    "                    Column('NO3', forceNumeric),\n",
    "                    Column('NH4', forceNumeric),\n",
    "                    Column('Si', forceNumeric),\n",
    "                    Column('PON', forceNumeric),\n",
    "                    Column('DON', forceNumeric),\n",
    "                    Column('bSi', forceNumeric))\n",
    "    station=relationship(StationTBL, primaryjoin=__table__.c.StationTBLID == StationTBL.ID)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "Base.metadata.create_all(engine)\n",
    "session = create_session(bind = engine, autocommit = False, autoflush = True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Next, open DFO_OPDB database and extract data:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "Base1 = automap_base()\n",
    "engine1 = create_engine('sqlite:///' + basepath + dbnameOPDB + '.sqlite', echo = False)\n",
    "# reflect the tables:\n",
    "Base1.prepare(engine1, reflect=True)\n",
    "Sta1=Base1.classes.StationTBL\n",
    "Obs1=Base1.classes.ObsTBL\n",
    "session1 = create_session(bind = engine1, autocommit = False, autoflush = True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### definitions:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "Press1=case([(Obs1.Pressure!=None, Obs1.Pressure)], else_=Obs1.Pressure_Reversing)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "Tem1=case([(Obs1.Temperature!=None, Obs1.Temperature)], else_=\n",
    "         case([(Obs1.Temperature_Primary!=None, Obs1.Temperature_Primary)], else_=\n",
    "         case([(Obs1.Temperature_Secondary!=None, Obs1.Temperature_Secondary)], else_=Obs1.Temperature_Reversing)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "Sal1=case([(Obs1.Salinity_Bottle!=None, Obs1.Salinity_Bottle)], else_=\n",
    "         case([(Obs1.Salinity_T0_C0!=None, Obs1.Salinity_T0_C0)], else_=\n",
    "         case([(Obs1.Salinity_T1_C1!=None, Obs1.Salinity_T1_C1)], else_=\n",
    "         case([(Obs1.Salinity!=None, Obs1.Salinity)], else_=\n",
    "         case([(Obs1.Salinity__Unknown!=None, Obs1.Salinity__Unknown)], else_=Obs1.Salinity__Pre1978)\n",
    "        ))))\n",
    "Sal1Flag=case([(Obs1.Salinity_Bottle!=None, Obs1.Flag_Salinity_Bottle)], else_=\n",
    "         case([(Obs1.Salinity_T0_C0!=None, Obs1.Flag_Salinity)], else_=\n",
    "         case([(Obs1.Salinity_T1_C1!=None, Obs1.Flag_Salinity)], else_=\n",
    "         case([(Obs1.Salinity!=None, Obs1.Flag_Salinity)], else_=\n",
    "         case([(Obs1.Salinity__Unknown!=None, Obs1.Flag_Salinity)], else_=Obs1.Quality_Flag_Sali)\n",
    "        ))))\n",
    "Sal1_noflag=case([(Sal1Flag>2, None)], else_=Sal1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "NO1=case([(Obs1.Nitrate_plus_Nitrite!=None, Obs1.Nitrate_plus_Nitrite)], else_=Obs1.Nitrate)\n",
    "NO1Flag=case([(Obs1.Nitrate_plus_Nitrite!=None, Obs1.Flag_Nitrate_plus_Nitrite)], else_=Obs1.Flag_Nitrate)\n",
    "# Obs.Quality_Flag_Nitr does not match any nitrate obs\n",
    "# ISUS not included in this NO\n",
    "# units are micromolar (muM)\n",
    "NO1_noflag=case([(NO1Flag>2, None)], else_=NO1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "NH1_noflag=case([(Obs1.Flag_Ammonium>2, None)], else_=Obs1.Ammonium)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "Si1_noflag=case([(Obs1.Flag_Silicate>2, None)], else_=Obs1.Silicate)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "DON_noflag=case([(Obs1.Flag_Nitrogen_Dissolved_Organic>2, None)], else_=Obs1.Nitrogen_Dissolved_Organic)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "PON_noflag=Obs1.Nitrogen_Particulate_Organic"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "do not include data with quality flag 3 or greater. \n",
    "from 2011-27-0020.che:\n",
    "        Flag channels initialized with zeros. Non-zero values have the following significance:\n",
    "        --------------------------------------------------------------------------------------\n",
    "        1 = Sample for this measurement was drawn from water bottle but not analyzed\n",
    "            (not normally used).\n",
    "        2 = Acceptable measurement (not normally used).\n",
    "        3 = Questionable measurement (no problem observed in sampling or analysis,\n",
    "            but value is not trusted, nonetheless; includes outlyers).\n",
    "        4 = Bad measurement (known problem with sampling or analysis, but not\n",
    "            serious enough to completely discard the value).\n",
    "        5 = Not reported (lost sample; unredeemably bad measurement).\n",
    "        6 = Mean of replicate measurements.\n",
    "        7 = Manual chromatographic peak measurement.\n",
    "        8 = Irregular digital chromatographic peak integration.\n",
    "        9 = Sample not drawn for this measurement from this bottle (not normally used)."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### start with OBS table:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "qry1_Obs=session1.query(Obs1.ID, Obs1.StationTBLID, Obs1.Depth, Press1, Tem1, Sal1_noflag, NO1_noflag, NH1_noflag,\\\n",
    "                       Si1_noflag, DON_noflag, PON_noflag, Sta1.Lon, Sta1.Lat).select_from(Obs1).\\\n",
    "                join(Sta1,Sta1.ID==Obs1.StationTBLID).filter(or_(\n",
    "                    NO1_noflag!=None,\n",
    "                    NH1_noflag!=None,\n",
    "                    Si1_noflag!=None,\n",
    "                    DON_noflag!=None,\n",
    "                    PON_noflag!=None))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'sourceStaID': 1, 'sourceObsID': 1, 'DON': None, 'sourceDB': 'DFO_OPDB', 'NH4': None, 'T': 14.3, 'NO3': 6.6, 'Si': 13.6, 'PON': None, 'Z': 12.0, 'S': 25.8, 'Press': None}\n",
      "{'sourceStaID': 597, 'sourceObsID': 6213, 'gswTC': 9.9774455341009798, 'DON': None, 'sourceDB': 'DFO_OPDB', 'NH4': None, 'T': 9.886, 'NO3': None, 'gswRho': 1022.5666440387499, 'gswSA': 29.289740448372932, 'Si': 41.4, 'PON': None, 'Z': None, 'S': 29.1496, 'Press': 31.1}\n",
      "{'sourceStaID': 1153, 'sourceObsID': 12689, 'gswTC': 7.1981975413649293, 'DON': None, 'sourceDB': 'DFO_OPDB', 'NH4': None, 'T': 7.1133, 'NO3': None, 'gswRho': 1022.1896305004838, 'gswSA': 28.46321542073542, 'Si': 56.9, 'PON': None, 'Z': None, 'S': 28.3262, 'Press': 5.0}\n",
      "{'sourceStaID': 1769, 'sourceObsID': 19106, 'gswTC': 9.8709211683004732, 'DON': None, 'sourceDB': 'DFO_OPDB', 'NH4': None, 'T': 9.7996, 'NO3': None, 'gswRho': 1023.4505149892334, 'gswSA': 30.536476111886568, 'Si': 44.29, 'PON': None, 'Z': None, 'S': 30.3899, 'Press': 10.4}\n",
      "{'sourceStaID': 2277, 'sourceObsID': 25023, 'gswTC': 6.7618013474984435, 'DON': None, 'sourceDB': 'DFO_OPDB', 'NH4': None, 'T': 6.7621, 'NO3': 23.1, 'gswRho': 1026.9513812707041, 'gswSA': 33.637181076694901, 'Si': 29.8, 'PON': None, 'Z': None, 'S': 33.4743, 'Press': 151.7}\n",
      "{'sourceStaID': 2686, 'sourceObsID': 30194, 'DON': None, 'sourceDB': 'DFO_OPDB', 'NH4': None, 'T': 5.4678, 'NO3': None, 'Si': 72.1, 'PON': None, 'Z': None, 'S': None, 'Press': 210.3}\n",
      "{'sourceStaID': 3136, 'sourceObsID': 35650, 'gswTC': 8.7800011399187969, 'DON': None, 'sourceDB': 'DFO_OPDB', 'NH4': None, 'T': 8.7346, 'NO3': None, 'gswRho': 1024.5671596935504, 'gswSA': 31.707012106529575, 'Si': 8.8, 'PON': None, 'Z': None, 'S': 31.5554, 'Press': 20.5}\n",
      "{'sourceStaID': 3632, 'sourceObsID': 41276, 'DON': None, 'sourceDB': 'DFO_OPDB', 'NH4': None, 'T': 7.8282, 'NO3': None, 'Si': 41.8, 'PON': None, 'Z': None, 'S': None, 'Press': 76.0}\n",
      "{'sourceStaID': 4202, 'sourceObsID': 46940, 'DON': None, 'sourceDB': 'DFO_OPDB', 'NH4': None, 'T': 6.7, 'NO3': None, 'Si': 563.0, 'PON': None, 'Z': 100.0, 'S': 33.84, 'Press': None}\n",
      "{'sourceStaID': 5066, 'sourceObsID': 53282, 'DON': None, 'sourceDB': 'DFO_OPDB', 'NH4': None, 'T': 13.36, 'NO3': 0.0, 'Si': 5.9, 'PON': None, 'Z': 9.8, 'S': 31.55, 'Press': None}\n",
      "{'sourceStaID': 5632, 'sourceObsID': 58457, 'gswTC': 9.2553406792164381, 'DON': None, 'sourceDB': 'DFO_OPDB', 'NH4': None, 'T': 9.2229, 'NO3': 28.5, 'gswRho': 1025.0951124643843, 'gswSA': 30.932667688775656, 'Si': 67.9, 'PON': None, 'Z': None, 'S': 30.7798, 'Press': 285.4}\n",
      "{'sourceStaID': 6981, 'sourceObsID': 72307, 'DON': None, 'sourceDB': 'DFO_OPDB', 'NH4': None, 'T': 11.05, 'NO3': None, 'Si': 30.0, 'PON': None, 'Z': 50.0, 'S': None, 'Press': None}\n",
      "{'sourceStaID': 8138, 'sourceObsID': 84121, 'DON': None, 'sourceDB': 'DFO_OPDB', 'NH4': None, 'T': 9.55, 'NO3': 17.1, 'Si': 36.5, 'PON': None, 'Z': 20.0, 'S': 29.253, 'Press': None}\n"
     ]
    }
   ],
   "source": [
    "jj=0\n",
    "for iSourceID, iSourceStaID, iZ, iP, iT, iS, iNO, iNH, iSi, iDON, iPON, iLon, iLat in qry1_Obs.all():\n",
    "    jj+=1\n",
    "    idict={}\n",
    "    idict['sourceObsID']=iSourceID\n",
    "    idict['sourceStaID']=iSourceStaID\n",
    "    idict['sourceDB']='DFO_OPDB'\n",
    "    idict['Z']=iZ\n",
    "    idict['Press']=iP\n",
    "    idict['T']=iT\n",
    "    idict['S']=iS\n",
    "    idict['NO3']=iNO\n",
    "    idict['NH4']=iNH\n",
    "    idict['Si']=iSi\n",
    "    idict['PON']=iPON\n",
    "    idict['DON']=iDON\n",
    "    if iS!=None and iP!=None and iLon!=None and iLat!=None:\n",
    "        iSA=gsw.SA_from_SP(iS,iP,iLon,iLat)\n",
    "        idict['gswSA']=iSA\n",
    "        if iT!=None:\n",
    "            idict['gswRho']=gsw.rho(iSA,iT,iP)\n",
    "            idict['gswTC']=gsw.CT_from_t(iSA,iT,iP)\n",
    "    # enter in new db Obs table:\n",
    "    session.execute(ObsTBL.__table__.insert().values(**idict))\n",
    "    if jj%5000==1:\n",
    "        print(idict)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### add data to StationTBL"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "qry1_Sta=session1.query(Sta1.ID, Sta1.Lat, Sta1.Lon, Sta1.StartYear, Sta1.StartMonth, Sta1.StartDay,\\\n",
    "                        Sta1.STATION).select_from(Obs1).\\\n",
    "                join(Sta1,Sta1.ID==Obs1.StationTBLID).filter(or_(\n",
    "                    NO1_noflag!=None,\n",
    "                    NH1_noflag!=None,\n",
    "                    Si1_noflag!=None,\n",
    "                    DON_noflag!=None,\n",
    "                    PON_noflag!=None)).group_by(Sta1.ID)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'sourceStaName': '26', 'sourceStaID': 1, 'Year': 1982.0, 'Day': 16.0, 'sourceDB': 'DFO_OPDB', 'Lon': -123.943, 'Lat': 49.25, 'Month': 7.0}\n",
      "{'sourceStaName': '65', 'sourceStaID': 1047, 'Year': 2012.0, 'Day': 7.0, 'sourceDB': 'DFO_OPDB', 'Lon': -123.16483333333333, 'Lat': 48.26433333333333, 'Month': 4.0}\n",
      "{'sourceStaName': 'P04', 'sourceStaID': 2077, 'Year': 1998.0, 'Day': 14.0, 'sourceDB': 'DFO_OPDB', 'Lon': -126.6695, 'Lat': 48.64516666666667, 'Month': 5.0}\n",
      "{'sourceStaName': 'LB14', 'sourceStaID': 3079, 'Year': 2004.0, 'Day': 10.0, 'sourceDB': 'DFO_OPDB', 'Lon': -125.9995, 'Lat': 48.141333333333336, 'Month': 9.0}\n",
      "{'sourceStaName': 'G', 'sourceStaID': 4099, 'Year': 1932.0, 'Day': 22.0, 'sourceDB': 'DFO_OPDB', 'Lon': -122.7, 'Lat': 48.666666666666664, 'Month': 7.0}\n",
      "{'sourceStaName': '043', 'sourceStaID': 5132, 'Year': 1986.0, 'Day': 22.0, 'sourceDB': 'DFO_OPDB', 'Lon': -127.387, 'Lat': 49.272, 'Month': 8.0}\n",
      "{'sourceStaName': None, 'sourceStaID': 7368, 'Year': 1953.0, 'Day': 20.0, 'sourceDB': 'DFO_OPDB', 'Lon': -122.86666666666666, 'Lat': 47.65, 'Month': 7.0}\n"
     ]
    }
   ],
   "source": [
    "jj=0\n",
    "for iID, iLat, iLon, iYr, iMo, iDy, iSN in qry1_Sta.all():\n",
    "    jj+=1\n",
    "    idict={}\n",
    "    idict['Lat']=iLat\n",
    "    idict['Lon']=iLon\n",
    "    idict['Year']=iYr\n",
    "    idict['Month']=iMo\n",
    "    idict['Day']=iDy\n",
    "    idict['sourceStaID']=iID\n",
    "    idict['sourceDB']='DFO_OPDB'\n",
    "    idict['sourceStaName']=iSN\n",
    "    session.execute(StationTBL.__table__.insert().values(**idict))\n",
    "    if jj%1000==1:\n",
    "        print(idict)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### update ObsTBL to have correct foreign keys based on DFO_OPDB relationships"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1 None 1\n",
      "1 1 1\n",
      "5001 None 597\n",
      "5001 578 597\n",
      "10001 None 1153\n",
      "10001 1107 1153\n",
      "15001 None 1769\n",
      "15001 1714 1769\n",
      "20001 None 2277\n",
      "20001 2201 2277\n",
      "25001 None 2686\n",
      "25001 2609 2686\n",
      "30001 None 3136\n",
      "30001 3058 3136\n",
      "35001 None 3632\n",
      "35001 3549 3632\n",
      "40001 None 4202\n",
      "40001 4103 4202\n",
      "45001 None 5066\n",
      "45001 4935 5066\n",
      "50001 None 5632\n",
      "50001 5501 5632\n",
      "55001 None 6981\n",
      "55001 5964 6981\n",
      "60001 None 8138\n",
      "60001 6397 8138\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/eolson/anaconda3/envs/teos10/lib/python3.4/site-packages/sqlalchemy/sql/sqltypes.py:562: SAWarning: Dialect sqlite+pysqlite does *not* support Decimal objects natively, and SQLAlchemy must convert from floating point - rounding errors and other issues may occur. Please consider storing Decimal numbers as strings or integers on this platform for lossless storage.\n",
      "  'storage.' % (dialect.name, dialect.driver))\n"
     ]
    }
   ],
   "source": [
    "jj=0\n",
    "for iObs in session.query(ObsTBL).all():\n",
    "    jj+=1\n",
    "    if jj%5000==1:\n",
    "        print(iObs.ID,iObs.StationTBLID, iObs.sourceStaID)\n",
    "    iObs.StationTBLID=session.query(StationTBL.ID).filter(StationTBL.sourceStaID==iObs.sourceStaID).one()[0]\n",
    "    if jj%5000==1:\n",
    "        print(iObs.ID,iObs.StationTBLID, iObs.sourceStaID)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "session.commit()\n",
    "session1.close()\n",
    "engine1.dispose()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Next, load PRISM data:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "Base2 = automap_base()\n",
    "engine2 = create_engine('sqlite:///' + basepath + dbnamePRISM + '.sqlite', echo = False)\n",
    "# reflect the tables:\n",
    "Base2.prepare(engine2, reflect=True)\n",
    "Obs2=Base2.classes.ObsTBL\n",
    "Cast2=Base2.classes.CastTBL\n",
    "session2 = create_session(bind = engine2, autocommit = False, autoflush = True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "qry2_0=session2.query(Obs2.id, Obs2.cast_dbid, Obs2.depth,Obs2.cast_lat.label('lat'),Obs2.cast_lon.label('lon')).filter(or_(\n",
    "                            Obs2.variable=='ammonium_concentration',\n",
    "                            Obs2.variable=='nitrate_concentration',\n",
    "                            Obs2.variable=='silicate_concentration')).subquery()\n",
    "qry2_ObsBase=session2.query(qry2_0.c.id, qry2_0.c.cast_dbid, qry2_0.c.depth, qry2_0.c.lat, qry2_0.c.lon).group_by(qry2_0.c.cast_dbid, qry2_0.c.depth).subquery()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(920742, 1160, Decimal('2.8000000000'), Decimal('48.0163333333'), Decimal('-122.3041666670'))\n",
      "(920757, 1160, Decimal('7.9000000000'), Decimal('48.0163333333'), Decimal('-122.3041666670'))\n",
      "(920772, 1160, Decimal('13.0000000000'), Decimal('48.0163333333'), Decimal('-122.3041666670'))\n",
      "(920787, 1160, Decimal('23.0000000000'), Decimal('48.0163333333'), Decimal('-122.3041666670'))\n",
      "(920802, 1160, Decimal('33.0000000000'), Decimal('48.0163333333'), Decimal('-122.3041666670'))\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/eolson/anaconda3/envs/teos10/lib/python3.4/site-packages/sqlalchemy/sql/sqltypes.py:562: SAWarning: Dialect sqlite+pysqlite does *not* support Decimal objects natively, and SQLAlchemy must convert from floating point - rounding errors and other issues may occur. Please consider storing Decimal numbers as strings or integers on this platform for lossless storage.\n",
      "  'storage.' % (dialect.name, dialect.driver))\n"
     ]
    }
   ],
   "source": [
    "test=session2.query(qry2_ObsBase.c.id,qry2_ObsBase.c.cast_dbid,qry2_ObsBase.c.depth, qry2_ObsBase.c.lat, qry2_ObsBase.c.lon).limit(5).all()\n",
    "for row in test:\n",
    "    print(row)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### some variables have multiple entires at a depth and station; take the average:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "qNO=session2.query(Obs2.cast_dbid,Obs2.depth,func.avg(Obs2.value).label('value')).filter(Obs2.variable=='nitrate_concentration').group_by(Obs2.cast_dbid,Obs2.depth).subquery()\n",
    "qNH=session2.query(Obs2.cast_dbid,Obs2.depth,func.avg(Obs2.value).label('value')).filter(Obs2.variable=='ammonium_concentration').group_by(Obs2.cast_dbid,Obs2.depth).subquery()\n",
    "qSi=session2.query(Obs2.cast_dbid,Obs2.depth,func.avg(Obs2.value).label('value')).filter(Obs2.variable=='silicate_concentration').group_by(Obs2.cast_dbid,Obs2.depth).subquery()\n",
    "qP=session2.query(Obs2.cast_dbid,Obs2.depth,func.avg(Obs2.value).label('value')).filter(Obs2.variable=='water_pressure').group_by(Obs2.cast_dbid,Obs2.depth).subquery()\n",
    "qT=session2.query(Obs2.cast_dbid,Obs2.depth,func.avg(Obs2.value).label('value')).filter(Obs2.variable=='water_temperature').group_by(Obs2.cast_dbid,Obs2.depth).subquery()\n",
    "qS=session2.query(Obs2.cast_dbid,Obs2.depth,func.avg(Obs2.value).label('value')).filter(Obs2.variable=='water_salinity').group_by(Obs2.cast_dbid,Obs2.depth).subquery()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "qry2_all=session2.query(qry2_ObsBase.c.id, qry2_ObsBase.c.lat, qry2_ObsBase.c.lon, qry2_ObsBase.c.cast_dbid, qry2_ObsBase.c.depth,\n",
    "                       qNO.c.value, qNH.c.value, qSi.c.value, qP.c.value, qT.c.value, qS.c.value).\\\n",
    "    outerjoin(qNO, and_(qNO.c.cast_dbid==qry2_ObsBase.c.cast_dbid, qNO.c.depth==qry2_ObsBase.c.depth)).\\\n",
    "    outerjoin(qNH, and_(qNH.c.cast_dbid==qry2_ObsBase.c.cast_dbid, qNH.c.depth==qry2_ObsBase.c.depth)).\\\n",
    "    outerjoin(qSi, and_(qSi.c.cast_dbid==qry2_ObsBase.c.cast_dbid, qSi.c.depth==qry2_ObsBase.c.depth)).\\\n",
    "    outerjoin(qP, and_(qP.c.cast_dbid==qry2_ObsBase.c.cast_dbid, qP.c.depth==qry2_ObsBase.c.depth)).\\\n",
    "    outerjoin(qT, and_(qT.c.cast_dbid==qry2_ObsBase.c.cast_dbid, qT.c.depth==qry2_ObsBase.c.depth)).\\\n",
    "    outerjoin(qS, and_(qS.c.cast_dbid==qry2_ObsBase.c.cast_dbid, qS.c.depth==qry2_ObsBase.c.depth))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "7880\n"
     ]
    }
   ],
   "source": [
    "test=qry2_all.count()\n",
    "print(test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "7880\n"
     ]
    }
   ],
   "source": [
    "test2=session2.query(qry2_ObsBase).count()\n",
    "print(test2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'sourceStaID': 1160, 'sourceObsID': 920742, 'gswTC': 14.659235764855625, 'sourceDB': 'PRISM', 'NH4': 0.159997469161, 'T': 14.416, 'gswRho': 1017.8875549349336, 'gswSA': 24.374606843954123, 'Si': 23.499628283, 'NO3': 1.49997627338, 'Z': Decimal('2.8000000000'), 'S': 24.257600000000004, 'Press': 2.8}\n",
      "{'sourceStaID': 1742, 'sourceObsID': 2271510, 'gswTC': 8.992970261288594, 'sourceDB': 'PRISM', 'NH4': 0.127033485043, 'T': 8.9436, 'gswRho': 1024.3648794085707, 'gswSA': 30.990698588268391, 'Si': 53.6765927411, 'NO3': 29.1039631806, 'Z': Decimal('104.1490000000'), 'S': 30.8415, 'Press': 105.056}\n"
     ]
    }
   ],
   "source": [
    "jj=0\n",
    "for iID, iLat, iLon, iCastID, iZ, iNO, iNH, iSi, iP, iT, iS in qry2_all.all():\n",
    "    jj+=1\n",
    "    idict={}\n",
    "    idict['sourceObsID']=iID\n",
    "    idict['sourceStaID']=iCastID\n",
    "    idict['sourceDB']='PRISM'\n",
    "    idict['Z']=iZ\n",
    "    idict['T']=iT\n",
    "    idict['S']=iS\n",
    "    idict['NO3']=iNO\n",
    "    idict['NH4']=iNH\n",
    "    idict['Si']=iSi\n",
    "    idict['Press']=iP\n",
    "    if iS!=None and iP!=None and iLon!=None and iLat!=None:\n",
    "        iSA=gsw.SA_from_SP(iS,iP,iLon,iLat)\n",
    "        idict['gswSA']=iSA\n",
    "        if iT!=None:\n",
    "            idict['gswRho']=gsw.rho(iSA,iT,iP)\n",
    "            idict['gswTC']=gsw.CT_from_t(iSA,iT,iP)\n",
    "    # enter in new db Obs table:\n",
    "    session.execute(ObsTBL.__table__.insert().values(**idict))\n",
    "    if jj%5000==1:\n",
    "        print(idict)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "qry2_Cast=session2.query(Cast2.cast_dbid, Cast2.cast_datetime, Cast2.cast_lat, Cast2.cast_lon,\n",
    "                        Cast2.station_name).join(qry2_ObsBase, qry2_ObsBase.c.cast_dbid==Cast2.cast_dbid).\\\n",
    "                group_by(Cast2.cast_dbid)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "898\n"
     ]
    }
   ],
   "source": [
    "print(qry2_Cast.count())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2071\n"
     ]
    }
   ],
   "source": [
    "qtest=session2.query(Cast2).count()\n",
    "print(qtest)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'sourceStaName': 'P1, Gedney Island', 'sourceStaID': 1160, 'Year': 1998, 'Day': 15, 'sourceDB': 'PRISM', 'Lon': Decimal('-122.3041666670'), 'Lat': Decimal('48.0163333333'), 'Month': 6}\n",
      "{'sourceStaName': 'P30, Three Tree Point', 'sourceStaID': 1260, 'Year': 1999, 'Day': 15, 'sourceDB': 'PRISM', 'Lon': Decimal('-122.4091666670'), 'Lat': Decimal('47.4563333333'), 'Month': 6}\n",
      "{'sourceStaName': 'P16, Hazel Point', 'sourceStaID': 1363, 'Year': 2000, 'Day': 14, 'sourceDB': 'PRISM', 'Lon': Decimal('-122.7608333330'), 'Lat': Decimal('47.6913333333'), 'Month': 6}\n",
      "{'sourceStaName': 'P12, Hoodsport', 'sourceStaID': 1464, 'Year': 2001, 'Day': 11, 'sourceDB': 'PRISM', 'Lon': Decimal('-123.1080000000'), 'Lat': Decimal('47.4250000000'), 'Month': 6}\n",
      "{'sourceStaName': 'P9, South Point', 'sourceStaID': 1564, 'Year': 2003, 'Day': 14, 'sourceDB': 'PRISM', 'Lon': Decimal('-122.6665000000'), 'Lat': Decimal('47.8321666667'), 'Month': 6}\n",
      "{'sourceStaName': 'P35, South of McNeil Island', 'sourceStaID': 1675, 'Year': 2004, 'Day': 5, 'sourceDB': 'PRISM', 'Lon': Decimal('-122.6316666670'), 'Lat': Decimal('47.1833333333'), 'Month': 12}\n",
      "{'sourceStaName': 'P18, Bush Point', 'sourceStaID': 1775, 'Year': 2006, 'Day': 28, 'sourceDB': 'PRISM', 'Lon': Decimal('-122.6156666670'), 'Lat': Decimal('48.0330000000'), 'Month': 6}\n",
      "{'sourceStaName': 'P19, Lagoon Point', 'sourceStaID': 1878, 'Year': 2008, 'Day': 5, 'sourceDB': 'PRISM', 'Lon': Decimal('-122.6298333330'), 'Lat': Decimal('48.0935000000'), 'Month': 2}\n",
      "{'sourceStaName': 'P13, Eldon', 'sourceStaID': 9185, 'Year': 2009, 'Day': 1, 'sourceDB': 'PRISM', 'Lon': Decimal('-123.0073333330'), 'Lat': Decimal('47.5473333333'), 'Month': 10}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/eolson/anaconda3/envs/teos10/lib/python3.4/site-packages/sqlalchemy/sql/sqltypes.py:562: SAWarning: Dialect sqlite+pysqlite does *not* support Decimal objects natively, and SQLAlchemy must convert from floating point - rounding errors and other issues may occur. Please consider storing Decimal numbers as strings or integers on this platform for lossless storage.\n",
      "  'storage.' % (dialect.name, dialect.driver))\n"
     ]
    }
   ],
   "source": [
    "jj=0\n",
    "for iID, iDT, iLat, iLon, iSN in qry2_Cast.all():\n",
    "    jj+=1\n",
    "    idict={}\n",
    "    idate=dutil.parser.parse(iDT)\n",
    "    idict['Lat']=iLat\n",
    "    idict['Lon']=iLon\n",
    "    idict['Year']=idate.year\n",
    "    idict['Month']=idate.month\n",
    "    idict['Day']=idate.day\n",
    "    idict['sourceStaID']=iID\n",
    "    idict['sourceDB']='PRISM'\n",
    "    idict['sourceStaName']=iSN\n",
    "    session.execute(StationTBL.__table__.insert().values(**idict))\n",
    "    if jj%100==1:\n",
    "        print(idict)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "61294 None 1160\n",
      "61294 6474 1160\n",
      "62294 None 1273\n",
      "62294 6586 1273\n",
      "63294 None 1388\n",
      "63294 6698 1388\n",
      "64294 None 1504\n",
      "64294 6814 1504\n",
      "65294 None 1624\n",
      "65294 6925 1624\n",
      "66294 None 1742\n",
      "66294 7041 1742\n",
      "67294 None 1858\n",
      "67294 7155 1858\n",
      "68294 None 9177\n",
      "68294 7266 9177\n"
     ]
    }
   ],
   "source": [
    "jj=0\n",
    "for iObs in session.query(ObsTBL).filter(ObsTBL.sourceDB=='PRISM').all():\n",
    "    jj+=1\n",
    "    if jj%1000==1:\n",
    "        print(iObs.ID,iObs.StationTBLID, iObs.sourceStaID)\n",
    "    iObs.StationTBLID=session.query(StationTBL.ID).filter(StationTBL.sourceStaID==iObs.sourceStaID,\n",
    "                                                         StationTBL.sourceDB=='PRISM').one()[0]\n",
    "    if jj%1000==1:\n",
    "        print(iObs.ID,iObs.StationTBLID, iObs.sourceStaID)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "session.commit()\n",
    "session2.close()\n",
    "engine2.dispose()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "## Check for duplicates (inspect entries with same lat, lon, date, depth)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "- first, check that longitudes have same sign for both databases:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(Decimal('-122.2666666667'),)\n",
      "(Decimal('-122.2998333330'),)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/eolson/anaconda3/envs/teos10/lib/python3.4/site-packages/sqlalchemy/sql/sqltypes.py:562: SAWarning: Dialect sqlite+pysqlite does *not* support Decimal objects natively, and SQLAlchemy must convert from floating point - rounding errors and other issues may occur. Please consider storing Decimal numbers as strings or integers on this platform for lossless storage.\n",
      "  'storage.' % (dialect.name, dialect.driver))\n"
     ]
    }
   ],
   "source": [
    "qtest=session.query(func.max(StationTBL.Lon)).group_by(StationTBL.sourceDB).all()\n",
    "for row in qtest:\n",
    "    print(row)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "- next, check that no stations are duplicated between the different datasets that were combined by comparing Year, Month, Day, Lat, Lon"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "a1=aliased(StationTBL)\n",
    "a2=aliased(StationTBL)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "qDupSta=session.query(a1.ID.label('ID1'),a2.ID.label('ID2'),a1.Lat,a1.Lon,a1.Year,a1.Month,\n",
    "                     a1.Day,a1.sourceDB.label('DB1'),a2.sourceDB.label('DB2')).select_from(a1).join(a2,and_(\n",
    "    a1.Year==a2.Year,\n",
    "    a1.Month==a2.Month,\n",
    "    a1.Day==a2.Day,\n",
    "    a1.Lat-a2.Lat<0.001,\n",
    "    a1.Lat-a2.Lat>-0.001,\n",
    "    a1.Lon-a2.Lon<0.001,\n",
    "    a1.Lon-a2.Lon>-0.001,\n",
    "    a1.ID!=a2.ID,\n",
    "    a1.sourceDB!=a2.sourceDB)) # assume there were no duplicates in the original databases"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "for row in qDupSta.all():\n",
    "    print(row)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "session.close()\n",
    "engine.dispose()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "indthw = np.loadtxt('/ocean/eolson/MEOPAR/tools/bathymetry/thalweg_working.txt', delimiter=\" \", unpack=False)\n",
    "indthw = indthw.astype(int)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.4.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}