{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import sqlalchemy\n", "from sqlalchemy import create_engine, Column, String, Integer, Numeric, MetaData, Table, ForeignKey\n", "from sqlalchemy.orm import create_session, relationship\n", "from sqlalchemy.ext.automap import automap_base\n", "from sqlalchemy.ext.declarative import declarative_base\n", "from sqlalchemy.sql import and_, or_, not_, func\n", "\n", "import csv\n", "import matplotlib.pyplot as plt\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "basepath = '/Users/eolson/WorkUBC/SWC/SQLAlchemy/'\n", "dbname = 'fakeData'" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Create Database" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "engine = create_engine('sqlite:///' + basepath + dbname + '.sqlite')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Create Tables in Database\n", " tables are associated with Classes" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "Base=declarative_base() # useful for creating your tables, defining their structure" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# We will use this first table as an intermediate step to import the flat spreadsheet\n", "class LoadDataTBL(Base):\n", " __table__ = Table('LoadDataTBL', Base.metadata, \n", " Column('ID', Integer, primary_key=True),\n", " Column('Station', Integer),\n", " Column('Lat', Numeric),\n", " Column('Lon', Numeric),\n", " Column('Date', String),\n", " Column('Depth', Numeric),\n", " Column('Temperature', Numeric),\n", " Column('Salinity', Numeric))\n", "\n", "# Station Table\n", "class StationTBL(Base):\n", " __table__=Table('StationTBL', Base.metadata,\n", " Column('ID', Integer, primary_key=True),\n", " Column('Lat', Numeric),\n", " Column('Lon', Numeric),\n", " Column('Date', String))\n", "\n", "# Profile Table\n", "class ProfileTBL(Base):\n", " __table__=Table('ProfileTBL', Base.metadata,\n", " Column('ID', Integer, primary_key=True),\n", " Column('StationTBLID', Integer, ForeignKey('StationTBL.ID')),\n", " Column('Depth', Numeric),\n", " Column('Temperature', Numeric),\n", " Column('Salinity', Numeric))\n", "\n", "Base.metadata.create_all(engine)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### create session: this is how you interact with the database" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "session = create_session(bind = engine, autocommit = False, autoflush = True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### load data from csv file into 'LoadDataTBL':" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "f=open(basepath+'MadeUpData.csv','r')\n", "cf = csv.DictReader(f, delimiter=',')\n", "i=0\n", "for row in cf:\n", " i+=1\n", " if i<3: print(row) # print first two rows\n", " session.execute(LoadDataTBL.__table__.insert().values(**row))\n", "f.close()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Commit changes to database" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "session.commit()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Query database to return ID, Station, Depth, Temperature, and Salinity Columns from LoadDataTBL. We will insert these in ProfileTBL" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# query LoadDataTBL to return columns that belong in ProfileTBL\n", "# ID, Station, Depth, Temperature Salinity\n", "q0=session.query(LoadDataTBL.ID,LoadDataTBL.Station,LoadDataTBL.Depth,LoadDataTBL.Temperature,\n", " LoadDataTBL.Salinity)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "for row in q0.all():\n", " print(*row)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "for row in q0.all():\n", " idict={}\n", " idict['ID']=row[0]\n", " idict['StationTBLID']=row[1]\n", " idict['Depth']=row[2]\n", " idict['Temperature']=row[3]\n", " idict['Salinity']=row[4]\n", " # enter in new record in Profile table:\n", " session.execute(ProfileTBL.__table__.insert().values(**idict))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "session.commit()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Query database to return unique Station, Lat, Lon, and Depth Columns from LoadDataTBL. We will insert these in ProfileTBL." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "q1=session.query(LoadDataTBL.Station,LoadDataTBL.Lat,LoadDataTBL.Lon,\n", " LoadDataTBL.Date).group_by(LoadDataTBL.Station)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "for row in q1.all():\n", " print(*row)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "for Station, Lat, Lon, Date in q1.all():\n", " idict={}\n", " idict['ID']=Station\n", " idict['Lat']=Lat\n", " idict['Lon']=Lon\n", " idict['Date']=Date\n", " # enter in new record in Profile table:\n", " session.execute(StationTBL.__table__.insert().values(**idict))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "session.commit()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "session.close()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Delete (drop) LoadDataTBL from database" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "LoadDataTBL.__table__.drop(engine)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### close database connection" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "engine.dispose()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Reopen database and map tables to classes" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# automatically reflect database structure:\n", "Base = automap_base()\n", "engine = create_engine('sqlite:///' + basepath + dbname + '.sqlite', echo = False)\n", "# reflect the tables:\n", "Base.prepare(engine, reflect=True)\n", "# mapped classes have been created" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# assign table classes to short variables for convenience:\n", "StationTBL=Base.classes.StationTBL\n", "ProfileTBL=Base.classes.ProfileTBL" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "session = create_session(bind = engine, autocommit = False, autoflush = True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## QUERIES\n", "### Query to return all Stations with Latitude > 48.4" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "q2=session.query(StationTBL.ID,StationTBL.Lat).filter(StationTBL.Lat>48.4)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "for row in q2.all():\n", " print(row)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Query to return all Temperature, Salinity where Salinity < 33" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "q3=session.query(ProfileTBL.Temperature,ProfileTBL.Salinity).filter(ProfileTBL.Salinity<33)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "for row in q3.all():\n", " print(row)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### --> Write a query to return all data from ProfileTBL where depth < 30" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## How do we write a query that combines information from both tables?\n", "### What happens if we jsut ask for information from both tables?" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "q4=session.query(StationTBL.Lat,StationTBL.Lon,ProfileTBL.Temperature).\\\n", " filter(ProfileTBL.Salinity<33)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "for row in q4:\n", " print(row)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### We need to join the tables using keys" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "q4=session.query(StationTBL.Lat,StationTBL.Lon,ProfileTBL.Salinity).\\\n", " select_from(ProfileTBL).join(StationTBL,StationTBL.ID==ProfileTBL.StationTBLID).\\\n", " filter(ProfileTBL.Salinity<33)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "for row in q4:\n", " print(row)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### --> Write a query to return Depth, Temperature where Lat > 48.4" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Some built in functions are available to us. For instance, return the maximum salinity in the profile table:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "q5=session.query(func.max(ProfileTBL.Salinity))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "print(q5.one())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### These functions can also be applied over grouped sections of data" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "q6=session.query(StationTBL.ID,func.avg(ProfileTBL.Temperature),func.count()).select_from(\n", " ProfileTBL).join(StationTBL,StationTBL.ID==ProfileTBL.StationTBLID).\\\n", " group_by(StationTBL.ID)\n", "for row in q6:\n", " print(row)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### --> Write a query that returns each Salinity value present in ProfileTBL along with the number of times it appears" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## What if you want to use a column more than once in your query? For instance:\n", "### Return ID, Temperature and Salinity pairs from records where both the depth and temperature are equal" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "qP1=session.query(ProfileTBL).subquery()\n", "qP2=session.query(ProfileTBL).subquery()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "qMatch=session.query(qP1.c.ID,qP2.c.ID,qP1.c.Temperature,qP2.c.Temperature,\n", " qP1.c.Salinity,qP2.c.Salinity).filter(\n", " qP1.c.ID!=qP2.c.ID,qP1.c.Temperature==qP2.c.Temperature,qP1.c.Depth==qP2.c.Depth)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "for row in qMatch.all():\n", " print(*row)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Put everything together:\n", "### --> Write a query to return Latitudes and Longitudes where salinities at more than one depth were less than 29" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "q0=session.query(ProfileTBL.StationTBLID,func.count().label('N')).filter(ProfileTBL.Salinity<29).\\\n", " group_by(ProfileTBL.StationTBLID)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "for row in q0.all():\n", " print(row)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "qsub=q0.subquery()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "q=session.query(StationTBL.ID,StationTBL.Lat,StationTBL.Lon).\\\n", " select_from(StationTBL).join(qsub,qsub.c.StationTBLID==StationTBL.ID).\\\n", " filter(qsub.c.N>1)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "for row in q.all():\n", " print(row)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "session.close()\n", "engine.dispose()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.1" } }, "nbformat": 4, "nbformat_minor": 0 }