{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# imports\n",
    "from sqlalchemy import create_engine, Column, String, Integer, Numeric, MetaData, Table, type_coerce, case\n",
    "from sqlalchemy.orm import mapper, create_session\n",
    "from sqlalchemy.sql import and_, or_, not_, select\n",
    "import sqlalchemy.types as types\n",
    "from sqlalchemy.ext.automap import automap_base\n",
    "import csv\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import numbers\n",
    "import re"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['VARIABLES', '', '', 'abc', '', 'def', 'g\\n']\n"
     ]
    }
   ],
   "source": [
    "test=re.split('\\s*,\\s*','VARIABLES , ,, abc,,def ,g\\n')\n",
    "print(test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "match\n"
     ]
    }
   ],
   "source": [
    "if re.search('^VARIABLES', 'VARIABLES , ,, abc,,def ,g\\n'):\n",
    "    print('match')\n",
    "if re.search('^VARIABLES', ' VARIABLES , ,, abc,,def ,g\\n'):\n",
    "    print('spacematch')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "match\n"
     ]
    }
   ],
   "source": [
    "if re.search('^END\\sOF\\sVARIABLES','END OF VARIABLES SECTION,'):\n",
    "    print('match')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['VARIABLES', 'Depth', 'F', 'O', 'Temperatur', 'F', 'O', 'Salinity', 'F', 'O', 'Oxygen', 'F', 'O', 'Phosphate', 'F', 'O', 'Silicate', 'F', 'O', 'Nitrate', 'F', 'O', 'pH', 'F', 'O', '', '']\n"
     ]
    }
   ],
   "source": [
    "splitline=re.split('\\s*,\\s*','VARIABLES ,Depth     ,F,O,Temperatur,F,O,Salinity  ,F,O,Oxygen    ,F,O,Phosphate ,F,O,Silicate  ,F,O,Nitrate   ,F,O,pH        ,F,O,,')\n",
    "print (splitline)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['VARIABLES', 'Depth', 'F', 'O', 'Temperatur', 'F', 'O', 'Salinity', 'F', 'O', 'Oxygen', 'F', 'O', 'Phosphate', 'F', 'O', 'Silicate', 'F', 'O', 'Nitrate', 'F', 'O', 'pH', 'F', 'O']\n",
      "Depth\n",
      "F\n",
      "O\n",
      "Temperatur\n",
      "F\n",
      "O\n",
      "Salinity\n",
      "F\n",
      "O\n",
      "Oxygen\n",
      "F\n",
      "O\n",
      "Phosphate\n",
      "F\n",
      "O\n",
      "Silicate\n",
      "F\n",
      "O\n",
      "Nitrate\n",
      "F\n",
      "O\n",
      "pH\n",
      "F\n",
      "O\n"
     ]
    }
   ],
   "source": [
    "sear=re.compile('^.+$').search\n",
    "test=[m.group(0) for m in map(sear, splitline) if m]\n",
    "print(test)\n",
    "n=int((len(test)-1)/3)\n",
    "for i in range(0,n):\n",
    "    print(test[i*3+1])\n",
    "    print(test[i*3+2])\n",
    "    print(test[i*3+3])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['#--------------------------------------------------------------------------------', '']\n",
      "['CAST', '', '6510729', 'WOD Unique Cast Number', 'WOD code', '']\n",
      "['NODC Cruise ID', '', 'CA-10936', '', '', '']\n",
      "['Originators Station ID', '', '3', '', '', 'integer', '']\n",
      "['Originators Cruise ID', '', '30600', '', '', '']\n",
      "['Latitude', '', '49.2583', 'decimal degrees', '', '']\n",
      "['Longitude', '', '-123.7833', 'decimal degrees', '', '']\n",
      "['Year', '', '1931', '', '', '']\n",
      "['Month', '', '1', '', '', '']\n",
      "['Day', '', '28', '', '', '']\n",
      "['Time', '', '22.70', 'decimal hours (UT)', '', '']\n",
      "['METADATA', '']\n",
      "['Country', '', 'CA', 'NODC code', 'CANADA\\n']\n",
      "['Accession Number', '', '9600170.', 'NODC code', '', '']\n",
      "['Platform', '', '789.', 'OCL code', 'A. P. KNIGHT (F/V;comm-d 1927;decomm-d 1974)', '']\n",
      "['Institute', '', '141.', 'NODC code', 'FISH RES BOARD OF CANADA PACIFIC OCEANOGRAPHIC GROUP (NANAIMO)', '']\n",
      "['Wind Force', '', '0.', 'Beaufort Scale', 'CALM MEAN VELOCITY IN KNOTS <1 IN METERS/SEC 0-0.2 IN KM/H <1 IN M.P.H. <1 /WAV', '']\n",
      "['Wind Direction', '', '0.', 'WMO code 0877', 'CALM (NO WAVES-NO MOTION)', '']\n",
      "['Wind speed', '', '0.0', 'knots', '', '']\n",
      "['Barometric pressure', '', '1020.90', 'millibars', '', '']\n",
      "['Air temperature (dry bulb)', '', '9.90', 'degrees Celsius', '', '']\n",
      "['probe_type', '', '7.', 'OCL_code', 'bottle/rossette/net', '']\n",
      "['Original units', 'Oxygen', '7.', 'NODC code', 'ug-at/l            mmol/m3  umol/l  uM  umol/dm3', '']\n",
      "['VARIABLES', 'Depth', 'F', 'O', 'Temperatur', 'F', 'O', 'Salinity', 'F', 'O', 'Oxygen', 'F', 'O', 'Phosphate', 'F', 'O', 'Nitrate', 'F', 'O', 'pH', 'F', 'O', '', '']\n",
      "['UNITS', 'm', '', '', 'degrees C', '', '', 'PSS', '', '', 'ml/l', '', '', 'umol/l', '', '', 'umol/l', '', '', '(n/a)', '', '', '', '']\n"
     ]
    }
   ],
   "source": [
    "# definitions\n",
    "file1='/ocean/eolson/MEOPAR/obs/NODCJDFSearch/ocldb1449111998.1790.OSD.csv'\n",
    "f=open(file1,'r')\n",
    "linno=0\n",
    "for line in f:\n",
    "    linno+=1\n",
    "    if linno>25:\n",
    "        break\n",
    "    test=re.split('\\s*,\\s*',line)\n",
    "    print(test)\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'var1', 'var2'}"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "varlist={x for x in ['var1', 'F','O','var2','F','O'] if x not in {'F','O'}}\n",
    "varlist"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'var1', 'var2', 'var3'}"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "varlist=varlist | {x for x in ['var3', 'F','O','var2','','F','O'] if x not in {'F','O',''}}\n",
    "varlist"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.4.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}