{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# imports\n", "from sqlalchemy import create_engine, Column, String, Integer, Numeric, MetaData, Table, type_coerce, case\n", "from sqlalchemy.orm import mapper, create_session\n", "from sqlalchemy.sql import and_, or_, not_, select\n", "import sqlalchemy.types as types\n", "from sqlalchemy.ext.automap import automap_base\n", "import csv\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "import numbers\n", "import re" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['VARIABLES', '', '', 'abc', '', 'def', 'g\\n']\n" ] } ], "source": [ "test=re.split('\\s*,\\s*','VARIABLES , ,, abc,,def ,g\\n')\n", "print(test)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "match\n" ] } ], "source": [ "if re.search('^VARIABLES', 'VARIABLES , ,, abc,,def ,g\\n'):\n", " print('match')\n", "if re.search('^VARIABLES', ' VARIABLES , ,, abc,,def ,g\\n'):\n", " print('spacematch')" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "match\n" ] } ], "source": [ "if re.search('^END\\sOF\\sVARIABLES','END OF VARIABLES SECTION,'):\n", " print('match')" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['VARIABLES', 'Depth', 'F', 'O', 'Temperatur', 'F', 'O', 'Salinity', 'F', 'O', 'Oxygen', 'F', 'O', 'Phosphate', 'F', 'O', 'Silicate', 'F', 'O', 'Nitrate', 'F', 'O', 'pH', 'F', 'O', '', '']\n" ] } ], "source": [ "splitline=re.split('\\s*,\\s*','VARIABLES ,Depth ,F,O,Temperatur,F,O,Salinity ,F,O,Oxygen ,F,O,Phosphate ,F,O,Silicate ,F,O,Nitrate ,F,O,pH ,F,O,,')\n", "print (splitline)" ] }, { "cell_type": "code", "execution_count": 26, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['VARIABLES', 'Depth', 'F', 'O', 'Temperatur', 'F', 'O', 'Salinity', 'F', 'O', 'Oxygen', 'F', 'O', 'Phosphate', 'F', 'O', 'Silicate', 'F', 'O', 'Nitrate', 'F', 'O', 'pH', 'F', 'O']\n", "Depth\n", "F\n", "O\n", "Temperatur\n", "F\n", "O\n", "Salinity\n", "F\n", "O\n", "Oxygen\n", "F\n", "O\n", "Phosphate\n", "F\n", "O\n", "Silicate\n", "F\n", "O\n", "Nitrate\n", "F\n", "O\n", "pH\n", "F\n", "O\n" ] } ], "source": [ "sear=re.compile('^.+$').search\n", "test=[m.group(0) for m in map(sear, splitline) if m]\n", "print(test)\n", "n=int((len(test)-1)/3)\n", "for i in range(0,n):\n", " print(test[i*3+1])\n", " print(test[i*3+2])\n", " print(test[i*3+3])" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['#--------------------------------------------------------------------------------', '']\n", "['CAST', '', '6510729', 'WOD Unique Cast Number', 'WOD code', '']\n", "['NODC Cruise ID', '', 'CA-10936', '', '', '']\n", "['Originators Station ID', '', '3', '', '', 'integer', '']\n", "['Originators Cruise ID', '', '30600', '', '', '']\n", "['Latitude', '', '49.2583', 'decimal degrees', '', '']\n", "['Longitude', '', '-123.7833', 'decimal degrees', '', '']\n", "['Year', '', '1931', '', '', '']\n", "['Month', '', '1', '', '', '']\n", "['Day', '', '28', '', '', '']\n", "['Time', '', '22.70', 'decimal hours (UT)', '', '']\n", "['METADATA', '']\n", "['Country', '', 'CA', 'NODC code', 'CANADA\\n']\n", "['Accession Number', '', '9600170.', 'NODC code', '', '']\n", "['Platform', '', '789.', 'OCL code', 'A. P. KNIGHT (F/V;comm-d 1927;decomm-d 1974)', '']\n", "['Institute', '', '141.', 'NODC code', 'FISH RES BOARD OF CANADA PACIFIC OCEANOGRAPHIC GROUP (NANAIMO)', '']\n", "['Wind Force', '', '0.', 'Beaufort Scale', 'CALM MEAN VELOCITY IN KNOTS <1 IN METERS/SEC 0-0.2 IN KM/H <1 IN M.P.H. <1 /WAV', '']\n", "['Wind Direction', '', '0.', 'WMO code 0877', 'CALM (NO WAVES-NO MOTION)', '']\n", "['Wind speed', '', '0.0', 'knots', '', '']\n", "['Barometric pressure', '', '1020.90', 'millibars', '', '']\n", "['Air temperature (dry bulb)', '', '9.90', 'degrees Celsius', '', '']\n", "['probe_type', '', '7.', 'OCL_code', 'bottle/rossette/net', '']\n", "['Original units', 'Oxygen', '7.', 'NODC code', 'ug-at/l mmol/m3 umol/l uM umol/dm3', '']\n", "['VARIABLES', 'Depth', 'F', 'O', 'Temperatur', 'F', 'O', 'Salinity', 'F', 'O', 'Oxygen', 'F', 'O', 'Phosphate', 'F', 'O', 'Nitrate', 'F', 'O', 'pH', 'F', 'O', '', '']\n", "['UNITS', 'm', '', '', 'degrees C', '', '', 'PSS', '', '', 'ml/l', '', '', 'umol/l', '', '', 'umol/l', '', '', '(n/a)', '', '', '', '']\n" ] } ], "source": [ "# definitions\n", "file1='/ocean/eolson/MEOPAR/obs/NODCJDFSearch/ocldb1449111998.1790.OSD.csv'\n", "f=open(file1,'r')\n", "linno=0\n", "for line in f:\n", " linno+=1\n", " if linno>25:\n", " break\n", " test=re.split('\\s*,\\s*',line)\n", " print(test)\n", " " ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "{'var1', 'var2'}" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "varlist={x for x in ['var1', 'F','O','var2','F','O'] if x not in {'F','O'}}\n", "varlist" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "{'var1', 'var2', 'var3'}" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "varlist=varlist | {x for x in ['var3', 'F','O','var2','','F','O'] if x not in {'F','O',''}}\n", "varlist" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.4.3" } }, "nbformat": 4, "nbformat_minor": 0 }