{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"2020-11-03 - lesson #9 notebook.ipynb","provenance":[],"collapsed_sections":["Jy_ZjZTTYTqY","PsLWyh4SNJBC"]},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"n-tRl-ouYOAe"},"source":["# Video lesson \\#9 notebook"]},{"cell_type":"markdown","metadata":{"id":"Jy_ZjZTTYTqY"},"source":["## Part 1: Pandas `Series` and `DataFrame` objects"]},{"cell_type":"code","metadata":{"id":"57m7NRAMYSyB"},"source":["# Import Pandas (and NumPy, because Pandas is built on NumPy)\n","import numpy as np\n","import pandas as pd"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"U5tMy1iIBhXl","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1604344345789,"user_tz":480,"elapsed":950,"user":{"displayName":"Ethan C Campbell","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjCBYTiuomqOsCakND1k_5wj0kYvFY53Jt7kunt=s64","userId":"11255944928409084259"}},"outputId":"d793df50-a15b-415c-b210-c80414210ba6"},"source":["# Create two new Pandas Series objects\n","s1 = pd.Series(index=[2016,2017,2018,2019,2020],\n"," data=[4.1,5.2,6.3,7.4,8.5],\n"," name='Temperature')\n","s2 = pd.Series(index=[2016,2017,2018,2019,2020],\n"," data=[35.5,35.0,34.5,34.0,33.5],\n"," name='Salinity')\n","\n","# Series still have a length, as with lists and NumPy arrays\n","print(len(s1))"],"execution_count":null,"outputs":[{"output_type":"stream","text":["5\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"4u-PfDtSBjKz","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1604344345790,"user_tz":480,"elapsed":932,"user":{"displayName":"Ethan C Campbell","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjCBYTiuomqOsCakND1k_5wj0kYvFY53Jt7kunt=s64","userId":"11255944928409084259"}},"outputId":"b78567c5-fbe6-4715-9da1-8722d06424cb"},"source":["# Extract parts of the Series object\n","print(s1.index) # get index as Index object (not very useful)\n","print(s1.index.values) # get index converted into NumPy array\n","print(s1.values) # get data converted into NumPy array"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Int64Index([2016, 2017, 2018, 2019, 2020], dtype='int64')\n","[2016 2017 2018 2019 2020]\n","[4.1 5.2 6.3 7.4 8.5]\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"iQ6_4IOOESJJ","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1604344345790,"user_tz":480,"elapsed":913,"user":{"displayName":"Ethan C Campbell","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjCBYTiuomqOsCakND1k_5wj0kYvFY53Jt7kunt=s64","userId":"11255944928409084259"}},"outputId":"101d87ac-1aa5-4cb7-96e5-15c5e097fb90"},"source":["# Select data from Series object using .iloc (Python/NumPy-style selection by position)\n","print('\\nOption 1:\\n', s1.iloc[3]) # use a single integer index (returns the value)\n","print('\\nOption 2:\\n', s1.iloc[[2,3,4]]) # use a list or array of integer indices (returns a Series)\n","print('\\nOption 3:\\n', s1.iloc[2:5]) # use a slice of integer indices (returns a Series)\n","print('\\nOption 4:\\n', s1.iloc[[False,False,True,True,True]]) # use a Boolean array (returns a Series)\n","\n","# Select data from Series object using .loc (selection by label)\n","print('\\nOption 5:\\n', s1.loc[2019]) # use a single label of the index (NOT an integer position along the index)\n","print('\\nOption 6:\\n', s1.loc[[2018,2019,2020]]) # use a list or array of labels\n","print('\\nOption 7:\\n', s1.loc[2018:2020]) # use a slice of labels (UNLIKE standard Python/NumPy slices, the end value is inclusive)\n","\n","# Remember that you have to use .values to convert a Series to a NumPy array:\n","print('\\nReminder:\\n', s1.loc[2018:2020].values)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["\n","Option 1:\n"," 7.4\n","\n","Option 2:\n"," 2018 6.3\n","2019 7.4\n","2020 8.5\n","Name: Temperature, dtype: float64\n","\n","Option 3:\n"," 2018 6.3\n","2019 7.4\n","2020 8.5\n","Name: Temperature, dtype: float64\n","\n","Option 4:\n"," 2018 6.3\n","2019 7.4\n","2020 8.5\n","Name: Temperature, dtype: float64\n","\n","Option 5:\n"," 7.4\n","\n","Option 6:\n"," 2018 6.3\n","2019 7.4\n","2020 8.5\n","Name: Temperature, dtype: float64\n","\n","Option 7:\n"," 2018 6.3\n","2019 7.4\n","2020 8.5\n","Name: Temperature, dtype: float64\n","\n","Reminder:\n"," [6.3 7.4 8.5]\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"vaVz5ACPFbLX","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1604344345791,"user_tz":480,"elapsed":895,"user":{"displayName":"Ethan C Campbell","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjCBYTiuomqOsCakND1k_5wj0kYvFY53Jt7kunt=s64","userId":"11255944928409084259"}},"outputId":"3ba0d7ed-4cfa-4904-c00e-8f4813bd4a30"},"source":["# Changing values of a Series using the indexing options above\n","s1.loc[2018] = 5.3\n","print(s1)\n","s1.iloc[3:5] = [6.4,7.5]\n","print(s1)\n","s1.loc[2018:2020] += 1\n","print(s1)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["2016 4.1\n","2017 5.2\n","2018 5.3\n","2019 7.4\n","2020 8.5\n","Name: Temperature, dtype: float64\n","2016 4.1\n","2017 5.2\n","2018 5.3\n","2019 6.4\n","2020 7.5\n","Name: Temperature, dtype: float64\n","2016 4.1\n","2017 5.2\n","2018 6.3\n","2019 7.4\n","2020 8.5\n","Name: Temperature, dtype: float64\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"qCFOAtW1NvJO","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1604344345791,"user_tz":480,"elapsed":879,"user":{"displayName":"Ethan C Campbell","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjCBYTiuomqOsCakND1k_5wj0kYvFY53Jt7kunt=s64","userId":"11255944928409084259"}},"outputId":"f0999ff5-e085-468c-9de2-759fbd48bf9c"},"source":["# Add a new value to a Series using a new index label\n","s1.loc[2021] = 9.6\n","print(s1)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["2016 4.1\n","2017 5.2\n","2018 6.3\n","2019 7.4\n","2020 8.5\n","2021 9.6\n","Name: Temperature, dtype: float64\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"WlIVNZ5FQyGB"},"source":["# Two ways of creating a Pandas DataFrame object\n","\n","# Option 1: join two or more Series objects\n","df = pd.concat([s1,s2],axis=1)\n","\n","# Option 2: provide a dictionary with the data lists or NumPy arrays\n","df = pd.DataFrame(index=[2016,2017,2018,2019,2020],\n"," data={'Temperature':[4.1,5.2,6.3,7.4,8.5],\n"," 'Salinity':[35.5,35.0,34.5,34.0,33.5]})"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"ejAFmOmzRGuW","colab":{"base_uri":"https://localhost:8080/","height":648},"executionInfo":{"status":"ok","timestamp":1604344346133,"user_tz":480,"elapsed":1199,"user":{"displayName":"Ethan C Campbell","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjCBYTiuomqOsCakND1k_5wj0kYvFY53Jt7kunt=s64","userId":"11255944928409084259"}},"outputId":"a20051c6-69c8-4850-9116-ddf7915103fb"},"source":["# Get information about the DataFrame object\n","print(df.shape) # get dimensions\n","print(df.size) # get number of data values\n","print(df) # print() still works, but is not as nice looking as display()\n","display(df) # display() opens the display interface, a more nicely formatted view of the object\n","df.describe() # get useful summary statistics"],"execution_count":null,"outputs":[{"output_type":"stream","text":["(5, 2)\n","10\n"," Temperature Salinity\n","2016 4.1 35.5\n","2017 5.2 35.0\n","2018 6.3 34.5\n","2019 7.4 34.0\n","2020 8.5 33.5\n"],"name":"stdout"},{"output_type":"display_data","data":{"text/html":["
"],"text/plain":[" Temperature Salinity\n","count 5.000000 5.000000\n","mean 6.300000 34.500000\n","std 1.739253 0.790569\n","min 4.100000 33.500000\n","25% 5.200000 34.000000\n","50% 6.300000 34.500000\n","75% 7.400000 35.000000\n","max 8.500000 35.500000"]},"metadata":{"tags":[]},"execution_count":43}]},{"cell_type":"code","metadata":{"id":"4dRFCmGNUhLW","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1604344346133,"user_tz":480,"elapsed":1180,"user":{"displayName":"Ethan C Campbell","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjCBYTiuomqOsCakND1k_5wj0kYvFY53Jt7kunt=s64","userId":"11255944928409084259"}},"outputId":"d03071d5-ab41-44ee-bfc8-f665b5030530"},"source":["# Extract parts of the DataFrame object\n","print(df.index.values) # get index as a NumPy array\n","print(df.columns.values) # get column names as a NumPy array\n","print(df.values) # get data as a NumPy array\n","print(df['Salinity'].values) # get one column as a NumPy array\n"," # (similar to dictionary indexing)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["[2016 2017 2018 2019 2020]\n","['Temperature' 'Salinity']\n","[[ 4.1 35.5]\n"," [ 5.2 35. ]\n"," [ 6.3 34.5]\n"," [ 7.4 34. ]\n"," [ 8.5 33.5]]\n","[35.5 35. 34.5 34. 33.5]\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"cS0qFEqydUnc","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1604344346134,"user_tz":480,"elapsed":1163,"user":{"displayName":"Ethan C Campbell","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjCBYTiuomqOsCakND1k_5wj0kYvFY53Jt7kunt=s64","userId":"11255944928409084259"}},"outputId":"b1a4b00e-e10f-4a2a-91d2-e248bfcbc1a5"},"source":["# Select data from Series object using .iloc or .loc\n","print('\\nExample 1:\\n', df.iloc[3]) # use a single index (returns a Series)\n","print('\\nExample 2:\\n', df.loc[2019]) # use a single label (returns a Series)\n","print('\\nExample 3:\\n', df.iloc[2:5]) # use a slice of integer indices (returns a DataFrame)\n","print('\\nExample 4:\\n', df.loc[2018:2020]) # use a slice of labels (returns a DataFrame)\n","print('\\nExample 5:\\n', df['Temperature'].loc[2019]) # select a column AND choose a single row (returns the value)\n","print('\\nExample 6:\\n', df[['Temperature','Salinity']].loc[2019]) # select multiple columns AND choose a single row (returns a Series)\n","print('\\nExample 7:\\n', df[df['Temperature'] > 6.0]) # use a Boolean condition applied to one column (returns a DataFrame)\n","\n","# NOTE: changing values using .iloc and .loc selection works similar to as shown above with Series"],"execution_count":null,"outputs":[{"output_type":"stream","text":["\n","Example 1:\n"," Temperature 7.4\n","Salinity 34.0\n","Name: 2019, dtype: float64\n","\n","Example 2:\n"," Temperature 7.4\n","Salinity 34.0\n","Name: 2019, dtype: float64\n","\n","Example 3:\n"," Temperature Salinity\n","2018 6.3 34.5\n","2019 7.4 34.0\n","2020 8.5 33.5\n","\n","Example 4:\n"," Temperature Salinity\n","2018 6.3 34.5\n","2019 7.4 34.0\n","2020 8.5 33.5\n","\n","Example 5:\n"," 7.4\n","\n","Example 6:\n"," Temperature 7.4\n","Salinity 34.0\n","Name: 2019, dtype: float64\n","\n","Example 7:\n"," Temperature Salinity\n","2018 6.3 34.5\n","2019 7.4 34.0\n","2020 8.5 33.5\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"NOZf4kXThUQ7","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1604344346134,"user_tz":480,"elapsed":1145,"user":{"displayName":"Ethan C Campbell","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjCBYTiuomqOsCakND1k_5wj0kYvFY53Jt7kunt=s64","userId":"11255944928409084259"}},"outputId":"81991544-e5ed-4f3e-fae7-9c965e9ee977"},"source":["# Apply NumPy functions to Series and DataFrame objects\n","print('\\nExample 1:\\n', df.mean()) # take the mean along the index (axis 0)\n","print('\\nExample 2:\\n', df.mean(axis=0)) # same as above\n","print('\\nExample 3:\\n', df.mean(axis=1)) # take the mean along the columns (axis 1)\n","print('\\nExample 4:\\n', df.mean(skipna=True)) # ignore NaN values (if present) when taking the mean\n","\n","# Combine column extraction, selection by label, and applying a NumPy function\n","print('\\nExample 5:\\n', df['Salinity'].loc[2017:].mean()) # returns a single value"],"execution_count":null,"outputs":[{"output_type":"stream","text":["\n","Example 1:\n"," Temperature 6.3\n","Salinity 34.5\n","dtype: float64\n","\n","Example 2:\n"," Temperature 6.3\n","Salinity 34.5\n","dtype: float64\n","\n","Example 3:\n"," 2016 19.8\n","2017 20.1\n","2018 20.4\n","2019 20.7\n","2020 21.0\n","dtype: float64\n","\n","Example 4:\n"," Temperature 6.3\n","Salinity 34.5\n","dtype: float64\n","\n","Example 5:\n"," 34.25\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"eNzNLSyGkaHc"},"source":["# Save a Pandas DataFrame as a CSV file\n","# df.to_csv('filepath/including/filename.csv')\n","\n","# Read a CSV file as a Pandas DataFrame (more powerful than np.genfromtxt()!)\n","# See available arguments: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_csv.html\n","# df = pd.read_csv('filepath/including/filename.csv',delimiter=',',delim_whitespace=False,header=0)\n","\n","# Read an Excel spreadsheet as a Pandas DataFrame\n","# See available arguments: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_excel.html\n","# df = pd.read_excel('filepath/including/filename.xlsx',sheet_name='Sheet1')"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"PsLWyh4SNJBC"},"source":["## Part 2: xarray `DataArray` and `Dataset` objects"]},{"cell_type":"code","metadata":{"id":"nKguR6pRYCBN"},"source":["# Import xarray (and other libraries, because they are helpful when working with xarray files)\n","import numpy as np\n","import pandas as pd\n","import xarray as xr\n","from datetime import datetime, timedelta\n","import matplotlib.pyplot as plt"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"GqgHqCLUYaTz"},"source":["# You'll need to install the netCDF4 library to work with netCDF files\n","# You should only need to run this line of code once per Colab notebook,\n","# so comment it out or delete it afterwards\n","# !pip install netcdf4"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"GpTSfp17Ybq3","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1604344346136,"user_tz":480,"elapsed":1103,"user":{"displayName":"Ethan C Campbell","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjCBYTiuomqOsCakND1k_5wj0kYvFY53Jt7kunt=s64","userId":"11255944928409084259"}},"outputId":"2706a539-4a70-4716-8dbd-e45b55425bf3"},"source":["# Give Colab access to Google Drive\n","from google.colab import drive\n","drive.mount('/content/drive')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"aeJNHWpzYe5B"},"source":["# NOTE: you'll need to change this variable to match your own filepath\n","filepath = 'drive/My Drive/OCEAN 215 - Autumn \\'20/OCEAN 215 - Autumn \\'20 - Course documents/' \\\n"," + 'Video lesson slides and notebooks/2020-11-03 - lesson #9 data/bsose_monthly_velocities.nc'\n","\n","# This is how we load a netCDF file\n","# (This method is safe on Colab for files\n","# up to about 0.5 GB [500 MB] in size)\n","data = xr.open_dataset(filepath)"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"P03-CHg_ZlZ4","colab":{"base_uri":"https://localhost:8080/","height":383},"executionInfo":{"status":"ok","timestamp":1604344346137,"user_tz":480,"elapsed":1076,"user":{"displayName":"Ethan C Campbell","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjCBYTiuomqOsCakND1k_5wj0kYvFY53Jt7kunt=s64","userId":"11255944928409084259"}},"outputId":"58ea80a2-46c2-42f5-f8f0-0022dc43e671"},"source":["# Examine structure of xarray Dataset using the interactive display() interface\n","display(data) # Note there are 2 variables (eastward velocities, northward velocities)\n"," # 4 dimensions, and 4 coordinates (time, lat, lon, depth), so each variable is a 4D array\n","\n","# Note that you can click the buttons to view attributes (page icon) and actual data values (cylinder icon)"],"execution_count":null,"outputs":[{"output_type":"display_data","data":{"text/html":["
"],"text/plain":["\n","[49533120 values with dtype=float32]\n","Coordinates:\n"," * time (time) datetime64[ns] 2012-01-30T20:00:00 ... 2012-12-30T12:00:00\n"," * lat (lat) float32 -77.96525 -77.89555 ... -30.089203 -29.789328\n"," * lon (lon) float32 -179.66667 -179.33333 -179.0 ... 179.66667 180.0\n"," * depth (depth) float32 2.1 26.25 65.0 105.0 ... 1800.0 3000.0 4600.0\n","Attributes:\n"," units: m/s\n"," long_name: Zonal Component of Velocity (m/s)\n"," standard_name: UVEL\n"," mate: VVEL"]},"metadata":{"tags":[]}}]},{"cell_type":"code","metadata":{"id":"6ToFSWx56PvG","colab":{"base_uri":"https://localhost:8080/","height":237},"executionInfo":{"status":"ok","timestamp":1604344347103,"user_tz":480,"elapsed":2000,"user":{"displayName":"Ethan C Campbell","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjCBYTiuomqOsCakND1k_5wj0kYvFY53Jt7kunt=s64","userId":"11255944928409084259"}},"outputId":"eb982b77-dc11-416f-e217-fd9d6e6a103d"},"source":["# You can do mathematical calculations between xarray DataArrays, as long as their dimensions match\n","\n","# Example: calculate current speed using Pythagorean theorem: \n","# speed = sqrt(U^2 + V^2)\n","speed = (data['U']**2 + data['V']**2)**0.5\n","display(speed)\n","\n","# Note that the coordinates and dimensions remained the same:"],"execution_count":null,"outputs":[{"output_type":"display_data","data":{"text/html":["
"],"text/plain":["\n","array(0.12589, dtype=float32)\n","Coordinates:\n"," time datetime64[ns] 2012-01-30T20:00:00\n"," lat float32 -52.70605\n"," lon float32 -13.0\n"," depth float32 2.1\n","Attributes:\n"," units: meters/second\n"," long_name: Zonal Component of Velocity (m/s)\n"," standard_name: UVEL\n"," mate: VVEL"]},"metadata":{"tags":[]},"execution_count":57}]},{"cell_type":"code","metadata":{"id":"uNihw6GC_Tay","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1604344347105,"user_tz":480,"elapsed":1912,"user":{"displayName":"Ethan C Campbell","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjCBYTiuomqOsCakND1k_5wj0kYvFY53Jt7kunt=s64","userId":"11255944928409084259"}},"outputId":"23e3a63a-0e55-43ec-c052-16b0d6ed7fcb"},"source":["# You can convert a single-value Dataset result to a number using float() or .item():\n","print(data['U'].isel(time=0,lat=200,lon=500,depth=0).item())\n","print(float(data['U'].isel(time=0,lat=200,lon=500,depth=0)))"],"execution_count":null,"outputs":[{"output_type":"stream","text":["0.1258898824453354\n","0.1258898824453354\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"qBfXHrsHEwtp","colab":{"base_uri":"https://localhost:8080/","height":306},"executionInfo":{"status":"ok","timestamp":1604344347106,"user_tz":480,"elapsed":1889,"user":{"displayName":"Ethan C Campbell","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjCBYTiuomqOsCakND1k_5wj0kYvFY53Jt7kunt=s64","userId":"11255944928409084259"}},"outputId":"c6cf5f4c-31d6-4325-bf01-1d16579bad77"},"source":["# You can select multiple indices using .isel()\n","data['U'].isel(time=0,lat=200,lon=500,depth=[0,1,2,3,4]) # analogous to NumPy: u[0,0,0,[0,1,2,3,4]]\n","data['U'].isel(time=0,lat=200,lon=500,depth=slice(0,5)) # analogous to NumPy: u[0,0,0,0:5]\n","\n","# Notice below that the result has a dimension of 5 depths, and we see the depths range from 2.1 m to 146.5 m:"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
"],"text/plain":["\n","array([0.12589 , 0.050398, 0.057173, 0.061555, 0.057382], dtype=float32)\n","Coordinates:\n"," time datetime64[ns] 2012-01-30T20:00:00\n"," lat float32 -52.70605\n"," lon float32 -13.0\n"," * depth (depth) float32 2.1 26.25 65.0 105.0 146.5\n","Attributes:\n"," units: meters/second\n"," long_name: Zonal Component of Velocity (m/s)\n"," standard_name: UVEL\n"," mate: VVEL"]},"metadata":{"tags":[]},"execution_count":59}]},{"cell_type":"code","metadata":{"id":"So5WL2P5Gmck","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1604344347106,"user_tz":480,"elapsed":1866,"user":{"displayName":"Ethan C Campbell","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjCBYTiuomqOsCakND1k_5wj0kYvFY53Jt7kunt=s64","userId":"11255944928409084259"}},"outputId":"72a6667a-eae3-4949-92ae-84dae6a0a13b"},"source":["# Multiple results can be converted from a Dataset to the underlying NumPy array using .values:\n","data['U'].isel(time=0,lat=200,lon=500,depth=slice(0,5)).values"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["array([0.12588988, 0.05039841, 0.05717332, 0.06155456, 0.057382 ],\n"," dtype=float32)"]},"metadata":{"tags":[]},"execution_count":60}]},{"cell_type":"code","metadata":{"id":"0Whazqb_JA-y","colab":{"base_uri":"https://localhost:8080/","height":376},"executionInfo":{"status":"ok","timestamp":1604344369036,"user_tz":480,"elapsed":295,"user":{"displayName":"Ethan C Campbell","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjCBYTiuomqOsCakND1k_5wj0kYvFY53Jt7kunt=s64","userId":"11255944928409084259"}},"outputId":"d66a1831-107e-4f53-d711-b8a6072578f0"},"source":["# This also works when the underlying NumPy array has more than one dimension (e.g. is 2-D, 3-D, etc.):\n","display(data['U'].isel(time=0,lat=slice(200,204),lon=slice(500,504),depth=0))\n","data['U'].isel(time=0,lat=slice(200,204),lon=slice(500,504),depth=0).values\n","\n","# Calling .values on the result gave a 4x4 NumPy array:"],"execution_count":null,"outputs":[{"output_type":"display_data","data":{"text/html":["
"],"text/plain":["\n","array(0.12589, dtype=float32)\n","Coordinates:\n"," time datetime64[ns] 2012-01-30T20:00:00\n"," lat float32 -52.70605\n"," lon float32 -13.0\n"," depth float32 2.1\n","Attributes:\n"," units: meters/second\n"," long_name: Zonal Component of Velocity (m/s)\n"," standard_name: UVEL\n"," mate: VVEL"]},"metadata":{"tags":[]},"execution_count":62}]},{"cell_type":"code","metadata":{"id":"_hfDPUWFYaBC","colab":{"base_uri":"https://localhost:8080/","height":306},"executionInfo":{"status":"ok","timestamp":1604344913343,"user_tz":480,"elapsed":357,"user":{"displayName":"Ethan C Campbell","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjCBYTiuomqOsCakND1k_5wj0kYvFY53Jt7kunt=s64","userId":"11255944928409084259"}},"outputId":"2dd7cdf7-58cf-4a1c-9e46-e454737be6e7"},"source":["# Slicing works similarly between .isel() (slice by index) and .sel() (slice by value):\n","data['U'].sel(time=datetime(2012,1,30,20,0,0),lat=-52.70605,lon=-13.0,depth=slice(2,147)) # slicing values don't have to be exact"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
"],"text/plain":["\n","array([0.12589 , 0.050398, 0.057173, 0.061555, 0.057382], dtype=float32)\n","Coordinates:\n"," time datetime64[ns] 2012-01-30T20:00:00\n"," lat float32 -52.70605\n"," lon float32 -13.0\n"," * depth (depth) float32 2.1 26.25 65.0 105.0 146.5\n","Attributes:\n"," units: meters/second\n"," long_name: Zonal Component of Velocity (m/s)\n"," standard_name: UVEL\n"," mate: VVEL"]},"metadata":{"tags":[]},"execution_count":65}]},{"cell_type":"code","metadata":{"id":"uXM4BWzwY504","colab":{"base_uri":"https://localhost:8080/","height":306},"executionInfo":{"status":"ok","timestamp":1604345129628,"user_tz":480,"elapsed":324,"user":{"displayName":"Ethan C Campbell","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjCBYTiuomqOsCakND1k_5wj0kYvFY53Jt7kunt=s64","userId":"11255944928409084259"}},"outputId":"701900d4-da49-4edf-d64c-7d4159730aed"},"source":["# Sometimes you don't know the exact coordinate values, so you can ask xarray to find the 'nearest' values:\n","data['U'].sel(time=datetime(2012,1,30),lat=-53,lon=-13,depth=2,method='nearest') # slicing values don't have to be exact"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
"],"text/plain":["\n","array(0.128653, dtype=float32)\n","Coordinates:\n"," time datetime64[ns] 2012-01-30T20:00:00\n"," lat float32 -52.90755\n"," lon float32 -13.0\n"," depth float32 2.1\n","Attributes:\n"," units: meters/second\n"," long_name: Zonal Component of Velocity (m/s)\n"," standard_name: UVEL\n"," mate: VVEL"]},"metadata":{"tags":[]},"execution_count":67}]},{"cell_type":"code","metadata":{"id":"2wniUv3OaLez","colab":{"base_uri":"https://localhost:8080/","height":575},"executionInfo":{"status":"ok","timestamp":1604345992348,"user_tz":480,"elapsed":380,"user":{"displayName":"Ethan C Campbell","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjCBYTiuomqOsCakND1k_5wj0kYvFY53Jt7kunt=s64","userId":"11255944928409084259"}},"outputId":"db26a95e-fd8f-4286-aba0-e0431605dcf6"},"source":["# Other examples of slicing to get a 2D NumPy array\n","\n","# Here, the remaining dimensions are latitude and longitude\n","# (because we've selected a single time and single depth)\n","display(data['U'].sel(time=datetime(2012,1,30,20),depth=2.1,\n"," lat=slice(-50,-40),lon=slice(0,120)))\n","\n","# Here, the remaining dimensions are depth and longitude\n","# (because we've selected a single time and single latitude)\n","display(data['U'].sel(time=datetime(2012,1,30,20),depth=slice(200,1000),\n"," lon=slice(-120,0)).sel(lat=-57,method='nearest'))"],"execution_count":null,"outputs":[{"output_type":"display_data","data":{"text/html":["
"],"text/plain":["\n","array([[0.140002, 0.129511, 0.119145, ..., 0.032177, 0.033488, 0.035995],\n"," [0.134399, 0.124342, 0.114391, ..., 0.031455, 0.032763, 0.035228],\n"," [0.123725, 0.114466, 0.105262, ..., 0.029983, 0.031231, 0.033597],\n"," [0.106912, 0.098912, 0.090874, ..., 0.027764, 0.028964, 0.031245]],\n"," dtype=float32)\n","Coordinates:\n"," time datetime64[ns] 2012-01-30T20:00:00\n"," lat float32 -56.926678\n"," * lon (lon) float32 -120.0 -119.66667 -119.33333 ... -0.3333435 0.0\n"," * depth (depth) float32 220.0 301.0 450.0 700.0\n","Attributes:\n"," units: meters/second\n"," long_name: Zonal Component of Velocity (m/s)\n"," standard_name: UVEL\n"," mate: VVEL"]},"metadata":{"tags":[]}}]},{"cell_type":"code","metadata":{"id":"dGDa7r38d1Fc","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1604346606097,"user_tz":480,"elapsed":316,"user":{"displayName":"Ethan C Campbell","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjCBYTiuomqOsCakND1k_5wj0kYvFY53Jt7kunt=s64","userId":"11255944928409084259"}},"outputId":"b8efd785-f8e6-4008-b3bc-26ec23b65fc1"},"source":["# You can reduce data from an xarray DataFrame by applying a NumPy function:\n","\n","# .mean() calculates the average over both of the remaining axes (depth and latitude)\n","print(data['U'].sel(time=datetime(2012,1,30,20),depth=2.1,\n"," lat=slice(-50,-40),lon=slice(0,120)).mean().item())"],"execution_count":null,"outputs":[{"output_type":"stream","text":["0.16497819125652313\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"Q9ZT45eDgOdr","colab":{"base_uri":"https://localhost:8080/","height":216},"executionInfo":{"status":"ok","timestamp":1604346750003,"user_tz":480,"elapsed":287,"user":{"displayName":"Ethan C Campbell","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjCBYTiuomqOsCakND1k_5wj0kYvFY53Jt7kunt=s64","userId":"11255944928409084259"}},"outputId":"98bad938-731e-4697-e66f-3991536c3765"},"source":["# .mean(dim='lon') calculates the average across the longitude dimension,\n","# leaving only latitude as the remaining dimension\n","display(data['U'].sel(time=datetime(2012,1,30,20),depth=2.1,\n"," lat=slice(-50,-40),lon=slice(0,120)).mean(dim='lon'))"],"execution_count":null,"outputs":[{"output_type":"display_data","data":{"text/html":["
"],"text/plain":["\n","array([0.19636832, 0.19726074, 0.19570175, 0.19753072, 0.1995998 ,\n"," 0.20088746, 0.20197098, 0.20239758, 0.20189096, 0.20043223,\n"," 0.19856165, 0.19670185, 0.19461559, 0.19198534, 0.18877912,\n"," 0.18521579, 0.18209256, 0.18030445, 0.1799029 , 0.18019168,\n"," 0.17991017, 0.17799716, 0.17403169, 0.1685389 , 0.16236207,\n"," 0.15637264, 0.15085906, 0.14584213, 0.14138193, 0.13736448,\n"," 0.13341603, 0.12943429, 0.12593448, 0.12360686, 0.12302325,\n"," 0.12359596, 0.12365858, 0.1221991 , 0.11970461, 0.11638598,\n"," 0.11225116, 0.10882197], dtype=float32)\n","Coordinates:\n"," time datetime64[ns] 2012-01-30T20:00:00\n"," * lat (lat) float32 -49.78614 -49.570454 ... -40.405617 -40.151318\n"," depth float32 2.1"]},"metadata":{"tags":[]}}]},{"cell_type":"code","metadata":{"id":"Z57CSoKfibWn","colab":{"base_uri":"https://localhost:8080/","height":300},"executionInfo":{"status":"ok","timestamp":1604347759903,"user_tz":480,"elapsed":427,"user":{"displayName":"Ethan C Campbell","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjCBYTiuomqOsCakND1k_5wj0kYvFY53Jt7kunt=s64","userId":"11255944928409084259"}},"outputId":"f5ef3c7d-7f98-4867-e9a1-d92ec42b48f2"},"source":["# Save the result (keeping it in xarray format, not NumPy, to keep the latitude coordinate)\n","lat_velocities = data['U'].sel(time=datetime(2012,1,30,20),depth=2.1,\n"," lat=slice(-50,-40),lon=slice(0,120)).mean(dim='lon')\n","\n","# So this gave the eastward velocity averaged over all longitudes in the swath,\n","# so it's a 1-D array (a line) over latitude\n","plt.figure(figsize=(4,4))\n","plt.plot(lat_velocities['lat'],lat_velocities.values,c='k')\n","plt.xlabel('Latitude (°N)')\n","plt.ylabel('Eastward velocity (m/s)')\n","plt.grid()"],"execution_count":null,"outputs":[{"output_type":"display_data","data":{"image/png":"\n","text/plain":["