{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "This is a version of a notebook I (Elise) made to try out some of the things I learned in Jinhui Qin's dask tutorial on calculations I would normally do. I'm posting a version that was improved by Doug. I am guessing this part might be most helpful to people:\n", "\n", "client = dask.distributed.Client(\n", " n_workers=2, threads_per_worker=2, processes=True)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false } }, "outputs": [], "source": [ "!lscpu" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false } }, "outputs": [], "source": [ "!cat /proc/meminfo | head -1" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from datetime import datetime, timedelta\n", "import functools\n", "import sys\n", "import time\n", "\n", "import dask\n", "import dask.distributed\n", "import matplotlib.pyplot as plt\n", "import netCDF4\n", "import numpy\n", "import xarray\n", "\n", "from salishsea_tools import evaltools\n", "\n", "%matplotlib inline" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false } }, "outputs": [], "source": [ "print(sys.version)\n", "print(xarray.__version__)\n", "print(dask.__version__)\n", "print(netCDF4.__version__)\n", "print(numpy.__version__)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "j0, j1, i0, i1 = 230, 470, 0, 200" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "with netCDF4.Dataset(\"/ocean/eolson/MEOPAR/NEMO-forcing/grid/mesh_mask201702_noLPE.nc\") as mesh:\n", " tmask=numpy.copy(mesh.variables['tmask'][:,:,j0:j1,i0:i1])\n", " e3t0=numpy.copy(mesh.variables['e3t_0'][:,:,j0:j1,i0:i1])" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false } }, "outputs": [], "source": [ "flistmuZ = evaltools.index_model_files(\n", " datetime(2015, 6, 1), datetime(2015, 6, 9),\n", " \"/data/eolson/results/MEOPAR/SS36runs/CedarRuns/testmuZ/\",\n", " \"long\", 10, \"ptrc_T\", 1\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false } }, "outputs": [], "source": [ "flistmuZ['t_0'][0], flistmuZ['t_n'][len(flistmuZ)-1]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def calc_depth_avgs(flist, tmask, e3t0):\n", " sum1 = numpy.empty((len(flist)*24*10, j1-j0, i1-i0))\n", " sum1uZ = numpy.empty((len(flist)*24*10, j1-j0, i1-i0))\n", " for ind, row in flist.iterrows():\n", " ds = netCDF4.Dataset(row['paths'])\n", " diatoms = ds.variables['diatoms'][:,:,j0:j1,i0:i1]\n", " uZ = ds.variables['microzooplankton'][:,:,j0:j1,i0:i1]\n", " sum1[(24*10*ind):(24*10*(ind+1)),:,:] = numpy.sum(tmask*e3t0*diatoms, 1)\n", " sum1uZ[(24*10*ind):(24*10*(ind+1)),:,:] = numpy.sum(tmask*e3t0*uZ, 1)\n", " diatom_int = numpy.mean(sum1, 0)\n", " microzoo_int = numpy.mean(sum1uZ, 0)\n", " return diatom_int, microzoo_int" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false } }, "outputs": [], "source": [ "t0=datetime.now()\n", "diatoms_muZ_Int, uZ_muZ_Int = calc_depth_avgs(flistmuZ, tmask, e3t0)\n", "t1=datetime.now()\n", "print(t1-t0)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def calc_depth_avgs_no_loop(flist, tmask, e3t0):\n", " with netCDF4.Dataset(flist) as ds:\n", " diatoms = ds.variables['diatoms'][:,:,j0:j1,i0:i1]\n", " diatoms_int = (diatoms * tmask * e3t0).sum(axis=1).mean(axis=0)\n", " uZ = ds.variables['microzooplankton'][:,:,j0:j1,i0:i1]\n", " uZ_int = (uZ * tmask * e3t0).sum(axis=1).mean(axis=0)\n", " return diatoms_int, uZ_int" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false } }, "outputs": [], "source": [ "t0=datetime.now()\n", "diatoms_int, uZ_int = calc_depth_avgs_no_loop(\n", " \"/data/eolson/results/MEOPAR/SS36runs/CedarRuns/testmuZ/SalishSea_1h_20150531_20150709_ptrc_T_20150531-20150609.nc\",\n", " tmask, e3t0)\n", "numpy.testing.assert_equal(diatoms_int, diatoms_muZ_Int)\n", "numpy.testing.assert_equal(uZ_int, uZ_muZ_Int)\n", "t1=datetime.now()\n", "print(t1-t0)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "with xarray.open_dataset(\"/ocean/eolson/MEOPAR/NEMO-forcing/grid/mesh_mask201702_noLPE.nc\") as mesh:\n", " x_tmask = mesh.tmask.isel(y=slice(j0, j1), x=slice(i0, i1)).data\n", " x_e3t0 = mesh.e3t_0.isel(y=slice(j0, j1), x=slice(i0, i1)).data" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def calc_depth_avgs_xarray(flist, x_tmask, x_e3t0):\n", " with xarray.open_dataset(flist) as ds:\n", " diatoms = ds.diatoms.isel(y=slice(j0, j1), x=slice(i0, i1))\n", " diatoms_int = (diatoms * x_tmask * x_e3t0).sum(axis=1).mean(axis=0)\n", " uZ = ds.microzooplankton.isel(y=slice(j0, j1), x=slice(i0, i1))\n", " uZ_int = (uZ * x_tmask * x_e3t0).sum(axis=1).mean(axis=0)\n", " return diatoms_int, uZ_int" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false } }, "outputs": [], "source": [ "t0=datetime.now()\n", "x_diatoms_int, x_uZ_int = calc_depth_avgs_xarray(\n", " \"/data/eolson/results/MEOPAR/SS36runs/CedarRuns/testmuZ/SalishSea_1h_20150531_20150709_ptrc_T_20150531-20150609.nc\",\n", " tmask, e3t0)\n", "numpy.testing.assert_allclose(x_diatoms_int, diatoms_muZ_Int)\n", "numpy.testing.assert_allclose(x_uZ_int, uZ_muZ_Int)\n", "t1=datetime.now()\n", "print(t1-t0)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false } }, "outputs": [], "source": [ "client = dask.distributed.Client(\n", " n_workers=2, threads_per_worker=2, processes=True)\n", "client" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def calc_depth_avgs_dask(flist, chunks, tmask, e3t0):\n", " with xarray.open_dataset(flist, chunks=chunks) as ds:\n", " diatoms = ds.diatoms.isel(y=slice(j0, j1), x=slice(i0, i1))\n", " diatoms_int = (diatoms * tmask * e3t0).sum(axis=1).mean(axis=0)\n", " uZ = ds.microzooplankton.isel(y=slice(j0, j1), x=slice(i0, i1))\n", " uZ_int = (uZ * tmask * e3t0).sum(axis=1).mean(axis=0)\n", " return diatoms_int, uZ_int" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def check_results(diatoms_calc, diatoms_exp, uZ_calc, uZ_exp):\n", " numpy.testing.assert_allclose(diatoms_calc, diatoms_exp)\n", " numpy.testing.assert_allclose(uZ_calc, uZ_exp)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "chunks = {\"time_counter\": 1, \"deptht\": 40, \"y\": 898, \"x\": 398}\n", "d_diatoms_int, d_uZ_int = calc_depth_avgs_dask(\n", " \"/data/eolson/results/MEOPAR/SS36runs/CedarRuns/testmuZ/SalishSea_1h_20150531_20150709_ptrc_T_20150531-20150609.nc\",\n", " chunks, tmask, e3t0)\n", "check_results(d_diatoms_int, diatoms_muZ_Int, d_uZ_int, uZ_muZ_Int)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "chunks = {\"time_counter\": 3, \"deptht\": 40*3, \"y\": 898*3, \"x\": 398*3}\n", "d_diatoms_int, d_uZ_int = calc_depth_avgs_dask(\n", " \"/data/eolson/results/MEOPAR/SS36runs/CedarRuns/testmuZ/SalishSea_1h_20150531_20150709_ptrc_T_20150531-20150609.nc\",\n", " chunks, tmask, e3t0)\n", "check_results(d_diatoms_int, diatoms_muZ_Int, d_uZ_int, uZ_muZ_Int)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "chunks = {\"time_counter\": 3, \"deptht\": 40*3, \"y\": 898*3, \"x\": 398*3}\n", "d_diatoms_int, d_uZ_int = calc_depth_avgs_dask(\n", " \"/data/eolson/results/MEOPAR/SS36runs/CedarRuns/testmuZ/SalishSea_1h_20150531_20150709_ptrc_T_20150531-20150609.nc\",\n", " chunks, tmask, e3t0)\n", "check_results(d_diatoms_int, diatoms_muZ_Int, d_uZ_int, uZ_muZ_Int)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "chunks = {\"time_counter\": 4, \"deptht\": 40*4, \"y\": 898*4, \"x\": 398*4}\n", "d_diatoms_int, d_uZ_int = calc_depth_avgs_dask(\n", " \"/data/eolson/results/MEOPAR/SS36runs/CedarRuns/testmuZ/SalishSea_1h_20150531_20150709_ptrc_T_20150531-20150609.nc\",\n", " chunks, tmask, e3t0)\n", "check_results(d_diatoms_int, diatoms_muZ_Int, d_uZ_int, uZ_muZ_Int)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false } }, "outputs": [], "source": [ "flistmuZ = evaltools.index_model_files(\n", " datetime(2015, 6, 1), datetime(2015, 6, 19),\n", " \"/data/eolson/results/MEOPAR/SS36runs/CedarRuns/testmuZ/\",\n", " \"long\", 10, \"ptrc_T\", 1\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false } }, "outputs": [], "source": [ "flistmuZ" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false } }, "outputs": [], "source": [ "t0=datetime.now()\n", "m_diatoms_muZ_Int, m_uZ_muZ_Int = calc_depth_avgs(flistmuZ, tmask, e3t0)\n", "t1=datetime.now()\n", "print(t1-t0)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def calc_depth_avgs_mfdataset(flist, chunks, tmask, e3t0):\n", " kwargs = dict(combine=\"nested\", concat_dim=\"time_counter\", parallel=True)\n", " with xarray.open_mfdataset(flist, chunks=chunks, **kwargs) as ds:\n", " diatoms = ds.diatoms.isel(y=slice(j0, j1), x=slice(i0, i1))\n", " diatoms_int = (diatoms * tmask * e3t0).sum(axis=1).mean(axis=0)\n", " uZ = ds.microzooplankton.isel(y=slice(j0, j1), x=slice(i0, i1))\n", " uZ_int = (uZ * tmask * e3t0).sum(axis=1).mean(axis=0)\n", " return diatoms_int, uZ_int" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "chunks = {\"time_counter\": 4, \"deptht\": 40*4, \"y\": 898*4, \"x\": 398*4}\n", "mf_diatoms_int, mf_uZ_int = calc_depth_avgs_mfdataset(\n", " [\"/data/eolson/results/MEOPAR/SS36runs/CedarRuns/testmuZ/SalishSea_1h_20150531_20150709_ptrc_T_20150531-20150609.nc\",\n", " \"/data/eolson/results/MEOPAR/SS36runs/CedarRuns/testmuZ/SalishSea_1h_20150531_20150709_ptrc_T_20150610-20150619.nc\",],\n", " chunks, tmask, e3t0)\n", "check_results(mf_diatoms_int, m_diatoms_muZ_Int, mf_uZ_int, m_uZ_muZ_Int)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "client.close()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Final:" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "with xarray.open_dataset(\"/ocean/eolson/MEOPAR/NEMO-forcing/grid/mesh_mask201702_noLPE.nc\") as mesh:\n", " x_tmask = mesh.tmask.isel(y=slice(j0, j1), x=slice(i0, i1)).data\n", " x_e3t0 = mesh.e3t_0.isel(y=slice(j0, j1), x=slice(i0, i1)).data" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false } }, "outputs": [], "source": [ "flistmuZ = evaltools.index_model_files(\n", " datetime(2015, 6, 1), datetime(2015, 9, 1),\n", " \"/data/eolson/results/MEOPAR/SS36runs/CedarRuns/testmuZ/\",\n", " \"long\", 10, \"ptrc_T\", 1\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false } }, "outputs": [], "source": [ "flistmuZ" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "flistHC = evaltools.index_model_files(\n", " flistmuZ['t_0'][0], flistmuZ['t_n'][len(flistmuZ)-1] - timedelta(days=1),\n", " '/results/SalishSea/hindcast.201905/', 'nowcast', 1, 'ptrc_T', 1)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false } }, "outputs": [], "source": [ "flistHC" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def calc_depth_avgs_mfdataset(flist, chunks, tmask, e3t0):\n", " kwargs = dict(combine=\"nested\", concat_dim=\"time_counter\", parallel=True)\n", " with xarray.open_mfdataset(flist, chunks=chunks, **kwargs) as ds:\n", " diatoms = ds.diatoms.isel(y=slice(j0, j1), x=slice(i0, i1))\n", " diatoms_int = (diatoms * tmask * e3t0).sum(axis=1).mean(axis=0)\n", " uZ = ds.microzooplankton.isel(y=slice(j0, j1), x=slice(i0, i1))\n", " uZ_int = (uZ * tmask * e3t0).sum(axis=1).mean(axis=0)\n", " return diatoms_int, uZ_int" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false } }, "outputs": [], "source": [ "client = dask.distributed.Client(\n", " n_workers=2, threads_per_worker=2, processes=True)\n", "client" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "chunks = {\"time_counter\": 3, \"deptht\": 40*3, \"y\": 898*3, \"x\": 398*3}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "x_diatoms_muZ_int, x_uZ_muZ_int = calc_depth_avgs_mfdataset(\n", " [row[\"paths\"] for _, row in flistmuZ.iterrows()], chunks, x_tmask, x_e3t0\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "x_diatomsHC_int, x_uZHC_int = calc_depth_avgs_mfdataset(\n", " [row[\"paths\"] for _, row in flistHC.iterrows()], chunks, x_tmask, x_e3t0\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false } }, "outputs": [], "source": [ "t0=datetime.now()\n", "diatomsmuZInt = x_diatoms_muZ_int.compute()\n", "t1=datetime.now()\n", "print(t1-t0)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false } }, "outputs": [], "source": [ "t0=datetime.now()\n", "diatomsHCInt = x_diatomsHC_int.compute()\n", "t1=datetime.now()\n", "print(t1-t0)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false } }, "outputs": [], "source": [ "fig, ax = plt.subplots(1, 3, figsize=(15, 4))\n", "m0 = ax[0].pcolormesh(diatomsmuZInt)\n", "plt.colorbar(m0, ax=ax[0])\n", "ax[0].set_title('muZ Diatoms')\n", "m1 = ax[1].pcolormesh(diatomsHCInt)\n", "plt.colorbar(m1,ax=ax[1])\n", "ax[1].set_title('HC Diatoms')\n", "m2 = ax[2].pcolormesh(diatomsmuZInt - diatomsHCInt)\n", "plt.colorbar(m2,ax=ax[2])\n", "ax[2].set_title('Diff')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false } }, "outputs": [], "source": [ "t0=datetime.now()\n", "uZmuZInt = x_uZ_muZ_int.compute()\n", "t1=datetime.now()\n", "print(t1-t0)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false } }, "outputs": [], "source": [ "t0=datetime.now()\n", "uZHCInt = x_uZHC_int.compute()\n", "t1=datetime.now()\n", "print(t1-t0)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false } }, "outputs": [], "source": [ "fig, ax = plt.subplots(1, 3, figsize=(15, 4))\n", "m0 = ax[0].pcolormesh(uZmuZInt)\n", "plt.colorbar(m0, ax=ax[0])\n", "ax[0].set_title('muZ microZ')\n", "m1 = ax[1].pcolormesh(uZHCInt)\n", "plt.colorbar(m1,ax=ax[1])\n", "ax[1].set_title('HC microZ')\n", "m2 = ax[2].pcolormesh(uZmuZInt - uZHCInt)\n", "plt.colorbar(m2,ax=ax[2])\n", "ax[2].set_title('Diff')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "client.close()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "anaconda-cloud": {}, "kernelspec": { "display_name": "Python (py39)", "language": "python", "name": "py39" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.1" } }, "nbformat": 4, "nbformat_minor": 4 }