{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"\n",
"import intake, intake_esm\n",
"#Ref https://intake-esm.readthedocs.io/_/downloads/en/latest/pdf/\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Load the catalog "
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"col_url = \"https://cmip6-nc.s3.us-east-2.amazonaws.com/esgf-world.json\" \n"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"from dask_gateway import Gateway\n",
"from dask.distributed import Client\n",
"def launchDask(options):\n",
" #TODO try, except\n",
" gateway = Gateway()\n",
" clusters = gateway.list_clusters()\n",
" clusters\n",
" if len(clusters) >0:\n",
" cluster = gateway.connect(clusters[0].name)\n",
" print(\"lets close existing connection\")#or shut down and start afresh cluster.shutdown(). otherwise I dunno how to apply custom options\n",
" cluster.shutdown()\n",
" \n",
" cluster = gateway.new_cluster(options)\n",
" else:\n",
" cluster = gateway.new_cluster(options)\n",
" return(cluster)\n",
"\n",
"gateway = Gateway()\n",
"options = gateway.cluster_options()\n",
"options.worker_memory=8\n",
"\n",
"cluster = launchDask(options)\n",
"\n",
"\n",
"from distributed import Client\n",
"client = Client(cluster)\n",
"client\n",
"cluster.adapt(minimum=0, maximum=10) \n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"\n",
"Client\n",
"\n",
" | \n",
"\n",
"Cluster\n",
"\n",
" - Workers: 0
\n",
" - Cores: 0
\n",
" - Memory: 0 B
\n",
" \n",
" | \n",
"
\n",
"
"
],
"text/plain": [
""
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"client"
]
},
{
"cell_type": "code",
"execution_count": 122,
"metadata": {},
"outputs": [],
"source": [
"col = intake.open_esm_datastore(col_url)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"esmcol_data = col.esmcol_data"
]
},
{
"cell_type": "code",
"execution_count": 123,
"metadata": {},
"outputs": [],
"source": [
"cat_T = col.search(experiment_id=['historical'],\n",
" mip_table='Omon',\n",
" ensemble_member=[\"r1i1p1f1\"],\n",
" model=['CESM2', 'CESM2-FV2', 'CESM2-WACCM-FV2','CIESM','IPSL-CM6A-LR','MPI-ESM1-2-HR','MPI-ESM1-2-LR','MIROC6','CanESM5',\n",
" 'MPI-ESM-1-2-HAM','MRI-ESM2-0','SAM0-UNICON'],\n",
" #model=['GFDL-CM4','GFDL-ESM4'],\n",
" grid_label=['gn'],\n",
" variable=[\"thetao\"])\n",
" # version=['v20190308', 'v20191120', 'v20200220','v20191108','v20190323','v20190731','v20190710','v20180803',\n",
" # 'v20191007','v20190627','v20190311','v20190429','v20190627','v20191205','v20190914','v20190815','v20191108'])\n"
]
},
{
"cell_type": "code",
"execution_count": 124,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" project | \n",
" institute | \n",
" experiment_id | \n",
" frequency | \n",
" modeling_realm | \n",
" mip_table | \n",
" ensemble_member | \n",
" grid_label | \n",
" variable | \n",
" temporal subset | \n",
" version | \n",
" path | \n",
"
\n",
" \n",
" model | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" CESM2 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
"
\n",
" \n",
" CESM2-FV2 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 4 | \n",
" 1 | \n",
" 4 | \n",
"
\n",
" \n",
" CESM2-WACCM-FV2 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 4 | \n",
" 1 | \n",
" 4 | \n",
"
\n",
" \n",
" CIESM | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 4 | \n",
" 1 | \n",
" 4 | \n",
"
\n",
" \n",
" CanESM5 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 17 | \n",
" 2 | \n",
" 34 | \n",
"
\n",
" \n",
" IPSL-CM6A-LR | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 2 | \n",
" 1 | \n",
" 2 | \n",
"
\n",
" \n",
" MIROC6 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 17 | \n",
" 1 | \n",
" 17 | \n",
"
\n",
" \n",
" MPI-ESM-1-2-HAM | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 9 | \n",
" 1 | \n",
" 9 | \n",
"
\n",
" \n",
" MPI-ESM1-2-HR | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 33 | \n",
" 1 | \n",
" 33 | \n",
"
\n",
" \n",
" MPI-ESM1-2-LR | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 9 | \n",
" 1 | \n",
" 9 | \n",
"
\n",
" \n",
" MRI-ESM2-0 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 3 | \n",
" 1 | \n",
" 3 | \n",
"
\n",
" \n",
" SAM0-UNICON | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 17 | \n",
" 1 | \n",
" 17 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" project institute experiment_id frequency modeling_realm \\\n",
"model \n",
"CESM2 1 1 1 1 1 \n",
"CESM2-FV2 1 1 1 1 1 \n",
"CESM2-WACCM-FV2 1 1 1 1 1 \n",
"CIESM 1 1 1 1 1 \n",
"CanESM5 1 1 1 1 1 \n",
"IPSL-CM6A-LR 1 1 1 1 1 \n",
"MIROC6 1 1 1 1 1 \n",
"MPI-ESM-1-2-HAM 1 1 1 1 1 \n",
"MPI-ESM1-2-HR 1 1 1 1 1 \n",
"MPI-ESM1-2-LR 1 1 1 1 1 \n",
"MRI-ESM2-0 1 1 1 1 1 \n",
"SAM0-UNICON 1 1 1 1 1 \n",
"\n",
" mip_table ensemble_member grid_label variable \\\n",
"model \n",
"CESM2 1 1 1 1 \n",
"CESM2-FV2 1 1 1 1 \n",
"CESM2-WACCM-FV2 1 1 1 1 \n",
"CIESM 1 1 1 1 \n",
"CanESM5 1 1 1 1 \n",
"IPSL-CM6A-LR 1 1 1 1 \n",
"MIROC6 1 1 1 1 \n",
"MPI-ESM-1-2-HAM 1 1 1 1 \n",
"MPI-ESM1-2-HR 1 1 1 1 \n",
"MPI-ESM1-2-LR 1 1 1 1 \n",
"MRI-ESM2-0 1 1 1 1 \n",
"SAM0-UNICON 1 1 1 1 \n",
"\n",
" temporal subset version path \n",
"model \n",
"CESM2 1 1 1 \n",
"CESM2-FV2 4 1 4 \n",
"CESM2-WACCM-FV2 4 1 4 \n",
"CIESM 4 1 4 \n",
"CanESM5 17 2 34 \n",
"IPSL-CM6A-LR 2 1 2 \n",
"MIROC6 17 1 17 \n",
"MPI-ESM-1-2-HAM 9 1 9 \n",
"MPI-ESM1-2-HR 33 1 33 \n",
"MPI-ESM1-2-LR 9 1 9 \n",
"MRI-ESM2-0 3 1 3 \n",
"SAM0-UNICON 17 1 17 "
]
},
"execution_count": 124,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cat_T.df.groupby(['model']).nunique()#CanESM5 has two versions"
]
},
{
"cell_type": "code",
"execution_count": 129,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" project | \n",
" institute | \n",
" model | \n",
" experiment_id | \n",
" frequency | \n",
" modeling_realm | \n",
" mip_table | \n",
" ensemble_member | \n",
" grid_label | \n",
" variable | \n",
" temporal subset | \n",
" path | \n",
"
\n",
" \n",
" version | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" v20190306 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 17 | \n",
" 17 | \n",
"
\n",
" \n",
" v20190429 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 17 | \n",
" 17 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" project institute model experiment_id frequency \\\n",
"version \n",
"v20190306 1 1 1 1 1 \n",
"v20190429 1 1 1 1 1 \n",
"\n",
" modeling_realm mip_table ensemble_member grid_label variable \\\n",
"version \n",
"v20190306 1 1 1 1 1 \n",
"v20190429 1 1 1 1 1 \n",
"\n",
" temporal subset path \n",
"version \n",
"v20190306 17 17 \n",
"v20190429 17 17 "
]
},
"execution_count": 129,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cat_T.df[cat_T.df['model']=='CanESM5'].groupby(['version']).nunique() #one distinct version only, latest one.\n"
]
},
{
"cell_type": "code",
"execution_count": 130,
"metadata": {},
"outputs": [],
"source": [
"def latest_version(ds):\n",
" \"\"\"filters latest DRS versions of datasets only\"\"\"\n",
" ds=ds.df.sort_values(['version']).groupby(['temporal subset','model','mip_table','institute','variable','ensemble_member','grid_label','experiment_id'],as_index=False).last()\n",
" return ds"
]
},
{
"cell_type": "code",
"execution_count": 131,
"metadata": {},
"outputs": [],
"source": [
"cat_T_new = latest_version(cat_T)"
]
},
{
"cell_type": "code",
"execution_count": 134,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" temporal subset | \n",
" model | \n",
" mip_table | \n",
" institute | \n",
" variable | \n",
" ensemble_member | \n",
" grid_label | \n",
" experiment_id | \n",
" project | \n",
" frequency | \n",
" modeling_realm | \n",
" path | \n",
"
\n",
" \n",
" version | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" v20190429 | \n",
" 17 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 17 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" temporal subset model mip_table institute variable \\\n",
"version \n",
"v20190429 17 1 1 1 1 \n",
"\n",
" ensemble_member grid_label experiment_id project frequency \\\n",
"version \n",
"v20190429 1 1 1 1 1 \n",
"\n",
" modeling_realm path \n",
"version \n",
"v20190429 1 17 "
]
},
"execution_count": 134,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cat_T_new[cat_T_new['model']=='CanESM5'].groupby(['version']).nunique() #one distinct version only, latest one.\n"
]
},
{
"cell_type": "code",
"execution_count": 135,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" temporal subset | \n",
" model | \n",
" mip_table | \n",
" institute | \n",
" variable | \n",
" ensemble_member | \n",
" grid_label | \n",
" experiment_id | \n",
" project | \n",
" frequency | \n",
" modeling_realm | \n",
" version | \n",
" path | \n",
"
\n",
" \n",
" \n",
" \n",
" 3 | \n",
" 185001-186012 | \n",
" CanESM5 | \n",
" Omon | \n",
" CCCma | \n",
" thetao | \n",
" r1i1p1f1 | \n",
" gn | \n",
" historical | \n",
" CMIP6 | \n",
" mon | \n",
" ocean | \n",
" v20190429 | \n",
" s3://esgf-world/CMIP6/CMIP/CCCma/CanESM5/histo... | \n",
"
\n",
" \n",
" 16 | \n",
" 186101-187012 | \n",
" CanESM5 | \n",
" Omon | \n",
" CCCma | \n",
" thetao | \n",
" r1i1p1f1 | \n",
" gn | \n",
" historical | \n",
" CMIP6 | \n",
" mon | \n",
" ocean | \n",
" v20190429 | \n",
" s3://esgf-world/CMIP6/CMIP/CCCma/CanESM5/histo... | \n",
"
\n",
" \n",
" 23 | \n",
" 187101-188012 | \n",
" CanESM5 | \n",
" Omon | \n",
" CCCma | \n",
" thetao | \n",
" r1i1p1f1 | \n",
" gn | \n",
" historical | \n",
" CMIP6 | \n",
" mon | \n",
" ocean | \n",
" v20190429 | \n",
" s3://esgf-world/CMIP6/CMIP/CCCma/CanESM5/histo... | \n",
"
\n",
" \n",
" 28 | \n",
" 188101-189012 | \n",
" CanESM5 | \n",
" Omon | \n",
" CCCma | \n",
" thetao | \n",
" r1i1p1f1 | \n",
" gn | \n",
" historical | \n",
" CMIP6 | \n",
" mon | \n",
" ocean | \n",
" v20190429 | \n",
" s3://esgf-world/CMIP6/CMIP/CCCma/CanESM5/histo... | \n",
"
\n",
" \n",
" 35 | \n",
" 189101-190012 | \n",
" CanESM5 | \n",
" Omon | \n",
" CCCma | \n",
" thetao | \n",
" r1i1p1f1 | \n",
" gn | \n",
" historical | \n",
" CMIP6 | \n",
" mon | \n",
" ocean | \n",
" v20190429 | \n",
" s3://esgf-world/CMIP6/CMIP/CCCma/CanESM5/histo... | \n",
"
\n",
" \n",
" 44 | \n",
" 190101-191012 | \n",
" CanESM5 | \n",
" Omon | \n",
" CCCma | \n",
" thetao | \n",
" r1i1p1f1 | \n",
" gn | \n",
" historical | \n",
" CMIP6 | \n",
" mon | \n",
" ocean | \n",
" v20190429 | \n",
" s3://esgf-world/CMIP6/CMIP/CCCma/CanESM5/histo... | \n",
"
\n",
" \n",
" 51 | \n",
" 191101-192012 | \n",
" CanESM5 | \n",
" Omon | \n",
" CCCma | \n",
" thetao | \n",
" r1i1p1f1 | \n",
" gn | \n",
" historical | \n",
" CMIP6 | \n",
" mon | \n",
" ocean | \n",
" v20190429 | \n",
" s3://esgf-world/CMIP6/CMIP/CCCma/CanESM5/histo... | \n",
"
\n",
" \n",
" 56 | \n",
" 192101-193012 | \n",
" CanESM5 | \n",
" Omon | \n",
" CCCma | \n",
" thetao | \n",
" r1i1p1f1 | \n",
" gn | \n",
" historical | \n",
" CMIP6 | \n",
" mon | \n",
" ocean | \n",
" v20190429 | \n",
" s3://esgf-world/CMIP6/CMIP/CCCma/CanESM5/histo... | \n",
"
\n",
" \n",
" 63 | \n",
" 193101-194012 | \n",
" CanESM5 | \n",
" Omon | \n",
" CCCma | \n",
" thetao | \n",
" r1i1p1f1 | \n",
" gn | \n",
" historical | \n",
" CMIP6 | \n",
" mon | \n",
" ocean | \n",
" v20190429 | \n",
" s3://esgf-world/CMIP6/CMIP/CCCma/CanESM5/histo... | \n",
"
\n",
" \n",
" 68 | \n",
" 194101-195012 | \n",
" CanESM5 | \n",
" Omon | \n",
" CCCma | \n",
" thetao | \n",
" r1i1p1f1 | \n",
" gn | \n",
" historical | \n",
" CMIP6 | \n",
" mon | \n",
" ocean | \n",
" v20190429 | \n",
" s3://esgf-world/CMIP6/CMIP/CCCma/CanESM5/histo... | \n",
"
\n",
" \n",
" 80 | \n",
" 195101-196012 | \n",
" CanESM5 | \n",
" Omon | \n",
" CCCma | \n",
" thetao | \n",
" r1i1p1f1 | \n",
" gn | \n",
" historical | \n",
" CMIP6 | \n",
" mon | \n",
" ocean | \n",
" v20190429 | \n",
" s3://esgf-world/CMIP6/CMIP/CCCma/CanESM5/histo... | \n",
"
\n",
" \n",
" 85 | \n",
" 196101-197012 | \n",
" CanESM5 | \n",
" Omon | \n",
" CCCma | \n",
" thetao | \n",
" r1i1p1f1 | \n",
" gn | \n",
" historical | \n",
" CMIP6 | \n",
" mon | \n",
" ocean | \n",
" v20190429 | \n",
" s3://esgf-world/CMIP6/CMIP/CCCma/CanESM5/histo... | \n",
"
\n",
" \n",
" 92 | \n",
" 197101-198012 | \n",
" CanESM5 | \n",
" Omon | \n",
" CCCma | \n",
" thetao | \n",
" r1i1p1f1 | \n",
" gn | \n",
" historical | \n",
" CMIP6 | \n",
" mon | \n",
" ocean | \n",
" v20190429 | \n",
" s3://esgf-world/CMIP6/CMIP/CCCma/CanESM5/histo... | \n",
"
\n",
" \n",
" 97 | \n",
" 198101-199012 | \n",
" CanESM5 | \n",
" Omon | \n",
" CCCma | \n",
" thetao | \n",
" r1i1p1f1 | \n",
" gn | \n",
" historical | \n",
" CMIP6 | \n",
" mon | \n",
" ocean | \n",
" v20190429 | \n",
" s3://esgf-world/CMIP6/CMIP/CCCma/CanESM5/histo... | \n",
"
\n",
" \n",
" 104 | \n",
" 199101-200012 | \n",
" CanESM5 | \n",
" Omon | \n",
" CCCma | \n",
" thetao | \n",
" r1i1p1f1 | \n",
" gn | \n",
" historical | \n",
" CMIP6 | \n",
" mon | \n",
" ocean | \n",
" v20190429 | \n",
" s3://esgf-world/CMIP6/CMIP/CCCma/CanESM5/histo... | \n",
"
\n",
" \n",
" 112 | \n",
" 200101-201012 | \n",
" CanESM5 | \n",
" Omon | \n",
" CCCma | \n",
" thetao | \n",
" r1i1p1f1 | \n",
" gn | \n",
" historical | \n",
" CMIP6 | \n",
" mon | \n",
" ocean | \n",
" v20190429 | \n",
" s3://esgf-world/CMIP6/CMIP/CCCma/CanESM5/histo... | \n",
"
\n",
" \n",
" 119 | \n",
" 201101-201412 | \n",
" CanESM5 | \n",
" Omon | \n",
" CCCma | \n",
" thetao | \n",
" r1i1p1f1 | \n",
" gn | \n",
" historical | \n",
" CMIP6 | \n",
" mon | \n",
" ocean | \n",
" v20190429 | \n",
" s3://esgf-world/CMIP6/CMIP/CCCma/CanESM5/histo... | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" temporal subset model mip_table institute variable ensemble_member \\\n",
"3 185001-186012 CanESM5 Omon CCCma thetao r1i1p1f1 \n",
"16 186101-187012 CanESM5 Omon CCCma thetao r1i1p1f1 \n",
"23 187101-188012 CanESM5 Omon CCCma thetao r1i1p1f1 \n",
"28 188101-189012 CanESM5 Omon CCCma thetao r1i1p1f1 \n",
"35 189101-190012 CanESM5 Omon CCCma thetao r1i1p1f1 \n",
"44 190101-191012 CanESM5 Omon CCCma thetao r1i1p1f1 \n",
"51 191101-192012 CanESM5 Omon CCCma thetao r1i1p1f1 \n",
"56 192101-193012 CanESM5 Omon CCCma thetao r1i1p1f1 \n",
"63 193101-194012 CanESM5 Omon CCCma thetao r1i1p1f1 \n",
"68 194101-195012 CanESM5 Omon CCCma thetao r1i1p1f1 \n",
"80 195101-196012 CanESM5 Omon CCCma thetao r1i1p1f1 \n",
"85 196101-197012 CanESM5 Omon CCCma thetao r1i1p1f1 \n",
"92 197101-198012 CanESM5 Omon CCCma thetao r1i1p1f1 \n",
"97 198101-199012 CanESM5 Omon CCCma thetao r1i1p1f1 \n",
"104 199101-200012 CanESM5 Omon CCCma thetao r1i1p1f1 \n",
"112 200101-201012 CanESM5 Omon CCCma thetao r1i1p1f1 \n",
"119 201101-201412 CanESM5 Omon CCCma thetao r1i1p1f1 \n",
"\n",
" grid_label experiment_id project frequency modeling_realm version \\\n",
"3 gn historical CMIP6 mon ocean v20190429 \n",
"16 gn historical CMIP6 mon ocean v20190429 \n",
"23 gn historical CMIP6 mon ocean v20190429 \n",
"28 gn historical CMIP6 mon ocean v20190429 \n",
"35 gn historical CMIP6 mon ocean v20190429 \n",
"44 gn historical CMIP6 mon ocean v20190429 \n",
"51 gn historical CMIP6 mon ocean v20190429 \n",
"56 gn historical CMIP6 mon ocean v20190429 \n",
"63 gn historical CMIP6 mon ocean v20190429 \n",
"68 gn historical CMIP6 mon ocean v20190429 \n",
"80 gn historical CMIP6 mon ocean v20190429 \n",
"85 gn historical CMIP6 mon ocean v20190429 \n",
"92 gn historical CMIP6 mon ocean v20190429 \n",
"97 gn historical CMIP6 mon ocean v20190429 \n",
"104 gn historical CMIP6 mon ocean v20190429 \n",
"112 gn historical CMIP6 mon ocean v20190429 \n",
"119 gn historical CMIP6 mon ocean v20190429 \n",
"\n",
" path \n",
"3 s3://esgf-world/CMIP6/CMIP/CCCma/CanESM5/histo... \n",
"16 s3://esgf-world/CMIP6/CMIP/CCCma/CanESM5/histo... \n",
"23 s3://esgf-world/CMIP6/CMIP/CCCma/CanESM5/histo... \n",
"28 s3://esgf-world/CMIP6/CMIP/CCCma/CanESM5/histo... \n",
"35 s3://esgf-world/CMIP6/CMIP/CCCma/CanESM5/histo... \n",
"44 s3://esgf-world/CMIP6/CMIP/CCCma/CanESM5/histo... \n",
"51 s3://esgf-world/CMIP6/CMIP/CCCma/CanESM5/histo... \n",
"56 s3://esgf-world/CMIP6/CMIP/CCCma/CanESM5/histo... \n",
"63 s3://esgf-world/CMIP6/CMIP/CCCma/CanESM5/histo... \n",
"68 s3://esgf-world/CMIP6/CMIP/CCCma/CanESM5/histo... \n",
"80 s3://esgf-world/CMIP6/CMIP/CCCma/CanESM5/histo... \n",
"85 s3://esgf-world/CMIP6/CMIP/CCCma/CanESM5/histo... \n",
"92 s3://esgf-world/CMIP6/CMIP/CCCma/CanESM5/histo... \n",
"97 s3://esgf-world/CMIP6/CMIP/CCCma/CanESM5/histo... \n",
"104 s3://esgf-world/CMIP6/CMIP/CCCma/CanESM5/histo... \n",
"112 s3://esgf-world/CMIP6/CMIP/CCCma/CanESM5/histo... \n",
"119 s3://esgf-world/CMIP6/CMIP/CCCma/CanESM5/histo... "
]
},
"execution_count": 135,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cat_T_new[cat_T_new['model']=='CanESM5'] #one distinct version only, latest one."
]
},
{
"cell_type": "code",
"execution_count": 136,
"metadata": {},
"outputs": [],
"source": [
"cat_T = intake.open_esm_datastore(cat_T_new,esmcol_data=esmcol_data)"
]
},
{
"cell_type": "code",
"execution_count": 141,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" temporal subset | \n",
" model | \n",
" mip_table | \n",
" institute | \n",
" variable | \n",
" ensemble_member | \n",
" grid_label | \n",
" experiment_id | \n",
" project | \n",
" frequency | \n",
" modeling_realm | \n",
" path | \n",
"
\n",
" \n",
" version | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" v20190429 | \n",
" 17 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 17 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" temporal subset model mip_table institute variable \\\n",
"version \n",
"v20190429 17 1 1 1 1 \n",
"\n",
" ensemble_member grid_label experiment_id project frequency \\\n",
"version \n",
"v20190429 1 1 1 1 1 \n",
"\n",
" modeling_realm path \n",
"version \n",
"v20190429 1 17 "
]
},
"execution_count": 141,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#cat_T['CMIP6.CCCma.CanESM5.historical.Omon']\n",
"cat_T.df[cat_T.df['model']=='CanESM5'].groupby(['version']).nunique() #one distinct version only, latest one.\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"VOLCELLO test"
]
},
{
"cell_type": "code",
"execution_count": 150,
"metadata": {},
"outputs": [],
"source": [
"cat_T_v = col.search(experiment_id=['historical'],\n",
" mip_table=['Ofx','Omon'],\n",
" ensemble_member=[\"r1i1p1f1\"],\n",
" model=['NorESM2-LM'],\n",
" #model=['GFDL-CM4','GFDL-ESM4'],\n",
" # grid_label=['gn'],\n",
" variable=[\"volcello\"])\n",
" # version=['v20190308', 'v20191120', 'v20200220','v20191108','v20190323','v20190731','v20190710','v20180803',\n",
" # 'v20191007','v20190627','v20190311','v20190429','v20190627','v20191205','v20190914','v20190815','v20191108'])\n"
]
},
{
"cell_type": "code",
"execution_count": 151,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" project | \n",
" institute | \n",
" experiment_id | \n",
" frequency | \n",
" modeling_realm | \n",
" mip_table | \n",
" ensemble_member | \n",
" grid_label | \n",
" variable | \n",
" temporal subset | \n",
" version | \n",
" path | \n",
"
\n",
" \n",
" model | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" NorESM2-LM | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 2 | \n",
" 1 | \n",
" 2 | \n",
" 1 | \n",
" 13 | \n",
" 2 | \n",
" 14 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" project institute experiment_id frequency modeling_realm \\\n",
"model \n",
"NorESM2-LM 1 1 1 1 1 \n",
"\n",
" mip_table ensemble_member grid_label variable temporal subset \\\n",
"model \n",
"NorESM2-LM 2 1 2 1 13 \n",
"\n",
" version path \n",
"model \n",
"NorESM2-LM 2 14 "
]
},
"execution_count": 151,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cat_T_v.df.groupby(['model']).nunique()#CanESM5 has two versions"
]
},
{
"cell_type": "code",
"execution_count": 152,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array(['v20190815', 'v20191108'], dtype=object)"
]
},
"execution_count": 152,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cat_T_v.df.version.unique()"
]
},
{
"cell_type": "code",
"execution_count": 153,
"metadata": {},
"outputs": [],
"source": [
"cat_T_vol = latest_version(cat_T_v)"
]
},
{
"cell_type": "code",
"execution_count": 154,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array(['v20191108'], dtype=object)"
]
},
"execution_count": 154,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cat_T_vol.version.unique()"
]
},
{
"cell_type": "code",
"execution_count": 155,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" temporal subset | \n",
" mip_table | \n",
" institute | \n",
" variable | \n",
" ensemble_member | \n",
" grid_label | \n",
" experiment_id | \n",
" project | \n",
" frequency | \n",
" modeling_realm | \n",
" version | \n",
" path | \n",
"
\n",
" \n",
" model | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" NorESM2-LM | \n",
" 13 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 13 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" temporal subset mip_table institute variable ensemble_member \\\n",
"model \n",
"NorESM2-LM 13 1 1 1 1 \n",
"\n",
" grid_label experiment_id project frequency modeling_realm \\\n",
"model \n",
"NorESM2-LM 1 1 1 1 1 \n",
"\n",
" version path \n",
"model \n",
"NorESM2-LM 1 13 "
]
},
"execution_count": 155,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cat_T_vol.groupby(['model']).nunique() "
]
},
{
"cell_type": "code",
"execution_count": 156,
"metadata": {},
"outputs": [],
"source": [
"cat_T_volstore = intake.open_esm_datastore(cat_T_vol,esmcol_data=esmcol_data)"
]
},
{
"cell_type": "code",
"execution_count": 157,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array(['v20191108'], dtype=object)"
]
},
"execution_count": 157,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cat_T_volstore.df.version.unique()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}