In [None]:
from glob import glob
import xarray as xr
import cftime
import nc_time_axis
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
pd.options.display.max_rows = 200
import intake, intake_esm
! pip install cmip6_preprocessing
from cmip6_preprocessing.preprocessing import (correct_units,rename_cmip6)

## Functions for preprocessing CMIP6 data 

In [12]:
def chunk_time(ds):
    if 'time' in ds.dims:
        ds = ds.chunk({'time':1})
    return ds

In [13]:
# Necessary for creating a common time axis for all models
# We want to create a common time axis so there will be no gaps when plotting the results 

def fix_time(ds):
    """ force calendar to noleap"""
    import xarray as xr
    
    if "time" not in ds.dims:
        return ds
    
    if ("calendar" not in ds["time"].attrs): 
        ds["time"].attrs.update({"calendar": "noleap"})
        
    if ds["time"].attrs["calendar"] not in ["noleap", "NOLEAP", "365_day"]:
        ds["time"].attrs.update({"calendar": "noleap"})
        
    ds = xr.decode_cf(ds)
    return ds

In [14]:
# Pass this function for preprocessing thetao data 
def pp_thetao(ds):
    ds = rename_cmip6(ds)
    ds = fix_time(ds)
    ds = correct_units(ds)
    return ds

In [15]:
# Pass this function for preprocessing volcello and areacello data
def pp_volcello(ds):
    ds = rename_cmip6(ds)
    ds = chunk_time(ds)
    ds = fix_time(ds)
    ds = correct_units(ds)
    return ds

In [16]:
# Use this function to reconstruct areacello 
def compute_area_regular_grid(ds, Rearth=6378e3):
    """ compute the cells area on a regular grid """

    rfac = 2 * np.pi * Rearth / 360

    dx1d = rfac * 1 
    dy1d = rfac * 1

    dx2d, dy2d = np.meshgrid(dx1d, dy1d)
    _, lat2d = np.meshgrid(ds["x"].values, ds["y"].values)

    dx = dx2d * np.cos(2 * np.pi * lat2d / 360)
    dy = dy2d
    area = dx * dy
    return xr.DataArray(area, dims=('y', 'x'))

## Load the catalog with Intake-ESM

In [17]:
col_url = "https://raw.githubusercontent.com/aradhakrishnanGFDL/gfdl-aws-analysis/community/esm-collection-spec-examples/esgf-world.json"

In [18]:
col = intake.open_esm_datastore(col_url)
esmcol_data = col.esmcol_data
col

Unnamed: 0,unique
project,1
institute,38
model,106
experiment_id,217
frequency,2
modeling_realm,3
mip_table,37
ensemble_member,2725
grid_label,13
variable,738


In [62]:
#def latest_version(ds):
#    """filters latest DRS versions of datasets only"""
#    ds=ds.df.sort_values(['version'],ascending=False).groupby(['temporal subset','model','mip_table',
#                                               'institute','variable','ensemble_member',
#                                               'grid_label','experiment_id'],as_index=False)#.head(1)
#    return ds

def latest_version(cat):
    """
    input
    cat: esmdatastore 
    output
    esmdatastore with latest DRS versions
    """
    
    latest_cat = cat.df.sort_values(by=['version','path']).drop_duplicates(['temporal subset','model','mip_table',
                                               'institute','variable','ensemble_member',
                                               'grid_label','experiment_id'],keep='last')
    return latest_cat

In [52]:

# gn_models = ['ACCESS-ESM1-5','BCC-CSM2-MR','BCC-ESM1','CAMS-CSM1-0','EC-Earth3',
#              'EC-Earth3-Veg','FIO-ESM-2-0','GISS-E2-1-G','GISS-E2-1-G-CC','NESM3','ACCESS-CM2','CIESM','SAM0-UNICON','MPI-ESM1-2-HR',
#               'CanESM5','FGOALS-f3-L','IPSL-CM6A-LR','MIROC6','MPI-ESM-1-2-HAM','MPI-ESM1-2-LR']
gn_models = ['CESM2', 'CESM2-FV2', 'CESM2-WACCM-FV2','IPSL-CM6A-LR','MPI-ESM1-2-HR','MPI-ESM1-2-LR','MIROC6','CanESM5',
                        'MPI-ESM-1-2-HAM','MRI-ESM2-0','SAM0-UNICON']

cat_T_gn = col.search(experiment_id=['historical'],
                 mip_table='Omon',
                 ensemble_member=["r1i1p1f1"],
                 model=gn_models,
                 grid_label=['gn'],
                 variable=["thetao"])

cat_VOmon_gn = col.search(experiment_id=['historical'],
                 mip_table=['Omon'],
                 ensemble_member="r1i1p1f1",
                 model=gn_models,
                 grid_label='gn',
                 variable=["volcello"])

cat_VOfx_gn = col.search(experiment_id=['historical'],
                 mip_table=['Ofx'],
                 ensemble_member="r1i1p1f1",
                 model=gn_models,
                 grid_label='gn',
                 variable=["volcello"])


cat_A_gn = col.search(experiment_id=['historical'],
                 mip_table=['Omon','Ofx'],
                 ensemble_member="r1i1p1f1",
                 model=gn_models,
                 grid_label='gn',
                 variable=['areacello'])

  warn(message)


In [53]:
cat_VOfx_gn.df

Unnamed: 0,project,institute,model,experiment_id,frequency,modeling_realm,mip_table,ensemble_member,grid_label,variable,temporal subset,version,path
0,CMIP6,MPI-M,MPI-ESM1-2-HR,historical,,,Ofx,r1i1p1f1,gn,volcello,,v20190710,s3://esgf-world/CMIP6/CMIP/MPI-M/MPI-ESM1-2-HR...
1,CMIP6,NCAR,CESM2-FV2,historical,,,Ofx,r1i1p1f1,gn,volcello,,v20191120,s3://esgf-world/CMIP6/CMIP/NCAR/CESM2-FV2/hist...
2,CMIP6,NCAR,CESM2-WACCM-FV2,historical,,,Ofx,r1i1p1f1,gn,volcello,,v20191120,s3://esgf-world/CMIP6/CMIP/NCAR/CESM2-WACCM-FV...
3,CMIP6,NCAR,CESM2,historical,,,Ofx,r1i1p1f1,gn,volcello,,v20190308,s3://esgf-world/CMIP6/CMIP/NCAR/CESM2/historic...
4,CMIP6,SNU,SAM0-UNICON,historical,,,Ofx,r1i1p1f1,gn,volcello,,v20190323,s3://esgf-world/CMIP6/CMIP/SNU/SAM0-UNICON/his...


In [63]:
cat_T_gn_latest = latest_version(cat_T_gn)
cat_VOmon_gn_latest = latest_version(cat_VOmon_gn)
cat_VOfx_gn_latest = latest_version(cat_VOfx_gn)
cat_A_gn_latest = latest_version(cat_A_gn)

In [64]:
cat_VOfx_gn_latest.head()

Unnamed: 0,project,institute,model,experiment_id,frequency,modeling_realm,mip_table,ensemble_member,grid_label,variable,temporal subset,version,path
3,CMIP6,NCAR,CESM2,historical,,,Ofx,r1i1p1f1,gn,volcello,,v20190308,s3://esgf-world/CMIP6/CMIP/NCAR/CESM2/historic...
4,CMIP6,SNU,SAM0-UNICON,historical,,,Ofx,r1i1p1f1,gn,volcello,,v20190323,s3://esgf-world/CMIP6/CMIP/SNU/SAM0-UNICON/his...
0,CMIP6,MPI-M,MPI-ESM1-2-HR,historical,,,Ofx,r1i1p1f1,gn,volcello,,v20190710,s3://esgf-world/CMIP6/CMIP/MPI-M/MPI-ESM1-2-HR...
1,CMIP6,NCAR,CESM2-FV2,historical,,,Ofx,r1i1p1f1,gn,volcello,,v20191120,s3://esgf-world/CMIP6/CMIP/NCAR/CESM2-FV2/hist...
2,CMIP6,NCAR,CESM2-WACCM-FV2,historical,,,Ofx,r1i1p1f1,gn,volcello,,v20191120,s3://esgf-world/CMIP6/CMIP/NCAR/CESM2-WACCM-FV...


In [59]:
cat_VOfx_gn.df

Unnamed: 0,project,institute,model,experiment_id,frequency,modeling_realm,mip_table,ensemble_member,grid_label,variable,temporal subset,version,path
0,CMIP6,MPI-M,MPI-ESM1-2-HR,historical,,,Ofx,r1i1p1f1,gn,volcello,,v20190710,s3://esgf-world/CMIP6/CMIP/MPI-M/MPI-ESM1-2-HR...
1,CMIP6,NCAR,CESM2-FV2,historical,,,Ofx,r1i1p1f1,gn,volcello,,v20191120,s3://esgf-world/CMIP6/CMIP/NCAR/CESM2-FV2/hist...
2,CMIP6,NCAR,CESM2-WACCM-FV2,historical,,,Ofx,r1i1p1f1,gn,volcello,,v20191120,s3://esgf-world/CMIP6/CMIP/NCAR/CESM2-WACCM-FV...
3,CMIP6,NCAR,CESM2,historical,,,Ofx,r1i1p1f1,gn,volcello,,v20190308,s3://esgf-world/CMIP6/CMIP/NCAR/CESM2/historic...
4,CMIP6,SNU,SAM0-UNICON,historical,,,Ofx,r1i1p1f1,gn,volcello,,v20190323,s3://esgf-world/CMIP6/CMIP/SNU/SAM0-UNICON/his...


In [24]:
cat_T_gn = intake.open_esm_datastore(cat_T_gn_latest,esmcol_data=esmcol_data)
cat_VOmon_gn = intake.open_esm_datastore(cat_VOmon_gn_latest,esmcol_data=esmcol_data)
cat_VOfx_gn = intake.open_esm_datastore(cat_VOfx_gn_latest,esmcol_data=esmcol_data)
cat_A_gn = intake.open_esm_datastore(cat_A_gn_latest,esmcol_data=esmcol_data)

In [66]:
cat_VOfx_gn_latest

Unnamed: 0,project,institute,model,experiment_id,frequency,modeling_realm,mip_table,ensemble_member,grid_label,variable,temporal subset,version,path
3,CMIP6,NCAR,CESM2,historical,,,Ofx,r1i1p1f1,gn,volcello,,v20190308,s3://esgf-world/CMIP6/CMIP/NCAR/CESM2/historic...
4,CMIP6,SNU,SAM0-UNICON,historical,,,Ofx,r1i1p1f1,gn,volcello,,v20190323,s3://esgf-world/CMIP6/CMIP/SNU/SAM0-UNICON/his...
0,CMIP6,MPI-M,MPI-ESM1-2-HR,historical,,,Ofx,r1i1p1f1,gn,volcello,,v20190710,s3://esgf-world/CMIP6/CMIP/MPI-M/MPI-ESM1-2-HR...
1,CMIP6,NCAR,CESM2-FV2,historical,,,Ofx,r1i1p1f1,gn,volcello,,v20191120,s3://esgf-world/CMIP6/CMIP/NCAR/CESM2-FV2/hist...
2,CMIP6,NCAR,CESM2-WACCM-FV2,historical,,,Ofx,r1i1p1f1,gn,volcello,,v20191120,s3://esgf-world/CMIP6/CMIP/NCAR/CESM2-WACCM-FV...


In [79]:

cat_T_gn.df[cat_T_gn.df['model']=='CanESM5'].groupby(['version']).nunique() #one distinct version only, latest one.


Unnamed: 0_level_0,project,institute,model,experiment_id,frequency,modeling_realm,mip_table,ensemble_member,grid_label,variable,temporal subset,path
version,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
v20190306,1,1,1,1,1,1,1,1,1,1,17,17
v20190429,1,1,1,1,1,1,1,1,1,1,17,17


In [82]:
cat_T_gn_latest[cat_T_gn_latest['model']=='CanESM5'].groupby(['version']).nunique() #one distinct version only, latest one.

Unnamed: 0_level_0,project,institute,model,experiment_id,frequency,modeling_realm,mip_table,ensemble_member,grid_label,variable,temporal subset,path
version,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
v20190429,1,1,1,1,1,1,1,1,1,1,17,17


In [90]:
cat_T_gn.df.groupby(['model']).nunique()

Unnamed: 0_level_0,project,institute,experiment_id,frequency,modeling_realm,mip_table,ensemble_member,grid_label,variable,temporal subset,version,path
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
CESM2,1,1,1,1,1,1,1,1,1,1,1,1
CESM2-FV2,1,1,1,1,1,1,1,1,1,4,1,4
CESM2-WACCM-FV2,1,1,1,1,1,1,1,1,1,4,1,4
CanESM5,1,1,1,1,1,1,1,1,1,17,2,34
IPSL-CM6A-LR,1,1,1,1,1,1,1,1,1,2,1,2
MIROC6,1,1,1,1,1,1,1,1,1,17,1,17
MPI-ESM-1-2-HAM,1,1,1,1,1,1,1,1,1,9,1,9
MPI-ESM1-2-HR,1,1,1,1,1,1,1,1,1,33,1,33
MPI-ESM1-2-LR,1,1,1,1,1,1,1,1,1,9,1,9
MRI-ESM2-0,1,1,1,1,1,1,1,1,1,3,1,3


In [91]:
cat_A_gn.df.groupby(['model']).nunique() 

Unnamed: 0_level_0,project,institute,experiment_id,frequency,modeling_realm,mip_table,ensemble_member,grid_label,variable,temporal subset,version,path
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
CESM2,1,1,1,0,0,1,1,1,1,0,1,1
CESM2-FV2,1,1,1,0,0,1,1,1,1,0,1,1
CESM2-WACCM-FV2,1,1,1,0,0,1,1,1,1,0,1,1
CanESM5,1,1,1,0,0,1,1,1,1,0,1,1
IPSL-CM6A-LR,1,1,1,0,0,1,1,1,1,0,1,1
MIROC6,1,1,1,0,0,1,1,1,1,0,1,1
MPI-ESM-1-2-HAM,1,1,1,0,0,1,1,1,1,0,1,1
MPI-ESM1-2-HR,1,1,1,0,0,1,1,1,1,0,1,1
MPI-ESM1-2-LR,1,1,1,0,0,1,1,1,1,0,1,1
MRI-ESM2-0,1,1,1,0,0,1,1,1,1,0,1,1


In [95]:
cat_A_gn_latest.groupby(['model']).nunique() #.nunique()

Unnamed: 0_level_0,project,institute,experiment_id,frequency,modeling_realm,mip_table,ensemble_member,grid_label,variable,temporal subset,version,path
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
CESM2,1,1,1,0,0,1,1,1,1,0,1,1
CESM2-FV2,1,1,1,0,0,1,1,1,1,0,1,1
CESM2-WACCM-FV2,1,1,1,0,0,1,1,1,1,0,1,1
CanESM5,1,1,1,0,0,1,1,1,1,0,1,1
IPSL-CM6A-LR,1,1,1,0,0,1,1,1,1,0,1,1
MIROC6,1,1,1,0,0,1,1,1,1,0,1,1
MPI-ESM-1-2-HAM,1,1,1,0,0,1,1,1,1,0,1,1
MPI-ESM1-2-HR,1,1,1,0,0,1,1,1,1,0,1,1
MPI-ESM1-2-LR,1,1,1,0,0,1,1,1,1,0,1,1
MRI-ESM2-0,1,1,1,0,0,1,1,1,1,0,1,1
