# Demo for using C3S intake catalog

Intake Example:
https://github.com/intake/intake-examples/blob/master/tutorial/data_scientist.ipynb


In [None]:
import intake

## Open remote catalog

In [None]:
cat_url = "https://raw.githubusercontent.com/cp4cds/c3s_34g_manifests/update_intake_catalog/intake/catalogs/c3s.yaml"
cat = intake.open_catalog(cat_url)


In [None]:
list(cat)

In [None]:
print(cat['c3s-cmip6'])

## Load catalog for c3s-cmip6
Catalogs will be cached locally in `~/.intake/cache`.

See: https://intake.readthedocs.io/en/latest/catalog.html?highlight=simplecache#caching-source-files-locally

In [None]:
df = cat['c3s-cmip6'].read()

### Show memory usage

In [None]:
df.info(memory_usage='deep')

## Show first datasets

In [None]:
df.head()

## Show number of datasets

In [None]:
df.ds_id.nunique()

## Define a search function for dataset and time

In [None]:
def search(df, collection, time=None):
 # a common search we do in rook
 start = end = None
 if time:
 if "/" in time:
 start, end = time.split("/")
 start = start.strip()
 end = end.strip()
 else:
 start = time.strip()
 
 start = start or "1800-01-01"
 end = end or "2500-12-31"
 
 sdf = df.fillna({'start_time': '1000-01-01T12:00:00', 'end_time': '3000-12-31T12:00:00'})

 result = sdf.loc[(sdf.ds_id == collection) & (sdf.end_time >= start) & (sdf.start_time <= end)]
 return list(result.path.sort_values().to_dict().values())
 

## Search for a dataset with time restrictions

In [None]:
result = search(
 df, 
 collection="c3s-cmip6.CMIP.SNU.SAM0-UNICON.historical.r1i1p1f1.day.pr.gn.v20190323",
 time="2000-01-01/2001-12-31")
result

## Search for dataset with no time axis (fx, fixed fields)

In [None]:
df.loc[df.table_id=="fx"].ds_id

In [None]:
collection = df.iloc[29].ds_id
collection

In [None]:
result = search(df, collection=collection, time="2000-01-01/2010-12-31")
result

## Other searches ...

In [None]:
result = df.loc[
 (df.variable_id=="tas") 
 & (df.experiment_id=="historical")
 & (df.table_id=="day")
 & (df.member_id=="r1i1p1f1")
 & (df.institution_id=="MIROC")
]
result.head()

In [None]:
result.ds_id.unique()