{ "cells": [ { "cell_type": "markdown", "id": "center-vision", "metadata": {}, "source": [ "# Demo for using C3S intake catalog\n", "\n", "Intake Example:\n", "https://github.com/intake/intake-examples/blob/master/tutorial/data_scientist.ipynb\n" ] }, { "cell_type": "code", "execution_count": null, "id": "demographic-regulation", "metadata": {}, "outputs": [], "source": [ "import intake" ] }, { "cell_type": "markdown", "id": "fewer-robert", "metadata": {}, "source": [ "## Open remote catalog" ] }, { "cell_type": "code", "execution_count": null, "id": "becoming-theater", "metadata": {}, "outputs": [], "source": [ "cat_url = \"https://raw.githubusercontent.com/cp4cds/c3s_34g_manifests/update_intake_catalog/intake/catalogs/c3s.yaml\"\n", "cat = intake.open_catalog(cat_url)\n" ] }, { "cell_type": "code", "execution_count": null, "id": "awful-clearance", "metadata": {}, "outputs": [], "source": [ "list(cat)" ] }, { "cell_type": "code", "execution_count": null, "id": "nearby-alert", "metadata": {}, "outputs": [], "source": [ "print(cat['c3s-cmip6'])" ] }, { "cell_type": "markdown", "id": "forced-article", "metadata": {}, "source": [ "## Load catalog for c3s-cmip6\n", "Catalogs will be cached locally in `~/.intake/cache`.\n", "\n", "See: https://intake.readthedocs.io/en/latest/catalog.html?highlight=simplecache#caching-source-files-locally" ] }, { "cell_type": "code", "execution_count": null, "id": "ceramic-today", "metadata": {}, "outputs": [], "source": [ "df = cat['c3s-cmip6'].read()" ] }, { "cell_type": "markdown", "id": "convertible-naples", "metadata": {}, "source": [ "### Show memory usage" ] }, { "cell_type": "code", "execution_count": null, "id": "weekly-argentina", "metadata": {}, "outputs": [], "source": [ "df.info(memory_usage='deep')" ] }, { "cell_type": "markdown", "id": "minus-briefing", "metadata": {}, "source": [ "## Show first datasets" ] }, { "cell_type": "code", "execution_count": null, "id": "assured-bobby", "metadata": {}, "outputs": [], "source": [ "df.head()" ] }, { "cell_type": "markdown", "id": "induced-politics", "metadata": {}, "source": [ "## Show number of datasets" ] }, { "cell_type": "code", "execution_count": null, "id": "complimentary-recovery", "metadata": {}, "outputs": [], "source": [ "df.ds_id.nunique()" ] }, { "cell_type": "markdown", "id": "dominant-watch", "metadata": {}, "source": [ "## Define a search function for dataset and time" ] }, { "cell_type": "code", "execution_count": null, "id": "textile-fortune", "metadata": {}, "outputs": [], "source": [ "def search(df, collection, time=None):\n", " # a common search we do in rook\n", " start = end = None\n", " if time:\n", " if \"/\" in time:\n", " start, end = time.split(\"/\")\n", " start = start.strip()\n", " end = end.strip()\n", " else:\n", " start = time.strip()\n", " \n", " start = start or \"1800-01-01\"\n", " end = end or \"2500-12-31\"\n", " \n", " sdf = df.fillna({'start_time': '1000-01-01T12:00:00', 'end_time': '3000-12-31T12:00:00'})\n", "\n", " result = sdf.loc[(sdf.ds_id == collection) & (sdf.end_time >= start) & (sdf.start_time <= end)]\n", " return list(result.path.sort_values().to_dict().values())\n", " " ] }, { "cell_type": "markdown", "id": "naughty-timothy", "metadata": {}, "source": [ "## Search for a dataset with time restrictions" ] }, { "cell_type": "code", "execution_count": null, "id": "quality-strand", "metadata": {}, "outputs": [], "source": [ "result = search(\n", " df, \n", " collection=\"c3s-cmip6.CMIP.SNU.SAM0-UNICON.historical.r1i1p1f1.day.pr.gn.v20190323\",\n", " time=\"2000-01-01/2001-12-31\")\n", "result" ] }, { "cell_type": "markdown", "id": "peripheral-president", "metadata": {}, "source": [ "## Search for dataset with no time axis (fx, fixed fields)" ] }, { "cell_type": "code", "execution_count": null, "id": "hairy-potential", "metadata": {}, "outputs": [], "source": [ "df.loc[df.table_id==\"fx\"].ds_id" ] }, { "cell_type": "code", "execution_count": null, "id": "anonymous-winner", "metadata": {}, "outputs": [], "source": [ "collection = df.iloc[29].ds_id\n", "collection" ] }, { "cell_type": "code", "execution_count": null, "id": "complicated-transaction", "metadata": {}, "outputs": [], "source": [ "result = search(df, collection=collection, time=\"2000-01-01/2010-12-31\")\n", "result" ] }, { "cell_type": "markdown", "id": "architectural-suspension", "metadata": {}, "source": [ "## Other searches ..." ] }, { "cell_type": "code", "execution_count": null, "id": "important-algebra", "metadata": {}, "outputs": [], "source": [ "result = df.loc[\n", " (df.variable_id==\"tas\") \n", " & (df.experiment_id==\"historical\")\n", " & (df.table_id==\"day\")\n", " & (df.member_id==\"r1i1p1f1\")\n", " & (df.institution_id==\"MIROC\")\n", "]\n", "result.head()" ] }, { "cell_type": "code", "execution_count": null, "id": "maritime-association", "metadata": {}, "outputs": [], "source": [ "result.ds_id.unique()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.2" } }, "nbformat": 4, "nbformat_minor": 5 }