{ "cells": [ { "cell_type": "markdown", "id": "19439be1-4be8-4d2e-b2ac-611280516544", "metadata": {}, "source": [ "# Compare intake catalog\n", "https://github.com/cp4cds/c3s_34g_manifests/tree/master/intake/catalogs" ] }, { "cell_type": "code", "execution_count": null, "id": "ba414e54-dfb6-4b81-a52b-37286e7e26b7", "metadata": {}, "outputs": [], "source": [ "import intake" ] }, { "cell_type": "markdown", "id": "db4cf19e-c93c-4e08-8e67-e2a1978d6a68", "metadata": {}, "source": [ "## Load latest catalog" ] }, { "cell_type": "code", "execution_count": null, "id": "ce3c6720-c2de-4417-ad46-35a54a926135", "metadata": {}, "outputs": [], "source": [ "url = \"https://raw.githubusercontent.com/cp4cds/c3s_34g_manifests/master/intake/catalogs/c3s.yaml\"\n", "cat = intake.open_catalog(url)" ] }, { "cell_type": "code", "execution_count": null, "id": "b6c65c3d-1d20-4afd-a2fb-94f7400e3d0d", "metadata": {}, "outputs": [], "source": [ "df = cat['c3s-cmip6'].read()" ] }, { "cell_type": "code", "execution_count": null, "id": "8d5924ff-4d81-4984-8ea0-610eb6596912", "metadata": {}, "outputs": [], "source": [ "df.nunique()" ] }, { "cell_type": "markdown", "id": "95f95260-7e75-4dd1-bcc4-0de1f9c67c2a", "metadata": {}, "source": [ "## Load previous catalog ... CSV file" ] }, { "cell_type": "code", "execution_count": null, "id": "4bc06178-918d-47e6-ac9d-76e8d6f5253d", "metadata": {}, "outputs": [], "source": [ "df_old = intake.open_csv(\"https://github.com/cp4cds/c3s_34g_manifests/raw/master/intake/catalogs/c3s-cmip6/c3s-cmip6_v20210313.csv.gz\").read()" ] }, { "cell_type": "code", "execution_count": null, "id": "d298c8da-e66f-4c81-b4ad-405817e68760", "metadata": {}, "outputs": [], "source": [ "df_old.nunique()" ] }, { "cell_type": "markdown", "id": "c4e686ba-ea17-4480-896e-9279726e4af3", "metadata": {}, "source": [ "## Build sets for dataset ids ... current and old" ] }, { "cell_type": "code", "execution_count": null, "id": "034c7e13-e233-4866-bd71-82e5dd93216a", "metadata": {}, "outputs": [], "source": [ "ds_ids = set(df.ds_id.values)\n", "ds_ids_old = set(df_old.ds_id.values)" ] }, { "cell_type": "markdown", "id": "81015fc2-7124-405e-a99e-dcfdebe7842c", "metadata": {}, "source": [ "## Which datasets are missing in current set?" ] }, { "cell_type": "code", "execution_count": null, "id": "a1e39777-64c7-49c3-8fd3-c222a9db60e7", "metadata": {}, "outputs": [], "source": [ "ds_ids_missing = ds_ids_old - ds_ids" ] }, { "cell_type": "code", "execution_count": null, "id": "6555b094-b161-45a3-8219-782ba09a317c", "metadata": {}, "outputs": [], "source": [ "len(ds_ids_missing)" ] }, { "cell_type": "code", "execution_count": null, "id": "2de63048-b103-4d64-b35e-45bed5fbfacc", "metadata": {}, "outputs": [], "source": [ "ds_ids_missing" ] }, { "cell_type": "markdown", "id": "6d3fc4d1-82f7-41d8-8280-f4335bc31893", "metadata": {}, "source": [ "## Search for ds_id in current datasets" ] }, { "cell_type": "code", "execution_count": null, "id": "ec3063bc-b0ba-43a6-8f26-15a9b464aeca", "metadata": {}, "outputs": [], "source": [ "df.loc[df.ds_id.str.match(\"c3s-cmip6.ScenarioMIP.NIMS-KMA.KACE-1-0-G.ssp126\")]" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.5" } }, "nbformat": 4, "nbformat_minor": 5 }