{ "cells": [ { "cell_type": "markdown", "id": "ffc452d9-ab00-48e2-9e6a-038f0f57630a", "metadata": {}, "source": [ "# Create RO-Crate in ROHub for Wetland ML Demo" ] }, { "cell_type": "code", "execution_count": 1, "id": "25b2ee1f-beee-475a-8e38-f36d40729bf2", "metadata": {}, "outputs": [], "source": [ "import rohub\n", "import os" ] }, { "cell_type": "markdown", "id": "00d3e548-8cec-4ee7-86b8-a031e9511a40", "metadata": {}, "source": [ "## 1. AUTHENTICATION\n", "\n", "### Get your token from rohub.org → Settings → API Token" ] }, { "cell_type": "code", "execution_count": 2, "id": "96aa7ebf-19d6-4e51-9cc6-7fc0e1ab7066", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Logged successfully as annef@simula.no.\n" ] } ], "source": [ "rohub_user = open(os.path.join(os.environ['HOME'],\"rohub-user\")).read().rstrip()\n", "rohub_pwd = open(os.path.join(os.environ['HOME'],\"rohub-pwd\")).read().rstrip()\n", "rohub.login(username=rohub_user, password=rohub_pwd)" ] }, { "cell_type": "markdown", "id": "ca5668a0-10ea-46d8-a50d-cc646472b7e5", "metadata": {}, "source": [ "## 2. CREATE RESEARCH OBJECT" ] }, { "cell_type": "code", "execution_count": 3, "id": "0090be45-c64c-4d03-9a01-b9e05c5d05ee", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Research Object was successfully created with id = 10dc322d-eedd-43ff-a4af-7adb6281cb6e\n", "Created RO: 10dc322d-eedd-43ff-a4af-7adb6281cb6e\n" ] } ], "source": [ "ro = rohub.ros_create(\n", " title=\"JupyterGIS Wetland ML Classification Demo - ESA EOPF\",\n", " description=\"\"\"\n", " Human-in-the-loop machine learning workflow for wetland classification \n", " using Sentinel-2 data from ESA EOPF. Demonstrates collaborative annotation \n", " using JupyterGIS, model retraining with expert corrections, and FAIR \n", " research practices.\n", " \"\"\",\n", " ros_type=\"Executable Research Object\",\n", " use_template=True,\n", " research_areas=[\"Earth Observation\", \"Machine Learning\", \"Wetlands\", \"Remote Sensing\"]\n", ")\n", "\n", "print(f\"Created RO: {ro.identifier}\")" ] }, { "cell_type": "markdown", "id": "d84ada9a-5e6d-4ae1-89fc-b7e2014394f2", "metadata": {}, "source": [ "## 3. ADD EXTERNAL RESOURCES" ] }, { "cell_type": "code", "execution_count": 4, "id": "1c69795f-00a1-485e-be5b-8beb577dd319", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Resource was successfully created with id = 6120644a-edf9-4139-ad55-96df61c61785\n", "Resource was successfully created with id = 98b35d80-4f51-45c1-a3bf-eeb8685337e5\n", "Resource was successfully created with id = 12b9f9ef-a046-4222-aea1-8e806706a3f1\n", "Resource was successfully created with id = 2f956d0f-5f9f-4768-8fe6-f6dd249ac856\n", "Resource was successfully created with id = c267902c-8c44-464d-b224-f72fb04a0a30\n", "Resource was successfully created with id = 4d4a1bbc-2c18-44d5-b3c1-df9a22c5e1cd\n", "Resource was successfully created with id = a28552fd-7d9c-449b-98a4-884da477ae2b\n", "Resource was successfully created with id = acd096ac-59d0-4925-bb2a-3c3c50c2fc31\n", "Resource was successfully created with id = 591a9965-170b-4ef9-ac42-5f0aa4b26fdd\n", "Resource was successfully created with id = 11e0f30e-32bc-4aca-9c45-20244e641b68\n", "Resource was successfully created with id = 3e7c2e75-e59b-466a-ba56-600ff9cace09\n", "All resources added!\n" ] } ], "source": [ "GITHUB_RAW = \"https://raw.githubusercontent.com/annefou/jupytergis-showcases/refs/heads/main/content\"\n", "GITHUB_HTML = \"https://annefou.github.io/jupytergis-showcases/lab/index.html\"\n", "\n", "# Software / Tools\n", "jupytergis = ro.add_external_resource(\n", " res_type=\"Software\",\n", " input_url=\"https://github.com/geojupyter/jupytergis\",\n", " folder=\"tool\",\n", " title=\"JupyterGIS\",\n", " description=\"Collaborative GIS environment for Jupyter - required to open .jGIS files\"\n", ")\n", "\n", "conda_env = ro.add_external_resource(\n", " res_type=\"Software\",\n", " input_url=f\"{GITHUB_RAW}/../requirements.txt\",\n", " folder=\"tool\",\n", " title=\"Conda Environment\",\n", " description=\"Conda environment specification with all Python dependencies\"\n", ")\n", "\n", "eopf_service = ro.add_external_resource(\n", " res_type=\"Other\",\n", " input_url=\"https://github.com/EOPF-Sample-Service/eopf-sample-notebooks\",\n", " folder=\"biblio\",\n", " title=\"EOPF Sample Service\",\n", " description=\"ESA Earth Observation Processing Framework for Sentinel-1, 2 and 3 data access\"\n", ")\n", "\n", "# Workflow (main entry point)\n", "notebook = ro.add_external_resource(\n", " res_type=\"Jupyter Notebook\",\n", " input_url=f\"{GITHUB_RAW}/Wetland_ML_Demo_EOPF.ipynb\",\n", " folder=\"tool\",\n", " title=\"Wetland ML Demo Notebook\",\n", " description=\"Main Jupyter notebook implementing the wetland classification workflow\"\n", ")\n", "\n", "# JupyterGIS document\n", "jgis_doc = ro.add_external_resource(\n", " res_type=\"Dataset\",\n", " input_url=f\"{GITHUB_HTML}?path=Wetland_Annotation.jGIS\",\n", " folder=\"output\",\n", " title=\"JupyterGIS Annotation Document\",\n", " description=\"Interactive map with expert annotations for model corrections\"\n", ")\n", "\n", "# Input data\n", "sentinel2_rgb = ro.add_external_resource(\n", " res_type=\"Dataset\",\n", " input_url=f\"{GITHUB_RAW}/wetland_outputs/sentinel2_rgb.tif\",\n", " folder=\"output\",\n", " title=\"Sentinel-2 RGB Composite (COG)\",\n", " description=\"Cloud Optimized GeoTIFF - RGB composite from Sentinel-2 L2A\"\n", ")\n", "\n", "# ML outputs - Version 1\n", "prediction_v1 = ro.add_external_resource(\n", " res_type=\"Result\",\n", " input_url=f\"{GITHUB_RAW}/wetland_outputs/wetland_prediction_v1.tif\",\n", " folder=\"output\",\n", " title=\"Wetland Prediction v1\",\n", " description=\"Initial Random Forest classification - before expert corrections\"\n", ")\n", "\n", "# Corrections\n", "corrections = ro.add_external_resource(\n", " res_type=\"Dataset\",\n", " input_url=f\"{GITHUB_RAW}/wetland_outputs/corrections.geojson\",\n", " folder=\"input\",\n", " title=\"Expert Corrections (GeoJSON)\",\n", " description=\"Expert corrections extracted from JupyterGIS annotations\"\n", ")\n", "\n", "# ML outputs - Version 2\n", "prediction_v2 = ro.add_external_resource(\n", " res_type=\"Result\",\n", " input_url=f\"{GITHUB_RAW}/wetland_outputs/wetland_prediction_v2_corrected.tif\",\n", " folder=\"output\",\n", " title=\"Wetland Prediction v2 (Corrected)\",\n", " description=\"Improved classification after retraining with expert corrections\"\n", ")\n", "\n", "model_v2 = ro.add_external_resource(\n", " res_type=\"Result\",\n", " input_url=f\"{GITHUB_RAW}/wetland_outputs/wetland_model_v2.joblib\",\n", " folder=\"output\",\n", " title=\"Trained Model v2 (joblib)\",\n", " description=\"Serialized Random Forest model retrained with expert corrections\"\n", ")\n", "\n", "# GitHub repository\n", "github_repo = ro.add_external_resource(\n", " res_type=\"Software\",\n", " input_url=\"https://github.com/annefou/jupytergis-showcases\",\n", " folder=\"tool\",\n", " title=\"GitHub Repository\",\n", " description=\"Source repository for this demo\"\n", ")\n", "\n", "print(\"All resources added!\")" ] }, { "cell_type": "markdown", "id": "d991ac54-8540-4413-adbf-1a098a922d4c", "metadata": {}, "source": [ "## 4. CREATE LOOKUP FOR PROVENANCE TRIPLES" ] }, { "cell_type": "code", "execution_count": 5, "id": "1a65cdbd-550b-4583-ae65-b1cc78cb3e2a", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Total resources: 11\n", " - JupyterGIS Annotation Document\n", " - GitHub Repository\n", " - EOPF Sample Service\n", " - JupyterGIS\n", " - Conda Environment\n", " - Wetland ML Demo Notebook\n", " - Expert Corrections (GeoJSON)\n", " - Sentinel-2 RGB Composite (COG)\n", " - Trained Model v2 (joblib)\n", " - Wetland Prediction v1\n", " - Wetland Prediction v2 (Corrected)\n" ] } ], "source": [ "# Get resources with their URLs\n", "resources = ro.list_resources()\n", "res_url_by_title = dict(zip(resources['title'], resources['url']))\n", "\n", "print(f\"\\nTotal resources: {len(res_url_by_title)}\")\n", "for title in res_url_by_title:\n", " print(f\" - {title}\")" ] }, { "cell_type": "markdown", "id": "724c3c38-de94-408a-a9e4-7798cf3928bf", "metadata": {}, "source": [ "## 5. ADD PROVENANCE RELATIONSHIPS (TRIPLES)" ] }, { "cell_type": "code", "execution_count": 6, "id": "689ab4ac-61a5-4e4d-aa84-ed2a3c3c66c2", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Added: Notebook -> softwareRequirements -> Conda Environment\n", "Added: Corrections -> wasDerivedFrom -> jGIS Document\n", "Added: Prediction v2 -> wasDerivedFrom -> Corrections\n", "Added: Prediction v2 -> wasRevisionOf -> Prediction v1\n", "Added: Prediction v1 -> wasGeneratedBy -> Notebook\n" ] } ], "source": [ "# Use the first annotation to attach triples\n", "annotations = ro.list_annotations()\n", "annotation_id = annotations[0]['identifier']\n", "\n", "# 1. Notebook requires Conda Environment\n", "ro.add_triple(\n", " the_subject=res_url_by_title[\"Wetland ML Demo Notebook\"],\n", " the_predicate=\"http://schema.org/softwareRequirements\",\n", " the_object=res_url_by_title[\"Conda Environment\"],\n", " annotation_id=annotation_id,\n", " object_class=\"URIRef\"\n", ")\n", "print(\"Added: Notebook -> softwareRequirements -> Conda Environment\")\n", "\n", "# 2. Corrections derived from JupyterGIS annotations\n", "ro.add_triple(\n", " the_subject=res_url_by_title[\"Expert Corrections (GeoJSON)\"],\n", " the_predicate=\"http://www.w3.org/ns/prov#wasDerivedFrom\",\n", " the_object=res_url_by_title[\"JupyterGIS Annotation Document\"],\n", " annotation_id=annotation_id,\n", " object_class=\"URIRef\"\n", ")\n", "print(\"Added: Corrections -> wasDerivedFrom -> jGIS Document\")\n", "\n", "# 3. Prediction v2 derived from corrections\n", "ro.add_triple(\n", " the_subject=res_url_by_title[\"Wetland Prediction v2 (Corrected)\"],\n", " the_predicate=\"http://www.w3.org/ns/prov#wasDerivedFrom\",\n", " the_object=res_url_by_title[\"Expert Corrections (GeoJSON)\"],\n", " annotation_id=annotation_id,\n", " object_class=\"URIRef\"\n", ")\n", "print(\"Added: Prediction v2 -> wasDerivedFrom -> Corrections\")\n", "\n", "# 4. Prediction v2 is revision of v1\n", "ro.add_triple(\n", " the_subject=res_url_by_title[\"Wetland Prediction v2 (Corrected)\"],\n", " the_predicate=\"http://www.w3.org/ns/prov#wasRevisionOf\",\n", " the_object=res_url_by_title[\"Wetland Prediction v1\"],\n", " annotation_id=annotation_id,\n", " object_class=\"URIRef\"\n", ")\n", "print(\"Added: Prediction v2 -> wasRevisionOf -> Prediction v1\")\n", "\n", "# 5. Prediction v1 generated by notebook\n", "ro.add_triple(\n", " the_subject=res_url_by_title[\"Wetland Prediction v1\"],\n", " the_predicate=\"http://www.w3.org/ns/prov#wasGeneratedBy\",\n", " the_object=res_url_by_title[\"Wetland ML Demo Notebook\"],\n", " annotation_id=annotation_id,\n", " object_class=\"URIRef\"\n", ")\n", "print(\"Added: Prediction v1 -> wasGeneratedBy -> Notebook\")" ] }, { "cell_type": "markdown", "id": "49ac70d0-db02-4278-b3af-04f544caca94", "metadata": {}, "source": [ "## 6. VERIFY TRIPLES" ] }, { "cell_type": "code", "execution_count": 7, "id": "8f0b956d-bc79-4025-963c-5c2076e457d8", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[{'identifier': '09dad437-4cb5-4cfe-88e2-72046bc8247a', 'subject': 'https://w3id.org/ro-id/10dc322d-eedd-43ff-a4af-7adb6281cb6e/resources/3e7c2e75-e59b-466a-ba56-600ff9cace09', 'predicate': 'http://purl.org/dc/terms/title', 'object': 'GitHub Repository', 'created_by': 'annef@simula.no', 'created_on': '2025-12-21T16:29:33.509179+0000'}, {'identifier': '0589188d-a60d-42ea-9402-065903e1d142', 'subject': 'https://raw.githubusercontent.com/annefou/jupytergis-showcases/refs/heads/main/content/Wetland_ML_Demo_EOPF.ipynb', 'predicate': 'http://schema.org/softwareRequirements', 'object': 'https://raw.githubusercontent.com/annefou/jupytergis-showcases/refs/heads/main/content/../requirements.txt', 'created_by': 'annef@simula.no', 'created_on': '2025-12-21T16:29:50.643195+0000'}, {'identifier': 'e9b8478f-d77b-46ec-a801-7c53ae4bac1b', 'subject': 'https://raw.githubusercontent.com/annefou/jupytergis-showcases/refs/heads/main/content/wetland_outputs/corrections.geojson', 'predicate': 'http://www.w3.org/ns/prov#wasDerivedFrom', 'object': 'https://annefou.github.io/jupytergis-showcases/lab/index.html?path=Wetland_Annotation.jGIS', 'created_by': 'annef@simula.no', 'created_on': '2025-12-21T16:29:51.040155+0000'}, {'identifier': '1557ef0c-3183-4a44-951d-ae2e7dac46fb', 'subject': 'https://raw.githubusercontent.com/annefou/jupytergis-showcases/refs/heads/main/content/wetland_outputs/wetland_prediction_v2_corrected.tif', 'predicate': 'http://www.w3.org/ns/prov#wasDerivedFrom', 'object': 'https://raw.githubusercontent.com/annefou/jupytergis-showcases/refs/heads/main/content/wetland_outputs/corrections.geojson', 'created_by': 'annef@simula.no', 'created_on': '2025-12-21T16:29:51.433140+0000'}, {'identifier': '4114351c-e325-4363-8cae-7ab351f3b8e5', 'subject': 'https://raw.githubusercontent.com/annefou/jupytergis-showcases/refs/heads/main/content/wetland_outputs/wetland_prediction_v1.tif', 'predicate': 'http://www.w3.org/ns/prov#wasGeneratedBy', 'object': 'https://raw.githubusercontent.com/annefou/jupytergis-showcases/refs/heads/main/content/Wetland_ML_Demo_EOPF.ipynb', 'created_by': 'annef@simula.no', 'created_on': '2025-12-21T16:29:52.233198+0000'}, {'identifier': 'ebcd3f50-7154-4f5e-a7b1-b3f1e4227f1d', 'subject': 'https://raw.githubusercontent.com/annefou/jupytergis-showcases/refs/heads/main/content/wetland_outputs/wetland_prediction_v2_corrected.tif', 'predicate': 'http://www.w3.org/ns/prov#wasRevisionOf', 'object': 'https://raw.githubusercontent.com/annefou/jupytergis-showcases/refs/heads/main/content/wetland_outputs/wetland_prediction_v1.tif', 'created_by': 'annef@simula.no', 'created_on': '2025-12-21T16:29:51.836535+0000'}]\n" ] } ], "source": [ "# Check triples in the annotation\n", "triples = ro.list_triples(annotation_id)\n", "print(triples)" ] }, { "cell_type": "markdown", "id": "a238e7d4-1683-4816-b0b6-d0e4e21aba33", "metadata": {}, "source": [ "## 7. ADD AUTHOR" ] }, { "cell_type": "code", "execution_count": 8, "id": "0323e2b8-44a8-45b1-b7cb-88d3acf4b4a0", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Agent: annef@simula.no recognized in the system.\n", "Author added!\n" ] } ], "source": [ "ro.set_authors(agents=[\"annef@simula.no\"])\n", "print(\"Author added!\")" ] }, { "cell_type": "markdown", "id": "91efd76d-ee00-4458-9213-8b05bc2f40af", "metadata": {}, "source": [ "## 8. EXPORT RO-CRATE" ] }, { "cell_type": "code", "execution_count": 9, "id": "250ffc95-f67b-444e-9b67-ee277ff94752", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "File was successfully downloaded.\n" ] } ], "source": [ "# Export the RO-Crate to verify the relationships are captured\n", "ro.export_to_rocrate(filename=\"my_rocrate\", use_format=\"jsonld\")" ] }, { "cell_type": "markdown", "id": "add9ecbf-8b7d-4aa6-8a28-8c1148517709", "metadata": {}, "source": [ "## 8. SUMMARY" ] }, { "cell_type": "code", "execution_count": 10, "id": "5f22f458-42a1-430b-a383-85667748d73c", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "=================================================================\n", "RESEARCH OBJECT CREATED SUCCESSFULLY\n", "=================================================================\n", "\n", "📊 Provenance graph:\n", "\n", " EOPF Service ──────────────────┐\n", " │ │\n", " ▼ ▼\n", " Sentinel-2 RGB Notebook (Workflow)\n", " │\n", " ▼\n", " Prediction v1\n", " │\n", " ┌────────────────────────┤\n", " ▼ ▼\n", " JupyterGIS ◀─────────── jGIS Document\n", " (Software) │\n", " ▼\n", " Corrections (GeoJSON)\n", " │\n", " ┌────────────────────────┤\n", " ▼ ▼\n", " Model v2 ──────────────▶ Prediction v2\n", "\n", "🔗 View your RO at:\n", " https://w3id.org/ro-id/10dc322d-eedd-43ff-a4af-7adb6281cb6e\n", " https://rohub.org/overview/10dc322d-eedd-43ff-a4af-7adb6281cb6e\n", "\n", "=================================================================\n", "\n" ] } ], "source": [ "print(f\"\"\"\n", "{'='*65}\n", "RESEARCH OBJECT CREATED SUCCESSFULLY\n", "{'='*65}\n", "\n", "📊 Provenance graph:\n", "\n", " EOPF Service ──────────────────┐\n", " │ │\n", " ▼ ▼\n", " Sentinel-2 RGB Notebook (Workflow)\n", " │\n", " ▼\n", " Prediction v1\n", " │\n", " ┌────────────────────────┤\n", " ▼ ▼\n", " JupyterGIS ◀─────────── jGIS Document\n", " (Software) │\n", " ▼\n", " Corrections (GeoJSON)\n", " │\n", " ┌────────────────────────┤\n", " ▼ ▼\n", " Model v2 ──────────────▶ Prediction v2\n", "\n", "🔗 View your RO at:\n", " https://w3id.org/ro-id/{ro.identifier}\n", " https://rohub.org/overview/{ro.identifier}\n", "\n", "{'='*65}\n", "\"\"\")" ] }, { "cell_type": "raw", "id": "d31b4e23-9d28-4c01-9c6a-fb6918380f61", "metadata": {}, "source": [ "ro_id = \"e62f59cd-fe68-4ca5-9656-9411d82f8b28\"\n", "#rohub.ros_delete(identifier = ro_id)\n", "\n", "ro = rohub.ros_load(identifier=ro_id)\n", "print(ro.status)\n", "print(ro.creation_mode)\n", "ro.delete()" ] }, { "cell_type": "code", "execution_count": null, "id": "a0c37150-786b-48e8-b0c0-691b6563efd3", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "bc37d2ad-b740-408a-97ee-691e807d3a82", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "900fa66d-78f3-4379-a345-3ef400c01bbf", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.12" }, "widgets": { "application/vnd.jupyter.widget-state+json": { "state": {}, "version_major": 2, "version_minor": 0 } } }, "nbformat": 4, "nbformat_minor": 5 }