From f20ff6603428cf183ec71e3b1b1b8ef7d844dcf2 Mon Sep 17 00:00:00 2001 From: Ewelina Dobrowolska Date: Wed, 15 Apr 2026 09:44:57 +0200 Subject: [PATCH] updating-guide-notebooks --- examples/contribute_via_osc_editor.ipynb | 4 +- examples/contribute_via_pr_osc.ipynb | 4 +- examples/earthcode_data_discovery.ipynb | 2 +- ...0.Prerequisites-EarthCODE-Workspaces.ipynb | 220 ++++---- guide/0.Prerequisites-local.ipynb | 223 ++++---- guide/1.Project.ipynb | 379 +++++++------- guide/2.0.Product.ipynb | 475 +++++++++--------- guide/2.1.Product_files_PRR.ipynb | 289 +++++------ guide/2.1.Product_files_self_hosted.ipynb | 328 ++++++------ guide/3.Workflow.ipynb | 321 ++++++------ guide/4.Experiment.ipynb | 333 ++++++------ 11 files changed, 1343 insertions(+), 1235 deletions(-) diff --git a/examples/contribute_via_osc_editor.ipynb b/examples/contribute_via_osc_editor.ipynb index 2716b27..6f3a7bb 100644 --- a/examples/contribute_via_osc_editor.ipynb +++ b/examples/contribute_via_osc_editor.ipynb @@ -1982,7 +1982,7 @@ ], "metadata": { "kernelspec": { - "display_name": "default", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -1996,7 +1996,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.12" + "version": "3.13.11" } }, "nbformat": 4, diff --git a/examples/contribute_via_pr_osc.ipynb b/examples/contribute_via_pr_osc.ipynb index 60316fe..d24fa05 100644 --- a/examples/contribute_via_pr_osc.ipynb +++ b/examples/contribute_via_pr_osc.ipynb @@ -2257,7 +2257,7 @@ ], "metadata": { "kernelspec": { - "display_name": "default", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -2271,7 +2271,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.14.2" + "version": "3.13.11" } }, "nbformat": 4, diff --git a/examples/earthcode_data_discovery.ipynb b/examples/earthcode_data_discovery.ipynb index 7425195..31524d5 100644 --- a/examples/earthcode_data_discovery.ipynb +++ b/examples/earthcode_data_discovery.ipynb @@ -8056,7 +8056,7 @@ ], "metadata": { "kernelspec": { - "display_name": "default", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, diff --git a/guide/0.Prerequisites-EarthCODE-Workspaces.ipynb b/guide/0.Prerequisites-EarthCODE-Workspaces.ipynb index 7bdb48e..899178b 100644 --- a/guide/0.Prerequisites-EarthCODE-Workspaces.ipynb +++ b/guide/0.Prerequisites-EarthCODE-Workspaces.ipynb @@ -1,109 +1,117 @@ { - "cells": [ - { - "cell_type": "markdown", - "id": "fff64edd", - "metadata": {}, - "source": [ - "# 0.1 Prerequisites Using EarthCODE workspaces\n", - "\n", - "\n", - "\n", - "## Short introduction to Earthcode\n", - "At EarthCODE we aim to not just store data but make it easily accessible and [FAIR](https://esa-earthcode.github.io/documentation/Community%20and%20Best%20Practices/FAIR%20and%20Open%20Science%20Best%20Practices/). The Open Science Catalog is built on the [Spatio Temporal Asset Catalog (STAC)](https://stacspec.org/en), which is a standard for describing geospatial data. Therefore new entries must conform to its specification.\n", - "\n", - "To process your data and publish it on Open Science Data Catalogue, we need six things from you to get started:\n", - "\n", - "1. Information about your ESA (EO Programme) funded `Project`, \n", - "2. Information about your `Product / Dataset`\n", - "3. Information about the actual `files/data`\n", - "4. Whether you need to `store your results permanently on ESA cloud storage`.\n", - "5. Information about the `workflow/code` you used to generate the data.\n", - "6. Information about the `experiment` that generated the dataset.\n", - "\n", - "We've broken the process into five steps, contained within five notebooks. You can run the individual notebooks and commit the results via pull request to the open science catalog libray. Below you can find instructions how to do this via the EarthCODE workspace.\n", - "\n", - "## Intro to Earthcode workspaces:\n", - "\n", - "\n", - "The [EarthCODE workspace](https://workspace.earthcode.eox.at) allows you to access to an integrated Jupyter Lab environment with the EarthCODE library installed, in order for you to quickly get started with scientific research and experiments.\n", - "\n", - "The login, you must have a [GitHub account](https://docs.github.com/en/get-started/start-your-journey/creating-an-account-on-github).\n", - "\n", - "Once you are logged in, you can access all features directly through the portal or find links to them.\n", - "\n", - "When you log in, you will find the navigation bar on the left-hand side, containing links to all available resources and tasks, while the middle pane serves as the main interactive area, changing depending on the option you select.\n", - "\n", - "\n", - "## 1. Create a copy of Open Science Catalog metadata\n", - "\n", - "You can add new content to the OSC via GitHub [Pull Request](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request). To do this, you need a to [fork](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks/about-forks) the OSC repository, embeded the new information into the existing catalog and merge. The steps below describe the process.\n", - "\n", - "1. [Fork](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks/fork-a-repo) the open science catalog repository on github - [https://github.com/ESA-EarthCODE/open-science-catalog-metadata](https://github.com/ESA-EarthCODE/open-science-catalog-metadata)\n", - "\n", - "2. Then clone your forked repository and create a branch by using the Git GUI available in the EarthCODE Workspaces:\n", - "\n", - "![image.png](../assets/clone-and-branch.gif)\n", - "\n", - "\n", - "## 2. Sequence of actions\n", - "\n", - "First clone the earthcode library guide notebooks by running inside your accounts workspace:\n", - "`git clone https://github.com/ESA-EarthCODE/earthcode-library.git`\n", - "\n", - "Run notebooks in this order:\n", - "1. `guide/1.Project.ipynb`\n", - "2. `guide/2.Product.ipynb`\n", - "3. Either `guide/2.1.Product_files_PRR.ipynb` or `guide/2.2.Product.ipynb`, depending on the results from step 2.\n", - "4. `guide/3.Workflow.ipynb`\n", - "5. `guide/4.Experiment.ipynb`\n", - "6. Open a pull request against the main OSC repository\n", - "\n", - "\n", - "## 3. Opening a GitHub PR\n", - "\n", - "After the validation passes, you are ready to request changes into existing [open-science-catalog-metadata](https://github.com/ESA-EarthCODE/open-science-catalog-metadata) repository to be able to publish your datasets and project by using the EarthCODE Workspaces Git GUI as shown below.\n", - "\n", - "1. First you need to create a valid token.\n", - " 1. go to https://github.com/settings/personal-access-tokens\n", - " 2. click on fine-grained tokens, and select generate new token\n", - " 3. Limit access to the forked repository and provide read-write Contents permissions\n", - " 4. Copy the token produced\n", - "\n", - "![createtoken](../assets/create-token.gif)\n", - "\n", - "1. Commit changes using the EarthCODE Workspaces Git GUI (or terminal commands)\n", - "\n", - "![commitclip.gif](../assets/commitclip.gif)\n", - "\n", - "3. Open a [pull request](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request) against the main open science catalog repository
\n", - " `gh pr create -f`\n", - "\n", - "Or alternatively directly via github.com:\n", - "![pullrequest.gif](../assets/pullrequest.gif)\n", - "\n", - "\n", - "### Check the status of your PR direclty in GitHub\n", - "\n", - "After creation of Pull Request you should see it on the list: https://github.com/ESA-EarthCODE/open-science-catalog-metadata/pulls\n", - "\n", - "Check the status of your PR under: https://github.com/ESA-EarthCODE/open-science-catalog-metadata/actions\n", - "\n", - "> Changes to the OSC content will be reviewed by the EarthCODE Data Steward team. In case any changes are needed to your inputs, you will be contacted by the team." - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "name": "python", - "version": "3" - } + "cells": [ + { + "cell_type": "markdown", + "id": "fff64edd", + "metadata": {}, + "source": [ + "# 0.1 Prerequisites Using EarthCODE workspaces\n", + "\n", + "\n", + "\n", + "## Short introduction to Earthcode\n", + "At EarthCODE we aim to not just store data but make it easily accessible and [FAIR](https://esa-earthcode.github.io/documentation/Community%20and%20Best%20Practices/FAIR%20and%20Open%20Science%20Best%20Practices/). The Open Science Catalog is built on the [Spatio Temporal Asset Catalog (STAC)](https://stacspec.org/en), which is a standard for describing geospatial data. Therefore new entries must conform to its specification.\n", + "\n", + "To process your data and publish it on Open Science Data Catalogue, we need six things from you to get started:\n", + "\n", + "1. Information about your ESA (EO Programme) funded `Project`, \n", + "2. Information about your `Product / Dataset`\n", + "3. Information about the actual `files/data`\n", + "4. Whether you need to `store your results permanently on ESA cloud storage`.\n", + "5. Information about the `workflow/code` you used to generate the data.\n", + "6. Information about the `experiment` that generated the dataset.\n", + "\n", + "We've broken the process into five steps, contained within five notebooks. You can run the individual notebooks and commit the results via pull request to the open science catalog libray. Below you can find instructions how to do this via the EarthCODE workspace.\n", + "\n", + "## Intro to Earthcode workspaces:\n", + "\n", + "\n", + "The [EarthCODE workspace](https://workspace.earthcode.eox.at) allows you to access to an integrated Jupyter Lab environment with the EarthCODE library installed, in order for you to quickly get started with scientific research and experiments.\n", + "\n", + "The login, you must have a [GitHub account](https://docs.github.com/en/get-started/start-your-journey/creating-an-account-on-github).\n", + "\n", + "Once you are logged in, you can access all features directly through the portal or find links to them.\n", + "\n", + "When you log in, you will find the navigation bar on the left-hand side, containing links to all available resources and tasks, while the middle pane serves as the main interactive area, changing depending on the option you select.\n", + "\n", + "\n", + "## 1. Create a copy of Open Science Catalog metadata\n", + "\n", + "You can add new content to the OSC via GitHub [Pull Request](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request). To do this, you need a to [fork](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks/about-forks) the OSC repository, embeded the new information into the existing catalog and merge. The steps below describe the process.\n", + "\n", + "1. [Fork](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks/fork-a-repo) the open science catalog repository on github - [https://github.com/ESA-EarthCODE/open-science-catalog-metadata](https://github.com/ESA-EarthCODE/open-science-catalog-metadata)\n", + "\n", + "2. Then clone your forked repository and create a branch by using the Git GUI available in the EarthCODE Workspaces:\n", + "\n", + "![image.png](../assets/clone-and-branch.gif)\n", + "\n", + "\n", + "## 2. Sequence of actions\n", + "\n", + "First clone the earthcode library guide notebooks by running inside your accounts workspace:\n", + "`git clone https://github.com/ESA-EarthCODE/earthcode-library.git`\n", + "\n", + "Run notebooks in this order:\n", + "1. `guide/1.Project.ipynb`\n", + "2. `guide/2.Product.ipynb`\n", + "3. Either `guide/2.1.Product_files_PRR.ipynb` or `guide/2.2.Product.ipynb`, depending on the results from step 2.\n", + "4. `guide/3.Workflow.ipynb`\n", + "5. `guide/4.Experiment.ipynb`\n", + "6. Open a pull request against the main OSC repository\n", + "\n", + "\n", + "## 3. Opening a GitHub PR\n", + "\n", + "After the validation passes, you are ready to request changes into existing [open-science-catalog-metadata](https://github.com/ESA-EarthCODE/open-science-catalog-metadata) repository to be able to publish your datasets and project by using the EarthCODE Workspaces Git GUI as shown below.\n", + "\n", + "1. First you need to create a valid token.\n", + " 1. go to https://github.com/settings/personal-access-tokens\n", + " 2. click on fine-grained tokens, and select generate new token\n", + " 3. Limit access to the forked repository and provide read-write Contents permissions\n", + " 4. Copy the token produced\n", + "\n", + "![createtoken](../assets/create-token.gif)\n", + "\n", + "1. Commit changes using the EarthCODE Workspaces Git GUI (or terminal commands)\n", + "\n", + "![commitclip.gif](../assets/commitclip.gif)\n", + "\n", + "3. Open a [pull request](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request) against the main open science catalog repository
\n", + " `gh pr create -f`\n", + "\n", + "Or alternatively directly via github.com:\n", + "![pullrequest.gif](../assets/pullrequest.gif)\n", + "\n", + "\n", + "### Check the status of your PR direclty in GitHub\n", + "\n", + "After creation of Pull Request you should see it on the list: https://github.com/ESA-EarthCODE/open-science-catalog-metadata/pulls\n", + "\n", + "Check the status of your PR under: https://github.com/ESA-EarthCODE/open-science-catalog-metadata/actions\n", + "\n", + "> Changes to the OSC content will be reviewed by the EarthCODE Data Steward team. In case any changes are needed to your inputs, you will be contacted by the team." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" }, - "nbformat": 4, - "nbformat_minor": 5 + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 } diff --git a/guide/0.Prerequisites-local.ipynb b/guide/0.Prerequisites-local.ipynb index 295ce26..da97524 100644 --- a/guide/0.Prerequisites-local.ipynb +++ b/guide/0.Prerequisites-local.ipynb @@ -1,105 +1,124 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# 0.1 Prerequisites - local \n", - "\n", - "## Short introduction to Earthcode\n", - "At EarthCODE we aim to not just store data but make it easily accessible and [FAIR](https://esa-earthcode.github.io/documentation/Community%20and%20Best%20Practices/FAIR%20and%20Open%20Science%20Best%20Practices/). The Open Science Catalog is built on the [Spatio Temporal Asset Catalog (STAC)](https://stacspec.org/en), which is a standard for describing geospatial data. Therefore new entries must conform to its specification.\n", - "\n", - "To process your data and publish it on Open Science Data Catalogue, we need six things from you to get started:\n", - "\n", - "1. Information about your ESA (EO Programme) funded `Project`, \n", - "2. Information about your `Product / Dataset`\n", - "3. Information about the actual `files/data`\n", - "4. Whether you need to `store your results permanently on ESA cloud storage`.\n", - "5. Information about the `workflow/code` you used to generate the data.\n", - "6. Information about the `experiment` that generated the dataset.\n", - "\n", - "We've broken the process into five steps, contained within five notebooks. You can run the individual notebooks and commit the results via pull request to the open science catalog libray. Below you can find instructions about setting up the enviroment, running the notebooks and creating a pull request.\n", - "\n", - "## 0.1 Install the earthcode library\n", - "\n", - "### Option 1. - Existing enviroment\n", - "If you want to install the earthcode library in an existing enviroment simply run:\n", - "`pip install earthcode`\n", - "\n", - "\n", - "### Option 2. - New environment\n", - "If you want to create a new environment to run the library in you can run:\n", - "\n", - "1. `git clone git clone https://github.com/ESA-EarthCODE/earthcode-library.git`\n", - "2. Install pixi - https://pixi.sh/dev/installation/\n", - "3. `cd earthcode-library`\n", - "4. `pixi install`\n", - "5. `pixi run jupyter lab`\n", - "\n", - "\n", - "## 0.2 Create local copy of Open Science Catalog metadata\n", - "\n", - "You can add new content to the OSC via GitHub [Pull Request](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request). To do this, you need a to [fork](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks/about-forks) the OSC repository, embeded the new information into the existing catalog and merge. The steps below describe the process.\n", - "\n", - "0. (if needed) [Install git](https://github.com/git-guides/install-git) & create a [GitHub account](https://docs.github.com/en/get-started/start-your-journey/creating-an-account-on-github) \n", - "1. [Fork](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks/fork-a-repo) the open science catalog repository on github - [https://github.com/ESA-EarthCODE/open-science-catalog-metadata](https://github.com/ESA-EarthCODE/open-science-catalog-metadata)\n", - "2. [Clone](https://docs.github.com/en/repositories/creating-and-managing-repositories/cloning-a-repository) your *forked* repository
\n", - " `git clone https://github.com/your-gh-username/open-science-catalog-metadata.git`\n", - "3. Create a new branch in the local clone
\n", - " - Set the current workspace to your **local clone** of the open science catalog metadata repository: `cd open-science-catalog-metadata/`\n", - " - Create a new branch: `git checkout -b project_branch`\n", - "\n", - "## 0.3 Sequence of actions\n", - "\n", - "Run notebooks in this order and follow the instructions inside:\n", - "1. `guide/1.Project.ipynb`\n", - "2. `guide/2.Product.ipynb`\n", - "3. Either `guide/2.1.Product_files_PRR.ipynb` or `guide/2.2.Product.ipynb`, depending on the results from step 2.\n", - "4. `guide/3.Workflow.ipynb`\n", - "5. `guide/4.Experiment.ipynb`\n", - "6. Open a pull request against the main OSC repository\n", - "\n", - "\n", - "\n", - "## 0.4 Opening a GitHub PR\n", - "\n", - "After the validation passes, you are ready to request changes into existing [open-science-catalog-metadata](https://github.com/ESA-EarthCODE/open-science-catalog-metadata) repository to be able to publish your datasets and project. \n", - "By using the terminal: \n", - "1. Commit the changes to the newly created branch on your local copy of repository:
\n", - " `git commit -m \"Adding new product\\_v2.0\"`
\n", - "3. [Push](https://docs.github.com/en/get-started/using-git/pushing-commits-to-a-remote-repository) the changes to your fork:
\n", - " `git push origin \\`\n", - "5. Open a [pull request](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request) against the main open science catalog repository
\n", - " - you can use `gh pr create -f` or the GitHub website to do this.\n", - "\n", - "\n", - "### Check the status of your PR direclty in GitHub\n", - "\n", - "After creation of Pull Request you should see it on the list: https://github.com/ESA-EarthCODE/open-science-catalog-metadata/pulls\n", - "\n", - "Check the status of your PR under: https://github.com/ESA-EarthCODE/open-science-catalog-metadata/actions\n", - "\n", - "> Changes to the OSC content will be reviewed by the EarthCODE Data Steward team. In case any changes are needed to your inputs, you will be contacted by the team." - ] - }, - { - "cell_type": "markdown", - "id": "f02b8c28", - "metadata": {}, - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "name": "python", - "version": "3" - } + "cells": [ + { + "cell_type": "markdown", + "id": "704d1139", + "metadata": {}, + "source": [ + "# Prerequisites - local \n", + "\n", + "## Short introduction to EarthCODE\n", + "ESA has developed [EarthCODE](https://earthcode.esa.int/), a Collaborative Open Development Environment for Earth System Science. The scope of EarthCODE is to enable adoption of [FAIR](https://esa-earthcode.github.io/documentation/Community%20and%20Best%20Practices/FAIR%20and%20Open%20Science%20Best%20Practices/) and Open Science Principles throughout ESA-funded Earth Science activities and supporting the ESA Science Clusters, to deliver long-term persistence of data, code and documentation, aiding reproducibility, reuse and consumption of research outputs by a wider community. EarthCODE integrates 1) the [Open Science Catalogue](https://opensciencedata.esa.int/), making it effortless to discover novel research outcomes from ESA Science Clusters activities; 2) access to commercial platform services; and 3) a wealth of community resources and tools that help maintain data and research quality.
\n", + "\n", + "In this guide we provide steps on how to expoit resources provided in this first compontent related to data cataloguing and sharing.
\n", + "\n", + "The Open Science Catalogue is built on the [Spatio Temporal Asset Catalog (STAC)](https://stacspec.org/en), which is a standard for describing geospatial data. Therefore new entries must conform to its specification.\n", + "We've broken the process into six steps, contained within six notebooks. You can run the individual notebooks and commit the results via pull request to the [open science catalog metadata repository](https://github.com/ESA-EarthCODE/open-science-catalog-metadata/tree/main).
\n", + "Below you can find instructions about setting up the environment, running the notebooks and creating a pull request.\n", + "\n", + "## 0.1 Install the earthcode library\n", + "\n", + "### Option 1. - Existing enviroment\n", + "If you want to install the earthcode library in an existing enviroment simply run:\n", + "`pip install earthcode`\n", + "\n", + "\n", + "### Option 2. - New environment\n", + "If you want to create a new environment to run the library in you can run:\n", + "\n", + "1. `git clone https://github.com/ESA-EarthCODE/earthcode-library.git`\n", + "2. Install pixi - https://pixi.sh/dev/installation/\n", + "3. `cd earthcode-library`\n", + "4. `pixi install`\n", + "5. `pixi run jupyter lab`\n", + "\n", + "Refer to Documentation published on [https://esa-earthcode.github.io/earthcode-library/README.html](https://esa-earthcode.github.io/earthcode-library/README.html)\n", + "\n", + "## 0.2 Create local copy of Open Science Catalog metadata\n", + "\n", + "You can add new content to the OSC via GitHub [Pull Request](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request). To do this, you need a to [fork](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks/about-forks) the OSC repository, embeded the new information into the existing catalog and merge. The steps below describe the process.\n", + "\n", + " **⚠️ Important** \n", + "> All steps below must be executed in a **terminal (command-line shell)**. \n", + "> **Do not run these commands inside a Jupyter notebook or any other notebook environment.**\n", + "\n", + "0. (if needed) [Install git](https://github.com/git-guides/install-git) & create a [GitHub account](https://docs.github.com/en/get-started/start-your-journey/creating-an-account-on-github) \n", + "1. [Fork](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks/fork-a-repo) the open science catalog repository on github - [https://github.com/ESA-EarthCODE/open-science-catalog-metadata](https://github.com/ESA-EarthCODE/open-science-catalog-metadata)\n", + "2. [Clone](https://docs.github.com/en/repositories/creating-and-managing-repositories/cloning-a-repository) your *forked* repository
\n", + " `git clone https://github.com/your-gh-username/open-science-catalog-metadata.git`\n", + "3. Create a new branch in the local clone
\n", + " - Set the current workspace to your **local clone** of the open science catalog metadata repository: `cd open-science-catalog-metadata/`\n", + " - Create a new branch: `git checkout -b project_branch`\n", + " \n", + "✅ You are now on a new branch and ready to start adding or editing OSC metadata.\n", + "\n", + "## 0.3 Sequence of actions\n", + "To process your data and publish it on Open Science Data Catalogue, we need six things from you to get started.
\n", + "After completing Step 0.2 (above), run notebooks in suggested order and follow the instructions inside.\n", + "1. Information about your ESA (EO Programme) funded `Project` >>> `guide/1.Project.ipynb`, \n", + "2. Information about your `Product` (metadata and description) >>> `guide/2.0.Product.ipynb`\n", + "3. `Generate STAC Items for your dataset` >>> `guide/2.2.Product_files_self_hosted.ipynb`\n", + "4. Request permanent storage of data on `ESA cloud storage` >>> `guide/2.1.Product_files_PRR.ipynb`.\n", + "5. Information about the `Workflow` you used to generate the data >>> `guide/3.Workflow.ipynb`\n", + "6. Information about the `Experiment` that generated the dataset >>> `guide/4.Experiment.ipynb`\n", + "7. Open a pull request against the main OSC repository `(section 0.4 below)` \n", + "\n", + "## 0.4 Opening a GitHub PR\n", + "\n", + "Each step which refer to metadata generation and contribution to Open Science Catalogue metadata repsitory (notebooks 1-2 and 4-5) end with validation of the entries against the entire repository. This might take few minutes, depending on the number of changes you make to the catalogue.\n", + "Once the validation is successfull, you are ready to request changes into existing [open-science-catalog-metadata](https://github.com/ESA-EarthCODE/open-science-catalog-metadata) meaning being able to contribute with your project, products and workflows to the catalogue. \n", + "\n", + "\n", + " **⚠️ Important** \n", + "> All steps below must be executed in a **terminal (command-line shell)**. \n", + "> **Do not run these commands inside a Jupyter notebook or any other notebook environment.**\n", + "\n", + "**By using the terminal:**\n", + "1. Check if the new entry has been added to the repository by typing: `git status`\n", + "2. Add all changes to the commit: `git add .`\n", + "3. Create commit to the newly created branch on your local copy of repository:
\n", + " `git commit -m \"Adding new product\\_v2.0\"`
\n", + "4. [Push](https://docs.github.com/en/get-started/using-git/pushing-commits-to-a-remote-repository) the changes to your fork:
\n", + " `git push origin \\`\n", + "5. Open a [pull request](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request) against the main open science catalog repository
\n", + " - now open your browser and go to the github website (link appears automatically in the terminal command line)\n", + " - Automatically you will be redirected to New Pull Request website. Compare changes across two branches between your branche in forked repository and main `open-science-catalog-metadata` repository.\n", + " - update the title of the Pull Request and give it meaningful description\n", + " - Click on \"Create Pull Request\".\n", + "✅ You now syncronised Pull Request from your local branch to the main branch and your inputs are ready to review.\n", + "\n", + "⚠️ Extra: you can use `gh pr create -f` to create a Pull Request using Github Client: https://cli.github.com/ \n", + "\n", + "\n", + "### Check the status of your PR directly in GitHub\n", + "\n", + "After creation of Pull Request you should see it on the list: [https://github.com/ESA-EarthCODE/open-science-catalog-metadata/pulls](https://github.com/ESA-EarthCODE/open-science-catalog-metadata/pulls)\n", + "\n", + "Check the status of your PR under: [https://github.com/ESA-EarthCODE/open-science-catalog-metadata/actions](https://github.com/ESA-EarthCODE/open-science-catalog-metadata/actions)\n", + "\n", + "> Changes to the OSC content will be reviewed by the EarthCODE Data Steward team. In case any changes are needed to your inputs, you will be contacted by the team." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" }, - "nbformat": 4, - "nbformat_minor": 5 + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 } diff --git a/guide/1.Project.ipynb b/guide/1.Project.ipynb index 326ff55..4a3d5d5 100644 --- a/guide/1.Project.ipynb +++ b/guide/1.Project.ipynb @@ -1,184 +1,201 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# 1.Project\n", - "\n", - "\n", - "The project STAC Collection provides a general description of your ESA-funded project - including its official title, short descrption, time span, consortium members involved, related themes, etc.
Edit parameters below to specify all the required information. See helper description in the comments inside that code cell. \n", - "\n", - "> See **example project metadata** directly at open science catalogue metadata repository on GitHub to compare the list of required parameters and their format: [See example project: WAPOSAL](https://github.com/ESA-EarthCODE/open-science-catalog-metadata/blob/main/projects/waposal/collection.json)\n", - "\n", - "\n", - "**LICENSE:** In this step you are required to select one of the available licenses for **all your products** generated by the project.
Please have a look at available list of license and pick the one that defines your datasets: [osc-licence schemas](https://github.com/ESA-EarthCODE/open-science-catalog-validation/blob/main/schemas/license.json).
\n", - "Visit EarthCODE Best Practices to learn more about [**Open Data & Licensing**](https://esa-earthcode.github.io/documentation/Community%20and%20Best%20Practices/FAIR%20and%20Open%20Science%20Best%20Practices/Data#open-data-licensing)\n", - "\n", - "> *If you have doubts which license you should define for your products/workflows, please use the list of [licenses by SPDX](https://spdx.org/licenses/) and select the most appropriate one.*\n", - "\n", - "\n", - "This notebook shows how to create an OSC project entry using the current `earthcode` API, save it in a local OSC catalog clone, and validate the full catalog." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from datetime import datetime\n", - "from pathlib import Path\n", - "\n", - "from earthcode.metadata_input_definitions import ProjectCollectionMetadata\n", - "from earthcode.static import create_project_collection\n", - "from earthcode.git_add import save_project_collection_to_osc\n", - "from earthcode.validator import validate_catalog" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "\n", - "# Local OSC clone root path (assumed one folder above repository root)\n", - "catalog_root = str(Path(\"../open-science-catalog-metadata\").resolve())\n", - "\n", - "\n", - "# A custom id of the project, it can be related to the title, i.e. - 4datlantic-ohc. Use dash \"-\" symbol to separare words in the id\"\n", - "project_id = \"\"\n", - "# Specify the Title of your project. I.e. - 4DAtlantic-OHC. This should correspond to the title of the project as in the ESA contract.\n", - "project_title = \"\"\n", - "# A short description of the project:\n", - "project_description = \"\"\n", - "# Project status: pick from - ongoing or completed\n", - "project_status = \"\"\n", - "\n", - "# Overall license for all related data that will be uploaded from the project., i.e. CC-BY-4.0. See the note in the markdown cell above to consult full list of available licenses.\n", - "# If you have multiple licenses, you can pick 'various'\n", - "project_license = \"\"\n", - "\n", - "# Define spatial extent of the project study area in epsg:4326\n", - "# if you have multiple disjoint study areas, specify the bounding box that covers all of them\n", - "# i.e project_s, project_w, project_n, project_e = -180.0, -90.0, 180.0, 90.0\n", - "project_s = -180.0\n", - "project_w = -90.0\n", - "project_n = 180.0\n", - "project_e = 90.0\n", - "\n", - "# The project start and end times\n", - "project_start_year = 2021\n", - "project_start_month = 1\n", - "project_start_day = 1\n", - "project_end_year = 2021\n", - "project_end_month = 12\n", - "project_end_day = 31\n", - "\n", - "# Define the links to the project website and dedicated Project website on EO4SocietyLink. Discover the list of published projects here: https://eo4society.esa.int/projects/\n", - "website_link = \"\"\n", - "eo4society_link = \"\"\n", - "\n", - "# Define project themes, according to OSC ontology. Pick one or more from:\n", - "# - atmosphere, cryosphere, land, magnetosphere-ionosphere, oceans, solid-earth.\n", - "# See the list here: https://github.com/ESA-EarthCODE/open-science-catalog-metadata/blob/main/themes/catalog.json\n", - "project_themes = [\"\"]\n", - "\n", - "# provide the Name and e-mail address to ESA Technical Officer (TO) supporting your project:\n", - "to_name = \"\"\n", - "to_email = \"\"\n", - "\n", - "# List the consortium members in a tuple with format (name, contact_email), for example - ('University A', \"contact@universitya.fr\")\n", - "consortium_members = [(\"\", \"\")]" - ] - }, - { - "cell_type": "markdown", - "id": "ea910f5f", - "metadata": {}, - "source": [ - "All OSC entries are interlinked to enable efficient search and analysis. For example, projects have associated products, themes, missions and in turn products link back to their projects, etc. Most of these can be automatically generated using the existing information in an OSC Entry and the associated `earthcode` library function.\n", - "\n", - "To use these functions you need a **local copy of the OSC**, preferably a fork, so that later, you can easily open a PR. The functions will save your newly created OSC entries and make changes to existing OSC entries, in order to conform to the required structure." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "721b41ed", - "metadata": {}, - "outputs": [], - "source": [ - "project_bbox = [[project_w, project_s, project_e, project_n]]\n", - "\n", - "project_metadata = ProjectCollectionMetadata(\n", - " project_id=project_id,\n", - " project_title=project_title,\n", - " project_description=project_description,\n", - " project_status=project_status,\n", - " project_license=project_license,\n", - " project_bbox=project_bbox,\n", - " project_start_datetime=datetime(project_start_year, project_start_month, project_start_day),\n", - " project_end_datetime=datetime(project_end_year, project_end_month, project_end_day),\n", - " project_themes=project_themes,\n", - " to_name=to_name,\n", - " to_email=to_email,\n", - " consortium_members=consortium_members,\n", - " website_link=website_link,\n", - " eo4society_link=eo4society_link or None,\n", - ")\n", - "\n", - "project_collection = create_project_collection(project_metadata)" - ] - }, - { - "cell_type": "markdown", - "id": "e9e46d10", - "metadata": {}, - "source": [ - "There will be two types of checks before accepting your entry into the main OSC:\n", - "\n", - "1. Automatic verification\n", - "2. Semantic validation\n", - "\n", - "You can see the results of the automatic checks using the library." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "catalog_root = Path(catalog_root)\n", - "\n", - "save_project_collection_to_osc(project_collection, catalog_root)\n", - "\n", - "errors, error_files = validate_catalog(catalog_root)\n", - "if errors or error_files:\n", - " raise AssertionError(f\"Catalog validation failed. errors={len(errors)} files={len(error_files)}\")\n", - "\n", - "print(f\"Saved project: {project_collection.id}\")\n", - "print(\"Catalog validation passed.\")" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "name": "python", - "version": "3" - } + "cells": [ + { + "cell_type": "markdown", + "id": "a49fbb49", + "metadata": {}, + "source": [ + "# 1. Project metadata\n", + "\n", + "Projects entries have the top level information about the results you are going to publish. Typically an OSC project corresponds to a project financed by the European Space Agency - Earth Observation programme. Before creating new project, check if your project is not already on the [list of onboarded projects](https://opensciencedata.esa.int/projects/catalog). In such case you can use your project entry and only update it where needed.\n", + "The project STAC Collection provides a general description of your ESA-funded project - including its official title, short descrption, time span, consortium members involved, related themes, etc.
\n", + "Metadata of each project is stored in a folder named after their unique id (`collectionid`). Each folder has one file - collection.json that has all the project information (metadata). \n", + "Edit parameters below to specify all the required information. See helper description in the comments inside that code cell. \n", + "\n", + "> See **example project metadata** directly at open science catalogue metadata repository on GitHub to compare the list of required parameters and their format: [See example project: WAPOSAL](https://github.com/ESA-EarthCODE/open-science-catalog-metadata/blob/main/projects/waposal/collection.json)\n", + "\n", + "\n", + "**LICENSE:** In this step you are required to select one of the available licenses for **all your products** generated by the project.
Please have a look at available list of license and pick the one that defines your datasets: [osc-licence schemas](https://github.com/ESA-EarthCODE/open-science-catalog-validation/blob/main/schemas/license.json).
\n", + "Visit EarthCODE Best Practices to learn more about [**Open Data & Licensing**](https://esa-earthcode.github.io/documentation/Community%20and%20Best%20Practices/FAIR%20and%20Open%20Science%20Best%20Practices/Data#open-data-licensing)\n", + "\n", + "> *If you have doubts which license you should define for your products/workflows, please use the list of [licenses by SPDX](https://spdx.org/licenses/) and select the most appropriate one.*\n", + "\n", + "\n", + "This notebook shows how to create an OSC project entry using the current `earthcode` API, save it in a local OSC catalog clone, and validate the full catalog." + ] }, - "nbformat": 4, - "nbformat_minor": 5 + { + "cell_type": "code", + "execution_count": null, + "id": "88342b6a", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import datetime\n", + "from pathlib import Path\n", + "\n", + "from earthcode.metadata_input_definitions import ProjectCollectionMetadata\n", + "from earthcode.static import create_project_collection\n", + "from earthcode.git_add import save_project_collection_to_osc\n", + "from earthcode.validator import validate_catalog" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4af84721", + "metadata": { + "tags": [ + "parameters" + ] + }, + "outputs": [], + "source": [ + "\n", + "# Local OSC clone root path (assumed one folder above repository root)\n", + "catalog_root = str(Path(\"../open-science-catalog-metadata\").resolve())\n", + "\n", + "\n", + "# A custom id of the project, it can be related to the title, i.e. - 4datlantic-ohc. Use dash \"-\" symbol to separare words in the id\"\n", + "project_id = \"\"\n", + "# Specify the Title of your project. I.e. - 4DAtlantic-OHC. This should correspond to the title of the project as in the ESA contract.\n", + "project_title = \"\"\n", + "# A short description of the project:\n", + "project_description = \"\"\n", + "# Project status: pick from - ongoing or completed\n", + "project_status = \"\"\n", + "\n", + "# Overall license for all related data that will be uploaded from the project., i.e. CC-BY-4.0. See the note in the markdown cell above to consult full list of available licenses.\n", + "# If you have multiple licenses, you can pick 'various'\n", + "project_license = \"\"\n", + "\n", + "# Define spatial extent of the project study area in epsg:4326\n", + "# if you have multiple disjoint study areas, specify the bounding box that covers all of them\n", + "# i.e project_s, project_w, project_n, project_e = -180.0, -90.0, 180.0, 90.0\n", + "project_s = -180.0\n", + "project_w = -90.0\n", + "project_n = 180.0\n", + "project_e = 90.0\n", + "\n", + "# The project start and end times\n", + "project_start_year = 2021\n", + "project_start_month = 1\n", + "project_start_day = 1\n", + "project_end_year = 2021\n", + "project_end_month = 12\n", + "project_end_day = 31\n", + "\n", + "# Define the links to the project websites \n", + "website_link = \"\" # link to proejct-dedicated website (hosted also on external servers)\n", + "eo4society_link = \"\" # link to the project website on eo4society projects list: Discover the list of published projects here: https://eo4society.esa.int/projects/\n", + "\n", + "# Define project themes, according to OSC ontology. Pick one or more from:\n", + "# - atmosphere, cryosphere, land, magnetosphere-ionosphere, oceans, solid-earth.\n", + "# See the list here: https://github.com/ESA-EarthCODE/open-science-catalog-metadata/blob/main/themes/catalog.json\n", + "project_themes = [\"\"]\n", + "\n", + "# provide the Name and e-mail address to ESA Technical Officer (TO) supporting your project:\n", + "to_name = \"\"\n", + "to_email = \"\"\n", + "\n", + "# List the consortium members in a tuple with format (name, contact_email), for example - ('University A', \"contact@universitya.fr\")\n", + "consortium_members = [(\"\", \"\")]" + ] + }, + { + "cell_type": "markdown", + "id": "ea910f5f", + "metadata": {}, + "source": [ + "### Create Project collection\n", + "> ℹ️ **Note** \n", + "> This function **creates a project collection** and automatically generates STAC Collection.json and all required **STAC links**. \n", + "> It connects the project with related products, themes, and missions, and updates existing entries as needed.
\n", + "> Run the cell below to automatically create new entry. \n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "721b41ed", + "metadata": {}, + "outputs": [], + "source": [ + "project_bbox = [[project_w, project_s, project_e, project_n]]\n", + "\n", + "project_metadata = ProjectCollectionMetadata(\n", + " project_id=project_id,\n", + " project_title=project_title,\n", + " project_description=project_description,\n", + " project_status=project_status,\n", + " project_license=project_license,\n", + " project_bbox=project_bbox,\n", + " project_start_datetime=datetime(project_start_year, project_start_month, project_start_day),\n", + " project_end_datetime=datetime(project_end_year, project_end_month, project_end_day),\n", + " project_themes=project_themes,\n", + " to_name=to_name,\n", + " to_email=to_email,\n", + " consortium_members=consortium_members,\n", + " website_link=website_link,\n", + " eo4society_link=eo4society_link or None,\n", + ")\n", + "\n", + "project_collection = create_project_collection(project_metadata)" + ] + }, + { + "cell_type": "markdown", + "id": "e9e46d10", + "metadata": {}, + "source": [ + "### Validate your entry and save to local fork of open-science-catalog-metadata repository\n", + "There will be two types of checks before accepting your entry into the main OSC:\n", + "\n", + "1. Automatic verification\n", + "2. Semantic validation\n", + "\n", + "You can see the results of the automatic checks using the library." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "73205194", + "metadata": {}, + "outputs": [], + "source": [ + "catalog_root = Path(catalog_root)\n", + "\n", + "save_project_collection_to_osc(project_collection, catalog_root)\n", + "\n", + "errors, error_files = validate_catalog(catalog_root)\n", + "if errors or error_files:\n", + " raise AssertionError(f\"Catalog validation failed. errors={len(errors)} files={len(error_files)}\")\n", + "\n", + "print(f\"Saved project: {project_collection.id}\")\n", + "print(\"Catalog validation passed.\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 } diff --git a/guide/2.0.Product.ipynb b/guide/2.0.Product.ipynb index 7b5ed5f..b6b8c45 100644 --- a/guide/2.0.Product.ipynb +++ b/guide/2.0.Product.ipynb @@ -1,231 +1,250 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# 2.Product metadata\n", - "\n", - "The `product` STAC Collection provides a general metadata description of all project outputs which will be discovered on the Open Science Catalogue (OSC). Most of these metadata fields should already be available and can be extracted from your data or documentation.\n", - "\n", - "This notebook shows how to create an OSC product entry using the current `earthcode` API, save it in a local OSC catalog clone, add an item to the product, and validate the full catalog.\n", - "\n", - "> You can **attach one or more products to a single project**! So if you have more than one, you have to redo steps 2. and 3. for each!\n", - "\n", - "\n", - "> See **example product metadata** directly at open science catalogue metadata repository on GitHub to compare the list of required parameters and their format: [See example product: WAPOSAL Dataset](https://github.com/ESA-EarthCODE/open-science-catalog-metadata/blob/main/products/waposal-waves/collection.json)\n", - "\n", - "\n", - "**LICENSE:** In this step you are required to select one of the available licenses for **each of your product**.
Please have a look at available list of license and pick the one that defines your datasets: [osc-licence schemas](https://github.com/ESA-EarthCODE/open-science-catalog-validation/blob/main/schemas/license.json).
\n", - ">*If you have a product with non-defined license, we **cannot proceed with publishing the datasets**. Please use the list of [licenses by SPDX](https://spdx.org/licenses/) and select the most appropriate one.*
\n", - "> Visit EarthCODE Best Practices to learn more about [**Open Data & Licensing**](https://esa-earthcode.github.io/documentation/Community%20and%20Best%20Practices/FAIR%20and%20Open%20Science%20Best%20Practices/Data#open-data-licensing)
\n", - "\n", - "**PRODUCT EO-MISSIONS:** In this step you are required to select one or more EO Missions that you have used to generate your product.\n", - "Please have a look at the **defined list of EO missions** available in the OSC under: (https://github.com/ESA-EarthCODE/open-science-catalog-metadata/tree/main/eo-missions) and searchable under: https://opensciencedata.esa.int/eo-missions/catalog
\n", - ">*If you have a product which uses or complements in-situ data collections or comes as a results of numerical models please select: [\"in-situ-observations\"] or [\"numerical-models\"]*
\n", - "\n", - "**PRODUCT VARIABLES:** In this step you are required to select one or more variables that your product describes.\n", - "Please have a look at the **defined list of geophysical variables** available in the OSC under: (https://github.com/ESA-EarthCODE/open-science-catalog-metadata/tree/main/variables). You can also explore the list of variables under: https://opensciencedata.esa.int/variables/catalog\n", - "\n", - "> Variables are defined in OSC as geophysical, climate and environmental variables selected from [WMO OSCAR Database](https://space.oscar.wmo.int/variables), complemented by the [GCMD Keywords Database](https://gcmd.earthdata.nasa.gov/KeywordViewer/scheme/Earth%20Science?gtm_scheme=Earth%20Science)\n", - "\n", - "*NOTE: You can use the [EarthCODE search](examples/earthcode_data_discovery.ipynb) functionality to find relevant variables and eo-missions to your data.*\n", - "\n", - "**PRODUCT PARAMETER:** \n", - "Please provide a parameter linked to the product, in allignment with the **CF convention** standard: See full list under: [https://cfconventions.org/](https://cfconventions.org/)\n", - "\n", - "**PRODUCT DOI:**\n", - "Since few weeks EarthCODE offers DOI assignment to products/datasets published on Open Science Data Catalogue. The process is still manual and is handled by ESA TEllUs service, and is handled by the EarthCODE Data Stewardship team on behalf of the Project PI. \n", - "> If you would like to assign a DOI to your data, please contact the EarthCODE Team, who will support you in this process: [earth-code@esa.int](mailto:earth-code@esa.int)!\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from datetime import datetime\n", - "from pathlib import Path\n", - "\n", - "from earthcode.metadata_input_definitions import ProductCollectionMetadata\n", - "from earthcode.static import create_product_collection\n", - "from earthcode.git_add import save_product_collection_to_catalog\n", - "\n", - "from earthcode.search import search" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "# Local OSC clone root path (assumed one folder above repository root)\n", - "catalog_root = str(Path(\"../open-science-catalog-metadata\").resolve())\n", - "\n", - "# A custom id of the product (must be different from project!), it can be related to the title, i.e. - 4datlantic-ohc-dataset. Use dash \"-\" symbol to separare words in the id\"\n", - "product_id = \"\"\n", - "product_title = \"\"\n", - "product_description = \"\"\n", - "# Product status: pick from - ongoing or completed\n", - "product_status = \"\"\n", - "\n", - "# Define the product license. i.e. CC-BY-4.0. See the note in the markdown cell above to consult full list of available licenses.\n", - "# If you have a license agreement that is not in the list, you can put 'other' in the product licenses and provide a link to the text directly\n", - "# if your license is in the list, leave the license link as None.\n", - "product_license = \"CC-BY-4.0\"\n", - "license_link = None\n", - "\n", - "# Define at most five keywords for the product. You can use any short text, that allow users to discover your product.\n", - "product_keywords = [\"\", \"\"]\n", - "\n", - "# Define spatial extent of PRODUCT/DATASET in epsg:4326. If the dataset covers discontinuous regions,\n", - "# add the bounding box boundaries for each\n", - "product_s = [-180.0]\n", - "product_w = [-90.0]\n", - "product_n = [180.0]\n", - "product_e = [90.0]\n", - "\n", - "# Define the temporal extent of PRODUCT/ DATASET\n", - "product_start_year = 2021\n", - "product_start_month = 1\n", - "product_start_day = 1\n", - "product_end_year = 2021\n", - "product_end_month = 12\n", - "product_end_day = 31\n", - "\n", - "# Define the semantic region covered by this product, i.e. Belgium, Global etc.\n", - "product_region = \"\"\n", - "\n", - "# Define product themes i.e. land. Pick one or more from:\n", - "# - atmosphere, cryosphere, land, magnetosphere-ionosphere, oceans, solid-earth.\n", - "# See the list here: https://github.com/ESA-EarthCODE/open-science-catalog-metadata/blob/main/themes/catalog.json\n", - "product_themes = [\"\"]\n", - "\n", - "# Define the eo-misison(s) used to generate the product. i.e. - \"sentinel-2\"\n", - "# Pick one or more from - https://github.com/ESA-EarthCODE/open-science-catalog-metadata/tree/main/eo-missions\n", - "product_missions = []\n", - "\n", - "# Define variables describing at best your Product/ dataset:\n", - "# Pick one or more from from https://github.com/ESA-EarthCODE/open-science-catalog-metadata/tree/main/variables\n", - "product_variables = []\n", - "\n", - "# search(\"description\",type=\"variable\")\n", - "\n", - "# Define the parameters describing your product in standardised CF convention format: i.e. \"leaf_area_index\".\n", - "product_parameters = []\n", - "\n", - "# Provide DOI number assigned to your product. If your product does not have one, type: None\n", - "product_doi = None\n", - "\n", - "# Define the related project id and title\n", - "# These must match the new or an already existing project in the catalog! Alteratively correct links cannot be produced!\n", - "project_id = \"\"\n", - "project_title = \"\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b2bed0df", - "metadata": {}, - "outputs": [], - "source": [ - "catalog_root = Path(catalog_root)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f964cb1a", - "metadata": {}, - "outputs": [], - "source": [ - "# create a project\n", - "product_bbox = [[w, s, e, n] for s, w, n, e in zip(product_s, product_w, product_n, product_e)]\n", - "\n", - "product_metadata = ProductCollectionMetadata(\n", - " product_id=product_id,\n", - " product_title=product_title,\n", - " product_description=product_description,\n", - " product_bbox=product_bbox,\n", - " product_start_datetime=datetime(product_start_year, product_start_month, product_start_day),\n", - " product_end_datetime=datetime(product_end_year, product_end_month, product_end_day),\n", - " product_license=product_license,\n", - " product_keywords=product_keywords,\n", - " product_status=product_status,\n", - " product_region=product_region,\n", - " product_themes=product_themes,\n", - " product_missions=product_missions,\n", - " product_variables=product_variables,\n", - " project_id=project_id,\n", - " project_title=project_title,\n", - " product_parameters=product_parameters,\n", - " product_doi=product_doi,\n", - " license_link=license_link or None,\n", - ")\n", - "\n", - "product_collection = create_product_collection(product_metadata)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4c632aac", - "metadata": {}, - "outputs": [], - "source": [ - "# save the product\n", - "save_product_collection_to_catalog(product_collection, catalog_root)" - ] - }, - { - "cell_type": "markdown", - "id": "716f8e52", - "metadata": {}, - "source": [ - "In this case we do not run validation yet. To complete the addition of products to the OSC, you need to provide file-leve metadata. There are three requirements for this:\n", - "\n", - "1. **Storage**. Your research data and workflows/code must be hosted on remote, persistent storage that allows discovery. Examples include:\n", - "\n", - "* Repository provided by ESA\n", - "* S3-compatible object storage - permanent and public\n", - "* GitHub for workflow/methods and code\n", - "* Zenodo, CEDA, Dataverse, or other persistent archives usined by the academic community\n", - "\n", - "2. **Format**. We prefer data is stored in a [cloud-optimised format](https://esa-earthcode.github.io/documentation/Community%20and%20Best%20Practices/Data%20and%20Workflow%20Best%20Practices/Data/), since it makes storage and access much easier.\n", - "\n", - "3. **File-level metadata.**. To add your data to the Open Science Catalog, you have to generate a STAC items that describes your files, code (if applicable) and documentation (if applicable).\n", - "\n", - "**Your next steps depend on whether your data is already hosted on a remote, persistent storage and is in cloud-native format.**\n", - "\n", - "- If you already cover requirement 1. and 2., go to the 2.1.Product_files_self_hosted.ipynb notebook.\n", - "\n", - "- If instead, any of the below describe your situation better, go to the 2.1.Product_files_PRR.ipynb notebook.\n", - "* you would like your data to be hosted in long-term storage provided by ESA\n", - "* you would like a DOI associated with the data\n", - "\n", - "\n", - "> We can support you through this all stages of this process, just [**contact us**](mailto:earth-code.esa.int) or post in the [FORUM](https://discourse-earthcode.eox.at/c/technical-support/8)!" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "name": "python", - "version": "3" - } + "cells": [ + { + "cell_type": "markdown", + "id": "de333a79", + "metadata": {}, + "source": [ + "# 2.0. Product metadata\n", + "\n", + "The `product` STAC Collection provides a general metadata description of all project outputs which will be discovered on the Open Science Catalogue (OSC). Most of these metadata fields should already be available and can be extracted from your data or documentation.\n", + "\n", + "This notebook shows how to create an OSC product entry using the current `earthcode` API, save it in a local OSC catalog clone, add an item to the product, and validate the full catalog.\n", + "\n", + "> You can **attach one or more products to a single project**! So if you have more than one, you have run the cell [2-4] for each product to add !\n", + "\n", + "\n", + "> See **example product metadata** directly at open science catalogue metadata repository on GitHub to compare the list of required parameters and their format: [See example product: WAPOSAL Dataset](https://github.com/ESA-EarthCODE/open-science-catalog-metadata/blob/main/products/waposal-waves/collection.json)\n", + "\n", + "\n", + "**LICENSE:** In this step you are required to select one of the available licenses for **each of your product**.
Please have a look at available list of license and pick the one that defines your datasets: [osc-licence schemas](https://github.com/ESA-EarthCODE/open-science-catalog-validation/blob/main/schemas/license.json).
\n", + ">*If you have a product with non-defined license, we **cannot proceed with publishing the datasets**. Please use the list of [licenses by SPDX](https://spdx.org/licenses/) and select the most appropriate one.*
\n", + "> Visit EarthCODE Best Practices to learn more about [**Open Data & Licensing**](https://esa-earthcode.github.io/documentation/Community%20and%20Best%20Practices/FAIR%20and%20Open%20Science%20Best%20Practices/Data#open-data-licensing)
\n", + "\n", + "**PRODUCT EO-MISSIONS:** In this step you are required to select one or more EO Missions that you have used to generate your product.\n", + "Please have a look at the **defined list of EO missions** available in the OSC under: (https://github.com/ESA-EarthCODE/open-science-catalog-metadata/tree/main/eo-missions) and searchable under: https://opensciencedata.esa.int/eo-missions/catalog
\n", + ">*If you have a product which uses or complements in-situ data collections or comes as a results of numerical models please select: [\"in-situ-observations\"] or [\"numerical-models\"]*
\n", + "\n", + "**PRODUCT VARIABLES:** In this step you are required to select one or more variables that your product describes.\n", + "Please have a look at the **defined list of geophysical variables** available in the OSC under: (https://github.com/ESA-EarthCODE/open-science-catalog-metadata/tree/main/variables). You can also explore the list of variables under: https://opensciencedata.esa.int/variables/catalog\n", + "\n", + "> Variables are defined in OSC as geophysical, climate and environmental variables selected from [WMO OSCAR Database](https://space.oscar.wmo.int/variables), complemented by the [GCMD Keywords Database](https://gcmd.earthdata.nasa.gov/KeywordViewer/scheme/Earth%20Science?gtm_scheme=Earth%20Science)\n", + "\n", + "*NOTE: You can use the [EarthCODE search](examples/earthcode_data_discovery.ipynb) functionality to find relevant variables and eo-missions to your data.*\n", + "\n", + "**PRODUCT PARAMETER:** \n", + "Please provide a parameter linked to the product, in allignment with the **CF convention** standard: See full list under: [https://cfconventions.org/](https://cfconventions.org/)\n", + "\n", + "**PRODUCT DOI:**\n", + "Since few weeks EarthCODE offers DOI assignment to products/datasets published on Open Science Data Catalogue. The process is still manual and is handled by ESA TEllUs service, and is handled by the EarthCODE Data Stewardship team on behalf of the Project PI. \n", + "> If you would like to assign a DOI to your data, please contact the EarthCODE Team, who will support you in this process: [earth-code@esa.int](mailto:earth-code@esa.int)!\n" + ] }, - "nbformat": 4, - "nbformat_minor": 5 + { + "cell_type": "code", + "execution_count": null, + "id": "c1db4ca4", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import datetime\n", + "from pathlib import Path\n", + "\n", + "from earthcode.metadata_input_definitions import ProductCollectionMetadata\n", + "from earthcode.static import create_product_collection\n", + "from earthcode.git_add import save_product_collection_to_catalog\n", + "\n", + "from earthcode.search import search" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "acf50131", + "metadata": { + "tags": [ + "parameters" + ] + }, + "outputs": [], + "source": [ + "# Local OSC clone root path (assumed one folder above repository root)\n", + "catalog_root = str(Path(\"../open-science-catalog-metadata\").resolve())\n", + "\n", + "# A custom id of the product (must be different from project!), it can be related to the title, i.e. - 4datlantic-ohc-dataset. Use dash \"-\" symbol to separare words in the id\"\n", + "product_id = \"\"\n", + "product_title = \"\"\n", + "product_description = \"\"\n", + "# Product status: pick from - ongoing or completed\n", + "product_status = \"\"\n", + "\n", + "# Define the product license. i.e. CC-BY-4.0. See the note in the markdown cell above to consult full list of available licenses.\n", + "# If you have a license agreement that is not in the list, you can put 'other' in the product licenses and provide a link to the text directly\n", + "# if your license is in the list, leave the license link as None.\n", + "product_license = \"CC-BY-4.0\"\n", + "license_link = None\n", + "\n", + "# Define at most five keywords for the product. You can use any short text, that allow users to discover your product.\n", + "product_keywords = [\"\", \"\"]\n", + "\n", + "# Define spatial extent of PRODUCT/DATASET in epsg:4326. If the dataset covers discontinuous regions,\n", + "# add the bounding box boundaries for each\n", + "product_s = [-180.0]\n", + "product_w = [-90.0]\n", + "product_n = [180.0]\n", + "product_e = [90.0]\n", + "\n", + "# Define the temporal extent of PRODUCT/ DATASET\n", + "product_start_year = 2021\n", + "product_start_month = 1\n", + "product_start_day = 1\n", + "product_end_year = 2021\n", + "product_end_month = 12\n", + "product_end_day = 31\n", + "\n", + "# Define the semantic region covered by this product, i.e. Belgium, Global etc.\n", + "product_region = \"\"\n", + "\n", + "# Define product themes i.e. land. Pick one or more from:\n", + "# - atmosphere, cryosphere, land, magnetosphere-ionosphere, oceans, solid-earth.\n", + "# See the list here: https://github.com/ESA-EarthCODE/open-science-catalog-metadata/blob/main/themes/catalog.json\n", + "product_themes = [\"\"]\n", + "\n", + "# Define the eo-misison(s) used to generate the product. i.e. - \"sentinel-2\"\n", + "# Pick one or more from - https://github.com/ESA-EarthCODE/open-science-catalog-metadata/tree/main/eo-missions\n", + "product_missions = []\n", + "\n", + "# Define variables describing at best your Product/ dataset:\n", + "# Pick one or more from from https://github.com/ESA-EarthCODE/open-science-catalog-metadata/tree/main/variables\n", + "product_variables = []\n", + "\n", + "# search(\"description\",type=\"variable\")\n", + "\n", + "# Define the parameters describing your product in standardised CF convention format: i.e. \"leaf_area_index\".\n", + "product_parameters = []\n", + "\n", + "# Provide DOI number assigned to your product. If your product does not have one, type: None\n", + "product_doi = None\n", + "\n", + "# Define the related project id and title\n", + "# These must match the new or an already existing project in the catalog! Alteratively correct links cannot be produced!\n", + "project_id = \"\"\n", + "project_title = \"\"" + ] + }, + { + "cell_type": "markdown", + "id": "8c633094-3543-4d89-9162-a586ba2b0024", + "metadata": {}, + "source": [ + "### Create Project collection\n", + "> ℹ️ **Note** \n", + "> This function **creates a product collection** and automatically generates STAC Collection.json and all required **STAC links**. \n", + "> It connects the project with related projects, themes, and missions, and updates existing entries as needed.
\n", + "> Run the cell below to automatically create new entry. \n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f964cb1a", + "metadata": {}, + "outputs": [], + "source": [ + "# create a product\n", + "product_bbox = [[w, s, e, n] for s, w, n, e in zip(product_s, product_w, product_n, product_e)]\n", + "\n", + "product_metadata = ProductCollectionMetadata(\n", + " product_id=product_id,\n", + " product_title=product_title,\n", + " product_description=product_description,\n", + " product_bbox=product_bbox,\n", + " product_start_datetime=datetime(product_start_year, product_start_month, product_start_day),\n", + " product_end_datetime=datetime(product_end_year, product_end_month, product_end_day),\n", + " product_license=product_license,\n", + " product_keywords=product_keywords,\n", + " product_status=product_status,\n", + " product_region=product_region,\n", + " product_themes=product_themes,\n", + " product_missions=product_missions,\n", + " product_variables=product_variables,\n", + " project_id=project_id,\n", + " project_title=project_title,\n", + " product_parameters=product_parameters,\n", + " product_doi=product_doi,\n", + " license_link=license_link or None,\n", + ")\n", + "\n", + "product_collection = create_product_collection(product_metadata)" + ] + }, + { + "cell_type": "markdown", + "id": "2ca02655-a1ec-465d-be5e-639364b0e7a4", + "metadata": {}, + "source": [ + "### Save the product entry into local fork of the open-science-catalog-metadata repository" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4c632aac", + "metadata": {}, + "outputs": [], + "source": [ + "# save the product in local fork of the open-science-catalog-metadata repository \n", + "catalog_root = Path(catalog_root)\n", + "save_product_collection_to_catalog(product_collection, catalog_root)" + ] + }, + { + "cell_type": "markdown", + "id": "716f8e52", + "metadata": {}, + "source": [ + "In this case we do not run validation yet. To complete the addition of products to the OSC, you need to provide asset-level metadata.
\n", + "There are three requirements for this:\n", + "\n", + "1. **Storage**. Your research data and workflows/code must be hosted on remote, persistent storage that allows discovery. Examples include:\n", + "\n", + "* Repository provided by ESA >>> see `2.1 Product_files_PRR.ipynb notebook` \n", + "* S3-compatible object storage - permanent and public\n", + "* Zenodo, CEDA, Dataverse, or other persistent archives used by the academic community\n", + "\n", + "2. **Format**. We encourage you to use [cloud-optimised data format](https://esa-earthcode.github.io/documentation/Community%20and%20Best%20Practices/Data%20and%20Workflow%20Best%20Practices/Data/), since it makes storage and access to the products much easier.\n", + "\n", + "3. **File-level metadata.** To add your data to the Open Science Catalog, you have to generate a STAC items that describes your files, code (if applicable) and documentation (if applicable).\n", + "- If you already cover requirement 1. and 2, you should now start to **Generate STAC Items for your dataset** >>> `guide/2.2.Product_files_self_hosted.ipynb`\n", + "
\n", + "\n", + "\n", + "ℹ️ **To request permanent storage of data on ESA cloud storage to preserve the data in long-term**, navigate to >>> `guide/2.1.Product_files_PRR.ipynb.`\n", + "\n", + "\n", + "> We can support you through this all stages of this process, just [**contact us**](mailto:earth-code.esa.int) or post in the [FORUM](https://discourse-earthcode.eox.at/c/technical-support/8)!" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 } diff --git a/guide/2.1.Product_files_PRR.ipynb b/guide/2.1.Product_files_PRR.ipynb index 6f368af..4346385 100644 --- a/guide/2.1.Product_files_PRR.ipynb +++ b/guide/2.1.Product_files_PRR.ipynb @@ -1,144 +1,151 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# 2.1 Product files hosted on the PRR.\n", - "\n", - "As mentioned before there are three pre-requisites to adding files to the osc - storage, format and metadata.\n", - "\n", - "### 1. Storage\n", - "\n", - "If your data is not already stored in a persistent, online and prferably cloud-optimised repository, we recommend uploading your results into **ESA-sponsored repository**, granted by EarthCODE. The repository provides access to data, workflows, experiments and documentation from ESA Projects organised across Collections, accessible via the [STAC API](https://github.com/radiantearth/stac-api-spec).
\n", - "Each Collection contains [STAC Items](https://github.com/radiantearth/stac-spec/blob/master/item-spec/item-spec.md), with their related Assets stored within the repository. \n", - "\n", - "The upload process to the PRR is manual at the moment, therefore you will have to [email us](mailto:earth-code.esa.int) with information about your:\n", - "- Data type\n", - "- Data size\n", - "- ESA contract and ESA TO for your project\n", - "- STAC items describing the files\n", - "**A pre-requisite to upload the data is to have descriptions of each file using STAC items. For examples how to do this see section 3.**\n", - "\n", - "\n", - "## 2. Format\n", - "\n", - "We prefer data is stored in a [cloud-optimised format](https://esa-earthcode.github.io/documentation/Community%20and%20Best%20Practices/Data%20and%20Workflow%20Best%20Practices/Data/), since it makes storage and access much easier. If your data is already in one of the preferred formats - `cog`, `parquet`, `zarr`, etc - there is nothing to do for this step.\n", - "\n", - "If the data is not in a [cloud-optimised format](https://esa-earthcode.github.io/documentation/Community%20and%20Best%20Practices/Data%20and%20Workflow%20Best%20Practices/Data/), we encourage you to transform it yourselves or to contact us and we can help doing this. You can see examples of how this is done for different datasets in the [EarthCODE tutorials](https://esa-earthcode.github.io/tutorials/prr-stac-introduction/.\n", - "\n", - "\n", - "## 3. File-level metadata\n", - "\n", - "This is the most time-consuming step. There are multiple strategies for doing this, we are flexible and it is up to you to decide how to do it, so long as the data conforms to standard STAC specification.
**The main consideration should be usability of the data!**
You can learn more about STAC specification here: https://stacspec.org/en\n", - "\n", - "If you are new to STAC Specification and how this applies to your dataset, we have many [tutorials](https://esa-earthcode.github.io/tutorials/index-1/) available from the EarthCODE Portal and executable from a designated [workspace](https://workspace.earthcode.eox.at/). The tutorials examples how to generate the STAC Items from most commonly used data formats like: `netcdf, tiff and zarr files`. You can start with the introductory tutorial with will also have an overview of all the information provided here: https://esa-earthcode.github.io/tutorials/prr-stac-introduction/ . Note, that the code in the examples does not generalise fully, so we only offer a few libraries and pointers to get you started. You have to tailor the code to your data, but generally the list of tutorials should faciliate this task. You can run all examples in the earthcode library environment.\n", - "\n", - "More manual way to create STAC Items and Asset level data, is shown in the following [example](https://esa-earthcode.github.io/tutorials/creating-stac-catalog-from-prr-example/) (applicable to all file types - including documentation)
\n", - "\n", - "*The provided example use Python programming language, but you are free to explore options in other programming languages, if your are more comfortable with them. In that case please share with us the STAC Collections generated by your script*. \n", - "\n", - "> We can support you through this all stages of this process, just [**contact us**](mailto:earth-code.esa.int) or post in the [FORUM](https://discourse-earthcode.eox.at/c/technical-support/8)!\n", - "\n", - "\n", - "\n", - "##### 2.1.2.1 Adding PRR files\n", - "\n", - "- After you have sent us the data with the STAC items and we have returned a link for and hosted your data. You can continue the OSC process.\n", - "\n", - " You can run fill the below URL, with the one we returned to you and run the next section of the notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3ef00373", - "metadata": {}, - "outputs": [], - "source": [ - "from earthcode.git_add import save_item_links_to_product_collection\n", - "from earthcode.validator import validate_catalog\n", - "from pathlib import Path" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6fe1b477", - "metadata": { - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "\n", - "# Define the relevant data links to be manually added\n", - "# link to an external data collection\n", - "item_link = ''\n", - "# Link to accessing the data, this link is required.\n", - "access_link = f'https://opensciencedata.esa.int/stac-browser/#/external/{item_link}'\n", - "#Link to the documentation, leave as None, if not available\n", - "documentation_link = ''\n", - "\n", - "# the ID of the product you created in the previous steps\n", - "product_id = ''\n", - "\n", - "catalog_root = str(Path(\"../open-science-catalog-metadata\").resolve())" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "12472fa5", - "metadata": {}, - "outputs": [], - "source": [ - "catalog_root = Path(catalog_root)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c060d917", - "metadata": {}, - "outputs": [], - "source": [ - "save_item_links_to_product_collection(catalog_root, product_id, item_link, access_link, documentation_link)" - ] - }, - { - "cell_type": "markdown", - "id": "8cead67f", - "metadata": {}, - "source": [ - "# Run validation to make sure everything works." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "## run validation\n", - "errors, error_files = validate_catalog(catalog_root)\n", - "if errors or error_files:\n", - " raise AssertionError(f\"Catalog validation failed. errors={len(errors)} files={len(error_files)}\")" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "name": "python", - "version": "3" - } + "cells": [ + { + "cell_type": "markdown", + "id": "59a28ecf", + "metadata": {}, + "source": [ + "# 2.1 Request permanent storage of data on ESA cloud storage\n", + "\n", + "### 1. Storage\n", + "EarthCODE provides long term storage of research outcomes from ESA-funded EO Projects and activities including datasets and associated documents + workflows.
\n", + "Research outcomes are organised in STAC Objects (Collections), accessible via the STAC API accessible via the [STAC API](https://github.com/radiantearth/stac-api-spec) and the Open Science Catalogue Browser
\n", + "Each Collection contains [STAC Items](https://github.com/radiantearth/stac-spec/blob/master/item-spec/item-spec.md), with their related Assets stored within the repository. \n", + "\n", + "The upload process to the PRR is manual at the moment, therefore you will have to [email us](mailto:earth-code.esa.int) with information about your:\n", + "- Data type\n", + "- Data size\n", + "- ESA contract and ESA TO for your project\n", + "- STAC items describing the files\n", + "**A pre-requisite to upload the data is to have descriptions of each file using STAC items. For examples how to do this see section 3.**\n", + "\n", + "\n", + "## 2. Format\n", + "\n", + "We strongly encourage [cloud-optimised format for your data](https://esa-earthcode.github.io/documentation/Community%20and%20Best%20Practices/Data%20and%20Workflow%20Best%20Practices/Data/), since it makes storage and access much easier. If your data is already in one of the preferred formats - `cog`, `parquet`, `zarr`, etc - there is nothing to do for this step.\n", + "\n", + "If the data is not in a [cloud-optimised format](https://esa-earthcode.github.io/documentation/Community%20and%20Best%20Practices/Data%20and%20Workflow%20Best%20Practices/Data/), we encourage you to transform it yourselves or to contact us and we can help doing this. \n", + "\n", + "## 3. File-level metadata\n", + "\n", + "This is the most time-consuming step. There are multiple strategies for doing this, we are flexible and it is up to you to decide how to do it, so long as the data conforms to standard STAC specification.
**The main consideration should be usability of the data!**
You can learn more about STAC specification here: https://stacspec.org/en\n", + "\n", + "If you are new to STAC Specification and how this applies to your dataset, we have many [tutorials](https://esa-earthcode.github.io/tutorials/index-1/) available from the EarthCODE Portal and executable from a designated [workspace](https://workspace.earthcode.eox.at/). The tutorials examples how to generate the STAC Items from most commonly used data formats like: `netcdf, tiff and zarr files`. You can start with the introductory tutorial with will also have an overview of all the information provided here: https://esa-earthcode.github.io/tutorials/prr-stac-introduction/ . Note, that the code in the examples does not generalise fully, so we only offer a few libraries and pointers to get you started. You have to tailor the code to your data, but generally the list of tutorials should faciliate this task. You can run all examples in the earthcode library environment.\n", + "\n", + "More manual way to create STAC Items and Asset level data, is shown in the following [example](https://esa-earthcode.github.io/tutorials/creating-stac-catalog-from-prr-example/) (applicable to all file types - including documentation)
\n", + "\n", + "*The provided example use Python programming language, but you are free to explore options in other programming languages, if your are more comfortable with them. In that case please share with us the STAC Collections generated by your script*. \n", + "\n", + "> We can support you through this all stages of this process, just [**contact us**](mailto:earth-code.esa.int) or post in the [FORUM](https://discourse-earthcode.eox.at/c/technical-support/8)!\n", + "\n", + "\n", + "\n", + "##### 2.1.2.1 Adding PRR files\n", + "\n", + "- After you have sent us the data with the STAC items and we have returned a link for and hosted your data. You can continue the OSC process.\n", + "\n", + " You can run fill the below URL, with the one we returned to you and run the next section of the notebook." + ] }, - "nbformat": 4, - "nbformat_minor": 5 + { + "cell_type": "code", + "execution_count": null, + "id": "3ef00373", + "metadata": {}, + "outputs": [], + "source": [ + "from earthcode.git_add import save_item_links_to_product_collection\n", + "from earthcode.validator import validate_catalog\n", + "from pathlib import Path" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6fe1b477", + "metadata": { + "tags": [ + "parameters" + ] + }, + "outputs": [], + "source": [ + "\n", + "# Define the relevant data links to be manually added\n", + "# link to an external data collection\n", + "item_link = ''\n", + "# Link to accessing the data, this link is required.\n", + "access_link = f'https://opensciencedata.esa.int/stac-browser/#/external/{item_link}'\n", + "#Link to the documentation, leave as None, if not available\n", + "documentation_link = ''\n", + "\n", + "# the ID of the product you created in the previous steps\n", + "product_id = ''\n", + "\n", + "catalog_root = str(Path(\"../open-science-catalog-metadata\").resolve())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "12472fa5", + "metadata": {}, + "outputs": [], + "source": [ + "catalog_root = Path(catalog_root)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c060d917", + "metadata": {}, + "outputs": [], + "source": [ + "save_item_links_to_product_collection(catalog_root, product_id, item_link, access_link, documentation_link)" + ] + }, + { + "cell_type": "markdown", + "id": "8cead67f", + "metadata": {}, + "source": [ + "# Run validation to make sure everything works." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac0f412a", + "metadata": {}, + "outputs": [], + "source": [ + "## run validation\n", + "errors, error_files = validate_catalog(catalog_root)\n", + "if errors or error_files:\n", + " raise AssertionError(f\"Catalog validation failed. errors={len(errors)} files={len(error_files)}\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 } diff --git a/guide/2.1.Product_files_self_hosted.ipynb b/guide/2.1.Product_files_self_hosted.ipynb index 9196686..6dec87f 100644 --- a/guide/2.1.Product_files_self_hosted.ipynb +++ b/guide/2.1.Product_files_self_hosted.ipynb @@ -1,163 +1,171 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# 2.1 Product files - self-hosted\n", - "\n", - "This notebook is split into 2 steps. The first one covers the options to generate STAC items which describe your data files; the second one \n", - "\n", - "To add your data to the Open Science Catalog, you have to generate a STAC items that describes your files, code (if applicable) and documentation (if applicable).\n", - "\n", - "> We have many [tutorials](https://esa-earthcode.github.io/tutorials/index-1/) available from the EarthCODE Portal and executable from a designated [workspace](https://workspace.earthcode.eox.at/). The tutorials examples how to generate the STAC Items from most commonly used data formats like: `netcdf, tiff and zarr files`. **Note, that the code in the examples/tutorials and below does not generalise fully. You will have to tailor the code to your data.** \n", - "\n", - "# 2.1.1 Generating STAC items\n", - "\n", - "Below you can see an example of a more manual approach to creating STAC items and adding them to an existing OSC product (created and saved in the previous stage.) You can fill the information for a single file item and create the STAC item this way for each file. **If you have multiple files, you will have to update the code accordingly.**\n", - "\n", - "> We can support you through this all stages of this process, just [**contact us**](mailto:earth-code.esa.int) or post in the [FORUM](https://discourse-earthcode.eox.at/c/technical-support/8)!" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0b6f59b3", - "metadata": {}, - "outputs": [], - "source": [ - "from earthcode.git_add import save_item_to_product_collection\n", - "from earthcode.validator import validate_catalog\n", - "from pathlib import Path\n", - "from earthcode.metadata_input_definitions import ItemMetadata\n", - "from earthcode.static import create_item\n", - "import json\n", - "import shapely\n", - "import pandas as pd" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "535d2e4d", - "metadata": { - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "catalog_root = str(Path(\"../open-science-catalog-metadata\").resolve())\n", - "\n", - "product_id = \"\"\n", - "\n", - "# Define the relevant links to complete your data description.\n", - "access_link = \"\"\n", - "documentation_link = \"\"\n", - "license_link = \"\"\n", - "item_link = \"\"\n", - "item_title = \"\"\n", - "\n", - "# Item metadata (for adding one item to the product collection)\n", - "item_id = \"\"\n", - "item_bbox = [-180.0, -90.0, 180.0, 90.0]\n", - "item_datetime = \"\"\n", - "item_license = \"\"\n", - "item_description = \"\"\n", - "item_data_url = \"\"\n", - "item_data_mime_type = \"\"\n", - "item_data_title = \"\"\n", - "item_extra_fields = {}" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e4af0614", - "metadata": {}, - "outputs": [], - "source": [ - "catalog_root = Path(catalog_root)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "224e316a", - "metadata": {}, - "outputs": [], - "source": [ - "## generate a STAC item associated with a specific collection\n", - "geometry = json.loads(json.dumps(shapely.box(*item_bbox).__geo_interface__))\n", - "item_metadata = ItemMetadata(\n", - " itemid=item_id,\n", - " geometry=geometry,\n", - " data_time=pd.to_datetime(item_datetime),\n", - " bbox=item_bbox,\n", - " product_id=product_id,\n", - " license=item_license,\n", - " description=item_description,\n", - " data_url=item_data_url,\n", - " data_mime_type=item_data_mime_type,\n", - " data_title=item_data_title,\n", - " extra_fields=item_extra_fields,\n", - ")\n", - "item = create_item(item_metadata)" - ] - }, - { - "cell_type": "markdown", - "id": "da773887", - "metadata": {}, - "source": [ - "##### 2.1.2 Adding STAC items to existing OSC product collections\n", - "\n", - "If the files are remote, and you've generated the items use the below function to read all items and connect them to the product collection." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0f5762ba", - "metadata": {}, - "outputs": [], - "source": [ - "# save item to collection and add backlinks\n", - "save_item_to_product_collection(item, product_id, catalog_root)" - ] - }, - { - "cell_type": "markdown", - "id": "8cead67f", - "metadata": {}, - "source": [ - "# Run validation to make sure everything works." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "## run validation\n", - "errors, error_files = validate_catalog(catalog_root)\n", - "if errors or error_files:\n", - " raise AssertionError(f\"Catalog validation failed. errors={len(errors)} files={len(error_files)}\")" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "name": "python", - "version": "3" - } + "cells": [ + { + "cell_type": "markdown", + "id": "6f2fba0e", + "metadata": {}, + "source": [ + "# 2.2 Product files - self-hosted\n", + "\n", + "To add your data to the Open Science Catalog, you have to generate a STAC items that describes your files and associated documentation.\n", + "\n", + "> We have many [tutorials](https://esa-earthcode.github.io/tutorials/index-1/) available from the EarthCODE Portal and executable from a designated [workspace](https://workspace.earthcode.eox.at/). The tutorials examples how to generate the STAC Items from most commonly used data formats like: `netcdf, tiff and zarr files`. **Note, that the code in the examples/tutorials and below does not generalise fully. You will have to tailor the code to your data.** \n", + "\n", + "# 2.1.1 Generating STAC items\n", + "\n", + "Below you can see an example of a more manual approach to creating STAC items and adding them to an existing OSC product (created and saved in the previous stage.) You can fill the information for a single file item and create the STAC item this way for each file. **If you have multiple files, you will have to update the code accordingly.**\n", + "\n", + "> We can support you through this all stages of this process, just [**contact us**](mailto:earth-code.esa.int) or post in the [FORUM](https://discourse-earthcode.eox.at/c/technical-support/8)!" + ] }, - "nbformat": 4, - "nbformat_minor": 5 + { + "cell_type": "code", + "execution_count": null, + "id": "0b6f59b3", + "metadata": {}, + "outputs": [], + "source": [ + "from earthcode.git_add import save_item_to_product_collection\n", + "from earthcode.validator import validate_catalog\n", + "from pathlib import Path\n", + "from earthcode.metadata_input_definitions import ItemMetadata\n", + "from earthcode.static import create_item\n", + "import json\n", + "import shapely\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "535d2e4d", + "metadata": { + "tags": [ + "parameters" + ] + }, + "outputs": [], + "source": [ + "catalog_root = str(Path(\"../open-science-catalog-metadata\").resolve())\n", + "\n", + "product_id = \"\"\n", + "\n", + "# Define the relevant links to complete your data description.\n", + "access_link = \"\"\n", + "documentation_link = \"\"\n", + "license_link = \"\"\n", + "item_link = \"\"\n", + "item_title = \"\"\n", + "\n", + "# Item metadata (for adding one item to the product collection)\n", + "item_id = \"\"\n", + "item_bbox = [-180.0, -90.0, 180.0, 90.0]\n", + "item_datetime = \"\"\n", + "item_license = \"\"\n", + "item_description = \"\"\n", + "item_data_url = \"\"\n", + "item_data_mime_type = \"\"\n", + "item_data_title = \"\"\n", + "item_extra_fields = {}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e4af0614", + "metadata": {}, + "outputs": [], + "source": [ + "catalog_root = Path(catalog_root)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "224e316a", + "metadata": {}, + "outputs": [], + "source": [ + "## generate a STAC item associated with a specific collection\n", + "geometry = json.loads(json.dumps(shapely.box(*item_bbox).__geo_interface__))\n", + "item_metadata = ItemMetadata(\n", + " itemid=item_id,\n", + " geometry=geometry,\n", + " data_time=pd.to_datetime(item_datetime),\n", + " bbox=item_bbox,\n", + " product_id=product_id,\n", + " license=item_license,\n", + " description=item_description,\n", + " data_url=item_data_url,\n", + " data_mime_type=item_data_mime_type,\n", + " data_title=item_data_title,\n", + " extra_fields=item_extra_fields,\n", + ")\n", + "item = create_item(item_metadata)" + ] + }, + { + "cell_type": "markdown", + "id": "da773887", + "metadata": {}, + "source": [ + "##### 2.1.2 Adding STAC items to existing OSC product collections\n", + "\n", + "If the files are remote, and you've generated the items use the below function to read all items and connect them to the product collection." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0f5762ba", + "metadata": {}, + "outputs": [], + "source": [ + "# save item to collection and add backlinks\n", + "save_item_to_product_collection(item, product_id, catalog_root)" + ] + }, + { + "cell_type": "markdown", + "id": "8cead67f", + "metadata": {}, + "source": [ + "# Run validation to make sure everything works." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "473cb389", + "metadata": {}, + "outputs": [], + "source": [ + "## run validation\n", + "errors, error_files = validate_catalog(catalog_root)\n", + "if errors or error_files:\n", + " raise AssertionError(f\"Catalog validation failed. errors={len(errors)} files={len(error_files)}\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 } diff --git a/guide/3.Workflow.ipynb b/guide/3.Workflow.ipynb index a218013..35c2142 100644 --- a/guide/3.Workflow.ipynb +++ b/guide/3.Workflow.ipynb @@ -1,152 +1,175 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# 3.Workflow\n", - "\n", - "We also strongly encourage projects to add information about the workflow/code used to create the product to make the outputs fully reproducible.
\n", - "`Workflows` are defined as the code and workflows associated with a project, that have been used to generate a specific product. Workflows follow OGC record specifications in contrast to OSC Projects and Products entries. However, the metadata of a workflow is also expressed in JSON format.
\n", - "To discover the specification used in the workflows, explore the documentation here: [https://esa-earthcode.github.io/tutorials/osc-pr-manual/#id-2-3-add-new-workflow](https://esa-earthcode.github.io/tutorials/osc-pr-manual/#id-2-3-add-new-workflow)
\n", - "\n", - "\n", - "> See **example workflow metadata** directly at open science catalogue metadata repository on GitHub to compare the list of required parameters and their format: [See example workflow: FAIRSenDD workflow](https://github.com/ESA-EarthCODE/open-science-catalog-metadata/blob/main/workflows/fairsendd/record.json)\n", - "\n", - "**LICENSE**: In this step you are required to select one of the available licenses for each of your product.\n", - "Please have a look at available list of license and pick the one that defines your datasets: [osc-licence schemas](https://github.com/ESA-EarthCODE/open-science-catalog-validation/blob/main/schemas/license.json). \n", - ">*If you have a workflow with non-defined license, we **cannot proceed with publishing the workflow**. Please use the list of [licenses by SPDX](https://spdx.org/licenses/) and select the most appropriate one.*
\n", - "> Visit EarthCODE Best Practices to learn more about [**Open Data & Licensing**](https://esa-earthcode.github.io/documentation/Community%20and%20Best%20Practices/FAIR%20and%20Open%20Science%20Best%20Practices/Data#open-data-licensing)
\n", - "\n", - "\n", - "This notebook shows how to create an OSC workflow record using the current `earthcode` API, save it in a local OSC catalog clone, and validate the full catalog.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from datetime import datetime\n", - "from pathlib import Path\n", - "\n", - "from earthcode.metadata_input_definitions import WorkflowMetadata\n", - "from earthcode.static import create_workflow_record\n", - "from earthcode.git_add import save_workflow_record_to_osc\n", - "from earthcode.validator import validate_catalog" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "# BASIC INFORMATION ABOUT THE WORKFLOW\n", - "# A custom id of the workflow (must be different from project and product!), it can be related to the title, i.e. - world-cereal-algorithm.\n", - "#Use dash \"-\" symbol to separate words in the id\"\n", - "workflow_id = \"\"\n", - "workflow_title = \"\"\n", - "workflow_description = \"\"\n", - "# Define at most five keywords for the workflow. You can use any short text, that allow users to discover your workflow.\n", - "workflow_keywords = [\"\", \"\"]\n", - "# Define the license of the workflow. i.e. CC-BY-4.0. See the note in the markdown cell above to consult full list of available licenses.\n", - "# If you have multiple licenses, you can pick 'various'\n", - "workflow_license = \"CC-BY-4.0\"\n", - "\n", - "# what DATA the workflow takes as input and output, i.e. GeoTIFF, Netcdf\n", - "workflow_formats = [\"netcdf64\"]\n", - "\n", - "# Define which project the workflow is associated with\n", - "# if are adding to an existing project see the id and titles from here:\n", - "# - https://github.com/ESA-EarthCODE/open-science-catalog-metadata/projects/\n", - "#These must match the new or an already existing project in the catalog! Alteratively correct links cannot be produced!\n", - "project_id = \"\"\n", - "project_title = \"\"\n", - "\n", - "# Define product themes i.e. land. Pick one or more from:\n", - "# - atmosphere, cryosphere, land, magnetosphere-ionosphere, oceans, solid-earth.\n", - "# See the list here: https://github.com/ESA-EarthCODE/open-science-catalog-metadata/blob/main/themes/catalog.json\n", - "workflow_themes = [\"\"]\n", - "\n", - "# List the contacts in a tuple with format (name, contact_email), for example - ('Magellium', \"contact@magellium.fr\")\n", - "workflow_contracts_info = [(\"\", \"\")]\n", - "\n", - "# Define the access to the repository where the workflo/code can be discovered. Provide an active URL below\n", - "codeurl = \"\"\n", - "\n", - "# Optional workflow record fields\n", - "workflow_doi = None\n", - "workflow_s = -180.0\n", - "workflow_w = -90.0\n", - "workflow_n = 180.0\n", - "workflow_e = 90.0\n", - "workflow_start_year = 2021\n", - "workflow_start_month = 1\n", - "workflow_start_day = 1\n", - "workflow_end_year = 2021\n", - "workflow_end_month = 12\n", - "workflow_end_day = 31\n", - "include_workflow_bbox = False\n", - "include_workflow_time = False\n", - "\n", - "# Local OSC clone root path (assumed one folder above repository root)\n", - "catalog_root = str(Path(\"../open-science-catalog-metadata\").resolve())" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "catalog_root = Path(catalog_root)\n", - "\n", - "workflow_metadata = WorkflowMetadata(\n", - " workflow_id=workflow_id,\n", - " workflow_title=workflow_title,\n", - " workflow_description=workflow_description,\n", - " workflow_license=workflow_license,\n", - " workflow_keywords=workflow_keywords,\n", - " workflow_formats=workflow_formats,\n", - " workflow_themes=workflow_themes,\n", - " codeurl=codeurl,\n", - " project_id=project_id,\n", - " project_title=project_title,\n", - " workflow_doi=workflow_doi,\n", - " workflow_bbox=[[workflow_w, workflow_s, workflow_e, workflow_n]] if include_workflow_bbox else None,\n", - " workflow_start_datetime=datetime(workflow_start_year, workflow_start_month, workflow_start_day) if include_workflow_time else None,\n", - " workflow_end_datetime=datetime(workflow_end_year, workflow_end_month, workflow_end_day) if include_workflow_time else None,\n", - ")\n", - "\n", - "workflow_record = create_workflow_record(workflow_metadata)\n", - "save_workflow_record_to_osc(workflow_record, catalog_root)\n", - "\n", - "errors, error_files = validate_catalog(catalog_root)\n", - "if errors or error_files:\n", - " raise AssertionError(f\"Catalog validation failed. errors={len(errors)} files={len(error_files)}\")\n", - "\n", - "print(f\"Saved workflow: {workflow_record['id']}\")\n", - "print(\"Catalog validation passed.\")" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "name": "python", - "version": "3" - } + "cells": [ + { + "cell_type": "markdown", + "id": "3a21f7bc", + "metadata": {}, + "source": [ + "# 3.Workflow\n", + "\n", + "`Workflows` are defined as a formal wrapper for scientific code that can run in a specific container or environment on the cloud. Workflows in Open Science Catalogue are associated with the project and scientific product to provide a transparent information about the process that was used to generate the scientific results. Workflows follow OGC record specifications in contrast to OSC Projects and Products entries. However, the metadata of a workflow is also expressed in JSON format. Workflow metadata describe how a workflow can be used in general by experiments. It may describe the input parameters required, acceptable values etc.
\n", + "To discover the specification used in the workflows, explore the documentation here: [https://esa-earthcode.github.io/tutorials/osc-pr-manual/#id-2-3-add-new-workflow](https://esa-earthcode.github.io/tutorials/osc-pr-manual/#id-2-3-add-new-workflow)
\n", + "\n", + "\n", + "> See **example workflow metadata** directly at open science catalogue metadata repository on GitHub to compare the list of required parameters and their format: [See example workflow: FAIRSenDD workflow](https://github.com/ESA-EarthCODE/open-science-catalog-metadata/blob/main/workflows/fairsendd/record.json)\n", + "\n", + "**LICENSE**: In this step you are required to select one of the available licenses for your workflow.\n", + "Please have a look at available list of license and pick the one that fits into your needs and needs of the potential users: [osc-licence schemas](https://github.com/ESA-EarthCODE/open-science-catalog-validation/blob/main/schemas/license.json). \n", + ">*If you have a workflow with non-defined license, we **cannot proceed with publishing the workflow**. Please use the list of [licenses by SPDX](https://spdx.org/licenses/) and select the most appropriate one.*
\n", + "> Visit EarthCODE Best Practices to learn more about [**Open Data & Licensing**](https://esa-earthcode.github.io/documentation/Community%20and%20Best%20Practices/FAIR%20and%20Open%20Science%20Best%20Practices/Data#open-data-licensing)
\n", + "\n", + "\n", + "This notebook shows how to create an OSC workflow record using the current `earthcode` API, save it in a local OSC catalog clone, and validate the full catalog.\n" + ] }, - "nbformat": 4, - "nbformat_minor": 5 + { + "cell_type": "code", + "execution_count": null, + "id": "fb57d0a9", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import datetime\n", + "from pathlib import Path\n", + "\n", + "from earthcode.metadata_input_definitions import WorkflowMetadata\n", + "from earthcode.static import create_workflow_record\n", + "from earthcode.git_add import save_workflow_record_to_osc\n", + "from earthcode.validator import validate_catalog" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4a8e09c1", + "metadata": { + "tags": [ + "parameters" + ] + }, + "outputs": [], + "source": [ + "# BASIC INFORMATION ABOUT THE WORKFLOW\n", + "# A custom id of the workflow (must be different from project and product!), it can be related to the title, i.e. - world-cereal-algorithm.\n", + "#Use dash \"-\" symbol to separate words in the id\"\n", + "workflow_id = \"\"\n", + "workflow_title = \"\"\n", + "workflow_description = \"\"\n", + "# Define at most five keywords for the workflow. You can use any short text, that allow users to discover your workflow.\n", + "workflow_keywords = [\"\", \"\"]\n", + "# Define the license of the workflow. i.e. CC-BY-4.0. See the note in the markdown cell above to consult full list of available licenses.\n", + "# If you have multiple licenses, you can pick 'various'\n", + "workflow_license = \"CC-BY-4.0\"\n", + "\n", + "# what DATA the workflow takes as input and output, i.e. GeoTIFF, Netcdf\n", + "workflow_formats = [\"netcdf64\"]\n", + "\n", + "# Define which project the workflow is associated with\n", + "# if are adding to an existing project see the id and titles from here:\n", + "# - https://github.com/ESA-EarthCODE/open-science-catalog-metadata/projects/\n", + "#These must match the new or an already existing project in the catalog! Alteratively correct links cannot be produced!\n", + "project_id = \"\"\n", + "project_title = \"\"\n", + "\n", + "# Define workflow themes i.e. land. Pick one or more from:\n", + "# - atmosphere, cryosphere, land, magnetosphere-ionosphere, oceans, solid-earth.\n", + "# See the list here: https://github.com/ESA-EarthCODE/open-science-catalog-metadata/blob/main/themes/catalog.json\n", + "workflow_themes = [\"\"]\n", + "\n", + "# List the contacts in a tuple with format (name, contact_email), for example - ('Magellium', \"contact@magellium.fr\")\n", + "workflow_contracts_info = [(\"\", \"\")]\n", + "\n", + "# Define the access to the repository where the workflow can be discovered. Provide an active URL below:\n", + "codeurl = \"\"\n", + "\n", + "# Optional workflow record fields\n", + "workflow_doi = None\n", + "workflow_s = -180.0\n", + "workflow_w = -90.0\n", + "workflow_n = 180.0\n", + "workflow_e = 90.0\n", + "workflow_start_year = 2021\n", + "workflow_start_month = 1\n", + "workflow_start_day = 1\n", + "workflow_end_year = 2021\n", + "workflow_end_month = 12\n", + "workflow_end_day = 31\n", + "include_workflow_bbox = False\n", + "include_workflow_time = False\n", + "\n", + "# Local OSC clone root path (assumed one folder above repository root)\n", + "catalog_root = str(Path(\"../open-science-catalog-metadata\").resolve())" + ] + }, + { + "cell_type": "markdown", + "id": "4270b4d3-9465-4434-ac8b-6b92a738702f", + "metadata": {}, + "source": [ + "### Create Workflow record \n", + "> ℹ️ **Note** \n", + "> This function **creates a workflow record** and automatically generates record.json and all required links. \n", + "> It connects the workflow with related project, themes and updates existing entries as needed.
\n", + "> Run the cell below to automatically create new entry. \n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6f465bfa", + "metadata": {}, + "outputs": [], + "source": [ + "catalog_root = Path(catalog_root)\n", + "\n", + "workflow_metadata = WorkflowMetadata(\n", + " workflow_id=workflow_id,\n", + " workflow_title=workflow_title,\n", + " workflow_description=workflow_description,\n", + " workflow_license=workflow_license,\n", + " workflow_keywords=workflow_keywords,\n", + " workflow_formats=workflow_formats,\n", + " workflow_themes=workflow_themes,\n", + " codeurl=codeurl,\n", + " project_id=project_id,\n", + " project_title=project_title,\n", + " workflow_doi=workflow_doi,\n", + " workflow_bbox=[[workflow_w, workflow_s, workflow_e, workflow_n]] if include_workflow_bbox else None,\n", + " workflow_start_datetime=datetime(workflow_start_year, workflow_start_month, workflow_start_day) if include_workflow_time else None,\n", + " workflow_end_datetime=datetime(workflow_end_year, workflow_end_month, workflow_end_day) if include_workflow_time else None,\n", + ")\n", + "\n", + "workflow_record = create_workflow_record(workflow_metadata)\n", + "save_workflow_record_to_osc(workflow_record, catalog_root)\n", + "\n", + "errors, error_files = validate_catalog(catalog_root)\n", + "if errors or error_files:\n", + " raise AssertionError(f\"Catalog validation failed. errors={len(errors)} files={len(error_files)}\")\n", + "\n", + "print(f\"Saved workflow: {workflow_record['id']}\")\n", + "print(\"Catalog validation passed.\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 } diff --git a/guide/4.Experiment.ipynb b/guide/4.Experiment.ipynb index c1d7171..214cc67 100644 --- a/guide/4.Experiment.ipynb +++ b/guide/4.Experiment.ipynb @@ -1,166 +1,173 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# 4.Experiment\n", - "\n", - "Last step is to add information about the experiment which is a specific **execution of a workflow which produced a Product**. An Experiment captures the full context of how a Product was generated, linking together workflows, inputs, and configuration in a way that makes results reusable and reproducible.
\n", - "Experiments follow OGC record specifications as the Workflow entry. It is also stored in JSON record, with distinct, citable components—input.yaml and environment.yaml—to capture parameters and the execution environment. All resources are retrievable over open HTTPS, and catalog metadata persists even if external endpoints change. Interoperability is achieved via OSC themes (e.g., cryosphere) and standard representations (JSON/YAML), while qualified links connect the experiment to its generating workflow and resulting product. The record declares a license and aligns with EO community standards (OGC API Records; notebook environment specified via jupyter_kernel_info).
\n", - "\n", - "> See **example experiment metadata** directly at open science catalogue metadata repository on GitHub to compare the list of required parameters and their format: [See example experiment:ESA CCI permafrost](https://opensciencedata.esa.int/experiments/esa-cci-permafrost/record)
\n", - "\n", - "\n", - "**LICENSE**: In this step you are required to select one of the available licenses for each of your product.\n", - "Please have a look at available list of license and pick the one that defines your datasets: [osc-licence schemas](https://github.com/ESA-EarthCODE/open-science-catalog-validation/blob/main/schemas/license.json). \n", - ">*If you have an experiment with non-defined license, we **cannot proceed with publishing the experiment**. Please use the list of [licenses by SPDX](https://spdx.org/licenses/) and select the most appropriate one.*
\n", - "> Visit EarthCODE Best Practices to learn more about [**Open Data & Licensing**](https://esa-earthcode.github.io/documentation/Community%20and%20Best%20Practices/FAIR%20and%20Open%20Science%20Best%20Practices/Data#open-data-licensing)
\n", - "\n", - "This notebook shows how to add an OSC experiment record using the current `earthcode` API, save it in a local OSC catalog clone, and validate the full catalog." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from datetime import datetime\n", - "from pathlib import Path\n", - "\n", - "from earthcode.metadata_input_definitions import ExperimentMetadata\n", - "from earthcode.static import create_experiment_record\n", - "from earthcode.git_add import save_experiment_record_to_osc\n", - "from earthcode.validator import validate_catalog" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "# experiment info\n", - "# Experiment id\n", - "experiment_id = \"\"\n", - "experiment_title = \"\"\n", - "experiment_description = \"\"\n", - "experiment_license = \"\"\n", - "experiment_keywords = [\"\"]\n", - "\n", - "# Define the input output formats that this experiment works with\n", - "# i.e. GeoTIFF, Zarr, netCDF, etc\n", - "experiment_formats = [\"\"]\n", - "\n", - "# Define themes i.e. land. Pick one or more from:\n", - "# - atmosphere, cryosphere, land, magnetosphere-ionosphere, oceans, solid-earth.\n", - "experiment_themes = [\"\"]\n", - "\n", - "# link to the specification of the input paramters for the experiment\n", - "experiment_input_parameters_link = \"\"\n", - "# link to the enviroment in which the experiment was performed\n", - "experiment_enviroment_link = \"\"\n", - "\n", - "## ID and title of the associated workflow\n", - "workflow_id = \"\"\n", - "workflow_title = \"\"\n", - "\n", - "## ID and title title of the associated product\n", - "product_id = \"\"\n", - "product_title = \"\"\n", - "\n", - "# Optional experiment record fields\n", - "experiment_contacts = None\n", - "experiment_s = -180.0\n", - "experiment_w = -90.0\n", - "experiment_n = 180.0\n", - "experiment_e = 90.0\n", - "experiment_start_year = 2021\n", - "experiment_start_month = 1\n", - "experiment_start_day = 1\n", - "experiment_end_year = 2021\n", - "experiment_end_month = 12\n", - "experiment_end_day = 31\n", - "include_experiment_bbox = False\n", - "include_experiment_time = False\n", - "\n", - "# Local OSC clone root path (assumed one folder above repository root)\n", - "catalog_root = str(Path(\"../open-science-catalog-metadata\").resolve())" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "catalog_root = Path(catalog_root)\n", - "\n", - "experiment_metadata = ExperimentMetadata(\n", - " experiment_id=experiment_id,\n", - " experiment_title=experiment_title,\n", - " experiment_description=experiment_description,\n", - " experiment_license=experiment_license,\n", - " experiment_keywords=experiment_keywords,\n", - " experiment_formats=experiment_formats,\n", - " experiment_themes=experiment_themes,\n", - " experiment_input_parameters_link=experiment_input_parameters_link,\n", - " experiment_enviroment_link=experiment_enviroment_link,\n", - " workflow_id=workflow_id,\n", - " workflow_title=workflow_title,\n", - " product_id=product_id,\n", - " product_title=product_title,\n", - " contacts=experiment_contacts,\n", - " experiment_bbox=[[experiment_w, experiment_s, experiment_e, experiment_n]] if include_experiment_bbox else None,\n", - " experiment_start_datetime=datetime(experiment_start_year, experiment_start_month, experiment_start_day) if include_experiment_time else None,\n", - " experiment_end_datetime=datetime(experiment_end_year, experiment_end_month, experiment_end_day) if include_experiment_time else None,\n", - ")\n", - "\n", - "experiment_record = create_experiment_record(experiment_metadata)\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "78fb5edd", - "metadata": {}, - "outputs": [], - "source": [ - "save_experiment_record_to_osc(experiment_record, catalog_root)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ad9a7579", - "metadata": {}, - "outputs": [], - "source": [ - "errors, error_files = validate_catalog(catalog_root)\n", - "if errors or error_files:\n", - " raise AssertionError(f\"Catalog validation failed. errors={len(errors)} files={len(error_files)}\")\n", - "\n", - "print(f\"Saved experiment: {experiment_record['id']}\")\n", - "print(\"Catalog validation passed.\")" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "name": "python", - "version": "3" - } + "cells": [ + { + "cell_type": "markdown", + "id": "4e78c50b", + "metadata": {}, + "source": [ + "# 4.Experiment\n", + "\n", + "Last step is to add information about the experiment which is a specific **implementation of a workflow that can be used to reproduce a scientific output and generate a Product**. An Experiment captures the full context of how a Product was generated, linking together workflows, inputs, and configuration in a way that makes results reusable and reproducible.
\n", + "Experiments follow OGC record specifications as the Workflow entry. It is also stored in JSON record, with distinct, citable components—input.yaml and environment.yaml—to capture parameters and the execution environment. All resources are retrievable over open HTTPS, and catalog metadata persists even if external endpoints change. Interoperability is achieved via OSC themes (e.g., cryosphere) and standard representations (JSON/YAML), while qualified links connect the experiment to its generating workflow and resulting product. The record declares a license and aligns with EO community standards (OGC API Records; notebook environment specified via jupyter_kernel_info).
\n", + "\n", + "> See **example experiment metadata** directly at open science catalogue metadata repository on GitHub to compare the list of required parameters and their format: [See example experiment:ESA CCI permafrost](https://opensciencedata.esa.int/experiments/esa-cci-permafrost/record)
\n", + "\n", + "\n", + "**LICENSE**: In this step you are required to select one of the available licenses for each of your experiment.\n", + "Please have a look at available list of license and pick the one that fits your needs: [osc-licence schemas](https://github.com/ESA-EarthCODE/open-science-catalog-validation/blob/main/schemas/license.json). \n", + ">*If you have an experiment with non-defined license, we **cannot proceed with publishing the experiment**. Please use the list of [licenses by SPDX](https://spdx.org/licenses/) and select the most appropriate one.*
\n", + "> Visit EarthCODE Best Practices to learn more about [**Open Data & Licensing**](https://esa-earthcode.github.io/documentation/Community%20and%20Best%20Practices/FAIR%20and%20Open%20Science%20Best%20Practices/Data#open-data-licensing)
\n", + "\n", + "This notebook shows how to add an OSC experiment record using the current `earthcode` API, save it in a local OSC catalog clone, and validate the full catalog." + ] }, - "nbformat": 4, - "nbformat_minor": 5 + { + "cell_type": "code", + "execution_count": null, + "id": "ce8f185b", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import datetime\n", + "from pathlib import Path\n", + "\n", + "from earthcode.metadata_input_definitions import ExperimentMetadata\n", + "from earthcode.static import create_experiment_record\n", + "from earthcode.git_add import save_experiment_record_to_osc\n", + "from earthcode.validator import validate_catalog" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a0e8c735", + "metadata": { + "tags": [ + "parameters" + ] + }, + "outputs": [], + "source": [ + "# experiment info\n", + "# Experiment id\n", + "experiment_id = \"\"\n", + "experiment_title = \"\"\n", + "experiment_description = \"\"\n", + "experiment_license = \"\"\n", + "experiment_keywords = [\"\"]\n", + "\n", + "# Define the input output formats that this experiment works with\n", + "# i.e. GeoTIFF, Zarr, netCDF, etc\n", + "experiment_formats = [\"\"]\n", + "\n", + "# Define themes i.e. land. Pick one or more from:\n", + "# - atmosphere, cryosphere, land, magnetosphere-ionosphere, oceans, solid-earth.\n", + "experiment_themes = [\"\"]\n", + "\n", + "# link to the specification of the input parameters for the experiment\n", + "experiment_input_parameters_link = \"\"\n", + "# link to the enviroment in which the experiment was performed\n", + "experiment_enviroment_link = \"\"\n", + "\n", + "## ID and title of the associated workflow\n", + "workflow_id = \"\"\n", + "workflow_title = \"\"\n", + "\n", + "## ID and title title of the associated product\n", + "product_id = \"\"\n", + "product_title = \"\"\n", + "\n", + "# Optional experiment record fields\n", + "experiment_contacts = None\n", + "experiment_s = -180.0\n", + "experiment_w = -90.0\n", + "experiment_n = 180.0\n", + "experiment_e = 90.0\n", + "experiment_start_year = 2021\n", + "experiment_start_month = 1\n", + "experiment_start_day = 1\n", + "experiment_end_year = 2021\n", + "experiment_end_month = 12\n", + "experiment_end_day = 31\n", + "include_experiment_bbox = False\n", + "include_experiment_time = False\n", + "\n", + "# Local OSC clone root path (assumed one folder above repository root)\n", + "catalog_root = str(Path(\"../open-science-catalog-metadata\").resolve())" + ] + }, + { + "cell_type": "markdown", + "id": "7daca115-9918-4dd1-a285-7c229a615a67", + "metadata": {}, + "source": [ + "### Create Experiment record \n", + "> ℹ️ **Note** \n", + "> This function **creates an experiment metadata record** and automatically generates record.json and all required links. \n", + "> It connects the experiment with related project, workflow, themes and updates existing entries as needed.
\n", + "> Run the cell below to automatically create new entry. \n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2c01bf43", + "metadata": {}, + "outputs": [], + "source": [ + "catalog_root = Path(catalog_root)\n", + "\n", + "experiment_metadata = ExperimentMetadata(\n", + " experiment_id=experiment_id,\n", + " experiment_title=experiment_title,\n", + " experiment_description=experiment_description,\n", + " experiment_license=experiment_license,\n", + " experiment_keywords=experiment_keywords,\n", + " experiment_formats=experiment_formats,\n", + " experiment_themes=experiment_themes,\n", + " experiment_input_parameters_link=experiment_input_parameters_link,\n", + " experiment_enviroment_link=experiment_enviroment_link,\n", + " workflow_id=workflow_id,\n", + " workflow_title=workflow_title,\n", + " product_id=product_id,\n", + " product_title=product_title,\n", + " contacts=experiment_contacts,\n", + " experiment_bbox=[[experiment_w, experiment_s, experiment_e, experiment_n]] if include_experiment_bbox else None,\n", + " experiment_start_datetime=datetime(experiment_start_year, experiment_start_month, experiment_start_day) if include_experiment_time else None,\n", + " experiment_end_datetime=datetime(experiment_end_year, experiment_end_month, experiment_end_day) if include_experiment_time else None,\n", + ")\n", + "\n", + "experiment_record = create_experiment_record(experiment_metadata)\n", + "save_experiment_record_to_osc(experiment_record, catalog_root)\n", + "\n", + "errors, error_files = validate_catalog(catalog_root)\n", + "if errors or error_files:\n", + " raise AssertionError(f\"Catalog validation failed. errors={len(errors)} files={len(error_files)}\")\n", + "\n", + "print(f\"Saved experiment: {experiment_record['id']}\")\n", + "print(\"Catalog validation passed.\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 }