diff --git a/.gitignore b/.gitignore index c0e2324..b97d228 100644 --- a/.gitignore +++ b/.gitignore @@ -12,4 +12,10 @@ htmlcov/ _build/ dist/ dev.ipynb -earthcode/models \ No newline at end of file +earthcode/models +guide/project.yaml +guide/.ipynb_checkpoints/ +guide/product.yaml +earthcode/generators/project_generator.py +guide/workflow.yaml +guide/experiment.yaml diff --git a/_toc.yml b/_toc.yml index 90f558a..90efcef 100644 --- a/_toc.yml +++ b/_toc.yml @@ -11,6 +11,7 @@ parts: - file: guide/2.1.Product_files_self_hosted - file: guide/3.Workflow - file: guide/4.Experiment + - file: guide/5.Templates - caption: Examples chapters: - file: examples/earthcode_data_discovery diff --git a/earthcode/generators/experiment_generator.py b/earthcode/generators/experiment_generator.py index c9b51bc..81c1182 100644 --- a/earthcode/generators/experiment_generator.py +++ b/earthcode/generators/experiment_generator.py @@ -1,5 +1,6 @@ from pathlib import Path from datetime import datetime +import json import logging import sys @@ -7,6 +8,7 @@ from earthcode.static import create_experiment_record, ExperimentMetadata from earthcode.git_add import save_experiment_record_to_osc +from earthcode.validator import validateOSCEntry logging.basicConfig(stream=sys.stdout, encoding='utf-8', level=logging.INFO) log = logging.getLogger() @@ -51,3 +53,11 @@ def create_experiment_stac_from_template(experiment_yaml, osc_path): experiment_record = create_experiment_record(experiment_metadata) save_experiment_record_to_osc(experiment_record, Path(osc_path)) + + # validate the saved record, since catalog links are updated when written to disk + experiment_path = Path(osc_path) / "experiments" / experiment_record["id"] / "record.json" + with open(experiment_path, "r", encoding="utf-8") as f: + json_experiment = json.load(f) + errors = validateOSCEntry(json_experiment, Path(osc_path)) + if errors: + raise AssertionError(f"Catalog validation failed. errors={len(errors)}\n{errors}") diff --git a/earthcode/generators/product_generator.py b/earthcode/generators/product_generator.py index 55e1305..ea64959 100644 --- a/earthcode/generators/product_generator.py +++ b/earthcode/generators/product_generator.py @@ -1,3 +1,4 @@ +import json from pathlib import Path from datetime import datetime import sys @@ -8,6 +9,7 @@ from earthcode.static import create_product_collection, ProductCollectionMetadata from earthcode.git_add import save_product_collection_to_catalog +from earthcode.validator import validateOSCEntry logging.basicConfig(stream=sys.stdout, encoding="utf-8", level=logging.INFO) @@ -70,3 +72,10 @@ def create_product_stac_from_template(project_yaml, osc_path): product_collection = create_product_collection(product_metadata) save_product_collection_to_catalog(product_collection, Path(osc_path)) + + # need to run validation on the saved product collection to validate the final json in the OSC repo, not the one created in memory + with open(Path(osc_path) / f'products/{product_collection.id}/collection.json', "r", encoding="utf-8") as f: + json_product = json.load(f) + errors = validateOSCEntry(json_product, Path(osc_path)) + if errors: + raise AssertionError(f"Catalog validation failed. errors={len(errors)}\n{errors}") diff --git a/earthcode/generators/project_generator.py b/earthcode/generators/project_generator.py index 086b8a0..34391b0 100644 --- a/earthcode/generators/project_generator.py +++ b/earthcode/generators/project_generator.py @@ -1,5 +1,6 @@ from pathlib import Path from datetime import datetime +import json import logging import sys @@ -8,6 +9,7 @@ from earthcode.static import create_project_collection, ProjectCollectionMetadata from earthcode.git_add import save_project_collection_to_osc +from earthcode.validator import validateOSCEntry logging.basicConfig(stream=sys.stdout, encoding='utf-8', level=logging.INFO) log = logging.getLogger() @@ -32,7 +34,7 @@ def create_project_stac_from_template(project_yaml, osc_path): [project_cms.append((member['name'], member['email'])) for member in data['consortium_members']] project_metadata = ProjectCollectionMetadata( - project_id=data['id'] , + project_id=data['id'], project_title=data['title'], project_description=data['description'], project_status=data['status'], @@ -50,3 +52,11 @@ def create_project_stac_from_template(project_yaml, osc_path): project_collection = create_project_collection(project_metadata) save_project_collection_to_osc(project_collection, Path(osc_path)) + + # validate the saved collection, since catalog links are updated when written to disk + project_path = Path(osc_path) / "projects" / project_collection.id / "collection.json" + with open(project_path, "r", encoding="utf-8") as f: + json_project = json.load(f) + errors = validateOSCEntry(json_project, Path(osc_path)) + if errors: + raise AssertionError(f"Catalog validation failed. errors={len(errors)}\n{errors}") diff --git a/earthcode/generators/stac_generator.py b/earthcode/generators/stac_generator.py index 13f285f..5255878 100644 --- a/earthcode/generators/stac_generator.py +++ b/earthcode/generators/stac_generator.py @@ -1,12 +1,12 @@ import logging import sys import argparse -import os from .experiment_generator import create_experiment_stac_from_template from .product_generator import create_product_stac_from_template from .project_generator import create_project_stac_from_template from .workflow_generator import create_workflow_stac_from_template +from earthcode.validator import validate_catalog logging.basicConfig(stream=sys.stdout, encoding='utf-8', level=logging.INFO) log = logging.getLogger() @@ -14,7 +14,7 @@ def generate_stac(osc_path, project=None, workflow=None, experiment=None, product=None): """ - Generates the requested STAC json files at the specified OSC repo. + Generates the requested STAC json files at the specified OSC repo and performs a validation check. :param osc_path: OSC repo where the STAC json will be created. :param project: Path to the Project YAML template, if empty no Project STAC will be generated @@ -23,6 +23,12 @@ def generate_stac(osc_path, project=None, workflow=None, experiment=None, produc :param product: Path to the Product YAML template, if empty no Product STAC will be generated """ + if all(t is None for t in [project, workflow, experiment, product]): + log.warning("No template provided." + "Run again with at least a provided template to produce the relative STAC json." + "For additional help invoke with -h.") + return + if project is not None: log.info("Generating Project STAC json in OSC @ \"" + osc_path + "\"") create_project_stac_from_template(project, osc_path) @@ -36,10 +42,10 @@ def generate_stac(osc_path, project=None, workflow=None, experiment=None, produc log.info("Generating Product STAC json in OSC @ \"" + osc_path + "\"") create_product_stac_from_template(product, osc_path) - if project is None and workflow is None and experiment is None and product is None: - log.warning("No template provided." - "Run again with at least a provided template to produce the relative STAC json." - "For additional help invoke with -h.") + # OPTIONAL full catalogue validation + # errors, error_files = validate_catalog(osc_path) + # if errors or error_files: + # raise AssertionError(f"Catalog validation failed. errors={len(errors)} {errors} files={len(error_files)}") def main(): @@ -52,8 +58,8 @@ def main(): help="Experiment YAML template location") parser.add_argument("-o", "--product", type=str, help="Product YAML template location") - parser.add_argument("-m", "--oscm", type=str, - help="The target OSC location where the STAC jsons will be created.") + parser.add_argument("-m", "--oscm", type=str, required=True, + help="REQUIRED The target OSC location where the STAC jsons will be created.") args = parser.parse_args() diff --git a/earthcode/generators/template_generator.py b/earthcode/generators/template_generator.py index 02cda80..94cb064 100644 --- a/earthcode/generators/template_generator.py +++ b/earthcode/generators/template_generator.py @@ -67,7 +67,7 @@ def main(): parser.add_argument("-o", "--product", action='store_true', help="If present generate a product template") parser.add_argument("-t", "--target", type=str, - help="The target location where the templates will be generated.") + help="The target location where the templates will be generated. If empty the CWD will be used") args = parser.parse_args() diff --git a/earthcode/generators/workflow_generator.py b/earthcode/generators/workflow_generator.py index 9c70fab..66c5ba8 100644 --- a/earthcode/generators/workflow_generator.py +++ b/earthcode/generators/workflow_generator.py @@ -1,5 +1,6 @@ from pathlib import Path from datetime import datetime +import json import logging import sys @@ -7,6 +8,7 @@ from earthcode.git_add import save_workflow_record_to_osc from earthcode.static import create_workflow_record, WorkflowMetadata +from earthcode.validator import validateOSCEntry logging.basicConfig(stream=sys.stdout, encoding='utf-8', level=logging.INFO) log = logging.getLogger() @@ -50,3 +52,11 @@ def create_workflow_stac_from_template(project_yaml, osc_path): workflow_record = create_workflow_record(workflow_metadata) save_workflow_record_to_osc(workflow_record, Path(osc_path)) + + # validate the saved record, since catalog links are updated when written to disk + workflow_path = Path(osc_path) / "workflows" / workflow_record["id"] / "record.json" + with open(workflow_path, "r", encoding="utf-8") as f: + json_workflow = json.load(f) + errors = validateOSCEntry(json_workflow, Path(osc_path)) + if errors: + raise AssertionError(f"Catalog validation failed. errors={len(errors)}\n{errors}") diff --git a/guide/5.Templates.ipynb b/guide/5.Templates.ipynb new file mode 100644 index 0000000..a4f9cc2 --- /dev/null +++ b/guide/5.Templates.ipynb @@ -0,0 +1,198 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 5.Templates\n", + "\n", + "In addition to previous methods, items can be added to the OSC through a simplified workflow:\n", + "1) Generate a yaml template of the item you want to add\n", + "2) Edit the template with the details\n", + "3) Pass the template to generate an entry inside the OSC repo\n", + "\n", + "This can be achieved both in terminal with a CLI command available after installation or in a notebook/script by calling the equivalent functions." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 1: Generate YAML Templates\n", + "The command can be run with the following parameters:\n", + "
\n",
+    "  -h, --help            show this help message and exit\n",
+    "  -p, --project         If present generate a project template with the same name\n",
+    "  -w, --workflow        If present generate a workflow template with the same name\n",
+    "  -e, --experiment      If present generate an experiment template\n",
+    "  -o, --product         If present generate a product template\n",
+    "  -t TARGET, --target TARGET\n",
+    "                        The target location where the templates will be generated.\n",
+    "                        If empty the CWD will be used\n",
+    "
\n", + "\n", + "```shell\n", + "earthcode_template_gen -p -w -e -o -t targetdir\n", + "```\n", + "\n", + "or by importing the equivalent function:\n", + "```python\n", + "from earthcode import generate_template\n", + "\n", + "generate_template(project=True, workflow=True, experiment=True, product=True, target=\"targetdir\")\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "shellscript" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:root:No target folder specified, the templates will be generated in the PWD\n", + "INFO:root:Generating Project template at \"/Users/dean/Documents/EarthCODE/earthcode-library/guide\"\n" + ] + } + ], + "source": [ + "# Example with PROJECT\n", + "!earthcode_template_gen -p" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "INFO:root:Generating Project template at \"/Users/dean/Documents/EarthCODE/earthcode-library/guide\"\n" + ] + } + ], + "source": [ + "from earthcode import generate_template\n", + "\n", + "generate_template(project=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 2: Edit Template\n", + "Now a `project.yaml` file should be present in the chosen target directory or in the cwd.\n", + "Simply edit all the fields in this file with the new project informations and details." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 3: Generate Stac from Template\n", + "To add a new OSC entry we need to pass the location of the edited yaml file and of the OSC repo to the following command/function:\n", + "
\n",
+    "  -h, --help            show this help message and exit\n",
+    "  -p PROJECT, --project PROJECT\n",
+    "                        Project YAML template location\n",
+    "  -w WORKFLOW, --workflow WORKFLOW\n",
+    "                        Workflow YAML template location\n",
+    "  -e EXPERIMENT, --experiment EXPERIMENT\n",
+    "                        Experiment YAML template location\n",
+    "  -o PRODUCT, --product PRODUCT\n",
+    "                        Product YAML template location\n",
+    "  -m OSCM, --oscm OSCM  REQUIRED The target OSC location where the STAC jsons will be created.\n",
+    "
\n", + "\n", + "```shell\n", + "earthcode_stac_gen -p ./project.yaml -w ./workflow.yaml -e ./experiment.yaml -o ./product.yaml -m ../open-science-catalog-metadata\n", + "```\n", + "\n", + "or with function:\n", + "```python\n", + "from earthcode import generate_stac\n", + "\n", + "generate_stac(project=\"./project.yaml\", workflow=\"./workflow.yaml\", experiment=\"./experiment.yaml\", product=\"./product.yaml\", osc_path=\"../open-science-catalog-metadata\")\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "vscode": { + "languageId": "shellscript" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "INFO:root:Generating Project STAC json in OSC @ \"../open-science-catalog-metadata\"\n" + ] + } + ], + "source": [ + "#Example with PROJECT\n", + "!earthcode_stac_gen -p ./project.yaml -m ../open-science-catalog-metadata" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "INFO:root:Generating Project STAC json in OSC @ \"../open-science-catalog-metadata\"\n" + ] + } + ], + "source": [ + "from earthcode import generate_stac\n", + "\n", + "generate_stac(project=\"./project.yaml\", osc_path=\"../open-science-catalog-metadata\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.11" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/tests/test_notebooks.py b/tests/test_notebooks.py index 0867ed5..fe598e8 100644 --- a/tests/test_notebooks.py +++ b/tests/test_notebooks.py @@ -225,3 +225,15 @@ def test_notebooks(catalog_root:Path): cwd=str(repo_root), log_output=True, ) + + # templates + templates_cwd = catalog_root.parent / "templates" + templates_cwd.mkdir() + pm.execute_notebook( + input_path=str(notebooks_dir / "5.Templates.ipynb"), + output_path=None, + parameters={}, + kernel_name="python3", + cwd=str(templates_cwd), + log_output=True, + )