diff --git a/.gitignore b/.gitignore index 0f764f2..c0e2324 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ # pixi environments .pixi +.idea *.egg-info *__pycache__* *.json diff --git a/earthcode/__init__.py b/earthcode/__init__.py index e69de29..6ea25f1 100644 --- a/earthcode/__init__.py +++ b/earthcode/__init__.py @@ -0,0 +1,2 @@ +from .generators.stac_generator import generate_stac +from .generators.template_generator import generate_template diff --git a/earthcode/generators/__init__.py b/earthcode/generators/__init__.py new file mode 100644 index 0000000..e97811e --- /dev/null +++ b/earthcode/generators/__init__.py @@ -0,0 +1,6 @@ +from .experiment_generator import create_experiment_stac_from_template +from .product_generator import create_product_stac_from_template +from .project_generator import create_project_stac_from_template +from .stac_generator import generate_stac +from .template_generator import generate_template +from .workflow_generator import create_workflow_stac_from_template diff --git a/earthcode/generators/experiment_generator.py b/earthcode/generators/experiment_generator.py new file mode 100644 index 0000000..c9b51bc --- /dev/null +++ b/earthcode/generators/experiment_generator.py @@ -0,0 +1,53 @@ +from pathlib import Path +from datetime import datetime +import logging +import sys + +import yaml + +from earthcode.static import create_experiment_record, ExperimentMetadata +from earthcode.git_add import save_experiment_record_to_osc + +logging.basicConfig(stream=sys.stdout, encoding='utf-8', level=logging.INFO) +log = logging.getLogger() + + +def create_experiment_stac_from_template(experiment_yaml, osc_path): + with open(experiment_yaml, 'r') as file: + data = yaml.safe_load(file) + + for k, v in data.items(): + if v is None: + log.error(f"The Project YAML contains an empty value for the following field: {k}") + raise Exception(f"The Project YAML contains an empty value for the following field: {k}") + + temporal_extent = data.get('temporal_extent', None) + if temporal_extent is not None: + temporal_start = datetime.strptime(temporal_extent['start'], "%Y-%m-%dT%H:%M:%SZ") + temporal_end = datetime.strptime(temporal_extent['end'], "%Y-%m-%dT%H:%M:%SZ") + else: + temporal_start = temporal_end = None + + experiment_metadata = ExperimentMetadata( + experiment_id=data['id'], + experiment_title=data['title'], + experiment_description=data['description'], + experiment_license=data['license'], + experiment_keywords=data['keywords'], + experiment_formats=data['formats'], + experiment_themes=data['themes'], + experiment_input_parameters_link=data['link_params'], + experiment_enviroment_link=data['link_env'], + workflow_id=data['workflow'], + workflow_title=data['workflow-title'], + product_id=data['product'], + product_title=data['product-title'], + contacts=data.get('contacts', None), + experiment_bbox=data.get('spatial_extent', None), + experiment_start_datetime=temporal_start, + experiment_end_datetime=temporal_end, + ) + + experiment_record = create_experiment_record(experiment_metadata) + + save_experiment_record_to_osc(experiment_record, Path(osc_path)) diff --git a/earthcode/generators/product_generator.py b/earthcode/generators/product_generator.py new file mode 100644 index 0000000..55e1305 --- /dev/null +++ b/earthcode/generators/product_generator.py @@ -0,0 +1,72 @@ +from pathlib import Path +from datetime import datetime +import sys +import logging + +import pystac +import yaml + +from earthcode.static import create_product_collection, ProductCollectionMetadata +from earthcode.git_add import save_product_collection_to_catalog + + +logging.basicConfig(stream=sys.stdout, encoding="utf-8", level=logging.INFO) +log = logging.getLogger() + + +def create_product_stac_from_template(project_yaml, osc_path): + with open(project_yaml, "r") as file: + data = yaml.safe_load(file) + + for k, v in data.items(): + if v is None: + log.error( + f"The Project YAML contains an empty value for the following field: {k}" + ) + raise Exception( + f"The Project YAML contains an empty value for the following field: {k}" + ) + + # Define spatial and temporal extent + spatial_extent = pystac.SpatialExtent(data["extent"]["spatial"]["bbox"]).bboxes + temporal_start = datetime.strptime( + data["extent"]["temporal"]["start"], "%Y-%m-%dT%H:%M:%SZ" + ) + temporal_end = datetime.strptime( + data["extent"]["temporal"]["end"], "%Y-%m-%dT%H:%M:%SZ" + ) + + # optional + product_license = data["license"] + if product_license == "other": + product_license_link = data["license_link"] + else: + product_license_link = None + + via_link = data.get("via_link", None) + + product_metadata = ProductCollectionMetadata( + product_id=data["id"], + product_title=data["title"], + product_description=data["description"], + product_keywords=data["keywords"], + product_status=data["status"], + product_region=data["region"], + product_themes=data["themes"], + product_missions=data["missions"], + product_variables=data["variables"], + project_id=data["project"], + project_title=data["project-title"], + product_parameters=data["cf_parameters"], + product_doi=data["sci:doi"], + product_bbox=spatial_extent, + product_start_datetime=temporal_start, + product_end_datetime=temporal_end, + product_license=product_license, + license_link=product_license_link, + access_link=via_link, + ) + + product_collection = create_product_collection(product_metadata) + + save_product_collection_to_catalog(product_collection, Path(osc_path)) diff --git a/earthcode/generators/project_generator.py b/earthcode/generators/project_generator.py new file mode 100644 index 0000000..086b8a0 --- /dev/null +++ b/earthcode/generators/project_generator.py @@ -0,0 +1,52 @@ +from pathlib import Path +from datetime import datetime +import logging +import sys + +import pystac +import yaml + +from earthcode.static import create_project_collection, ProjectCollectionMetadata +from earthcode.git_add import save_project_collection_to_osc + +logging.basicConfig(stream=sys.stdout, encoding='utf-8', level=logging.INFO) +log = logging.getLogger() + + +def create_project_stac_from_template(project_yaml, osc_path): + with open(project_yaml, 'r') as file: + data = yaml.safe_load(file) + + for k, v in data.items(): + if v is None: + log.error(f"The Project YAML contains an empty value for the following field: {k}") + raise Exception(f"The Project YAML contains an empty value for the following field: {k}") + + # read spatial and temporal extent + spatial_extent = pystac.SpatialExtent(data['extent']['spatial']['bbox']).bboxes + temporal_start = datetime.strptime(data['extent']['temporal']['start'], "%Y-%m-%dT%H:%M:%SZ") + temporal_end = datetime.strptime(data['extent']['temporal']['end'], "%Y-%m-%dT%H:%M:%SZ") + + # read consortium contacts + project_cms = [] + [project_cms.append((member['name'], member['email'])) for member in data['consortium_members']] + + project_metadata = ProjectCollectionMetadata( + project_id=data['id'] , + project_title=data['title'], + project_description=data['description'], + project_status=data['status'], + project_license=data['license'], + project_bbox=spatial_extent, + project_start_datetime=temporal_start, + project_end_datetime=temporal_end, + project_themes=data['themes'], + to_name=data['to_name'], + to_email=data['to_email'], + consortium_members=project_cms, + website_link=data['link_website'], + eo4society_link=data['link_eo4society'] + ) + project_collection = create_project_collection(project_metadata) + + save_project_collection_to_osc(project_collection, Path(osc_path)) diff --git a/earthcode/generators/stac_generator.py b/earthcode/generators/stac_generator.py new file mode 100644 index 0000000..13f285f --- /dev/null +++ b/earthcode/generators/stac_generator.py @@ -0,0 +1,64 @@ +import logging +import sys +import argparse +import os + +from .experiment_generator import create_experiment_stac_from_template +from .product_generator import create_product_stac_from_template +from .project_generator import create_project_stac_from_template +from .workflow_generator import create_workflow_stac_from_template + +logging.basicConfig(stream=sys.stdout, encoding='utf-8', level=logging.INFO) +log = logging.getLogger() + + +def generate_stac(osc_path, project=None, workflow=None, experiment=None, product=None): + """ + Generates the requested STAC json files at the specified OSC repo. + + :param osc_path: OSC repo where the STAC json will be created. + :param project: Path to the Project YAML template, if empty no Project STAC will be generated + :param workflow: Path to the Workflow YAML template, if empty no Workflow STAC will be generated + :param experiment: Path to the Experiment YAML template, if empty no Experiment STAC will be generated + :param product: Path to the Product YAML template, if empty no Product STAC will be generated + """ + + if project is not None: + log.info("Generating Project STAC json in OSC @ \"" + osc_path + "\"") + create_project_stac_from_template(project, osc_path) + if workflow is not None: + log.info("Generating Workflow STAC json in OSC @ \"" + osc_path + "\"") + create_workflow_stac_from_template(workflow, osc_path) + if experiment is not None: + log.info("Generating Experiment STAC json in OSC @ \"" + osc_path + "\"") + create_experiment_stac_from_template(experiment, osc_path) + if product is not None: + log.info("Generating Product STAC json in OSC @ \"" + osc_path + "\"") + create_product_stac_from_template(product, osc_path) + + if project is None and workflow is None and experiment is None and product is None: + log.warning("No template provided." + "Run again with at least a provided template to produce the relative STAC json." + "For additional help invoke with -h.") + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("-p", "--project", type=str, + help="Project YAML template location") + parser.add_argument("-w", "--workflow", type=str, + help="Workflow YAML template location") + parser.add_argument("-e", "--experiment", type=str, + help="Experiment YAML template location") + parser.add_argument("-o", "--product", type=str, + help="Product YAML template location") + parser.add_argument("-m", "--oscm", type=str, + help="The target OSC location where the STAC jsons will be created.") + + args = parser.parse_args() + + generate_stac(args.oscm, args.project, args.workflow, args.experiment, args.product) + + +if __name__ == "__main__": + main() diff --git a/earthcode/generators/template_generator.py b/earthcode/generators/template_generator.py new file mode 100644 index 0000000..02cda80 --- /dev/null +++ b/earthcode/generators/template_generator.py @@ -0,0 +1,78 @@ +import argparse +import os +import shutil +from importlib import resources + +import logging +import sys + +logging.basicConfig(stream=sys.stdout, encoding='utf-8', level=logging.INFO) +log = logging.getLogger() + +def generate_template(project=False, workflow=False, experiment=False, product=False, target=os.getcwd()): + """ + Creates requested yaml templates at the desired target folder. + If the folder does not exist it will be created. + If no folder is specified the PWD where the program is run will be selected. + + :param project: If True: generates the Project yaml template + :param workflow: If True: generates the Workflow yaml template + :param experiment: If True: generates the Experiment yaml template + :param product: If True: generates the Product yaml template + :param target: target directory where the templates will be generated. + """ + # If empty use PWD as target directory + if target is None: + log.warning("No target folder specified, the templates will be generated in the PWD") + target = os.getcwd() + + # Create target directory if it doesn't exist + if not os.path.isdir(target): + os.makedirs(target, exist_ok=True) + + if project: + log.info("Generating Project template at \""+target+"\"") + with resources.as_file(resources.files("earthcode.generators").joinpath("templates").joinpath("project.yaml")) as path: + shutil.copy(path, target) + + if workflow: + log.info("Generating Workflow template at \""+target+"\"") + with resources.as_file(resources.files("earthcode.generators").joinpath("templates").joinpath("workflow.yaml")) as path: + shutil.copy(path, target) + + if experiment: + log.info("Generating Experiment template at \""+target+"\"") + with resources.as_file(resources.files("earthcode.generators").joinpath("templates").joinpath("experiment.yaml")) as path: + shutil.copy(path, target) + + if product: + log.info("Generating Product template at \""+target+"\"") + with resources.as_file(resources.files("earthcode.generators").joinpath("templates").joinpath("product.yaml")) as path: + shutil.copy(path, target) + + if not project and not workflow and not experiment and not product: + log.warning("No options selected." + "Run again with at least one option to produce the templates." + "For additional help invoke with -h") + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("-p", "--project", action='store_true', + help="If present generate a project template") + parser.add_argument("-w", "--workflow", action='store_true', + help="If present generate a workflow template") + parser.add_argument("-e", "--experiment", action='store_true', + help="If present generate an experiment template") + parser.add_argument("-o", "--product", action='store_true', + help="If present generate a product template") + parser.add_argument("-t", "--target", type=str, + help="The target location where the templates will be generated.") + + args = parser.parse_args() + + generate_template(args.project, args.workflow, args.experiment, args.product, args.target) + + +if __name__ == "__main__": + main() diff --git a/earthcode/generators/templates/experiment.yaml b/earthcode/generators/templates/experiment.yaml new file mode 100644 index 0000000..aa15e19 --- /dev/null +++ b/earthcode/generators/templates/experiment.yaml @@ -0,0 +1,52 @@ +# Define experiment properties +id: cool-project-experiment +title: Experiment related to Cool Project +description: The first experiment with data from the Cool Project # Short and meaningful experiment description. +license: proprietary # should be one of https:#github.com/ESA-EarthCODE/open-science-catalog-validation/blob/main/schemas/license.json +keywords: # experiment keywords (to enhance the findability of the experiment) + - agriculture + - crops +formats: # format of experiment output + - GeoTIFF + - PNG +workflow: cool-project-workflow # id of the workflow used for this experiment +workflow-title: Workflow to analyze Cool Project # title of the workflow used for this experiment +product: cool-project-product # id of the output product produced by this experiment +product-title: Cool Project Product # title of the output product produced by this experiment + +# Define links, link relations and link titles: e.g. link to service used to run the experiment etc. +link_env: https://myplatform.com +link_params: https://github.com/MyExperiment/parameters.git + +# Define experiment themes. The fields are restricted to the themes available in the OCS and having at least one theme is mandatory. Check available themes here: https:#opensciencedata.esa.int/themes/catalog +themes: + - land + - atmosphere + +# Optional contacts +contacts: +- name: Technical Officer + position: researcher + roles: + - technical_officer + organization: CGI + links: + - rel: about + type: text/html + href: https://cgi.com/ + contact_instructions: Contact preferably through project support page + +- name: Junior Staff + roles: + - assistant + +# Optional extent +spatial_extent: + - - -180 + - -90 + - 180 + - 90 + +temporal_extent: + start: '2021-01-01T00:00:00Z' + end: '2021-12-31T23:59:59Z' \ No newline at end of file diff --git a/earthcode/generators/templates/product.yaml b/earthcode/generators/templates/product.yaml new file mode 100644 index 0000000..9828ddb --- /dev/null +++ b/earthcode/generators/templates/product.yaml @@ -0,0 +1,53 @@ +id: cool-project-product # This is the unique id of the product. Typically, contains the dataset title+project name (or acronym) +description: output product results for Cool Project # Short, but meaningful product description. It should provide enough information to the external users on the specific product. +title: Cool Project Product # Product Title +license: proprietary # license of the product. Should be one of https://github.com/ESA-EarthCODE/open-science-catalog-validation/blob/main/schemas/license.json +#license_link: https://github.com/cool-license # If the license is not available put license='other' and fill this field with the license link. +status: completed # Product status +region: cool-land # Text description of the study area +project: cool-project # Unique id of the OSC project this product is related to. It must be the id provided in the ./project/ +project-title: CoolProjectTitle # title of the related project + +# Define project Themes. The fields are restricted to the themes available in the OCS and having at least one theme is mandatory. +# Check available themes here: https://opensciencedata.esa.int/themes/catalog and https://github.com/ESA-EarthCODE/open-science-catalog-metadata/main/themes +themes: + - land + - oceans + +# List of Keywords associated with the product. These are expected to be inline with the description. +keywords: + - agriculture + - crops + +# Array of Variables related to the product. This array of values is mandatory and limited to variables already existing in the OSC +# in this list https://github.com/ESA-EarthCODE/open-science-catalog-metadata/tree/main/variables. +# If you would like to associate your product to a variable that is not on the list, create variable entry first. +variables: + - lightning + +# Array of CF Parameters: see https://github.com/stac-extensions/cf for more details +cf_parameters: + - lightning + +# Array of ESA missions related to the product. This array of values is mandatory and limited to missions already existing in the OSC +# in this list: https://github.com/ESA-EarthCODE/open-science-catalog-metadata/tree/main/eo-missions. +# If you would like to associate your product to a mission that is not on the list, create an eo-mission entry first. +missions: + - sentinel-2 + +# The Temporal and Spatial Extent of the product +extent: + spatial: + bbox: + - - -180 + - -90 + - 180 + - 90 + temporal: + start: '2021-01-01T00:00:00Z' + end: '2021-12-31T23:59:59Z' + +# DOI reference +sci:doi: https://doi.org/10.12345/abc-r4nd0mn + +via_link: https://myplatform.com/products/cool-project-product \ No newline at end of file diff --git a/earthcode/generators/templates/project.yaml b/earthcode/generators/templates/project.yaml new file mode 100644 index 0000000..86dd450 --- /dev/null +++ b/earthcode/generators/templates/project.yaml @@ -0,0 +1,41 @@ +--- +# Define id, title, description, project status, license +id: cool-project # This is your project id. Please make sure to use unique id name for your project! The parent folder of the collection.json should have the same name as this id (not displayed in the browser). +title: CoolProjectTitle # Title of your project. Official acronym of the project may be used as well (this will be displayed to public) +description: Brief description of the project +status: completed # status of the project - Select from: completed, ongoing, scheduled +license: proprietary # Top level license of project outcomes. Should be one of https://github.com/ESA-EarthCODE/open-science-catalog-validation/blob/main/schemas/license.json + +# Define spatial and temporal extent: the study area of the project and its planned duration. +extent: + spatial: + bbox: + - - -180 + - -90 + - 180 + - 90 + temporal: + start: '2021-01-01T00:00:00Z' + end: '2021-12-31T23:59:59Z' + +# Define links, link relations and link titles. This is a list of links to the project websites. These are mandatory and you have to adapt them to your project. +link_website: https://cool-project.com +link_eo4society: https://eo4society.esa.int/projects + +# Define project themes. The fields are restricted to the themes available in the OCS and having at least one theme is mandatory. Check available themes here: https://opensciencedata.esa.int/themes/catalog +themes: +- land +- atmosphere +- cryosphere +- magnetosphere-ionosphere +- oceans +- solid-earth + +# Define contacts, list of consortium members working on the project and contact to ESA TO following the project. This field is required. +to_name: Technical Officer +to_email: t_o_mail@mail.com +consortium_members: + - name: Consortium Member 1 + email: cm1@mail.com + - name: Consortium Member 2 + email: cm2@mail.com \ No newline at end of file diff --git a/earthcode/generators/templates/workflow.yaml b/earthcode/generators/templates/workflow.yaml new file mode 100644 index 0000000..c0b8dac --- /dev/null +++ b/earthcode/generators/templates/workflow.yaml @@ -0,0 +1,41 @@ +--- +# Unique identifier for the workflow STAC item +id: cool-project-workflow + +# Core workflow metadata +title: Workflow to analyze Cool Project # A concise, descriptive title of the workflow +description: The first workflow able to process images from Cool Project # A summary of what the workflow does +project: cool-project-id # Project ID associated with the workflow +project-title: CoolProjectTitle # Title of associated project +formats: # Output formats of the workflow (e.g., GeoTIFF). +- GeoTIFF +- PNG +keywords: # Array of keywords to support discoverability (e.g., agriculture, crops). +- keyword1 +- keyword2 +- keyword3... +license: proprietary # License for the workflow +sci:doi: https://doi.org/10.12345/abc-r4nd0mn # DOI reference + +# Array of themes the workflow relates to. Each entry includes a concepts array with IDs (e.g., 'land') and a scheme URL. +themes: + - land + - atmosphere + - cryosphere + - magnetosphere-ionosphere + - oceans + - solid-earth + +# Define links, link relations and link titles: links to public repository where the workflow codebase can be found, to service used to run the workflow etc. +link_code: https://raw.githubusercontent.com/MyProject/My-Project/refs/tags/.. + +# Optional +spatial_extent: # BBOX + - - -180 + - -90 + - 180 + - 90 + +temporal_extent: # TIME + start: '2021-01-01T00:00:00Z' + end: '2021-12-31T23:59:59Z' \ No newline at end of file diff --git a/earthcode/generators/workflow_generator.py b/earthcode/generators/workflow_generator.py new file mode 100644 index 0000000..9c70fab --- /dev/null +++ b/earthcode/generators/workflow_generator.py @@ -0,0 +1,52 @@ +from pathlib import Path +from datetime import datetime +import logging +import sys + +import yaml + +from earthcode.git_add import save_workflow_record_to_osc +from earthcode.static import create_workflow_record, WorkflowMetadata + +logging.basicConfig(stream=sys.stdout, encoding='utf-8', level=logging.INFO) +log = logging.getLogger() + + +def create_workflow_stac_from_template(project_yaml, osc_path): + with open(project_yaml, 'r') as file: + data = yaml.safe_load(file) + + for k, v in data.items(): + if v is None: + log.error(f"The Project YAML contains an empty value for the following field: {k}") + raise Exception(f"The Project YAML contains an empty value for the following field: {k}") + + # read optional spatial and temporal extent + spatial_extent = data.get('spatial_extent', None) + temporal_extent = data.get('temporal_extent', None) + if temporal_extent is not None: + temporal_start = datetime.strptime(temporal_extent['start'], "%Y-%m-%dT%H:%M:%SZ") + temporal_end = datetime.strptime(temporal_extent['end'], "%Y-%m-%dT%H:%M:%SZ") + else: + temporal_start = temporal_end = None + + workflow_metadata = WorkflowMetadata( + workflow_id=data['id'], + workflow_title=data['title'], + workflow_description=data['description'], + workflow_license=data['license'], + workflow_keywords=data['keywords'], + workflow_formats=data['formats'], + workflow_themes=data['themes'], + codeurl=data['link_code'], + project_id=data['project'], + project_title=data['project-title'], + workflow_doi=data['sci:doi'], + workflow_bbox=spatial_extent, + workflow_start_datetime=temporal_start, + workflow_end_datetime=temporal_end + ) + + workflow_record = create_workflow_record(workflow_metadata) + + save_workflow_record_to_osc(workflow_record, Path(osc_path)) diff --git a/earthcode/validator.py b/earthcode/validator.py index 341563a..dadf603 100644 --- a/earthcode/validator.py +++ b/earthcode/validator.py @@ -1,6 +1,7 @@ import json import os import re +from importlib import resources from pathlib import Path from typing import Dict, List, Any, Optional import pystac @@ -465,21 +466,21 @@ def _validate_experiment(ctx): def _validate_relative_schema(ctx, schema_file): + schema_resource = resources.files("earthcode").joinpath(*Path(schema_file).parts) + with resources.as_file(schema_resource) as schema_path: + with open(schema_path, 'r', encoding='utf-8') as f: + schema = json.load(f) - schema_file = Path(__file__).resolve().parent / schema_file - with open(schema_file, 'r', encoding='utf-8') as f: - schema = json.load(f) - - with open(ctx['file_path'], 'r', encoding='utf-8') as f: - data = json.load(f) + with open(ctx['file_path'], 'r', encoding='utf-8') as f: + data = json.load(f) - # Create a base URI for the folder containing the schema - base_uri = Path(schema_file).absolute().parent.as_uri() + "/" - resolver = RefResolver(base_uri=base_uri, referrer=schema) - try: - validate(instance=data, schema=schema, resolver=resolver) - except Exception as e: - ctx['errors'].append(e) + # Create a base URI for the folder containing the schema + base_uri = schema_path.absolute().parent.as_uri() + "/" + resolver = RefResolver(base_uri=base_uri, referrer=schema) + try: + validate(instance=data, schema=schema, resolver=resolver) + except Exception as e: + ctx['errors'].append(e) #TODO: Implement Item checks @@ -602,4 +603,4 @@ def validate_catalog(root_path): errors.append(file_errors) error_files.append(full_path) - return errors, error_files \ No newline at end of file + return errors, error_files diff --git a/pixi.lock b/pixi.lock index 5b06723..4a79176 100644 --- a/pixi.lock +++ b/pixi.lock @@ -4059,7 +4059,7 @@ packages: - pypi: ./ name: earthcode version: 0.1.7 - sha256: 6ec5b0cc1cd4e44fe0ddb5a810fa0f46abe68a296d494670a68ea6981bea69c5 + sha256: 334a97b40d7f802bd26f7065e94c887678e16ee20bdb3b8ca51b473f23b85d87 requires_dist: - pystac>=1.14.1,<2 - xarray>=2025.12.0,<2026 @@ -4083,6 +4083,7 @@ packages: - pydantic>=2.12.5,<3 - requests>=2.32.5,<3 - tokenizers>=0.22.2,<1 + - pyyaml>=6.0.3,<7 - jupyter-book>=1.0.4,<2 ; extra == 'dev' - jupyterlab>=4.5.0,<5 ; extra == 'dev' - papermill>=2.7.0,<3 ; extra == 'dev' diff --git a/pixi.toml b/pixi.toml index 269677b..d09156f 100644 --- a/pixi.toml +++ b/pixi.toml @@ -29,6 +29,7 @@ pillow = ">=10.3,<12" onnxruntime = ">=1.24.1,<2" platformdirs = ">=4.5.0,<5" tokenizers = ">=0.22.2,<1" +PyYAML = ">=6.0.3,<7" [target.osx-arm64.dependencies] python = ">=3.12,<3.14" diff --git a/pyproject.toml b/pyproject.toml index 56a9ba4..4bf7826 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,6 +9,7 @@ license-files = ["LICENSE"] authors = [ { name = "Krasen Samardzhiev", email = "krasensam@gmail.com" }, { name = "Deyan Samardzhiev", email = "dean@lampata.co.uk" }, + { name = "Claudio Candelori", email = "claudio.candelori@cgi.com"}, ] keywords = [ "earth observation", @@ -47,6 +48,7 @@ dependencies = [ "pydantic>=2.12.5,<3", "requests>=2.32.5,<3", "tokenizers>=0.22.2,<1", + "PyYAML>=6.0.3,<7", ] [project.urls] @@ -70,6 +72,37 @@ build-backend = "hatchling.build" [tool.hatch.build] exclude = ["earthcode/models/**"] +artifacts = [ + "earthcode/schemas/catalog.json", + "earthcode/schemas/collection.json", + "earthcode/schemas/contacts.json", + "earthcode/schemas/eo-missions/children.json", + "earthcode/schemas/eo-missions/parent.json", + "earthcode/schemas/experiments/children.json", + "earthcode/schemas/experiments/parent.json", + "earthcode/schemas/license.json", + "earthcode/schemas/osc.json", + "earthcode/schemas/products/children.json", + "earthcode/schemas/products/parent.json", + "earthcode/schemas/projects/children.json", + "earthcode/schemas/projects/parent.json", + "earthcode/schemas/records.json", + "earthcode/schemas/themes.json", + "earthcode/schemas/themes/children.json", + "earthcode/schemas/themes/parent.json", + "earthcode/schemas/variables/children.json", + "earthcode/schemas/variables/parent.json", + "earthcode/schemas/workflows/children.json", + "earthcode/schemas/workflows/parent.json", + "earthcode/generators/templates/experiment.yaml", + "earthcode/generators/templates/product.yaml", + "earthcode/generators/templates/project.yaml", + "earthcode/generators/templates/workflow.yaml", +] [tool.hatch.build.targets.wheel] packages = ["earthcode"] + +[project.scripts] +earthcode_template_gen = "earthcode.generators.template_generator:main" +earthcode_stac_gen = "earthcode.generators.stac_generator:main" diff --git a/tests/test_valid_generators.py b/tests/test_valid_generators.py new file mode 100644 index 0000000..d4e4d23 --- /dev/null +++ b/tests/test_valid_generators.py @@ -0,0 +1,186 @@ +import shutil +import tempfile +from pathlib import Path +from importlib import resources +import filecmp +import pytest +import os + +from earthcode.validator import validate_catalog +from earthcode.generators import generate_template, generate_stac + + +### asummes a error free catalog +SOURCE_CATALOG = Path("../open-science-catalog-metadata/").resolve() + + +@pytest.fixture() +def catalog_root(tmp_path: Path) -> Path: + + if not SOURCE_CATALOG.exists(): + pytest.skip(f"Missing source catalog at {SOURCE_CATALOG}") + + target = tmp_path / "open-science-catalog-metadata" + shutil.copytree(SOURCE_CATALOG, target, ignore=shutil.ignore_patterns(".*")) + return target + + +def get_source_files(): + # return all files but ignore anything that starts with a .(dot) + source_files = { + f.relative_to(SOURCE_CATALOG) + for f in SOURCE_CATALOG.rglob("*") + if f.is_file() + and not any( + part.startswith(".") for part in f.relative_to(SOURCE_CATALOG).parts + ) + } + return source_files + + +def assertIsFile(path): + if not Path(path).resolve().is_file(): + raise AssertionError("File does not exist: %s" % str(path)) + + +def test_creation_and_validation(catalog_root: Path): + + generate_template( + project=True, + workflow=True, + experiment=True, + product=True, + target=str(catalog_root.parent), + ) + + generate_stac( + project=f"{catalog_root.parent / 'project.yaml'}", + experiment=f"{catalog_root.parent / 'experiment.yaml'}", + workflow=f"{catalog_root.parent / 'workflow.yaml'}", + product=f"{catalog_root.parent / 'product.yaml'}", + osc_path=str(catalog_root), + ) + + # assert that everything passes validation + errors, error_files = validate_catalog(catalog_root) + assert len(errors) == 0 + assert len(error_files) == 0 + + # count updated , deleted and created files + source_files = get_source_files() + target_files = { + f.relative_to(catalog_root) for f in catalog_root.rglob("*") if f.is_file() + } + + created_files = target_files - source_files + deleted_files = source_files - target_files + common_files = source_files & target_files + modified_files = set() + + for rel_path in common_files: + src_file = SOURCE_CATALOG / rel_path + tgt_file = catalog_root / rel_path + + # Setting shallow=False forces Python to compare the actual file contents + # rather than just checking OS metadata like modification times. + if not filecmp.cmp(src_file, tgt_file, shallow=False): + modified_files.add(rel_path) + + assert len(created_files) == 4 + assert len(deleted_files) == 0 + assert len(modified_files) == 8 + + +def test_generate_template(): + test_dir = tempfile.mkdtemp() + try: + generate_template( + project=True, workflow=True, experiment=True, product=True, target=test_dir + ) + + project = os.path.join(test_dir, "project.yaml") + workflow = os.path.join(test_dir, "workflow.yaml") + experiment = os.path.join(test_dir, "experiment.yaml") + product = os.path.join(test_dir, "product.yaml") + + assertIsFile(project) + assertIsFile(workflow) + assertIsFile(experiment) + assertIsFile(product) + + with resources.as_file( + resources.files("earthcode.generators") + .joinpath("templates") + .joinpath("project.yaml") + ) as expected_project: + assert filecmp.cmp(project, expected_project), ( + "The project template is different from the expected one" + ) + + with resources.as_file( + resources.files("earthcode.generators") + .joinpath("templates") + .joinpath("workflow.yaml") + ) as expected_workflow: + assert filecmp.cmp(workflow, expected_workflow), ( + "The workflow template is different from the expected one" + ) + + with resources.as_file( + resources.files("earthcode.generators") + .joinpath("templates") + .joinpath("experiment.yaml") + ) as expected_experiment: + assert filecmp.cmp(experiment, expected_experiment), ( + "The experiment template is different from the expected one" + ) + + with resources.as_file( + resources.files("earthcode.generators") + .joinpath("templates") + .joinpath("product.yaml") + ) as expected_product: + assert filecmp.cmp(product, expected_product), ( + "The product template is different from the expected one" + ) + finally: + shutil.rmtree(test_dir) + + +def test_packaged_schema_resources_exist(): + with resources.as_file( + resources.files("earthcode").joinpath("schemas").joinpath("catalog.json") + ) as schema_path: + assert schema_path.is_file() + + +def test_generate_template_with_no_template_selected_logs_warning(caplog): + test_dir = tempfile.mkdtemp() + try: + with caplog.at_level("WARNING"): + generate_template( + project=False, + workflow=False, + experiment=False, + product=False, + target=test_dir, + ) + assert "No options selected." in caplog.text + finally: + shutil.rmtree(test_dir) + + +def test_generate_stac_with_no_template_selected_logs_warning(caplog): + test_dir = tempfile.mkdtemp() + try: + with caplog.at_level("WARNING"): + generate_stac( + project=None, + workflow=None, + experiment=None, + product=None, + osc_path=test_dir, + ) + assert "No template provided." in caplog.text + finally: + shutil.rmtree(test_dir)