From a36442be49f6d595291d4bd343bbfc741a771c97 Mon Sep 17 00:00:00 2001
From: aliziel <21992503+aliziel@users.noreply.github.com>
Date: Fri, 3 Apr 2026 12:11:31 -0400
Subject: [PATCH 1/2] ci: reference head via environment

---
 .github/workflows/promotion-checker.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/promotion-checker.yml b/.github/workflows/promotion-checker.yml
index 583db7a..611436b 100644
--- a/.github/workflows/promotion-checker.yml
+++ b/.github/workflows/promotion-checker.yml
@@ -38,7 +38,7 @@ jobs:
       - name: Check if pr.yml succeeded
         id: check_pr_workflow
         run: |
-          WORKFLOW_STATUS=$(gh run list --repo ${{ github.repository }} --branch ${{ github.event.pull_request.head.ref }} --workflow "pr.yml" --json status --jq '.[0].status // "not found"')
+          WORKFLOW_STATUS=$(gh run list --repo ${{ github.repository }} --branch "$HEAD_REF" --workflow "pr.yml" --json status --jq '.[0].status // "not found"')
           echo "Previous workflow (pr.yml) status: $WORKFLOW_STATUS"
           if [ "$WORKFLOW_STATUS" != "completed" ]; then
             echo "Previous workflow (pr.yml) did not complete successfully. Exiting..."
@@ -46,6 +46,7 @@ jobs:
           fi
         env:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          HEAD_REF: ${{ github.event.pull_request.head.ref }}
 
   trigger-promotion-workflow:
     runs-on: ubuntu-latest

From a35ac7ed127c45ba231d63acfef7505e7bee0303 Mon Sep 17 00:00:00 2001
From: aliziel <21992503+aliziel@users.noreply.github.com>
Date: Mon, 6 Apr 2026 12:02:53 -0700
Subject: [PATCH 2/2] chore: lint checks

---
 README.md                     | 26 ++++++++++++++++----------
 requirements.in               |  2 +-
 scripts/generate_mdx.py       |  2 +-
 scripts/promote_collection.py |  2 +-
 scripts/promote_dataset.py    |  2 +-
 tests/test_collections.py     |  2 +-
 6 files changed, 21 insertions(+), 15 deletions(-)

diff --git a/README.md b/README.md
index 482fefc..25d631f 100644
--- a/README.md
+++ b/README.md
@@ -1,10 +1,12 @@
 # veda-data
+
 This repository houses data and config used to create STAC records to be published to the veda STAC catalog.
 
-# Repository layout
+## Repository layout
+
 The repo follows the following folder structure:
 
-```
+```plain
 | ingestion-data
     | collections
         | archive
@@ -14,10 +16,10 @@ The repo follows the following folder structure:
         . collection-n.json
     | discovery-items
         | archive
-            . archived-discovery-items-1.json  
-            . archived-discovery-items-2.json  
+            . archived-discovery-items-1.json
+            . archived-discovery-items-2.json
             ...
-            . archived-discovery-items-n.json  
+            . archived-discovery-items-n.json
         . discovery-items-1.json
         . discovery-items-2.json
         ...
@@ -28,12 +30,15 @@ The repo follows the following folder structure:
 ## ingestion-data
 
 ### collections
+
 These are STAC collection records for all the available datasets. They should conform to the [STAC specification for a collection](https://github.com/radiantearth/stac-spec/blob/master/collection-spec/collection-spec.md).
 
 #### archive
+
 These are the collections that we no longer update. However, we might still maintain them in the catalog.
 
 ### discovery-items
+
 These are the items ingestion config files that are used by our data pipelines (airflow), specifically the `veda_discover` DAG in [veda-data-airflow](https://github.com/NASA-IMPACT/veda-data-airflow), which discovers all the specified files and triggers the `veda_ingest_raster` DAG which takes care of creating the stac items and publishing them.
 
 The format looks like this:
@@ -63,10 +68,10 @@ The format looks like this:
 ```
 
 ### transfer-config
-These are the configs used to transfer assets from the dev bucket (`ghgc-data-store-develop` - where the data was delivered) to the production bucket (`ghgc-data-store` - where the data is moved after it is finalized). The files from the production bucket is used to publish to the catalog. The transfer is done via triggering `veda_transfer` DAG in [veda-data-airflow](https://github.com/NASA-IMPACT/veda-data-airflow).
 
-##### Description of each field:
+These are the configs used to transfer assets from the dev bucket (`ghgc-data-store-develop` - where the data was delivered) to the production bucket (`ghgc-data-store` - where the data is moved after it is finalized). The files from the production bucket is used to publish to the catalog. The transfer is done via triggering `veda_transfer` DAG in [veda-data-airflow](https://github.com/NASA-IMPACT/veda-data-airflow).
 
+#### Description of each field
 
 | Field              | Description                                      |
 |--------------------|--------------------------------------------------|
@@ -75,16 +80,17 @@ These are the configs used to transfer assets from the dev bucket (`ghgc-data-st
 | `prefix`             | The s3 prefix under which to search for the files                                                 |
 | `filename_regex`     | The regex pattern that the files to be discovered should match                                                 |
 | `id_regex`           | Specifies in regex what part of the filename (usually the datetime) should be used to group assets into item. Example: if the filenames are `asset1_20151201.tif`, `asset2_20151201.tif`, `asset1_20161201.tif`, `asset2_20161201.tif`; the item should be based on the datetime part, hence it'd be `".*_(.*).tif$"`. The part should be specified using round brackets. The is also the part of the filenames that will be used to form the item id, together with the `id_template` field.                                                  |
-| `id_template`        | This is a python f-string formatted string that is used to define the `id` of the STAC item. It's used together with the value of `id_regex`. So, going off of the example above, if the `id_template` is `eccodarwin-{}`, then the two item `id`s would be `eccodarwin-20151201` and `eccodarwin-20161201`                                                    | 
+| `id_template`        | This is a python f-string formatted string that is used to define the `id` of the STAC item. It's used together with the value of `id_regex`. So, going off of the example above, if the `id_template` is `eccodarwin-{}`, then the two item `id`s would be `eccodarwin-20151201` and `eccodarwin-20161201`                                                    |
 | `datetime_range`     | This is used to extract the datetime range from the filename. Valid values are `day`, `month` and `year`. Example: if the filename has `20160104` in it, and `datetime_range` is `day` - the `start_datetime` and `end_datetime` are the start and end of the day. For `month`, they are the start and end of the month and so on.                                                   |
 | `<asset_name>`       | An `id` for the asset                                   |
 | `assets.<asset_name>.title`       | A title for the asset                                   |
 | `assets.<asset_name>.description` | A description for the asset                                   |
 | `assets.<asset_name>.regex`       | The regex pattern that matches a filename to its respective asset                                   |
 
+#### config archive
 
-#### archive
 These are the discovery-items config for collections that we no longer update.
 
 ## notebooks
-Sometimes, there are exceptional datasets that might require a one-off ingestion that is not supported by the current state of our data pipelines. In such cases, we create notebooks/python scripts that can be used to ingest those data. This is where those notebooks/python scripts live.
\ No newline at end of file
+
+Sometimes, there are exceptional datasets that might require a one-off ingestion that is not supported by the current state of our data pipelines. In such cases, we create notebooks/python scripts that can be used to ingest those data. This is where those notebooks/python scripts live.
diff --git a/requirements.in b/requirements.in
index 8a1b326..8f9b637 100644
--- a/requirements.in
+++ b/requirements.in
@@ -3,4 +3,4 @@ pip-tools
 pre-commit
 pystac[validation]
 pytest
-ruff
\ No newline at end of file
+ruff
diff --git a/scripts/generate_mdx.py b/scripts/generate_mdx.py
index 92db411..e8726d5 100644
--- a/scripts/generate_mdx.py
+++ b/scripts/generate_mdx.py
@@ -127,4 +127,4 @@ def safe_open_w(path):
         ofile.write(new_content)
 
     collection_id = input_data["collection"]
-    print(collection_id)
\ No newline at end of file
+    print(collection_id)
diff --git a/scripts/promote_collection.py b/scripts/promote_collection.py
index 49a79d2..abc26e6 100644
--- a/scripts/promote_collection.py
+++ b/scripts/promote_collection.py
@@ -77,4 +77,4 @@ def trigger_collection_dag(payload: Dict[str, Any], stage: str):
     except FileNotFoundError:
         print(f"Error: File '{sys.argv[1]}' not found.")
     except json.JSONDecodeError:
-        raise ValueError(f"Invalid JSON content in file {sys.argv[1]}")
\ No newline at end of file
+        raise ValueError(f"Invalid JSON content in file {sys.argv[1]}")
diff --git a/scripts/promote_dataset.py b/scripts/promote_dataset.py
index 9558cd1..e2ea38f 100644
--- a/scripts/promote_dataset.py
+++ b/scripts/promote_dataset.py
@@ -131,4 +131,4 @@ def promote_to_production(payload):
     except FileNotFoundError:
         print(f"Error: File '{sys.argv[1]}' not found.")
     except json.JSONDecodeError:
-        raise ValueError(f"Invalid JSON content in file {sys.argv[1]}")
\ No newline at end of file
+        raise ValueError(f"Invalid JSON content in file {sys.argv[1]}")
diff --git a/tests/test_collections.py b/tests/test_collections.py
index 8c41827..3ab80ed 100644
--- a/tests/test_collections.py
+++ b/tests/test_collections.py
@@ -10,4 +10,4 @@
 @pytest.mark.parametrize("path", COLLECTIONS_PATH.rglob("*.json"))
 def test_validate(path: Path) -> None:
     collection = Collection.from_file(str(path))
-    collection.validate()
\ No newline at end of file
+    collection.validate()