From 875107af111c7fea4d6e5db53e751db69c2f3e6b Mon Sep 17 00:00:00 2001 From: OpenClaw Date: Tue, 28 Apr 2026 04:52:08 -0400 Subject: [PATCH] fix: use archive_org_indexer in update-databases workflow MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The CI workflow was still calling myrient_indexer.py (dead since March 2026) instead of archive_org_indexer.py, causing the combined romi_db.tsv to be empty and validate_db.py to fail. Changes: - Replace myrient_indexer.py → archive_org_indexer.py in CI step - Use --full-urls instead of --compact-urls (archive.org indexer defaults to full URLs, and sources.txt is still copied for offline mode) - Update comment about sources.txt (no longer Myrient-specific) --- .github/workflows/update-databases.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/update-databases.yml b/.github/workflows/update-databases.yml index d609a68..c44336e 100644 --- a/.github/workflows/update-databases.yml +++ b/.github/workflows/update-databases.yml @@ -51,7 +51,7 @@ jobs: mkdir -p release_databases cd tools - ARGS="--deduplicate --exclude-variants --compact-urls" + ARGS="--deduplicate --exclude-variants --full-urls" ARGS="$ARGS --region-priority World,USA,EUR,JPN,ASA,Unknown" ARGS="$ARGS --output ../release_databases" @@ -63,8 +63,8 @@ jobs: ARGS="$ARGS --platform ${{ inputs.platforms }}" fi - echo "Running: python myrient_indexer.py $ARGS" - python myrient_indexer.py $ARGS + echo "Running: python archive_org_indexer.py $ARGS" + python archive_org_indexer.py $ARGS cp index.html ../release_databases/ - name: Generate combined database for online mode @@ -86,7 +86,7 @@ jobs: echo "Generated combined database file: $(wc -l < $COMBINED_DB) entries" # Also keep individual files for offline package - # Create sources.txt with full Myrient URLs for offline mode + # Copy sources.txt for offline mode cp tools/sources.txt release_databases/sources.txt echo "Copied sources.txt for offline package."