From a948f1a659e7c7ecadd27bdc66ba25f4a31a4019 Mon Sep 17 00:00:00 2001 From: Nicolas Dupont Date: Thu, 15 Jan 2026 09:54:30 +0100 Subject: [PATCH 1/3] Run dataset publishing sequentially Large dataset uploads (500MB+) running in parallel caused memory issues and silent failures on a machine with 2GB ram --- scripts/dataset/publish/index.js | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/scripts/dataset/publish/index.js b/scripts/dataset/publish/index.js index ec8906173..367e930ba 100644 --- a/scripts/dataset/publish/index.js +++ b/scripts/dataset/publish/index.js @@ -22,26 +22,29 @@ export default async function publishRelease({ archivePath, releaseDate, stats } throw new Error('No publishing platform configured. Please configure at least one of: GitHub (OTA_ENGINE_GITHUB_TOKEN), GitLab (OTA_ENGINE_GITLAB_TOKEN), or data.gouv.fr (OTA_ENGINE_DATAGOUV_API_KEY + datasetId or organizationIdOrSlug in config).'); } - const results = await Promise.allSettled(platforms.map(async platform => { - const url = await platform.publish(); - - return { platform: platform.name, url }; - })); - - const succeeded = results.filter(result => result.status === 'fulfilled'); - const failed = results.filter(result => result.status === 'rejected'); + const succeeded = []; + const failed = []; + + // Execute publications sequentially to avoid memory issues with large file uploads + for (const platform of platforms) { + try { + const url = await platform.publish(); + + succeeded.push({ platform: platform.name, url }); + } catch (error) { + failed.push({ platform: platform.name, error }); + } + } if (failed.length) { let errorMessage = !succeeded.length ? 'All platforms failed to publish:' : 'Some platforms failed to publish:'; - failed.forEach(rejectedResult => { - const index = results.indexOf(rejectedResult); - - errorMessage += `\n - ${platforms[index].name}: ${rejectedResult.reason.message}`; + failed.forEach(({ platform, error }) => { + errorMessage += `\n - ${platform}: ${error.message}`; }); logger.error(errorMessage); } - return succeeded.map(result => result.value); + return succeeded; } From 1b420881e6ebaa3006576e37fdae4add0605052e Mon Sep 17 00:00:00 2001 From: Nicolas Dupont Date: Thu, 15 Jan 2026 10:16:21 +0100 Subject: [PATCH 2/3] Add changelog entry --- CHANGELOG.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index a072d3337..caa5dfcba 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,14 @@ All changes that impact users of this module are documented in this file, in the [Common Changelog](https://common-changelog.org) format with some additional specifications defined in the CONTRIBUTING file. This codebase adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## Unreleased [patch] + +> Development of this release was supported by [Reset Tech](https://www.reset.tech). + +### Fixed + +- Fix dataset publishing failure for large files by executing uploads sequentially instead of in parallel + ## 10.3.2 - 2026-01-14 > Development of this release was supported by [Reset Tech](https://www.reset.tech). From d2c86cf9f265bd868c52eb623d6a7db553b14f94 Mon Sep 17 00:00:00 2001 From: Nicolas Dupont Date: Thu, 15 Jan 2026 16:58:06 +0100 Subject: [PATCH 3/3] Stream dataset file for GitHub release upload Avoid loading the entire archive into memory by using createReadStream instead of readFileSync. This prevents out-of-memory crashes when publishing large datasets on memory-constrained machines. --- scripts/dataset/publish/github/index.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/dataset/publish/github/index.js b/scripts/dataset/publish/github/index.js index 5aaec8ad5..b6696cd12 100644 --- a/scripts/dataset/publish/github/index.js +++ b/scripts/dataset/publish/github/index.js @@ -30,7 +30,7 @@ export default async function publish({ archivePath, releaseDate, stats }) { logger.info('Uploading release asset…'); await octokit.rest.repos.uploadReleaseAsset({ - data: fsApi.readFileSync(archivePath), + data: fsApi.createReadStream(archivePath), headers: { 'content-type': 'application/zip', 'content-length': fsApi.statSync(archivePath).size,