diff --git a/.github/workflows/maven.yml b/.github/workflows/maven.yml index 071259596..b2bc0780b 100644 --- a/.github/workflows/maven.yml +++ b/.github/workflows/maven.yml @@ -38,7 +38,7 @@ jobs: with: node-version: 22.x - name: Start MongoDB - uses: supercharge/mongodb-github-action@1.3.0 + uses: supercharge/mongodb-github-action@1.12.1 with: mongodb-version: 4.2 - name: Setup Maven Cache diff --git a/src/main/java/com/conveyal/datatools/manager/models/FeedSourceSummary.java b/src/main/java/com/conveyal/datatools/manager/models/FeedSourceSummary.java index d86a15614..ab5883f2e 100644 --- a/src/main/java/com/conveyal/datatools/manager/models/FeedSourceSummary.java +++ b/src/main/java/com/conveyal/datatools/manager/models/FeedSourceSummary.java @@ -7,16 +7,16 @@ import com.fasterxml.jackson.databind.annotation.JsonDeserialize; import com.fasterxml.jackson.databind.annotation.JsonSerialize; import com.google.common.collect.Lists; -import com.mongodb.client.model.Accumulators; -import com.mongodb.client.model.Projections; import com.mongodb.client.model.Sorts; import com.mongodb.client.model.UnwindOptions; +import com.mongodb.client.model.Variable; import org.bson.Document; import org.bson.conversions.Bson; import java.time.LocalDate; import java.time.ZoneId; import java.util.ArrayList; +import java.util.Arrays; import java.util.Date; import java.util.HashMap; import java.util.List; @@ -26,7 +26,6 @@ import static com.conveyal.datatools.manager.DataManager.hasConfigProperty; import static com.conveyal.datatools.manager.DataManager.isExtensionEnabled; import static com.conveyal.datatools.manager.DataManager.isModuleEnabled; -import static com.mongodb.client.model.Aggregates.group; import static com.mongodb.client.model.Aggregates.limit; import static com.mongodb.client.model.Aggregates.lookup; import static com.mongodb.client.model.Aggregates.match; @@ -34,7 +33,12 @@ import static com.mongodb.client.model.Aggregates.replaceRoot; import static com.mongodb.client.model.Aggregates.sort; import static com.mongodb.client.model.Aggregates.unwind; +import static com.mongodb.client.model.Filters.expr; import static com.mongodb.client.model.Filters.in; +import static com.mongodb.client.model.Projections.computed; +import static com.mongodb.client.model.Projections.fields; +import static com.mongodb.client.model.Projections.include; +import static com.mongodb.client.model.Sorts.descending; import static java.util.Objects.requireNonNullElse; /** @@ -164,8 +168,10 @@ public static List getFeedSourceSummaries(String projectId, S match( in("projectId", projectId) ), + + // Project only necessary fields early to reduce document size. project( - Projections.fields(Projections.include( + include( "_id", "name", "deployable", @@ -174,11 +180,12 @@ public static List getFeedSourceSummaries(String projectId, S "labelIds", "url", "filename", - "noteIds") + "noteIds" ) ), sort(Sorts.ascending("name")) ); + return extractFeedSourceSummaries(projectId, organizationId, stages); } @@ -188,31 +195,103 @@ public static List getFeedSourceSummaries(String projectId, S * If this is updated, be sure to also update the matching Mongo query. */ public static Map getLatestFeedVersionForFeedSources(String projectId) { - List stages = Lists.newArrayList( + List feedVersionPipeline = Arrays.asList( + // Match FeedVersion documents where feedSourceId equals the feedSourceId passed from the outer document. match( - in("projectId", projectId) + expr( + new Document("$eq", Arrays.asList("$feedSourceId", "$$feedSourceId")) + ) ), - lookup("FeedVersion", "_id", "feedSourceId", "feedVersions"), - lookup("FeedVersion", "publishedVersionId", "namespace", "publishedFeedVersion"), - unwind("$feedVersions"), - unwind("$publishedFeedVersion", new UnwindOptions().preserveNullAndEmptyArrays(true)), - sort(Sorts.descending("feedVersions.version")), - group( - "$_id", - Accumulators.first("publishedFeedVersionErrorCount", "$publishedFeedVersion.validationResult.errorCount"), - Accumulators.first("publishedFeedVersionStartDate", "$publishedFeedVersion.validationResult.firstCalendarDate"), - Accumulators.first("publishedFeedVersionEndDate", "$publishedFeedVersion.validationResult.lastCalendarDate"), - Accumulators.first("publishedVersionId", "$publishedVersionId"), - Accumulators.first("feedVersionId", "$feedVersions._id"), - Accumulators.first("firstCalendarDate", "$feedVersions.validationResult.firstCalendarDate"), - Accumulators.first("lastCalendarDate", "$feedVersions.validationResult.lastCalendarDate"), - Accumulators.first("errorCount", "$feedVersions.validationResult.errorCount"), - Accumulators.first("processedByExternalPublisher", "$feedVersions.processedByExternalPublisher"), - Accumulators.first("sentToExternalPublisher", "$feedVersions.sentToExternalPublisher"), - Accumulators.first("gtfsPlusValidation", "$feedVersions.gtfsPlusValidation"), - Accumulators.first("namespace", "$feedVersions.namespace") + sort(descending("version")), + limit(1), + // Project only the fields needed from the FeedVersion to reduce payload size. + project( + include( + "version", + "_id", + "validationResult", + "processedByExternalPublisher", + "sentToExternalPublisher", + "gtfsPlusValidation", + "namespace" + ) ) ); + + // Define the variable passed into the lookup pipeline. + List> feedSourceId = List.of(new Variable<>("feedSourceId", "$_id")); + + // $lookup that uses the above pipeline to produce "latestFeedVersion" (an array with at most one element). + Bson lookupLatestFeedVersion = lookup( + "FeedVersion", + feedSourceId, + feedVersionPipeline, + "latestFeedVersion" + ); + + // Pipeline to find the published FeedVersion by namespace (or identifier stored in publishedVersionId) + List publishedFeedVersionPipeline = Arrays.asList( + // Match FeedVersion documents where namespace equals the outer document's publishedVersionId. + match( + expr( + new Document("$eq", Arrays.asList("$namespace", "$$publishedVersionId")) + ) + ), + limit(1), + // Project only the validationResult because that's all that is needed later. + project(include("validationResult")) + ); + + // Pass publishedVersionId from the local document into the lookup pipeline. + List> publishedVersionId = List.of(new Variable<>("publishedVersionId", "$publishedVersionId")); + + // $lookup that uses the above pipeline to produce "publishedFeedVersion" (an array with at most one element). + Bson lookupPublishedFeedVersion = lookup( + "FeedVersion", + publishedVersionId, + publishedFeedVersionPipeline, + "publishedFeedVersion" + ); + + // Top-level aggregation stages that combine the lookups and map required fields into a slimmed down result. + List stages = Arrays.asList( + // Start by filtering documents by projectId (reduces the number of input documents early). + match(in("projectId", projectId)), + + // Attach the latest FeedVersion (as an array "latestFeedVersion"). + lookupLatestFeedVersion, + + // Attach the published FeedVersion (as an array "publishedFeedVersion"). + lookupPublishedFeedVersion, + + // Unwind the latestFeedVersion array into a single document. + unwind("$latestFeedVersion", new UnwindOptions().preserveNullAndEmptyArrays(true)), + + // Unwind the publishedFeedVersion array into a single document. + unwind("$publishedFeedVersion", new UnwindOptions().preserveNullAndEmptyArrays(true)), + + // Final projection: select and compute only the fields needed for the output to minimize size. + project(fields( + // keep the raw publishedVersionId field for reference. + include("publishedVersionId"), + + // Published feed version fields (mapped from the nested publishedFeedVersion.validationResult). + computed("publishedFeedVersionErrorCount", "$publishedFeedVersion.validationResult.errorCount"), + computed("publishedFeedVersionStartDate", "$publishedFeedVersion.validationResult.firstCalendarDate"), + computed("publishedFeedVersionEndDate", "$publishedFeedVersion.validationResult.lastCalendarDate"), + + // Latest feed version fields (mapped from the nested latestFeedVersion). + computed("feedVersionId", "$latestFeedVersion._id"), + computed("firstCalendarDate", "$latestFeedVersion.validationResult.firstCalendarDate"), + computed("lastCalendarDate", "$latestFeedVersion.validationResult.lastCalendarDate"), + computed("errorCount", "$latestFeedVersion.validationResult.errorCount"), + computed("processedByExternalPublisher", "$latestFeedVersion.processedByExternalPublisher"), + computed("sentToExternalPublisher", "$latestFeedVersion.sentToExternalPublisher"), + computed("gtfsPlusValidation", "$latestFeedVersion.gtfsPlusValidation"), + computed("namespace", "$latestFeedVersion.namespace") + )) + ); + return extractFeedVersionSummaries( "FeedSource", "feedVersionId", @@ -228,27 +307,63 @@ public static Map getLatestFeedVersionForFeedSources * If this is updated, be sure to also update the matching Mongo query. */ public static Map getFeedVersionsFromLatestDeployment(String projectId) { - List stages = Lists.newArrayList( - match( - in("_id", projectId) - ), - lookup("Deployment", "_id", "projectId", "deployments"), - unwind("$deployments"), - replaceRoot("$deployments"), - sort(Sorts.descending("lastUpdated")), - limit(1), - lookup("FeedVersion", "feedVersionIds", "_id", "feedVersions"), - unwind("$feedVersions"), - replaceRoot("$feedVersions"), + List stages = new ArrayList<>(); + stages.add(match(in("_id", projectId))); + + // Lookup Deployments for the project. + stages.add(lookup( + "Deployment", + "_id", + "projectId", + "deployments" + )); + + // Unwind deployments array to get individual deployment documents. + stages.add(unwind("$deployments")); + + // Project only fields needed from deployment to reduce doc size before sorting. + stages.add(project(fields( + computed("deployment", "$deployments._id"), + computed("lastUpdated", "$deployments.lastUpdated"), + computed("feedVersionIds", "$deployments.feedVersionIds") + ))); + + // Sort deployments by lastUpdated descending. + stages.add(sort(descending("lastUpdated"))); + stages.add(limit(1)); + + List feedVersionPipeline = Arrays.asList( + match(expr(new Document("$in", Arrays.asList("$_id", "$$feedVersionIds")))), project( - Projections.fields(Projections.include( + include( "feedSourceId", "validationResult.firstCalendarDate", "validationResult.lastCalendarDate", - "validationResult.errorCount") + "validationResult.errorCount" ) ) ); + + // Use pipeline form of lookup to fetch FeedVersions matching deployment’s feedVersionIds + List> feedVersionIds = List.of(new Variable<>("feedVersionIds", "$feedVersionIds")); + + stages.add(lookup( + "FeedVersion", + feedVersionIds, + feedVersionPipeline, + "feedVersions" + )); + stages.add(unwind("$feedVersions", new UnwindOptions().preserveNullAndEmptyArrays(false))); + stages.add(replaceRoot("$feedVersions")); + // Final projection: select and compute only the fields needed for the output to minimize size. + stages.add(project( + include( + "_id", + "feedSourceId", + "validationResult" + ) + )); + return extractFeedVersionSummaries( "Project", "_id", @@ -263,27 +378,46 @@ public static Map getFeedVersionsFromLatestDeploymen * getFeedVersionsFromPinnedDeployment.js. */ public static Map getFeedVersionsFromPinnedDeployment(String projectId) { - List stages = Lists.newArrayList( + List stages = new ArrayList<>(); + + // Match projects by projectId. + stages.add(match(in("_id", projectId))); + + // Project only pinnedDeploymentId to keep doc small. + stages.add(project(include("pinnedDeploymentId"))); + + // Lookup Deployment documents by pinnedDeploymentId. + stages.add(lookup("Deployment", "pinnedDeploymentId", "_id", "deployment")); + + // Unwind deployment array (assuming single deployment per project). + stages.add(unwind("$deployment")); + + // Define pipeline in $lookup to filter and project FeedVersion docs. + List feedVersionPipeline = Arrays.asList( match( - in("_id", projectId) - ), - project( - Projections.fields(Projections.include("pinnedDeploymentId")) + expr( + new Document("$in", Arrays.asList("$_id", "$$feedVersionIds")) + ) ), - lookup("Deployment", "pinnedDeploymentId", "_id", "deployment"), - unwind("$deployment"), - lookup("FeedVersion", "deployment.feedVersionIds", "_id", "feedVersions"), - unwind("$feedVersions"), - replaceRoot("$feedVersions"), project( - Projections.fields(Projections.include( + include( + "_id", "feedSourceId", "validationResult.firstCalendarDate", "validationResult.lastCalendarDate", - "validationResult.errorCount") + "validationResult.errorCount" ) ) ); + + // Define variable for correlated lookup on FeedVersion collection. + List> feedVersionIds = List.of( + new Variable<>("feedVersionIds", "$deployment.feedVersionIds") + ); + + // Lookup FeedVersion docs with pipeline and store as feedVersions array. + stages.add(lookup("FeedVersion", feedVersionIds, feedVersionPipeline, "feedVersions")); + return extractFeedVersionSummaries( "Project", "_id", diff --git a/src/main/resources/mongo/getFeedVersionsFromLatestDeployment.js b/src/main/resources/mongo/getFeedVersionsFromLatestDeployment.js index 7779b6a84..81d4dd12c 100644 --- a/src/main/resources/mongo/getFeedVersionsFromLatestDeployment.js +++ b/src/main/resources/mongo/getFeedVersionsFromLatestDeployment.js @@ -1,64 +1,49 @@ db.getCollection('Project').aggregate([ + { $match: { _id: "" } }, { - // Match provided project id. - $match: { - _id: "" + $lookup: { + from: "Deployment", + localField: "_id", + foreignField: "projectId", + as: "deployment" } }, - { - // Get all deployments for this project. - $lookup:{ - from:"Deployment", - localField:"_id", - foreignField:"projectId", - as:"deployment" - } - }, - { - // Deconstruct deployments array to a document for each element. - $unwind: "$deployment" - }, - { - // Make the deployment documents the input/root document. - "$replaceRoot": { - "newRoot": "$deployment" - } - }, - { - // Sort descending. - $sort: { - lastUpdated : -1 - } - }, - { - // At this point we will have the latest deployment for a project. - $limit: 1 - }, - { - $lookup:{ - from:"FeedVersion", - localField:"feedVersionIds", - foreignField:"_id", - as:"feedVersions" - } - }, - { - // Deconstruct feedVersions array to a document for each element. - $unwind: "$feedVersions" - }, - { - // Make the feed version documents the input/root document. - "$replaceRoot": { - "newRoot": "$feedVersions" + { $unwind: "$deployment" }, + { $replaceRoot: { newRoot: "$deployment" } }, + { $sort: { lastUpdated: -1 } }, + { $limit: 1 }, + { + $lookup: { + from: "FeedVersion", + let: { feedVersionIds: "$feedVersionIds" }, + pipeline: [ + { + $match: { + $expr: { $in: ["$_id", "$$feedVersionIds"] } + } + }, + { + $project: { + _id: 1, + feedSourceId: 1, + "validationResult.firstCalendarDate": 1, + "validationResult.lastCalendarDate": 1, + "validationResult.errorCount": 1 + } + } + ], + as: "feedVersions" } }, + { $unwind: "$feedVersions" }, + { $replaceRoot: { newRoot: "$feedVersions" } }, { $project: { - "_id": 1, - "feedSourceId": 1, + _id: 1, + feedSourceId: 1, "validationResult.firstCalendarDate": 1, "validationResult.lastCalendarDate": 1, "validationResult.errorCount": 1 } } -]) +]); \ No newline at end of file diff --git a/src/main/resources/mongo/getFeedVersionsFromPinnedDeployment.js b/src/main/resources/mongo/getFeedVersionsFromPinnedDeployment.js index 008de3c56..566da095c 100644 --- a/src/main/resources/mongo/getFeedVersionsFromPinnedDeployment.js +++ b/src/main/resources/mongo/getFeedVersionsFromPinnedDeployment.js @@ -1,51 +1,32 @@ db.getCollection('Project').aggregate([ - { - // Match provided project id. - $match: { - _id: "" + { $match: { _id: "" } }, + { $project: { pinnedDeploymentId: 1 } }, + { + $lookup: { + from: "Deployment", + localField: "pinnedDeploymentId", + foreignField: "_id", + as: "deployment" } }, - { - $project: { - pinnedDeploymentId: 1 - } - }, - { - $lookup:{ - from:"Deployment", - localField:"pinnedDeploymentId", - foreignField:"_id", - as:"deployment" + { $unwind: "$deployment" }, + { + $lookup: { + from: "FeedVersion", + let: { feedVersionIds: "$deployment.feedVersionIds" }, + pipeline: [ + { $match: { $expr: { $in: ["$_id", "$$feedVersionIds"] } } }, + { $project: { + _id: 1, + feedSourceId: 1, + "validationResult.firstCalendarDate": 1, + "validationResult.lastCalendarDate": 1, + "validationResult.errorCount": 1 + }} + ], + as: "feedVersions" } }, - { - $unwind: "$deployment" - }, - { - $lookup:{ - from:"FeedVersion", - localField:"deployment.feedVersionIds", - foreignField:"_id", - as:"feedVersions" - } - }, - { - // Deconstruct feedVersions array to a document for each element. - $unwind: "$feedVersions" - }, - { - // Make the feed version documents the input/root document. - "$replaceRoot": { - "newRoot": "$feedVersions" - } - }, - { - $project: { - "_id": 1, - "feedSourceId": 1, - "validationResult.firstCalendarDate": 1, - "validationResult.lastCalendarDate": 1, - "validationResult.errorCount": 1 - } - } -]) + { $unwind: "$feedVersions" }, + { $replaceRoot: { newRoot: "$feedVersions" } } +]) \ No newline at end of file diff --git a/src/main/resources/mongo/getLatestFeedVersionForFeedSources.js b/src/main/resources/mongo/getLatestFeedVersionForFeedSources.js index 8b63a2293..21a563012 100644 --- a/src/main/resources/mongo/getLatestFeedVersionForFeedSources.js +++ b/src/main/resources/mongo/getLatestFeedVersionForFeedSources.js @@ -1,60 +1,56 @@ -db.getCollection('FeedSource').aggregate([ - { - // Match provided project id. - $match: { - projectId: "" - } - }, +db.FeedSource.aggregate([ + { $match: { projectId: "" } }, { $lookup: { from: "FeedVersion", - localField: "_id", - foreignField: "feedSourceId", - as: "feedVersions" + let: { feedSourceId: "$_id" }, + pipeline: [ + { $match: { $expr: { $eq: ["$feedSourceId", "$$feedSourceId"] } } }, + { $sort: { version: -1 } }, + { $limit: 1 }, + { + $project: { + version: 1, + validationResult: 1, + processedByExternalPublisher: 1, + sentToExternalPublisher: 1, + gtfsPlusValidation: 1, + namespace: 1 + } + } + ], + as: "latestFeedVersion" } }, { $lookup: { from: "FeedVersion", - localField: "publishedVersionId", - foreignField: "namespace", + let: { publishedVersionId: "$publishedVersionId" }, + pipeline: [ + { $match: { $expr: { $eq: ["$namespace", "$$publishedVersionId"] } } }, + { $limit: 1 }, + { $project: { validationResult: 1 } } + ], as: "publishedFeedVersion" } }, + { $unwind: { path: "$latestFeedVersion", preserveNullAndEmptyArrays: true } }, + { $unwind: { path: "$publishedFeedVersion", preserveNullAndEmptyArrays: true } }, { - $unwind: "$feedVersions" - }, - { - $unwind: { - path: "$publishedFeedVersion", - preserveNullAndEmptyArrays: true - } - }, - { - $sort: { - "feedVersions.version": -1 - } - }, - { - $group: { - _id: "$_id", - publishedVersionId: { $first: "$publishedVersionId" }, - publishedFeedVersionErrorCount: { $first: "$publishedFeedVersion.validationResult.errorCount"}, - publishedFeedVersionStartDate: { $first: "$publishedFeedVersion.validationResult.firstCalendarDate"}, - publishedFeedVersionEndDate: { $first: "$publishedFeedVersion.validationResult.lastCalendarDate"}, - feedVersion: { - $first: { - version: "$feedVersions.version", - feedVersionId: "$feedVersions._id", - firstCalendarDate: "$feedVersions.validationResult.firstCalendarDate", - lastCalendarDate: "$feedVersions.validationResult.lastCalendarDate", - errorCount: "$feedVersions.validationResult.errorCount", - processedByExternalPublisher: "$feedVersions.processedByExternalPublisher", - sentToExternalPublisher: "$feedVersions.sentToExternalPublisher", - gtfsPlusValidation: "$feedVersions.gtfsPlusValidation", - namespace: "$feedVersions.namespace" - } - } + $project: { + publishedVersionId: 1, + publishedFeedVersionErrorCount: "$publishedFeedVersion.validationResult.errorCount", + publishedFeedVersionStartDate: "$publishedFeedVersion.validationResult.firstCalendarDate", + publishedFeedVersionEndDate: "$publishedFeedVersion.validationResult.lastCalendarDate", + version: "$latestFeedVersion.version", + feedVersionId: "$latestFeedVersion._id", + firstCalendarDate: "$latestFeedVersion.validationResult.firstCalendarDate", + lastCalendarDate: "$latestFeedVersion.validationResult.lastCalendarDate", + errorCount: "$latestFeedVersion.validationResult.errorCount", + processedByExternalPublisher: "$latestFeedVersion.processedByExternalPublisher", + sentToExternalPublisher: "$latestFeedVersion.sentToExternalPublisher", + gtfsPlusValidation: "$latestFeedVersion.gtfsPlusValidation", + namespace: "$latestFeedVersion.namespace" } } -]) +]) \ No newline at end of file