From 19b9cac362f936ea8c67d3dc70ba9f7d6e12bcf3 Mon Sep 17 00:00:00 2001 From: "bxf12315@gmail.com" Date: Wed, 24 Dec 2025 19:34:53 +0800 Subject: [PATCH 1/2] Prune SBOMs based on ingestion date --- modules/fundamental/src/endpoints.rs | 1 + modules/fundamental/src/lib.rs | 1 + openapi.yaml | 87 ++++++++++++++++++++++++++++ 3 files changed, 89 insertions(+) diff --git a/modules/fundamental/src/endpoints.rs b/modules/fundamental/src/endpoints.rs index 04f808bd6..f6e3f9da8 100644 --- a/modules/fundamental/src/endpoints.rs +++ b/modules/fundamental/src/endpoints.rs @@ -22,6 +22,7 @@ pub fn configure( let ingestor_service = IngestorService::new(Graph::new(db.clone()), storage, Some(analysis)); svc.app_data(web::Data::new(ingestor_service)); + crate::admin::endpoints::configure(svc, db.clone()); crate::advisory::endpoints::configure(svc, db.clone(), config.advisory_upload_limit); crate::license::endpoints::configure(svc, db.clone()); crate::organization::endpoints::configure(svc, db.clone()); diff --git a/modules/fundamental/src/lib.rs b/modules/fundamental/src/lib.rs index 6795e34a3..67f001a5f 100644 --- a/modules/fundamental/src/lib.rs +++ b/modules/fundamental/src/lib.rs @@ -1,5 +1,6 @@ #![recursion_limit = "256"] +pub mod admin; pub mod advisory; pub mod common; pub mod db; diff --git a/openapi.yaml b/openapi.yaml index 650b86240..a6a576ae4 100644 --- a/openapi.yaml +++ b/openapi.yaml @@ -24,6 +24,57 @@ paths: type: object version: type: string + /api/v2/admin/sbom/prune: + post: + tags: + - admin + summary: Prune SBOMs based on ingestion date + operationId: pruneSboms + parameters: + - name: ingested + in: query + description: |- + Number of days ago from current time to prune SBOMs + For example, ingested=90 means prune SBOMs ingested more than 90 days ago + required: true + schema: + type: integer + format: int32 + minimum: 0 + style: form + example: 90 + - name: dry_run + in: query + description: If true, only return the list of SBOMs that would be deleted without actually deleting them + required: true + schema: + type: boolean + - name: batch_size + in: query + description: Number of SBOMs to process in a single batch + required: true + schema: + type: integer + format: int64 + minimum: 0 + - name: max_concurrent + in: query + description: Maximum number of concurrent operations + required: true + schema: + type: integer + minimum: 0 + responses: + '200': + description: List of pruned SBOMs + content: + application/json: + schema: + type: array + items: + $ref: '#/components/schemas/PrunedSbom' + '500': + description: Internal server error /api/v2/advisory: get: tags: @@ -4634,6 +4685,42 @@ components: format: int32 description: The total number of items to be processed. minimum: 0 + PrunedSbom: + type: object + required: + - sbom_id + - authors + - suppliers + - data_licenses + - ingested + properties: + authors: + type: array + items: + type: string + data_licenses: + type: array + items: + type: string + document_id: + type: + - string + - 'null' + ingested: + type: string + format: date-time + published: + type: + - string + - 'null' + format: date-time + sbom_id: + type: string + format: uuid + suppliers: + type: array + items: + type: string Purl: type: string format: uri From 7a54039ae5b8aef64b22436d5e4eba6ce5e57b57 Mon Sep 17 00:00:00 2001 From: "bxf12315@gmail.com" Date: Wed, 24 Dec 2025 22:11:13 +0800 Subject: [PATCH 2/2] Add multithreaded processing --- Cargo.lock | 1 + modules/fundamental/Cargo.toml | 1 + .../fundamental/src/admin/endpoints/mod.rs | 182 ++++++++++ .../fundamental/src/admin/endpoints/test.rs | 333 ++++++++++++++++++ modules/fundamental/src/admin/mod.rs | 1 + modules/fundamental/src/sbom/service/mod.rs | 1 + openapi.yaml | 46 ++- 7 files changed, 557 insertions(+), 8 deletions(-) create mode 100644 modules/fundamental/src/admin/endpoints/mod.rs create mode 100644 modules/fundamental/src/admin/endpoints/test.rs create mode 100644 modules/fundamental/src/admin/mod.rs diff --git a/Cargo.lock b/Cargo.lock index 51c1ddb16..f2948182e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8471,6 +8471,7 @@ dependencies = [ "csv", "cve", "flate2", + "futures", "futures-util", "hex", "humantime", diff --git a/modules/fundamental/Cargo.toml b/modules/fundamental/Cargo.toml index 749a6062e..5a5250b49 100644 --- a/modules/fundamental/Cargo.toml +++ b/modules/fundamental/Cargo.toml @@ -28,6 +28,7 @@ base64 = { workspace = true } cpe = { workspace = true } csv = { workspace = true } flate2 ={ workspace = true } +futures = { workspace = true } futures-util = { workspace = true } itertools = { workspace = true } log = { workspace = true } diff --git a/modules/fundamental/src/admin/endpoints/mod.rs b/modules/fundamental/src/admin/endpoints/mod.rs new file mode 100644 index 000000000..710df5801 --- /dev/null +++ b/modules/fundamental/src/admin/endpoints/mod.rs @@ -0,0 +1,182 @@ +#[cfg(test)] +mod test; + +use actix_web::{HttpResponse, Responder, post, web}; +use futures_util::{StreamExt, stream}; +use sea_orm::{ + ColumnTrait, EntityTrait, FromQueryResult, JoinType, QueryFilter, QuerySelect, RelationTrait, + TransactionTrait, +}; +use serde::{Deserialize, Serialize}; +use time::OffsetDateTime; +use trustify_auth::{DeleteSbom, authorizer::Require}; +use trustify_common::db::Database; +use trustify_entity::{sbom, source_document}; +use utoipa::{IntoParams, ToSchema}; +use uuid::Uuid; + +use crate::{Error, db::DatabaseExt, sbom::service::SbomService}; + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema, FromQueryResult)] +pub struct PrunedSbom { + pub sbom_id: Uuid, + pub document_id: Option, + pub published: Option, + pub authors: Vec, + pub suppliers: Vec, + pub data_licenses: Vec, + pub ingested: OffsetDateTime, + pub error: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +pub struct PrunedSbomLog { + pub total: u64, + pub successful_total: u64, + pub failed_total: u64, + pub successful_pruned: Vec, + pub failed_pruned: Vec, +} + +#[derive(Debug, Deserialize, IntoParams)] +pub struct PruneQuery { + /// Number of days ago from current time to prune SBOMs + #[param(style = Form, example = 90)] + pub ingested: u32, + /// If true, only return the list of SBOMs that would be deleted without actually deleting them + #[serde(alias = "dry-run")] + pub dry_run: bool, + /// Number of SBOMs to process in a single batch + #[serde(alias = "batch-size")] + pub batch_size: u64, + /// Maximum number of concurrent operations + #[serde(alias = "max-concurrent", default = "default_max_concurrent")] + #[param(minimum = 1, example = 10)] + pub max_concurrent: usize, +} + +fn default_max_concurrent() -> usize { + 1 +} + +/// Try to delete an SBOM by its ID. +/// +/// If the deletion is successful, the PrunedSbom struct is returned. +/// If the deletion fails, the PrunedSbom struct with an error message is returned. +pub async fn try_delete_sbom( + mut sbom: PrunedSbom, + db: actix_web::web::Data, + service: actix_web::web::Data, +) -> Result { + let delete_operation = async { + let tx = db.begin().await?; + service.delete_sbom(sbom.sbom_id, &tx).await?; + tx.commit().await?; + Ok::<(), Error>(()) + }; + + match delete_operation.await { + Ok(_) => Ok(sbom), + Err(e) => { + sbom.error = Some(e.to_string()); + Err(sbom) + } + } +} + +#[utoipa::path( + tag = "admin", + operation_id = "pruneSboms", + params(PruneQuery), + responses( + (status = 200, description = "List of pruned SBOMs", body = PrunedSbomLog), + (status = 500, description = "Internal server error"), + ), +)] +#[post("/v2/admin/sbom/prune")] +/// Prune SBOMs based on ingestion date +pub async fn prune_sboms( + service: web::Data, + db: web::Data, + web::Query(query): web::Query, + _: Require, +) -> Result { + // Calculate the cutoff date (current time minus the specified number of days) + let cutoff_date = OffsetDateTime::now_utc() - time::Duration::days(query.ingested as i64); + + // Query SBOMs joined with source_document where ingested date is before the cutoff date + let pruned_sboms: Vec = sbom::Entity::find() + .join(JoinType::Join, sbom::Relation::SourceDocument.def()) + .select_only() + .column_as(sbom::Column::SbomId, "sbom_id") + .column_as(sbom::Column::DocumentId, "document_id") + .column_as(sbom::Column::Published, "published") + .column_as(sbom::Column::Authors, "authors") + .column_as(sbom::Column::Suppliers, "suppliers") + .column_as(sbom::Column::DataLicenses, "data_licenses") + .column_as(source_document::Column::Ingested, "ingested") + .filter(source_document::Column::Ingested.lt(cutoff_date)) + .limit(query.batch_size) + .into_model::() + .all(&db.begin_read().await?) + .await?; + + // If not a dry run, delete the SBOMs concurrently + if !query.dry_run { + // Ensure max_concurrent is at least 1 to prevent stalling + let max_concurrent = query.max_concurrent.max(1); + + // Process SBOMs concurrently and collect results + let results: Vec> = stream::iter(pruned_sboms.clone()) + .map(move |sbom| { + let db = db.clone(); + let service = service.clone(); + try_delete_sbom(sbom, db, service) + }) + .buffer_unordered(max_concurrent) + .collect() + .await; + + // Separate successful and failed results + let (successful_pruned, failed_pruned): (Vec, Vec) = + results.into_iter().fold( + (Vec::new(), Vec::new()), + |(mut success, mut fail), result| { + match result { + Ok(sbom) => success.push(sbom), + Err(sbom) => fail.push(sbom), + } + (success, fail) + }, + ); + + let log = PrunedSbomLog { + total: pruned_sboms.len() as u64, + successful_total: successful_pruned.len() as u64, + failed_total: failed_pruned.len() as u64, + successful_pruned, + failed_pruned, + }; + + Ok(HttpResponse::Ok().json(log)) + } else { + // In dry run mode, build a PrunedSbomLog with all SBOMs as successful + let log = PrunedSbomLog { + total: pruned_sboms.len() as u64, + successful_total: pruned_sboms.len() as u64, + failed_total: 0, + successful_pruned: pruned_sboms.clone(), + failed_pruned: vec![], + }; + Ok(HttpResponse::Ok().json(log)) + } +} + +pub fn configure(config: &mut utoipa_actix_web::service_config::ServiceConfig, db: Database) { + let sbom_service = SbomService::new(db.clone()); + + config + .app_data(web::Data::new(db)) + .app_data(web::Data::new(sbom_service)) + .service(prune_sboms); +} diff --git a/modules/fundamental/src/admin/endpoints/test.rs b/modules/fundamental/src/admin/endpoints/test.rs new file mode 100644 index 000000000..f636aa0b9 --- /dev/null +++ b/modules/fundamental/src/admin/endpoints/test.rs @@ -0,0 +1,333 @@ +use actix_http::StatusCode; +use actix_web::test::TestRequest; +use sea_orm::EntityTrait; +use serde_json::Value; +use test_context::test_context; +use test_log::test; +use time::OffsetDateTime; +use trustify_common::id::Id; +use trustify_entity::source_document; +use trustify_test_context::{TrustifyContext, call::CallService}; + +use crate::test::caller; + +/// Verify that a SBOM exists by checking the GET endpoint returns OK +async fn verify_sbom_exists( + app: &impl CallService, + sbom_id: &Id, + expected_status: StatusCode, +) -> Result<(), anyhow::Error> { + let req = TestRequest::get() + .uri(&format!("/api/v2/sbom/{}", sbom_id)) + .to_request(); + let resp = app.call_service(req).await; + assert_eq!(resp.status(), expected_status); + Ok(()) +} + +/// Ingest test SBOMs and update their ingested date to be older than specified days +async fn ingest_test_sboms_with_old_date( + ctx: &TrustifyContext, + days_old: i64, +) -> Result<(Id, Id), anyhow::Error> { + // Ingest test SBOMs + let result_spdx = ctx + .ingest_document("spdx/OCP-TOOLS-4.11-RHEL-8.json") + .await?; + let result_cyclonedx = ctx + .ingest_document("cyclonedx/application.cdx.json") + .await?; + + // Manually update the ingested date to be older than specified days for testing + let old_date = OffsetDateTime::now_utc() - time::Duration::days(days_old); + source_document::Entity::update_many() + .col_expr(source_document::Column::Ingested, old_date.into()) + .exec(&ctx.db) + .await?; + + Ok((result_spdx.id, result_cyclonedx.id)) +} + +#[test_context(TrustifyContext)] +#[test(actix_web::test)] +async fn test_prune_sboms_dry_run(ctx: &TrustifyContext) -> Result<(), anyhow::Error> { + // Ingest test SBOMs with old date (100 days ago) + let (result_spdx, result_cyclonedx) = ingest_test_sboms_with_old_date(ctx, 100).await?; + + let app = caller(ctx).await?; + + // Create test request with dry-run=true + let req = TestRequest::post() + .uri("/api/v2/admin/sbom/prune?ingested=90&dry-run=true&batch-size=10&max-concurrent=5") + .to_request(); + + // Call the endpoint + let response: Value = app.call_and_read_body_json(req).await; + + // Verify response is an object + assert!( + response.is_object(), + "Expected response to be an object, got: {:?}", + response + ); + + // Verify successful_total equals 2 + let successful_total = response + .get("successful_total") + .and_then(|v| v.as_u64()) + .expect("Response should have successful_total field"); + assert_eq!( + successful_total, 2, + "Expected successful_total to be 2, got: {}", + successful_total + ); + + // Verify total equals 2 + let total = response + .get("total") + .and_then(|v| v.as_u64()) + .expect("Response should have total field"); + assert_eq!(total, 2, "Expected total to be 2, got: {}", total); + + // Verify failed_total equals 0 (dry run mode) + let failed_total = response + .get("failed_total") + .and_then(|v| v.as_u64()) + .expect("Response should have failed_total field"); + assert_eq!( + failed_total, 0, + "Expected failed_total to be 0 in dry run mode, got: {}", + failed_total + ); + + // Verify successful_pruned array has 2 items + let successful_pruned = response + .get("successful_pruned") + .and_then(|v| v.as_array()) + .expect("Response should have successful_pruned array"); + assert_eq!( + successful_pruned.len(), + 2, + "Expected successful_pruned to have 2 items, got: {}", + successful_pruned.len() + ); + + // Verify SBOMs still exist (dry run) + verify_sbom_exists(&app, &result_spdx, StatusCode::OK).await?; + verify_sbom_exists(&app, &result_cyclonedx, StatusCode::OK).await?; + Ok(()) +} + +#[test_context(TrustifyContext)] +#[test(actix_web::test)] +async fn test_prune_sboms_actual_deletion(ctx: &TrustifyContext) -> Result<(), anyhow::Error> { + // Ingest test SBOMs with old date (100 days ago) + let (result_spdx, result_cyclonedx) = ingest_test_sboms_with_old_date(ctx, 100).await?; + + let app = caller(ctx).await?; + + // Create test request with dry-run=false + let req = TestRequest::post() + .uri("/api/v2/admin/sbom/prune?ingested=90&dry-run=false&batch-size=10&max-concurrent=5") + .to_request(); + + // Call the endpoint + let response: Value = app.call_and_read_body_json(req).await; + + // Verify response is an object + assert!( + response.is_object(), + "Expected response to be an object, got: {:?}", + response + ); + + // Verify successful_total equals 2 + let successful_total = response + .get("successful_total") + .and_then(|v| v.as_u64()) + .expect("Response should have successful_total field"); + assert_eq!( + successful_total, 2, + "Expected successful_total to be 2, got: {}", + successful_total + ); + + // Verify total equals 2 + let total = response + .get("total") + .and_then(|v| v.as_u64()) + .expect("Response should have total field"); + assert_eq!(total, 2, "Expected total to be 2, got: {}", total); + + // Verify failed_total equals 0 (actual deletion mode) + let failed_total = response + .get("failed_total") + .and_then(|v| v.as_u64()) + .expect("Response should have failed_total field"); + assert_eq!( + failed_total, 0, + "Expected failed_total to be 0 in actual deletion mode, got: {}", + failed_total + ); + + // Verify successful_pruned array has 2 items + let successful_pruned = response + .get("successful_pruned") + .and_then(|v| v.as_array()) + .expect("Response should have successful_pruned array"); + assert_eq!( + successful_pruned.len(), + 2, + "Expected successful_pruned to have 2 items, got: {}", + successful_pruned.len() + ); + + // Verify SBOMs no longer exist (actual deletion) + verify_sbom_exists(&app, &result_spdx, StatusCode::NOT_FOUND).await?; + verify_sbom_exists(&app, &result_cyclonedx, StatusCode::NOT_FOUND).await?; + Ok(()) +} + +#[test_context(TrustifyContext)] +#[test(actix_web::test)] +async fn test_prune_sboms_no_matches(ctx: &TrustifyContext) -> Result<(), anyhow::Error> { + // Ingest test SBOMs with old date (0 days ago) + let (result_spdx, result_cyclonedx) = ingest_test_sboms_with_old_date(ctx, 0).await?; + + let app = caller(ctx).await?; + + // Create test request with dry-run=false + let req = TestRequest::post() + .uri("/api/v2/admin/sbom/prune?ingested=90&dry-run=false&batch-size=10&max-concurrent=5") + .to_request(); + + // Call the endpoint + let response: Value = app.call_and_read_body_json(req).await; + + // Verify response is an object + assert!( + response.is_object(), + "Expected response to be an object, got: {:?}", + response + ); + + // Verify successful_total equals 0 (no matches) + let successful_total = response + .get("successful_total") + .and_then(|v| v.as_u64()) + .expect("Response should have successful_total field"); + assert_eq!( + successful_total, 0, + "Expected successful_total to be 0 when no matches, got: {}", + successful_total + ); + + // Verify total equals 0 + let total = response + .get("total") + .and_then(|v| v.as_u64()) + .expect("Response should have total field"); + assert_eq!( + total, 0, + "Expected total to be 0 when no matches, got: {}", + total + ); + + // Verify failed_total equals 0 (no matches) + let failed_total = response + .get("failed_total") + .and_then(|v| v.as_u64()) + .expect("Response should have failed_total field"); + assert_eq!( + failed_total, 0, + "Expected failed_total to be 0 when no matches, got: {}", + failed_total + ); + + // Verify successful_pruned array is empty + let successful_pruned = response + .get("successful_pruned") + .and_then(|v| v.as_array()) + .expect("Response should have successful_pruned array"); + assert_eq!( + successful_pruned.len(), + 0, + "Expected successful_pruned to be empty when no matches, got: {}", + successful_pruned.len() + ); + + // Verify SBOMs still exist (no deletion) + verify_sbom_exists(&app, &result_spdx, StatusCode::OK).await?; + verify_sbom_exists(&app, &result_cyclonedx, StatusCode::OK).await?; + Ok(()) +} + +// #[test_context(TrustifyContext)] +// #[test(actix_web::test)] +// async fn test_prune_sboms_error_field_validation( +// ctx: &TrustifyContext, +// ) -> Result<(), anyhow::Error> { +// // Ingest test SBOMs with old date (100 days ago) +// let (_result_spdx, _result_cyclonedx) = ingest_test_sboms_with_old_date(ctx, 100).await?; + +// let app = caller(ctx).await?; + +// // Create test request with dry-run=false +// let req = TestRequest::post() +// .uri("/api/v2/admin/sbom/prune?ingested=90&dry-run=false&batch-size=10&max-concurrent=5") +// .to_request(); + +// // Call the endpoint +// let response: Value = app.call_and_read_body_json(req).await; + +// // Verify response is an object +// assert!( +// response.is_object(), +// "Expected response to be an object, got: {:?}", +// response +// ); + +// // Verify successful_pruned items have error field set to null +// let successful_pruned = response +// .get("successful_pruned") +// .and_then(|v| v.as_array()) +// .expect("Response should have successful_pruned array"); + +// for (index, sbom) in successful_pruned.iter().enumerate() { +// let error = sbom +// .get("error") +// .expect("Each SBOM should have error field"); +// assert!( +// error.is_null(), +// "Successful SBOM at index {} should have null error field, got: {:?}", +// index, +// error +// ); +// } + +// // Verify failed_pruned items have error field set to a string (if any failures) +// let failed_pruned = response +// .get("failed_pruned") +// .and_then(|v| v.as_array()) +// .expect("Response should have failed_pruned array"); + +// for (index, sbom) in failed_pruned.iter().enumerate() { +// let error = sbom +// .get("error") +// .expect("Each SBOM should have error field"); +// assert!( +// error.is_string(), +// "Failed SBOM at index {} should have string error field, got: {:?}", +// index, +// error +// ); +// let error_msg = error.as_str().expect("Error should be a string"); +// assert!( +// !error_msg.is_empty(), +// "Failed SBOM at index {} should have non-empty error message", +// index +// ); +// } + +// Ok(()) +// } diff --git a/modules/fundamental/src/admin/mod.rs b/modules/fundamental/src/admin/mod.rs new file mode 100644 index 000000000..8aff8c90d --- /dev/null +++ b/modules/fundamental/src/admin/mod.rs @@ -0,0 +1 @@ +pub(crate) mod endpoints; diff --git a/modules/fundamental/src/sbom/service/mod.rs b/modules/fundamental/src/sbom/service/mod.rs index a189d7093..b6f81024e 100644 --- a/modules/fundamental/src/sbom/service/mod.rs +++ b/modules/fundamental/src/sbom/service/mod.rs @@ -7,6 +7,7 @@ mod test; use trustify_common::db::Database; +#[derive(Clone)] pub struct SbomService { db: Database, } diff --git a/openapi.yaml b/openapi.yaml index a6a576ae4..861acedd6 100644 --- a/openapi.yaml +++ b/openapi.yaml @@ -33,9 +33,7 @@ paths: parameters: - name: ingested in: query - description: |- - Number of days ago from current time to prune SBOMs - For example, ingested=90 means prune SBOMs ingested more than 90 days ago + description: Number of days ago from current time to prune SBOMs required: true schema: type: integer @@ -60,19 +58,18 @@ paths: - name: max_concurrent in: query description: Maximum number of concurrent operations - required: true + required: false schema: type: integer - minimum: 0 + minimum: 1 + example: 10 responses: '200': description: List of pruned SBOMs content: application/json: schema: - type: array - items: - $ref: '#/components/schemas/PrunedSbom' + $ref: '#/components/schemas/PrunedSbomLog' '500': description: Internal server error /api/v2/advisory: @@ -4706,6 +4703,10 @@ components: type: - string - 'null' + error: + type: + - string + - 'null' ingested: type: string format: date-time @@ -4721,6 +4722,35 @@ components: type: array items: type: string + PrunedSbomLog: + type: object + required: + - total + - successful_total + - failed_total + - successful_pruned + - failed_pruned + properties: + failed_pruned: + type: array + items: + $ref: '#/components/schemas/PrunedSbom' + failed_total: + type: integer + format: int64 + minimum: 0 + successful_pruned: + type: array + items: + $ref: '#/components/schemas/PrunedSbom' + successful_total: + type: integer + format: int64 + minimum: 0 + total: + type: integer + format: int64 + minimum: 0 Purl: type: string format: uri