Skip to content

Commit c5b7f89

Browse files
committed
preemptive eliza fixes based on #10042
1 parent b4bf111 commit c5b7f89

14 files changed

Lines changed: 149 additions & 243 deletions

File tree

.claude/skills/add-background-task/SKILL.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ Create the task module. The struct holds whatever state it needs (typically `Arc
2525

2626
Logging conventions: `debug` when there's nothing to do, `info` when routine work was done, `warn` when the work done indicates something is wrong (e.g., cleaning up after a crash), `error` on failure.
2727

28-
Include unit tests in the same file using `TestDatabase::new_with_datastore`. Tests call `actually_activate` directly.
28+
Include a unit test in the same file using `TestDatabase::new_with_datastore` that calls `actually_activate` directly. If the task has a datastore method, a single test exercising the task end-to-end (including the limit/batching behavior) is sufficient — don't add a redundant test for the datastore method separately unless it has complex logic worth testing in isolation.
2929

3030
### 3. Register the module (`nexus/src/app/background/tasks/mod.rs`)
3131

@@ -37,7 +37,7 @@ Add `pub task_<name>: Activator` to the `BackgroundTasks` struct, maintaining al
3737

3838
### 5. Config (`nexus-config/src/nexus_config.rs`)
3939

40-
Add a config struct (e.g., `YourTaskConfig`) with at minimum `period_secs: Duration` (using `#[serde_as(as = "DurationSeconds<u64>")]`). If the task does bounded work per activation, name the limit field `max_<verb>_per_activation` (e.g., `max_delete_per_activation`, `max_update_per_activation`) to match existing conventions. Add the field to `BackgroundTaskConfig`. Update the test config literal and expected parse output at the bottom of the file.
40+
Add a config struct (e.g., `YourTaskConfig`) with at minimum `period_secs: Duration` (using `#[serde_as(as = "DurationSeconds<u64>")]`). If the task does bounded work per activation, name the limit field `max_<past_tense_verb>_per_activation` (e.g., `max_deleted_per_activation`, `max_timed_out_per_activation`) to match existing conventions. Add the field to `BackgroundTaskConfig`. Update the test config literal and expected parse output at the bottom of the file.
4141

4242
### 6. Config files
4343

@@ -62,7 +62,7 @@ If the task needs a new index or schema change to support its query, add a migra
6262

6363
### 9. Datastore method (if needed)
6464

65-
If the task needs a new query, add it in the appropriate `nexus/db-queries/src/db/datastore/` file. Add a test in the same file or in `nexus/db-queries/src/db/datastore/mod.rs`.
65+
If the task needs a new query, add it in the appropriate `nexus/db-queries/src/db/datastore/` file. Prefer the Diesel typed DSL over raw SQL (`diesel::sql_query`) for queries and test helpers. Only fall back to raw SQL when the DSL genuinely can't express the query.
6666

6767
If the task modifies rows that other code paths also modify, think about races: what happens if both run concurrently on the same row? Both paths should typically guard their writes so only one succeeds.
6868

dev-tools/omdb/src/bin/omdb/nexus.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2694,7 +2694,7 @@ fn print_task_audit_log_timeout_incomplete(details: &serde_json::Value) {
26942694
Ok(status) => {
26952695
const TIMED_OUT: &str = "timed_out:";
26962696
const CUTOFF: &str = "cutoff:";
2697-
const MAX_UPDATE: &str = "max_update_per_activation:";
2697+
const MAX_UPDATE: &str = "max_timed_out_per_activation:";
26982698
const ERROR: &str = "error:";
26992699
const WIDTH: usize =
27002700
const_max_len(&[TIMED_OUT, CUTOFF, MAX_UPDATE, ERROR]) + 1;
@@ -2706,7 +2706,7 @@ fn print_task_audit_log_timeout_incomplete(details: &serde_json::Value) {
27062706
);
27072707
println!(
27082708
" {MAX_UPDATE:<WIDTH$}{}",
2709-
status.max_update_per_activation
2709+
status.max_timed_out_per_activation
27102710
);
27112711
if let Some(error) = &status.error {
27122712
println!(" {ERROR:<WIDTH$}{error}");

dev-tools/omdb/tests/successes.out

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -597,9 +597,9 @@ task: "audit_log_timeout_incomplete"
597597
configured period: every <REDACTED_DURATION>m
598598
last completed activation: <REDACTED ITERATIONS>, triggered by <TRIGGERED_BY_REDACTED>
599599
started at <REDACTED_TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
600-
timed_out: 0
601-
cutoff: <REDACTED_TIMESTAMP>
602-
max_update_per_activation: 1000
600+
timed_out: 0
601+
cutoff: <REDACTED_TIMESTAMP>
602+
max_timed_out_per_activation: 1000
603603

604604
task: "bfd_manager"
605605
configured period: every <REDACTED_DURATION>s
@@ -1200,9 +1200,9 @@ task: "audit_log_timeout_incomplete"
12001200
configured period: every <REDACTED_DURATION>m
12011201
last completed activation: <REDACTED ITERATIONS>, triggered by <TRIGGERED_BY_REDACTED>
12021202
started at <REDACTED_TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
1203-
timed_out: 0
1204-
cutoff: <REDACTED_TIMESTAMP>
1205-
max_update_per_activation: 1000
1203+
timed_out: 0
1204+
cutoff: <REDACTED_TIMESTAMP>
1205+
max_timed_out_per_activation: 1000
12061206

12071207
task: "bfd_manager"
12081208
configured period: every <REDACTED_DURATION>s

nexus-config/src/nexus_config.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -464,7 +464,7 @@ pub struct AuditLogTimeoutIncompleteConfig {
464464
pub timeout_secs: Duration,
465465

466466
/// max rows per SQL statement
467-
pub max_update_per_activation: u32,
467+
pub max_timed_out_per_activation: u32,
468468
}
469469

470470
#[serde_as]
@@ -1286,7 +1286,7 @@ mod test {
12861286
session_cleanup.max_delete_per_activation = 10000
12871287
audit_log_timeout_incomplete.period_secs = 600
12881288
audit_log_timeout_incomplete.timeout_secs = 14400
1289-
audit_log_timeout_incomplete.max_update_per_activation = 1000
1289+
audit_log_timeout_incomplete.max_timed_out_per_activation = 1000
12901290
[default_region_allocation_strategy]
12911291
type = "random"
12921292
seed = 0
@@ -1558,7 +1558,7 @@ mod test {
15581558
AuditLogTimeoutIncompleteConfig {
15591559
period_secs: Duration::from_secs(600),
15601560
timeout_secs: Duration::from_secs(14400),
1561-
max_update_per_activation: 1000,
1561+
max_timed_out_per_activation: 1000,
15621562
},
15631563
},
15641564
multicast: MulticastConfig { enabled: false },
@@ -1669,7 +1669,7 @@ mod test {
16691669
session_cleanup.max_delete_per_activation = 10000
16701670
audit_log_timeout_incomplete.period_secs = 600
16711671
audit_log_timeout_incomplete.timeout_secs = 14400
1672-
audit_log_timeout_incomplete.max_update_per_activation = 1000
1672+
audit_log_timeout_incomplete.max_timed_out_per_activation = 1000
16731673
16741674
[default_region_allocation_strategy]
16751675
type = "random"

nexus/db-model/src/audit_log.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -224,7 +224,7 @@ impl From<AuditLogEntryInitParams> for AuditLogEntryInit {
224224

225225
/// `audit_log_complete` is a view on `audit_log` filtering for rows with
226226
/// non-null `time_completed`, not its own table.
227-
#[derive(Queryable, Selectable, Clone, Debug)]
227+
#[derive(Queryable, Selectable, Clone, Debug, PartialEq)]
228228
#[diesel(table_name = audit_log_complete)]
229229
pub struct AuditLogEntry {
230230
pub id: Uuid,

nexus/db-queries/src/db/datastore/audit_log.rs

Lines changed: 8 additions & 146 deletions
Original file line numberDiff line numberDiff line change
@@ -450,154 +450,16 @@ mod tests {
450450
id: Uuid,
451451
time_started: DateTime<Utc>,
452452
) {
453-
use diesel::sql_types;
454-
diesel::sql_query(
455-
"UPDATE omicron.public.audit_log \
456-
SET time_started = $1 WHERE id = $2",
457-
)
458-
.bind::<sql_types::Timestamptz, _>(time_started)
459-
.bind::<sql_types::Uuid, _>(id)
460-
.execute_async(&*datastore.pool_connection_for_tests().await.unwrap())
461-
.await
462-
.unwrap();
463-
}
464-
465-
#[tokio::test]
466-
async fn test_audit_log_timeout_incomplete() {
467-
let logctx = dev::test_setup_log("test_audit_log_timeout_incomplete");
468-
let db = TestDatabase::new_with_datastore(&logctx.log).await;
469-
let (opctx, datastore) = (db.opctx(), db.datastore());
470-
471-
let now = Utc::now();
472-
let two_hours_ago = now - TimeDelta::try_hours(2).unwrap();
473-
let cutoff = now - TimeDelta::try_hours(1).unwrap();
474-
475-
// Create an incomplete entry with time_started in the past
476-
let old_entry = datastore
477-
.audit_log_entry_init(opctx, make_entry_params("old-req").into())
478-
.await
479-
.unwrap();
480-
set_time_started(datastore, old_entry.id, two_hours_ago).await;
481-
482-
// Create an incomplete entry that's recent (should not be timed out)
483-
let _recent_entry = datastore
484-
.audit_log_entry_init(opctx, make_entry_params("recent-req").into())
485-
.await
486-
.unwrap();
487-
488-
// Create an old entry that's already completed (should not be touched)
489-
let completed_entry = datastore
490-
.audit_log_entry_init(
491-
opctx,
492-
make_entry_params("completed-req").into(),
493-
)
494-
.await
495-
.unwrap();
496-
set_time_started(datastore, completed_entry.id, two_hours_ago).await;
497-
datastore
498-
.audit_log_entry_complete(
499-
opctx,
500-
&completed_entry,
501-
AuditLogCompletion::Success { http_status_code: 200 }.into(),
453+
use diesel::prelude::*;
454+
use nexus_db_schema::schema::audit_log;
455+
diesel::update(audit_log::table)
456+
.filter(audit_log::id.eq(id))
457+
.set(audit_log::time_started.eq(time_started))
458+
.execute_async(
459+
&*datastore.pool_connection_for_tests().await.unwrap(),
502460
)
503461
.await
504-
.unwrap();
505-
506-
// Run the timeout
507-
let timed_out = datastore
508-
.audit_log_timeout_incomplete(opctx, cutoff, 100)
509-
.await
510-
.unwrap();
511-
512-
// Only the old incomplete entry should be affected
513-
assert_eq!(timed_out, 1);
514-
515-
// The timed-out entry should now appear in the audit log with
516-
// result_kind = timeout
517-
let pagparams = DataPageParams {
518-
marker: None,
519-
limit: NonZeroU32::new(100).unwrap(),
520-
direction: dropshot::PaginationOrder::Ascending,
521-
};
522-
let entries = datastore
523-
.audit_log_list(opctx, &pagparams, two_hours_ago, None)
524-
.await
525-
.unwrap();
526-
527-
let timed_out_entry =
528-
entries.iter().find(|e| e.id == old_entry.id).unwrap();
529-
assert_eq!(timed_out_entry.result_kind, AuditLogResultKind::Timeout);
530-
assert!(timed_out_entry.http_status_code.is_none());
531-
assert!(timed_out_entry.error_code.is_none());
532-
assert!(timed_out_entry.error_message.is_none());
533-
534-
// The completed entry should still be success
535-
let completed =
536-
entries.iter().find(|e| e.id == completed_entry.id).unwrap();
537-
assert_eq!(completed.result_kind, AuditLogResultKind::Success);
538-
539-
// Running again should find nothing (idempotent)
540-
let timed_out = datastore
541-
.audit_log_timeout_incomplete(opctx, cutoff, 100)
542-
.await
543-
.unwrap();
544-
assert_eq!(timed_out, 0);
545-
546-
db.terminate().await;
547-
logctx.cleanup_successful();
548-
}
549-
550-
#[tokio::test]
551-
async fn test_audit_log_timeout_respects_limit() {
552-
let logctx =
553-
dev::test_setup_log("test_audit_log_timeout_respects_limit");
554-
let db = TestDatabase::new_with_datastore(&logctx.log).await;
555-
let (opctx, datastore) = (db.opctx(), db.datastore());
556-
557-
let now = Utc::now();
558-
let two_hours_ago = now - TimeDelta::try_hours(2).unwrap();
559-
let cutoff = now - TimeDelta::try_hours(1).unwrap();
560-
561-
// Create 5 stale incomplete entries
562-
for i in 0..5 {
563-
let entry = datastore
564-
.audit_log_entry_init(
565-
opctx,
566-
make_entry_params(&format!("req-{i}")).into(),
567-
)
568-
.await
569-
.unwrap();
570-
set_time_started(datastore, entry.id, two_hours_ago).await;
571-
}
572-
573-
// Timeout with limit of 2
574-
let batch1 = datastore
575-
.audit_log_timeout_incomplete(opctx, cutoff, 2)
576-
.await
577-
.unwrap();
578-
assert_eq!(batch1, 2);
579-
580-
let batch2 = datastore
581-
.audit_log_timeout_incomplete(opctx, cutoff, 2)
582-
.await
583-
.unwrap();
584-
assert_eq!(batch2, 2);
585-
586-
let batch3 = datastore
587-
.audit_log_timeout_incomplete(opctx, cutoff, 2)
588-
.await
589-
.unwrap();
590-
assert_eq!(batch3, 1);
591-
592-
// All gone
593-
let batch4 = datastore
594-
.audit_log_timeout_incomplete(opctx, cutoff, 2)
595-
.await
596-
.unwrap();
597-
assert_eq!(batch4, 0);
598-
599-
db.terminate().await;
600-
logctx.cleanup_successful();
462+
.expect("could not set time_started");
601463
}
602464

603465
/// A late completion of an already-timed-out entry should be a no-op.

nexus/examples/config-second.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,7 @@ session_cleanup.period_secs = 300
193193
session_cleanup.max_delete_per_activation = 10000
194194
audit_log_timeout_incomplete.period_secs = 600
195195
audit_log_timeout_incomplete.timeout_secs = 14400
196-
audit_log_timeout_incomplete.max_update_per_activation = 1000
196+
audit_log_timeout_incomplete.max_timed_out_per_activation = 1000
197197

198198
[default_region_allocation_strategy]
199199
# allocate region on 3 random distinct zpools, on 3 random distinct sleds.

nexus/examples/config.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,7 @@ session_cleanup.period_secs = 300
177177
session_cleanup.max_delete_per_activation = 10000
178178
audit_log_timeout_incomplete.period_secs = 600
179179
audit_log_timeout_incomplete.timeout_secs = 14400
180-
audit_log_timeout_incomplete.max_update_per_activation = 1000
180+
audit_log_timeout_incomplete.max_timed_out_per_activation = 1000
181181

182182
[default_region_allocation_strategy]
183183
# allocate region on 3 random distinct zpools, on 3 random distinct sleds.

nexus/src/app/background/init.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1200,7 +1200,7 @@ impl BackgroundTasksInitializer {
12001200
config.audit_log_timeout_incomplete.timeout_secs,
12011201
config
12021202
.audit_log_timeout_incomplete
1203-
.max_update_per_activation,
1203+
.max_timed_out_per_activation,
12041204
),
12051205
),
12061206
opctx: opctx.child(BTreeMap::new()),

0 commit comments

Comments
 (0)