Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion .config/nextest.toml
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,11 @@ filter = 'binary_id(omicron-nexus::test_all) and test(::schema::)'
threads-required = 4

[[profile.ci.overrides]]
filter = 'binary_id(omicron-nexus::test_all)'
# omicron-nexus depends on omicron-test-utils, so this is a bit redundant,
# but it's future-proofing. omicron-test-utils has conditions where it uses
# SIGTERM to await CockroachDB graceful shutdown - we use a nextest timeout
# so it can wait "as long as the test runner decides is reasonable".
filter = 'binary_id(omicron-nexus::test_all) + rdeps(omicron-test-utils)'
# As of 2023-01-08, the slowest test in test_all takes 196s on a Ryzen 7950X.
# 900s is a good upper limit that adds a comfortable buffer.
slow-timeout = { period = '60s', terminate-after = 15 }
Expand Down
189 changes: 179 additions & 10 deletions test-utils/src/dev/db.rs
Original file line number Diff line number Diff line change
Expand Up @@ -733,24 +733,60 @@ impl CockroachInstance {

impl Drop for CockroachInstance {
fn drop(&mut self) {
// TODO-cleanup Ideally at this point we would run self.cleanup() to
// kill the child process, wait for it to exit, and then clean up the
// temporary directory. However, we don't have an executor here with
// which to run async/await code. We could create one here, but it's
// not clear how safe or sketchy that would be. Instead, we expect that
// the caller has done the cleanup already. This won't always happen,
// particularly for ungraceful failures.
// NOTE: Ideally, we'd share this logic with self.cleanup() to tear down
// the child process. However, there are a couple distinctions from that
// pathway we must consider here:
//
// 1. drop is not asynchronous, so we don't have the benefits of using
// our tokio executor to run async/await code.
// 2. If self.cleanup() has not been invoked, we want to gracefully
// preserve the database for post-mortem debugging. This is often
// the case for e.g. a test which has failed before invoking cleanup,
// and which is bailing out. We can also take this pathway when the
// calling code has forgotten to call "cleanup", and is leaking the
// database, but that's a bug - and choosing to flush and terminate
// via SIGTERM wouldn't be wrong in that case either.
if self.child_process.is_some() || self.temp_dir.is_some() {
eprintln!(
"WARN: dropped CockroachInstance without cleaning it up first \
(there may still be a child process running and a \
temporary directory leaked)"
);

// Still, make a best effort.
#[allow(unused_must_use)]
// Send SIGTERM so CockroachDB can flush its WAL to disk, making the
// leaked temp directory useful for post-mortem debugging.
//
// Background: test CRDB instances might set
// `kv.raft_log.disable_synchronization_unsafe = true`
// which skips per-commit fdatasync for performance. Data is still
// written to the WAL but may only exist in Pebble's user-space
// write buffer. SIGTERM triggers Pebble's DB.Close() which flushes
// and fsyncs the WAL before exiting.
// See: https://github.com/oxidecomputer/omicron/issues/10085
if let Some(child_process) = self.child_process.as_mut() {
child_process.start_kill();
let pid = self.pid as libc::pid_t;
unsafe { libc::kill(pid, libc::SIGTERM) };

// Poll the child_process until it exits.
//
// Note that nextest has a timeout for tests
// with the filter 'rdeps(omicron-test-utils)'.
//
// As a consequence, we can loop "as long as the test runner
// lets us" waiting for CockroachDB to gracefully terminate. In
// reality, this is typically on the order of ~one second after
// a test failure.
loop {
match child_process.try_wait() {
Ok(None) => {} // still running
Ok(Some(_)) | Err(_) => {
// Terminated successfully or already reaped (e.g.
// ECHILD).
break;
}
}
std::thread::sleep(std::time::Duration::from_millis(50));
}
}
#[allow(unused_must_use)]
if let Some(temp_dir) = self.temp_dir.take() {
Expand Down Expand Up @@ -1903,6 +1939,139 @@ mod test {
assert_eq!(addr.port(), 12345);
}

// Test that data written to CockroachDB survives when the instance is
// dropped without calling cleanup(). This exercises the Drop impl path
// that fires during test failures (when cleanup is unreachable due to
// a panic) or when cleanup is accidentally omitted.
//
// With `disable_synchronization_unsafe = true`, committed data may
// only exist in Pebble's user-space write buffer (not yet write()-ed
// to the WAL file on disk). A graceful shutdown (SIGTERM) triggers
// Pebble's DB.Close() which flushes and fsyncs the WAL, making data
// durable. SIGKILL destroys the process immediately, losing any
// buffered WAL data.
//
// See: https://github.com/oxidecomputer/omicron/issues/10085
#[tokio::test]
async fn test_data_survives_drop_without_cleanup() {
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test reliably fails without the changes to drop.

(If you want to verify this: check out this PR, undo the changes to drop, it'll fail. This was a pre-requisite before I started my implementation -- I wanted to reliably hit the issue @davepacheco saw, and have confidence the SIGTERM approach would fix it)

let store_dir = tempdir().expect("failed to create temp dir");
let data_dir = store_dir.path().join("data");

// Start CRDB with a persistent store directory
let starter = new_builder()
.store_dir(&data_dir)
.build()
.expect("failed to build starter");
let database = starter.start().await.expect("failed to start CRDB");

// Enable the unsafe sync setting (as setup_database does)
database
.disable_synchronization()
.await
.expect("failed to disable sync");

// Phase 1: Write enough data to overflow Pebble's internal WAL
// write buffer, ensuring it gets write()-ed to the kernel and
// is durable on disk.
let client = database.connect().await.expect("failed to connect");
client
.batch_execute(
"CREATE DATABASE test_db; \
USE test_db; \
CREATE TABLE test_data (id INT PRIMARY KEY, payload STRING); \
INSERT INTO test_data (id, payload) \
SELECT i, repeat('x', 1024) \
FROM generate_series(1, 5000) AS g(i);",
)
.await
.expect("insert failed");

// Phase 2: Perform many small mutations that may sit in
// Pebble's user-space write buffer. With
// disable_synchronization_unsafe, Sync() is a no-op, so the
// buffer is only flushed when full. We issue many batch UPDATEs
// (each updating 100 rows) so the last few WAL entries are
// likely still buffered when we shut down.
for batch_start in (1..=5000).step_by(100) {
let batch_end = std::cmp::min(batch_start + 99, 5000);
client
.batch_execute(&format!(
"UPDATE test_db.test_data \
SET payload = 'updated' \
WHERE id BETWEEN {batch_start} AND {batch_end};",
))
.await
.expect("update failed");
}

// One final write that we'll check for specifically — this
// is the most likely to be lost since it's the last WAL entry.
client
.batch_execute(
"INSERT INTO test_db.test_data (id, payload) \
VALUES (99999, 'canary');",
)
.await
.expect("canary insert failed");

// Drop without cleanup — this triggers the Drop impl.
// Save the temp dir path so we can clean it up after
// verification (the Drop impl intentionally leaks it).
let leaked_temp_dir = database.temp_dir().to_owned();
drop(client);
drop(database);

// Restart CRDB on the same store directory.
let starter2 = new_builder()
.store_dir(&data_dir)
.build()
.expect("failed to build second starter");
let mut database2 =
starter2.start().await.expect("failed to restart CRDB");

let client2 = database2.connect().await.expect("failed to reconnect");

// Check that the canary row survived.
let rows = client2
.query(
"SELECT payload FROM test_db.test_data WHERE id = 99999",
&[],
)
.await
.expect("canary query failed");
assert_eq!(
rows.len(),
1,
"canary row (id=99999) did not survive shutdown"
);
let payload: String = rows[0].get(0);
assert_eq!(payload, "canary", "canary row has wrong payload");

// Check that updates survived — sample a few rows.
let rows = client2
.query(
"SELECT count(*) FROM test_db.test_data \
WHERE payload = 'updated'",
&[],
)
.await
.expect("update check query failed");
let updated_count: i64 = rows[0].get(0);
assert_eq!(
updated_count, 5000,
"updates did not survive shutdown: \
expected 5000 updated rows, got {updated_count}"
);

client2.cleanup().await.expect("client2 cleanup failed");
database2.cleanup().await.expect("database2 cleanup failed");

// Clean up the temp dir leaked by the first instance's Drop.
fs::remove_dir_all(&leaked_temp_dir)
.await
.expect("failed to clean up leaked temp dir");
}

// Tests the way `process_exited()` checks and interprets the exit status
// for abnormal process termination (by a signal).
#[tokio::test]
Expand Down
Loading