Skip to content

Commit a54b7de

Browse files
authored
phd: collect core when killing non-booting guest (#1079)
this may or may not prove useful in practice; if we're lucky something got funky in device emulation and we can see a stuck thread. on the other hand, if we're unlucky the guest is stuck in a loop and all we see is one vCPU was running while everything else was idle.
1 parent d863f81 commit a54b7de

2 files changed

Lines changed: 47 additions & 2 deletions

File tree

phd-tests/framework/src/test_vm/mod.rs

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -887,7 +887,15 @@ impl TestVm {
887887
.instrument(info_span!("wait_to_boot"));
888888

889889
match timeout(timeout_duration, boot).await {
890-
Err(_) => anyhow::bail!("timed out while waiting to boot"),
890+
Err(_) => {
891+
error!(
892+
"Guest did not boot after {}ms! Collecting core..",
893+
timeout_duration.as_millis()
894+
);
895+
let proc = self.server.as_ref().unwrap();
896+
proc.core();
897+
anyhow::bail!("timed out while waiting to boot")
898+
}
891899
Ok(inner) => {
892900
inner.context("executing guest login sequence")?;
893901
}

phd-tests/framework/src/test_vm/server.rs

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,12 @@ use std::{
88
fmt::Debug,
99
net::{SocketAddr, SocketAddrV4},
1010
os::unix::process::CommandExt,
11+
time::SystemTime,
1112
};
1213

1314
use anyhow::Result;
1415
use camino::{Utf8Path, Utf8PathBuf};
15-
use tracing::{debug, info};
16+
use tracing::{debug, info, warn};
1617

1718
use crate::log_config::LogConfig;
1819

@@ -44,6 +45,7 @@ pub struct ServerProcessParameters<'a> {
4445
pub struct PropolisServer {
4546
server: Option<std::process::Child>,
4647
address: SocketAddrV4,
48+
output_dir: Utf8PathBuf,
4749
}
4850

4951
impl PropolisServer {
@@ -117,6 +119,9 @@ impl PropolisServer {
117119
let server = PropolisServer {
118120
server: Some(server_cmd.spawn()?),
119121
address: server_addr,
122+
// Stash the same output directory in case the framework has to
123+
// write any files on behalf of the test run.
124+
output_dir: output_dir.to_owned(),
120125
};
121126

122127
info!(
@@ -130,6 +135,38 @@ impl PropolisServer {
130135
self.address
131136
}
132137

138+
/// Collect a core of this server process, placing it in the same output
139+
/// directory as other artifacts of this test.
140+
pub(super) fn core(&self) {
141+
let Some(server_proc) = self.server.as_ref() else {
142+
warn!("Tried to produce a core without a propolis-server?");
143+
return;
144+
};
145+
146+
let core_name = format!(
147+
"core-{}",
148+
SystemTime::now()
149+
.duration_since(SystemTime::UNIX_EPOCH)
150+
.expect("Time is gone, the song is over")
151+
.as_millis()
152+
);
153+
let core_path = self.output_dir.join(core_name);
154+
155+
std::process::Command::new("pfexec")
156+
.args([
157+
"gcore".as_ref(),
158+
"-o".as_ref(),
159+
core_path.as_os_str(),
160+
server_proc.id().to_string().as_ref(),
161+
])
162+
.spawn()
163+
.expect("can try to gcore a process")
164+
.wait()
165+
.expect("can gcore a propolis-server we spawned");
166+
167+
warn!("core written to {}", core_path);
168+
}
169+
133170
/// Kills this server process if it hasn't been killed already.
134171
pub(super) fn kill(&mut self) {
135172
let Some(mut server) = self.server.take() else {

0 commit comments

Comments
 (0)