Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
460bfa6
feat: cloud engine firewall
pierugo-dfinity Jun 16, 2026
7a8ba86
test: system test
pierugo-dfinity Jun 16, 2026
a70d671
feat: run ic-boundary next to the replica
pierugo-dfinity Jun 16, 2026
9a2a31d
refactor: make Upgrade agnostic to processes running
pierugo-dfinity Jun 16, 2026
932808e
feat: add ic-gateway
pierugo-dfinity Jun 17, 2026
de65d14
feat: run ic-gateway only for now
pierugo-dfinity Jun 17, 2026
01b9362
refactor: minor refactor
pierugo-dfinity Jun 17, 2026
2af2935
docs: remove gov-team change
pierugo-dfinity Jun 17, 2026
d6ee4b5
refactor: deduplicate/generalize code
pierugo-dfinity Jun 17, 2026
e630926
feat: do not attempt to stop when not running. Log + Metrics when sto…
pierugo-dfinity Jun 17, 2026
e50142e
feat: propagate orchestrator's changed metric's name
pierugo-dfinity Jun 17, 2026
11138a7
re-trigger CI
pierugo-dfinity Jun 17, 2026
b73a95b
Automatically fixing code for linting and formatting issues
Jun 17, 2026
8a5a43e
style: check metric of all orchestrator processes
pierugo-dfinity Jun 17, 2026
fa1dd85
feat: support checking metrics prefixes
pierugo-dfinity Jun 17, 2026
3adc572
feat: ic-gateway metrics
pierugo-dfinity Jun 17, 2026
852e891
feat: stop ic-gateway first
pierugo-dfinity Jun 17, 2026
9295ec5
fix: fix image size estimates
pierugo-dfinity Jun 18, 2026
78de3f9
fix: spawn child as leader of new process group
pierugo-dfinity Jun 18, 2026
20e0162
docs: add TODO
pierugo-dfinity Jun 18, 2026
dd9deff
feat: firewall changes in separate PR
pierugo-dfinity Jun 18, 2026
b0a493c
feat: gate launching ic-gateway behind flag
pierugo-dfinity Jun 18, 2026
93a758e
docs
pierugo-dfinity Jun 18, 2026
aefebff
docs
pierugo-dfinity Jun 19, 2026
7ba43d7
style: inline args.ic_binary_directory
pierugo-dfinity Jun 22, 2026
603cfb5
docs
pierugo-dfinity Jun 22, 2026
afc74aa
style: rename Fake struct
pierugo-dfinity Jun 22, 2026
e12eccc
test: add IcBoundaryManager unit tests
pierugo-dfinity Jun 22, 2026
9ab62e6
docs
pierugo-dfinity Jun 22, 2026
45aeab2
feat: gate stopping
pierugo-dfinity Jun 22, 2026
5b993db
docs: update `ic_binary_directory` docs
pierugo-dfinity Jun 23, 2026
55e2587
feat: update current domain name only on successes
pierugo-dfinity Jun 23, 2026
a22c0de
feat: assert prefix-freeness
pierugo-dfinity Jun 23, 2026
8f3a740
fix: fix outdated log+docs
pierugo-dfinity Jun 23, 2026
d360dac
fix: period
pierugo-dfinity Jun 23, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 14 additions & 1 deletion Cargo.Bazel.json.lock
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"checksum": "7f96e5c133c203870fc997d52e080f72044bd53f1530c5332ba2230232e22d68",
"checksum": "ff7f44505ebc5f13c6c3d5c53d2dffc80669e43460f22816aff45ab32ec31178",
"crates": {
"abnf 0.12.0": {
"name": "abnf",
Expand Down Expand Up @@ -37763,6 +37763,18 @@
]
}
}
},
{
"Binary": {
"crate_name": "ic-gateway",
"crate_root": "src/main.rs",
"srcs": {
"allow_empty": true,
"include": [
"**/*.rs"
]
}
}
}
],
"library_target_name": "ic_gateway",
Expand Down Expand Up @@ -100978,6 +100990,7 @@
},
"binary_crates": [
"canbench 0.4.1",
"ic-gateway 0.2.0",
"ic-wasm 0.9.11",
"metrics-proxy 0.1.0"
],
Expand Down
2 changes: 2 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions bazel/rust.MODULE.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -2179,6 +2179,10 @@ crate.annotation(
crate = "metrics-proxy",
gen_binaries = ["metrics-proxy"],
)
crate.annotation(
crate = "ic-gateway",
gen_binaries = ["ic-gateway"],
)
crate.splicing_config(
resolver_version = "2",
)
Expand Down
1 change: 1 addition & 0 deletions ic-os/components/guestos.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ def component_files(mode):
Label("guestos/remote-attestation-server.service"): "/etc/systemd/system/remote-attestation-server.service",
Label("guestos/generate-ic-config/generate-ic-config.service"): "/etc/systemd/system/generate-ic-config.service",
Label("guestos/share/ic-boundary.env"): "/opt/ic/share/ic-boundary.env",
Label("guestos/share/ic-gateway.env"): "/opt/ic/share/ic-gateway.env",
Label("guestos/share/nns_public_key.pem"): "/opt/ic/share/nns_public_key.pem",

# init
Expand Down
2 changes: 1 addition & 1 deletion ic-os/components/guestos/ic-replica.service
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ User=ic-replica

Environment=RUST_BACKTRACE=1
Environment=RUST_MIN_STACK=8192000
ExecStart=/opt/ic/bin/orchestrator --replica-binary-dir /var/lib/ic/data/images --cup-dir /var/lib/ic/data/cups --replica-config-file /run/ic-node/config/ic.json5 --enable-provisional-registration --ic-binary-directory /opt/ic/bin --orchestrator-data-directory /var/lib/ic/data/orchestrator --version-file /opt/ic/share/version.txt
ExecStart=/opt/ic/bin/orchestrator --replica-binary-dir /var/lib/ic/data/images --cup-dir /var/lib/ic/data/cups --replica-config-file /run/ic-node/config/ic.json5 --ic-boundary-env-file /opt/ic/share/ic-boundary.env --ic-gateway-env-file /opt/ic/share/ic-gateway.env --enable-provisional-registration --ic-binary-directory /opt/ic/bin --orchestrator-data-directory /var/lib/ic/data/orchestrator --version-file /opt/ic/share/version.txt
LimitNOFILE=16777216
Restart=always
RestartSec=10
Expand Down
5 changes: 5 additions & 0 deletions ic-os/components/guestos/share/ic-gateway.env
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
LISTEN_PLAIN=[::]:80
LISTEN_INSECURE_SERVE_HTTP_ONLY=true
METRICS_LISTEN=[::]:9314
IC_URL=http://127.0.0.1:8080
DOMAIN=gateway.icp
1 change: 1 addition & 0 deletions ic-os/guestos/defs.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ def image_deps(mode, malicious = False):
"//publish/binaries:orchestrator": "/opt/ic/bin/orchestrator:0755", # Replica process manager, required by the IC protocol (upgrades, node addition, etc).
("//publish/malicious:replica" if malicious else "//publish/binaries:replica"): "/opt/ic/bin/replica:0755", # Main protocol binary, required by the IC protocol. Installs the malicious replica iff set only in test builds.
"//publish/binaries:ic-boundary": "/opt/ic/bin/ic-boundary:0755", # API boundary node binary, required by the IC protocol. The same GuestOS is used both for the replica and API boundary nodes.
"//rs/ic_os/release:ic-gateway": "/opt/ic/bin/ic-gateway:0755", # IC-gateway binary, required by cloud engine nodes, who run it as a sidecar to the replica.
"//publish/binaries:ic-consensus-pool-util": "/opt/ic/bin/ic-consensus-pool-util:0755", # May be used during recoveries to export/import consensus pool artifacts.
"//publish/binaries:ic-recovery": "/opt/ic/bin/ic-recovery:0755", # Required for performing subnet recoveries on the node directly.
"//publish/binaries:state-tool": "/opt/ic/bin/state-tool:0755", # May be used during recoveries for calculating the state hash and inspecting the state more generally.
Expand Down
6 changes: 3 additions & 3 deletions ic-os/guestos/envs/prod/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -18,19 +18,19 @@ icos_images = icos_build(
file_size_check(
name = "disk_img_size_check",
file = icos_images.disk_image,
max_file_size = 450 * 1000 * 1000, # 419 MB on 2025-03-21
max_file_size = 475 * 1000 * 1000, # 453 MB on 2026-06-18
)

file_size_check(
name = "update_img_size_check",
file = icos_images.update_image,
max_file_size = 450 * 1000 * 1000, # 416 MB on 2025-03-21
max_file_size = 475 * 1000 * 1000, # 451 MB on 2026-06-18
)

file_size_check(
name = "update_img_test_size_check",
file = icos_images.update_image_test,
max_file_size = 450 * 1000 * 1000, # 417 MB on 2025-06-26
max_file_size = 475 * 1000 * 1000, # 451 MB on 2026-06-18
)

# Export checksums & build artifacts
Expand Down
6 changes: 3 additions & 3 deletions ic-os/guestos/envs/recovery/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ icos_images = icos_build(
file_size_check(
name = "disk_img_size_check",
file = icos_images.disk_image,
max_file_size = 450 * 1000 * 1000, # 419 MB on 2025-06-26
max_file_size = 475 * 1000 * 1000, # 453 MB on 2026-06-18
tags = [
"manual",
"no-cache",
Expand All @@ -31,7 +31,7 @@ file_size_check(
file_size_check(
name = "update_img_size_check",
file = icos_images.update_image,
max_file_size = 450 * 1000 * 1000, # 417 MB on 2025-06-26
max_file_size = 475 * 1000 * 1000, # 451 MB on 2026-06-18
tags = [
"manual",
"no-cache",
Expand All @@ -41,7 +41,7 @@ file_size_check(
file_size_check(
name = "update_img_test_size_check",
file = icos_images.update_image_test,
max_file_size = 450 * 1000 * 1000, # 417 MB on 2025-06-26
max_file_size = 475 * 1000 * 1000, # 451 MB on 2026-06-18
tags = [
"manual",
"no-cache",
Expand Down
1 change: 1 addition & 0 deletions rs/ic_os/release/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ OBJECTS = {
"metrics-proxy": "@crate_index//:metrics-proxy__metrics-proxy",
"nss_icos": "//rs/ic_os/networking/nss_icos",
"custom_metrics": "//rs/ic_os/metrics/custom_metrics:custom_metrics_bin",
"ic-gateway": "@crate_index//:ic-gateway__ic-gateway",
}

[release_strip_binary(
Expand Down
12 changes: 10 additions & 2 deletions rs/orchestrator/src/args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,19 @@ pub struct OrchestratorArgs {
#[clap(long)]
pub(crate) replica_config_file: PathBuf,

/// The path to the IC boundary environment file
#[clap(long)]
pub(crate) ic_boundary_env_file: PathBuf,

/// The path to the IC gateway environment file
#[clap(long)]
pub(crate) ic_gateway_env_file: PathBuf,

/// The path to the Replica binary location containing the following in case
/// of guest OS deployment: version.txt, manageboot.sh, replica,
/// of guest OS deployment: replica, ic-boundary, ic-gateway, manageboot.sh,
/// install-upgrade.sh
#[clap(long)]
pub(crate) ic_binary_directory: Option<PathBuf>,
pub(crate) ic_binary_directory: PathBuf,

/// If not set, the default listen addr (0.0.0.0:[`PROMETHEUS_HTTP_PORT`])
/// will be used to export metrics.
Expand Down
179 changes: 22 additions & 157 deletions rs/orchestrator/src/boundary_node.rs
Original file line number Diff line number Diff line change
@@ -1,80 +1,35 @@
use crate::{
error::{OrchestratorError, OrchestratorResult},
metrics::OrchestratorMetrics,
process_manager::{Process, ProcessManager, ProcessManagerImpl},
error::OrchestratorError,
process_manager::Process,
processes::{IcBoundaryManager, IcBoundaryProcess},
registry_helper::RegistryHelper,
};
use ic_config::crypto::CryptoConfig;
use ic_logger::{ReplicaLogger, info, warn};
use ic_logger::{ReplicaLogger, warn};
use ic_types::{NodeId, ReplicaVersion};
use std::{
collections::HashMap,
ffi::OsString,
path::{Path, PathBuf},
sync::{Arc, Mutex},
};

struct BoundaryNodeProcess {
version: ReplicaVersion,
binary: PathBuf,
args: Vec<OsString>,
env: HashMap<OsString, OsString>,
}

impl Process for BoundaryNodeProcess {
const NAME: &'static str = "Boundary Node";

type Version = ReplicaVersion;

fn get_version(&self) -> &Self::Version {
&self.version
}

fn get_binary(&self) -> &Path {
&self.binary
}

fn get_args(&self) -> &[OsString] {
&self.args
}

fn get_env(&self) -> HashMap<OsString, OsString> {
self.env.clone()
}
}
use std::sync::Arc;

pub(crate) struct BoundaryNodeManager {
registry: Arc<RegistryHelper>,
_metrics: Arc<OrchestratorMetrics>,
process: Arc<Mutex<dyn ProcessManager<BoundaryNodeProcess>>>,
ic_binary_dir: PathBuf,
crypto_config: CryptoConfig,
process_manager: IcBoundaryManager,
version: ReplicaVersion,
logger: ReplicaLogger,
node_id: NodeId,
domain_name: Option<String>,
logger: ReplicaLogger,
}

impl BoundaryNodeManager {
pub(crate) fn new(
registry: Arc<RegistryHelper>,
metrics: Arc<OrchestratorMetrics>,
process_manager: IcBoundaryManager,
version: ReplicaVersion,
node_id: NodeId,
ic_binary_dir: PathBuf,
crypto_config: CryptoConfig,
logger: ReplicaLogger,
) -> Self {
Self {
registry,
_metrics: metrics,
process: Arc::new(Mutex::new(ProcessManagerImpl::new(logger.clone()))),
ic_binary_dir,
crypto_config,
process_manager,
version,
logger,
node_id,
domain_name: None,
}
}

Expand All @@ -95,119 +50,29 @@ impl BoundaryNodeManager {
// NOTE: We could also shutdown the boundary node here. However, it makes sense to continue
// serving requests while the orchestrator is downloading the new image in most cases.
} else {
match self.registry.get_node_domain_name(registry_version) {
Ok(Some(domain_name)) => {
let domain_name = Some(domain_name);

// stop ic-boundary when the domain name changes and start it again.
if domain_name != self.domain_name {
if let Err(err) = self.ensure_boundary_node_stopped() {
warn!(self.logger, "Failed to stop Boundary Node: {}", err);
}
self.domain_name = domain_name;
}

// make sure the boundary node is running
if let Err(err) = self.ensure_boundary_node_running(&self.version) {
warn!(self.logger, "Failed to start Boundary Node: {}", err);
}
}
// BN should not be active when the node doesn't have a domain name
Ok(None) => {
warn!(
self.logger,
"There is no domain associated with the node, while this is a requirement for the API boundary node. Shutting ic-boundary down."
);
if let Err(err) = self.ensure_boundary_node_stopped() {
warn!(self.logger, "Failed to stop Boundary Node: {}", err);
}
self.domain_name = None;
}
// Failing to read the registry
Err(err) => warn!(
self.logger,
"Failed to fetch Boundary Node domain name: {}", err
),
}
self.process_manager
.ensure_ic_boundary_running_and_restarted_on_domain_change(
self.version.clone(),
registry_version,
);
}
}
// BN should not be active
Err(OrchestratorError::ApiBoundaryNodeMissingError(_, _)) => {
if let Err(err) = self.ensure_boundary_node_stopped() {
warn!(self.logger, "Failed to stop Boundary Node: {}", err);
if let Err(err) = self.process_manager.stop() {
warn!(
self.logger,
"Failed to stop {}: {}",
IcBoundaryProcess::NAME,
err
);
}
}
// Failing to read the registry
Err(err) => warn!(
self.logger,
"Failed to fetch Boundary Node version: {}", err
"Failed to fetch API Boundary Node version: {}", err
),
}
}

/// Start the current boundary node process
fn ensure_boundary_node_running(&self, version: &ReplicaVersion) -> OrchestratorResult<()> {
let mut process = self.process.lock().unwrap();

if process.is_running() {
return Ok(());
}
info!(self.logger, "Starting new boundary node process");

let binary = self.ic_binary_dir.join("ic-boundary");

let domain_name = self
.domain_name
.as_ref()
.ok_or_else(|| OrchestratorError::DomainNameMissingError(self.node_id))?;

let env = match env_file_reader::read_file("/opt/ic/share/ic-boundary.env") {
Ok(env) => env
.into_iter()
.map(|(k, v)| (OsString::from(k), OsString::from(v)))
.collect(),
Err(e) => {
return Err(OrchestratorError::IoError(
"unable to read ic-boundary environment variables".to_string(),
e,
));
}
};

let args = vec![
format!("--tls-hostname={}", domain_name).into(),
format!(
"--crypto-config={}",
serde_json::to_string(&self.crypto_config)
.map_err(OrchestratorError::SerializeCryptoConfigError)?
)
.into(),
];

process
.start(BoundaryNodeProcess {
version: version.clone(),
binary,
args,
env,
})
.map_err(|e| {
OrchestratorError::IoError(
"Error when attempting to start new boundary node".into(),
e,
)
})
}

/// Stop the current boundary node process.
fn ensure_boundary_node_stopped(&self) -> OrchestratorResult<()> {
let mut process = self.process.lock().unwrap();
if process.is_running() {
return process.stop().map_err(|e| {
OrchestratorError::IoError("Error when attempting to stop boundary node".into(), e)
});
}

Ok(())
}
}
Loading
Loading