diff --git a/packages/pocket-ic/tests/tests.rs b/packages/pocket-ic/tests/tests.rs index cbd6e494fedb..0663d6fd3bea 100644 --- a/packages/pocket-ic/tests/tests.rs +++ b/packages/pocket-ic/tests/tests.rs @@ -10,6 +10,7 @@ use ic_management_canister_types::{ use ic_transport_types::Envelope; use ic_transport_types::EnvelopeContent::{Call, ReadState}; use ic_utils::interfaces::ManagementCanister; +use pocket_ic::SubnetMetrics; use pocket_ic::{ DefaultEffectiveCanisterIdError, ErrorCode, IngressStatusResult, PocketIc, PocketIcBuilder, PocketIcState, RejectCode, StartServerParams, Time, @@ -1649,26 +1650,36 @@ fn subnet_metrics() { deploy_counter_canister(&pic); - let metrics = pic.get_subnet_metrics(app_subnet).unwrap(); + fn get_subnet_metrics(pic: &PocketIc, subnet_id: Principal) -> SubnetMetrics { + // Advance 10 rounds, to ensure that `canister_state_bytes` (only recomputed + // every 10 rounds) is updated. + for _ in 0..10 { + pic.tick(); + } + pic.get_subnet_metrics(subnet_id).unwrap() + } + + pic.tick(); + let metrics = get_subnet_metrics(&pic, app_subnet); assert_eq!(metrics.num_canisters, 1); assert!((1 << 16) < metrics.canister_state_bytes && metrics.canister_state_bytes < (1 << 17)); let canister_id = deploy_counter_canister(&pic); - let metrics = pic.get_subnet_metrics(app_subnet).unwrap(); + let metrics = get_subnet_metrics(&pic, app_subnet); assert_eq!(metrics.num_canisters, 2); assert!((1 << 17) < metrics.canister_state_bytes && metrics.canister_state_bytes < (1 << 18)); pic.uninstall_canister(canister_id, None).unwrap(); pic.stop_canister(canister_id, None).unwrap(); - let metrics = pic.get_subnet_metrics(app_subnet).unwrap(); + let metrics = get_subnet_metrics(&pic, app_subnet); assert_eq!(metrics.num_canisters, 2); assert!((1 << 16) < metrics.canister_state_bytes && metrics.canister_state_bytes < (1 << 17)); pic.delete_canister(canister_id, None).unwrap(); - let metrics = pic.get_subnet_metrics(app_subnet).unwrap(); + let metrics = get_subnet_metrics(&pic, app_subnet); assert_eq!(metrics.num_canisters, 1); assert!((1 << 16) < metrics.canister_state_bytes && metrics.canister_state_bytes < (1 << 17)); } diff --git a/rs/messaging/src/message_routing.rs b/rs/messaging/src/message_routing.rs index fabcb1120f69..e514788f9a15 100644 --- a/rs/messaging/src/message_routing.rs +++ b/rs/messaging/src/message_routing.rs @@ -16,7 +16,7 @@ use ic_interfaces_state_manager::{CertificationScope, StateManager}; use ic_limits::{SMALL_APP_SUBNET_MAX_SIZE, SYSTEM_SUBNET_STREAM_MSG_LIMIT}; use ic_logger::{ReplicaLogger, debug, fatal, info, warn}; use ic_metrics::MetricsRegistry; -use ic_metrics::buckets::{add_bucket, decimal_buckets, decimal_buckets_with_zero}; +use ic_metrics::buckets::{add_bucket, decimal_buckets}; use ic_protobuf::proxy::{ProxyDecodeError, try_from_option_field}; use ic_protobuf::registry::subnet::v1::CanisterCyclesCostSchedule as CanisterCyclesCostScheduleProto; use ic_query_stats::QueryStatsAggregatorMetrics; @@ -34,6 +34,7 @@ use ic_registry_resource_limits::ResourceLimits; use ic_registry_subnet_features::{ChainKeyConfig, SubnetFeatures}; use ic_registry_subnet_type::SubnetType; use ic_replicated_state::metadata_state::ApiBoundaryNodeEntry; +use ic_replicated_state::metrics::ReplicatedStateMetrics; use ic_replicated_state::{ DroppedMessageMetrics, FullTopology, NetworkTopology, ReplicatedState, SubnetTopology, }; @@ -44,8 +45,8 @@ use ic_types::registry::RegistryClientError; use ic_types::state_manager::StateManagerError; use ic_types::xnet::{StreamHeader, StreamIndex}; use ic_types::{ - ExecutionRound, Height, NodeId, NumBytes, PrincipalId, PrincipalIdBlobParseError, - RegistryVersion, SubnetId, Time, + ExecutionRound, Height, NodeId, PrincipalId, PrincipalIdBlobParseError, RegistryVersion, + SubnetId, Time, }; use ic_types_cycles::CanisterCyclesCostSchedule; use ic_utils_thread::JoinOnDrop; @@ -110,11 +111,6 @@ const BLOCKS_NOT_PROPOSED_BY_BLOCKMAKER_TOTAL: &str = "mr_blocks_not_proposed_by const METRIC_NEXT_CHECKPOINT_HEIGHT: &str = "mr_next_checkpoint_height"; const METRIC_REMOTE_CERTIFIED_HEIGHTS: &str = "mr_remote_certified_heights"; -const METRIC_WASM_CUSTOM_SECTIONS_MEMORY_USAGE_BYTES: &str = - "mr_wasm_custom_sections_memory_usage_bytes"; -const METRIC_CANISTER_HISTORY_MEMORY_USAGE_BYTES: &str = "mr_canister_history_memory_usage_bytes"; -const METRIC_CANISTER_HISTORY_TOTAL_NUM_CHANGES: &str = "mr_canister_history_total_num_changes"; - const METRIC_SUBNET_INFO: &str = "mr_subnet_info"; const METRIC_SUBNET_SIZE: &str = "mr_subnet_size"; const METRIC_MAX_CANISTERS: &str = "mr_subnet_max_canisters"; @@ -323,24 +319,6 @@ pub(crate) struct MessageRoutingMetrics { /// Number of blocks not proposed by blockmaker ID. pub(crate) blocks_not_proposed_by_blockmaker_total: IntCounterVec, - /// The memory footprint of all the canisters on this subnet. Note that this - /// counter is from the perspective of the canisters and does not account - /// for the extra copies of the state that the protocol has to store for - /// correct operations. - canisters_memory_usage_bytes: IntGauge, - /// The memory footprint of Wasm custom sections of all canisters on this - /// subnet. Note that the value is from the perspective of the canisters - /// and does not account for the extra copies of the state that the protocol - /// has to store for correct operations. - wasm_custom_sections_memory_usage_bytes: IntGauge, - /// The memory footprint of canister history of all canisters on this - /// subnet. Note that the value is from the perspective of the canisters - /// and does not account for the extra copies of the state that the protocol - /// has to store for correct operations. - canister_history_memory_usage_bytes: IntGauge, - /// The total number of changes in canister history per canister on this subnet. - canister_history_total_num_changes: Histogram, - subnet_info: IntGaugeVec, subnet_size: IntGauge, max_canisters: IntGauge, @@ -464,24 +442,6 @@ impl MessageRoutingMetrics { "Failures to propose a block (when the node was block maker rank R but the subnet accepted the block from the block maker with rank S > R).", &["blockmaker_id"], ), - canisters_memory_usage_bytes: metrics_registry.int_gauge( - "canister_memory_usage_bytes", - "Total memory footprint of all canisters on this subnet.", - ), - wasm_custom_sections_memory_usage_bytes: metrics_registry.int_gauge( - METRIC_WASM_CUSTOM_SECTIONS_MEMORY_USAGE_BYTES, - "Total memory footprint of Wasm custom sections of all canisters on this subnet.", - ), - canister_history_memory_usage_bytes: metrics_registry.int_gauge( - METRIC_CANISTER_HISTORY_MEMORY_USAGE_BYTES, - "Total memory footprint of canister history of all canisters on this subnet.", - ), - canister_history_total_num_changes: metrics_registry.histogram( - METRIC_CANISTER_HISTORY_TOTAL_NUM_CHANGES, - "Total number of changes in canister history per canister on this subnet.", - // 0, 1, 2, 5, …, 1000, 2000, 5000 - decimal_buckets_with_zero(0, 3), - ), subnet_info: metrics_registry.int_gauge_vec( METRIC_SUBNET_INFO, @@ -749,46 +709,6 @@ impl BatchProcessorImpl { .observe(since.elapsed().as_secs_f64()); } - /// Observes metrics related to memory used by canisters. It includes: - /// * total memory used - /// * memory used by Wasm Custom Sections - /// * memory used by canister history - /// - /// Returns the total memory usage of the canisters of this subnet. - fn observe_canisters_memory_usage(&self, state: &ReplicatedState) -> NumBytes { - let mut total_memory_usage = NumBytes::new(0); - let mut wasm_custom_sections_memory_usage = NumBytes::new(0); - let mut canister_history_memory_usage = NumBytes::new(0); - for canister in state.canisters_iter() { - // Export the total canister memory usage; execution and wasm custom section - // memory are included in `memory_usage()`; message memory is added separately. - total_memory_usage += canister.memory_usage() + canister.message_memory_usage().total(); - wasm_custom_sections_memory_usage += canister - .execution_state - .as_ref() - .map(|es| es.metadata.memory_usage()) - .unwrap_or_default(); - canister_history_memory_usage += canister.canister_history_memory_usage(); - self.metrics.canister_history_total_num_changes.observe( - canister - .system_state - .get_canister_history() - .get_total_num_changes() as f64, - ); - } - self.metrics - .canisters_memory_usage_bytes - .set(total_memory_usage.get() as i64); - self.metrics - .wasm_custom_sections_memory_usage_bytes - .set(wasm_custom_sections_memory_usage.get() as i64); - self.metrics - .canister_history_memory_usage_bytes - .set(canister_history_memory_usage.get() as i64); - - total_memory_usage - } - /// Reads registry contents required by `BatchProcessorImpl::process_batch()`. // /// # Warning @@ -1489,6 +1409,7 @@ impl BatchProcessor for BatchProcessorImpl BatchProcessor for BatchProcessorImpl::new(); + let mut wasm_custom_sections_memory_usage = NumBytes::new(0); + let mut canister_history_memory_usage = NumBytes::new(0); + let mut total_canister_balance = Cycles::zero(); let mut total_canister_reserved_balance = Cycles::zero(); @@ -357,7 +382,7 @@ impl ReplicatedStateMetrics { let mut total_canister_snapshots_count = 0; let canister_id_ranges = state.routing_table().ranges(own_subnet_id); - state.canisters_iter().for_each(|canister| { + for canister in state.canisters_iter() { match canister.system_state.get_status() { CanisterStatus::Running { .. } => num_running_canisters += 1, CanisterStatus::Stopping { stop_contexts, .. } => { @@ -424,6 +449,13 @@ impl ReplicatedStateMetrics { canisters_not_in_routing_table += 1; } + wasm_custom_sections_memory_usage += canister + .execution_state + .as_ref() + .map(|es| es.metadata.memory_usage()) + .unwrap_or_default(); + canister_history_memory_usage += canister.canister_history_memory_usage(); + total_canister_balance += canister.system_state.balance(); total_canister_reserved_balance += canister.system_state.reserved_balance(); @@ -452,7 +484,7 @@ impl ReplicatedStateMetrics { total_canister_snapshots_memory_taken += canister.canister_snapshots.memory_taken(); total_canister_snapshots_count += canister.canister_snapshots.len(); - }); + } self.old_open_call_contexts .with_label_values(&[OLD_CALL_CONTEXT_LABEL_ONE_DAY]) @@ -568,6 +600,13 @@ impl ReplicatedStateMetrics { self.stop_canister_calls_without_call_id .set(num_stop_canister_calls_without_call_id as i64); + self.canisters_memory_usage_bytes + .set(Self::total_canister_memory_usage(state).get() as i64); + self.wasm_custom_sections_memory_usage_bytes + .set(wasm_custom_sections_memory_usage.get() as i64); + self.canister_history_memory_usage_bytes + .set(canister_history_memory_usage.get() as i64); + self.total_canister_balance .set(total_canister_balance.get() as f64); @@ -619,6 +658,23 @@ impl ReplicatedStateMetrics { if let Some(retention) = retention { self.canister_log_retention.observe(retention.as_secs_f64()); } + + self.canister_history_total_num_changes.observe( + canister + .system_state + .get_canister_history() + .get_total_num_changes() as f64, + ); + } + + /// Returns the total memory usage of all canisters. Execution and wasm custom section + /// memory are included in `memory_usage()`, message memory is added separately. + pub fn total_canister_memory_usage(state: &ReplicatedState) -> NumBytes { + let mut total_memory_usage = NumBytes::new(0); + for canister in state.canisters_iter() { + total_memory_usage += canister.memory_usage() + canister.message_memory_usage().total(); + } + total_memory_usage } }