Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 15 additions & 4 deletions packages/pocket-ic/tests/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ use ic_management_canister_types::{
use ic_transport_types::Envelope;
use ic_transport_types::EnvelopeContent::{Call, ReadState};
use ic_utils::interfaces::ManagementCanister;
use pocket_ic::SubnetMetrics;
use pocket_ic::{
DefaultEffectiveCanisterIdError, ErrorCode, IngressStatusResult, PocketIc, PocketIcBuilder,
PocketIcState, RejectCode, StartServerParams, Time,
Expand Down Expand Up @@ -1649,26 +1650,36 @@ fn subnet_metrics() {

deploy_counter_canister(&pic);

let metrics = pic.get_subnet_metrics(app_subnet).unwrap();
fn get_subnet_metrics(pic: &PocketIc, subnet_id: Principal) -> SubnetMetrics {
// Advance 10 rounds, to ensure that `canister_state_bytes` (only recomputed
// every 10 rounds) is updated.
for _ in 0..10 {
pic.tick();
}
pic.get_subnet_metrics(subnet_id).unwrap()
}

pic.tick();
let metrics = get_subnet_metrics(&pic, app_subnet);
assert_eq!(metrics.num_canisters, 1);
assert!((1 << 16) < metrics.canister_state_bytes && metrics.canister_state_bytes < (1 << 17));

let canister_id = deploy_counter_canister(&pic);

let metrics = pic.get_subnet_metrics(app_subnet).unwrap();
let metrics = get_subnet_metrics(&pic, app_subnet);
assert_eq!(metrics.num_canisters, 2);
assert!((1 << 17) < metrics.canister_state_bytes && metrics.canister_state_bytes < (1 << 18));

pic.uninstall_canister(canister_id, None).unwrap();
pic.stop_canister(canister_id, None).unwrap();

let metrics = pic.get_subnet_metrics(app_subnet).unwrap();
let metrics = get_subnet_metrics(&pic, app_subnet);
assert_eq!(metrics.num_canisters, 2);
assert!((1 << 16) < metrics.canister_state_bytes && metrics.canister_state_bytes < (1 << 17));

pic.delete_canister(canister_id, None).unwrap();

let metrics = pic.get_subnet_metrics(app_subnet).unwrap();
let metrics = get_subnet_metrics(&pic, app_subnet);
assert_eq!(metrics.num_canisters, 1);
assert!((1 << 16) < metrics.canister_state_bytes && metrics.canister_state_bytes < (1 << 17));
}
Expand Down
106 changes: 15 additions & 91 deletions rs/messaging/src/message_routing.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ use ic_interfaces_state_manager::{CertificationScope, StateManager};
use ic_limits::{SMALL_APP_SUBNET_MAX_SIZE, SYSTEM_SUBNET_STREAM_MSG_LIMIT};
use ic_logger::{ReplicaLogger, debug, fatal, info, warn};
use ic_metrics::MetricsRegistry;
use ic_metrics::buckets::{add_bucket, decimal_buckets, decimal_buckets_with_zero};
use ic_metrics::buckets::{add_bucket, decimal_buckets};
use ic_protobuf::proxy::{ProxyDecodeError, try_from_option_field};
use ic_protobuf::registry::subnet::v1::CanisterCyclesCostSchedule as CanisterCyclesCostScheduleProto;
use ic_query_stats::QueryStatsAggregatorMetrics;
Expand All @@ -34,6 +34,7 @@ use ic_registry_resource_limits::ResourceLimits;
use ic_registry_subnet_features::{ChainKeyConfig, SubnetFeatures};
use ic_registry_subnet_type::SubnetType;
use ic_replicated_state::metadata_state::ApiBoundaryNodeEntry;
use ic_replicated_state::metrics::ReplicatedStateMetrics;
use ic_replicated_state::{
DroppedMessageMetrics, FullTopology, NetworkTopology, ReplicatedState, SubnetTopology,
};
Expand All @@ -44,8 +45,8 @@ use ic_types::registry::RegistryClientError;
use ic_types::state_manager::StateManagerError;
use ic_types::xnet::{StreamHeader, StreamIndex};
use ic_types::{
ExecutionRound, Height, NodeId, NumBytes, PrincipalId, PrincipalIdBlobParseError,
RegistryVersion, SubnetId, Time,
ExecutionRound, Height, NodeId, PrincipalId, PrincipalIdBlobParseError, RegistryVersion,
SubnetId, Time,
};
use ic_types_cycles::CanisterCyclesCostSchedule;
use ic_utils_thread::JoinOnDrop;
Expand Down Expand Up @@ -110,11 +111,6 @@ const BLOCKS_NOT_PROPOSED_BY_BLOCKMAKER_TOTAL: &str = "mr_blocks_not_proposed_by
const METRIC_NEXT_CHECKPOINT_HEIGHT: &str = "mr_next_checkpoint_height";
const METRIC_REMOTE_CERTIFIED_HEIGHTS: &str = "mr_remote_certified_heights";

const METRIC_WASM_CUSTOM_SECTIONS_MEMORY_USAGE_BYTES: &str =
"mr_wasm_custom_sections_memory_usage_bytes";
const METRIC_CANISTER_HISTORY_MEMORY_USAGE_BYTES: &str = "mr_canister_history_memory_usage_bytes";
const METRIC_CANISTER_HISTORY_TOTAL_NUM_CHANGES: &str = "mr_canister_history_total_num_changes";

const METRIC_SUBNET_INFO: &str = "mr_subnet_info";
const METRIC_SUBNET_SIZE: &str = "mr_subnet_size";
const METRIC_MAX_CANISTERS: &str = "mr_subnet_max_canisters";
Expand Down Expand Up @@ -323,24 +319,6 @@ pub(crate) struct MessageRoutingMetrics {
/// Number of blocks not proposed by blockmaker ID.
pub(crate) blocks_not_proposed_by_blockmaker_total: IntCounterVec,

/// The memory footprint of all the canisters on this subnet. Note that this
/// counter is from the perspective of the canisters and does not account
/// for the extra copies of the state that the protocol has to store for
/// correct operations.
canisters_memory_usage_bytes: IntGauge,
/// The memory footprint of Wasm custom sections of all canisters on this
/// subnet. Note that the value is from the perspective of the canisters
/// and does not account for the extra copies of the state that the protocol
/// has to store for correct operations.
wasm_custom_sections_memory_usage_bytes: IntGauge,
/// The memory footprint of canister history of all canisters on this
/// subnet. Note that the value is from the perspective of the canisters
/// and does not account for the extra copies of the state that the protocol
/// has to store for correct operations.
canister_history_memory_usage_bytes: IntGauge,
/// The total number of changes in canister history per canister on this subnet.
canister_history_total_num_changes: Histogram,

subnet_info: IntGaugeVec,
subnet_size: IntGauge,
max_canisters: IntGauge,
Expand Down Expand Up @@ -464,24 +442,6 @@ impl MessageRoutingMetrics {
"Failures to propose a block (when the node was block maker rank R but the subnet accepted the block from the block maker with rank S > R).",
&["blockmaker_id"],
),
canisters_memory_usage_bytes: metrics_registry.int_gauge(
"canister_memory_usage_bytes",
"Total memory footprint of all canisters on this subnet.",
),
wasm_custom_sections_memory_usage_bytes: metrics_registry.int_gauge(
METRIC_WASM_CUSTOM_SECTIONS_MEMORY_USAGE_BYTES,
"Total memory footprint of Wasm custom sections of all canisters on this subnet.",
),
canister_history_memory_usage_bytes: metrics_registry.int_gauge(
METRIC_CANISTER_HISTORY_MEMORY_USAGE_BYTES,
"Total memory footprint of canister history of all canisters on this subnet.",
),
canister_history_total_num_changes: metrics_registry.histogram(
METRIC_CANISTER_HISTORY_TOTAL_NUM_CHANGES,
"Total number of changes in canister history per canister on this subnet.",
// 0, 1, 2, 5, …, 1000, 2000, 5000
decimal_buckets_with_zero(0, 3),
),

subnet_info: metrics_registry.int_gauge_vec(
METRIC_SUBNET_INFO,
Expand Down Expand Up @@ -749,46 +709,6 @@ impl<RegistryClient_: RegistryClient> BatchProcessorImpl<RegistryClient_> {
.observe(since.elapsed().as_secs_f64());
}

/// Observes metrics related to memory used by canisters. It includes:
/// * total memory used
/// * memory used by Wasm Custom Sections
/// * memory used by canister history
///
/// Returns the total memory usage of the canisters of this subnet.
fn observe_canisters_memory_usage(&self, state: &ReplicatedState) -> NumBytes {
let mut total_memory_usage = NumBytes::new(0);
let mut wasm_custom_sections_memory_usage = NumBytes::new(0);
let mut canister_history_memory_usage = NumBytes::new(0);
for canister in state.canisters_iter() {
// Export the total canister memory usage; execution and wasm custom section
// memory are included in `memory_usage()`; message memory is added separately.
total_memory_usage += canister.memory_usage() + canister.message_memory_usage().total();
wasm_custom_sections_memory_usage += canister
.execution_state
.as_ref()
.map(|es| es.metadata.memory_usage())
.unwrap_or_default();
canister_history_memory_usage += canister.canister_history_memory_usage();
self.metrics.canister_history_total_num_changes.observe(
canister
.system_state
.get_canister_history()
.get_total_num_changes() as f64,
);
}
self.metrics
.canisters_memory_usage_bytes
.set(total_memory_usage.get() as i64);
self.metrics
.wasm_custom_sections_memory_usage_bytes
.set(wasm_custom_sections_memory_usage.get() as i64);
self.metrics
.canister_history_memory_usage_bytes
.set(canister_history_memory_usage.get() as i64);

total_memory_usage
}

/// Reads registry contents required by `BatchProcessorImpl::process_batch()`.
//
/// # Warning
Expand Down Expand Up @@ -1489,6 +1409,7 @@ impl<RegistryClient_: RegistryClient> BatchProcessor for BatchProcessorImpl<Regi

let batch_summary = batch.batch_summary.clone();

let batch_number = batch.batch_number;
let mut state_after_round = self.state_machine.execute_round(
state,
network_topology,
Expand All @@ -1508,13 +1429,16 @@ impl<RegistryClient_: RegistryClient> BatchProcessor for BatchProcessorImpl<Regi
state_after_round.metadata.subnet_metrics.num_canisters =
state_after_round.canister_states().len() as u64;

// TODO(DSM-103): Consider either doing this every N rounds; or doing it just
// before (and only when actually) hashing the state.
let total_memory_usage = self.observe_canisters_memory_usage(&state_after_round);
state_after_round
.metadata
.subnet_metrics
.canister_state_bytes = total_memory_usage;
// Calculating the total memory usage across all canisters is expensive and
// we do not need it to be perfectly accurate. Only do it every 10 rounds.
if batch_number.get().is_multiple_of(10) {
let total_memory_usage =
ReplicatedStateMetrics::total_canister_memory_usage(&state_after_round);
state_after_round
.metadata
.subnet_metrics
.canister_state_bytes = total_memory_usage;
}

#[cfg(feature = "malicious_code")]
if let Some(delay) = self.malicious_flags.delay_execution(_process_batch_start) {
Expand Down
44 changes: 23 additions & 21 deletions rs/messaging/src/message_routing/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,10 @@ use ic_interfaces_registry::RegistryValue;
use ic_interfaces_state_manager::StateReader;
use ic_interfaces_state_manager_mocks::MockStateManager;
use ic_management_canister_types_private::{EcdsaCurve, EcdsaKeyId, MasterPublicKeyId};
use ic_protobuf::registry::api_boundary_node::v1::ApiBoundaryNodeRecord;
use ic_protobuf::registry::crypto::v1::{ChainKeyEnabledSubnetList, PublicKey as PublicKeyProto};
use ic_protobuf::registry::node::v1::{IPv4InterfaceConfig, NodeRecord};
use ic_protobuf::registry::subnet::v1::SubnetRecord as SubnetRecordProto;
use ic_protobuf::registry::{
api_boundary_node::v1::ApiBoundaryNodeRecord, node::v1::IPv4InterfaceConfig,
node::v1::NodeRecord,
};
use ic_registry_client_fake::FakeRegistryClient;
use ic_registry_keys::{make_canister_ranges_key, make_chain_key_enabled_subnet_list_key};
use ic_registry_local_registry::LocalRegistry;
Expand All @@ -32,19 +30,15 @@ use ic_test_utilities_logger::with_test_replica_logger;
use ic_test_utilities_metrics::{fetch_int_counter_vec, fetch_int_gauge_vec, metric_vec};
use ic_test_utilities_registry::{SubnetRecordBuilder, get_mainnet_delta_00_6d_c1};
use ic_test_utilities_state::CanisterStateBuilder;
use ic_test_utilities_types::{
batch::BatchBuilder,
ids::{canister_test_id, node_test_id, subnet_test_id, user_test_id},
};
use ic_types::batch::BlockmakerMetrics;
use ic_test_utilities_types::batch::BatchBuilder;
use ic_test_utilities_types::ids::{canister_test_id, node_test_id, subnet_test_id, user_test_id};
use ic_types::batch::{Batch, BatchMessages, BlockmakerMetrics};
use ic_types::crypto::AlgorithmId;
use ic_types::crypto::threshold_sig::ni_dkg::{NiDkgTag, NiDkgTranscript};
use ic_types::time::Time;
use ic_types::xnet::{StreamIndexedQueue, StreamSlice};
use ic_types::{CanisterId, ExecutionRound, ReplicaVersion};
use ic_types::{
NodeId, PrincipalId, Randomness,
batch::{Batch, BatchMessages},
crypto::AlgorithmId,
crypto::threshold_sig::ni_dkg::{NiDkgTag, NiDkgTranscript},
time::Time,
CanisterId, ExecutionRound, NodeId, NumBytes, PrincipalId, Randomness, ReplicaVersion,
};
use maplit::{btreemap, btreeset};
use std::{fmt::Debug, str::FromStr, sync::Arc, time::Duration};
Expand Down Expand Up @@ -2185,12 +2179,20 @@ fn process_batch_updates_subnet_metrics() {
// Reading from the registry must succeed for fully specified records.
let (batch_processor, _metrics, state_manager, _registry_settings) =
make_batch_processor(fixture.registry.clone(), log);
let (height, mut state) = state_manager.take_tip();
state.metadata.own_subnet_id = own_subnet_id;
state_manager.commit_and_certify(state, CertificationScope::Metadata, None);

// Advance to just before the next multiple of 10 height. `canister_state_bytes`
// is only updated on rounds that are a multiple of 10.
loop {
let (height, mut state) = state_manager.take_tip();
state.metadata.own_subnet_id = own_subnet_id;
state_manager.commit_and_certify(state, CertificationScope::Metadata, None);
if (height.get() + 2).is_multiple_of(10) {
break;
}
}

batch_processor.process_batch(Batch {
batch_number: height.increment().increment(),
batch_number: state_manager.tip_height().increment(),
batch_summary: None,
content: BatchContent::Data {
batch_messages: BatchMessages::default(),
Expand All @@ -2206,13 +2208,13 @@ fn process_batch_updates_subnet_metrics() {
});

let latest_state = state_manager.get_latest_state().take();
let canister_state = latest_state
let canister_state_bytes = latest_state
.canisters_iter()
.map(|canister| canister.memory_usage())
.sum();
assert_eq!(
latest_state.metadata.subnet_metrics.canister_state_bytes,
canister_state
canister_state_bytes
);
});
}
Expand Down
Loading
Loading