diff --git a/spartan/.gitignore b/spartan/.gitignore index 7b008b4aee86..89e1bef864a7 100644 --- a/spartan/.gitignore +++ b/spartan/.gitignore @@ -18,4 +18,5 @@ environments/* !environments/next-scenario.env !environments/ignition-fisherman.env !environments/tps-scenario.env +!environments/10-tps-scenario.env *.tfvars diff --git a/spartan/aztec-node/templates/_pod-template.yaml b/spartan/aztec-node/templates/_pod-template.yaml index 56593cb39aba..0af0ec6ac5fb 100644 --- a/spartan/aztec-node/templates/_pod-template.yaml +++ b/spartan/aztec-node/templates/_pod-template.yaml @@ -82,7 +82,11 @@ spec: export P2P_IP=$(hostname -i) {{- end }} + {{- if .Values.node.enableInspector }} + start_cmd=("node" "--inspect=0.0.0.0:9229" "/usr/src/yarn-project/aztec/dest/bin/index.js" "start" {{ join " " .Values.node.startCmd }}) + {{- else }} start_cmd=("node" "/usr/src/yarn-project/aztec/dest/bin/index.js" "start" {{ join " " .Values.node.startCmd }}) + {{- end }} source /scripts/setup-otel-resource.sh {{- if .Values.node.preStartScript }} diff --git a/spartan/environments/10-tps-scenario.env b/spartan/environments/10-tps-scenario.env new file mode 100644 index 000000000000..beb2cf0d0934 --- /dev/null +++ b/spartan/environments/10-tps-scenario.env @@ -0,0 +1,73 @@ +NAMESPACE=${NAMESPACE:-10-tps} +CLUSTER=aztec-gke-private +GCP_REGION=us-west1-a +DESTROY_NAMESPACE=true +DESTROY_ETH_DEVNET=true +CREATE_ETH_DEVNET=${CREATE_ETH_DEVNET:-true} +AZTEC_EPOCH_DURATION=32 +AZTEC_SLOT_DURATION=72 +AZTEC_PROOF_SUBMISSION_WINDOW=64 +ETHEREUM_CHAIN_ID=1337 +LABS_INFRA_MNEMONIC="test test test test test test test test test test test junk" +FUNDING_PRIVATE_KEY="0xac0974bec39a17e36ba4a6b4d238ff944bacb478cbed5efcae784d7bf4f2ff80" +# CREATE_CHAOS_MESH=true + +CREATE_ROLLUP_CONTRACTS=true +REDEPLOY_ROLLUP_CONTRACTS=true +VERIFY_CONTRACTS=false +DESTROY_AZTEC_INFRA=true + +AZTEC_LAG_IN_EPOCHS=1 + +OTEL_COLLECTOR_ENDPOINT=REPLACE_WITH_GCP_SECRET + +VALIDATOR_REPLICAS=12 +VALIDATORS_PER_NODE=4 +PUBLISHERS_PER_VALIDATOR_KEY=2 +VALIDATOR_PUBLISHER_MNEMONIC_START_INDEX=5000 + +REAL_VERIFIER=false + +RPC_REPLICAS=12 +RPC_INGRESS_ENABLED=false + +FULL_NODE_REPLICAS=500 +FULL_NODE_RESOURCE_PROFILE="2-core-spot" + +PUBLISHERS_PER_PROVER=2 +PROVER_PUBLISHER_MNEMONIC_START_INDEX=8000 +PROVER_REPLICAS=128 +PROVER_RESOURCE_PROFILE="hi-tps" +PROVER_AGENT_POLL_INTERVAL_MS=10000 + +RUN_TESTS=false + +P2P_MAX_TX_POOL_SIZE=1000000000 +PROVER_TEST_DELAY_TYPE=fixed + +AZTEC_SLASHING_ROUND_SIZE_IN_EPOCHS=1 +AZTEC_SLASHING_QUORUM=20 +AZTEC_SLASHING_EXECUTION_DELAY_IN_ROUNDS=0 +AZTEC_SLASHING_OFFSET_IN_ROUNDS=1 +AZTEC_LOCAL_EJECTION_THRESHOLD=90000000000000000000 + +SEQ_MAX_TX_PER_BLOCK=720 +SEQ_MIN_TX_PER_BLOCK=0 + +# Override L1 tx utils bump percentages for scenario tests +VALIDATOR_L1_PRIORITY_FEE_BUMP_PERCENTAGE=0 +VALIDATOR_L1_PRIORITY_FEE_RETRY_BUMP_PERCENTAGE=0 +PROVER_L1_PRIORITY_FEE_BUMP_PERCENTAGE=0 +PROVER_L1_PRIORITY_FEE_RETRY_BUMP_PERCENTAGE=0 + +# Enable latency mesaruement for p2p messages +DEBUG_P2P_INSTRUMENT_MESSAGES=true + +# Inject artificial delay of proof verification for all nodes +PROVER_TEST_VERIFICATION_DELAY_MS=250 + +# Reduce the amount of metrics produced by prover agents and full nodes +PROVER_AGENT_INCLUDE_METRICS="aztec.circuit" +FULL_NODE_INCLUDE_METRICS="aztec.p2p.gossip.agg_" +LOG_LEVEL=info + diff --git a/spartan/terraform/deploy-aztec-infra/values/prover-resources-hi-tps.yaml b/spartan/terraform/deploy-aztec-infra/values/prover-resources-hi-tps.yaml index e6ea1b799440..0bf20ecda56e 100644 --- a/spartan/terraform/deploy-aztec-infra/values/prover-resources-hi-tps.yaml +++ b/spartan/terraform/deploy-aztec-infra/values/prover-resources-hi-tps.yaml @@ -1,15 +1,19 @@ node: hostNetwork: true node: + enableInspector: true + nodeJsOptions: + - "--max-old-space-size=65536" resources: requests: - cpu: "3" - memory: "10Gi" + cpu: "6" + memory: "54Gi" nodeSelector: local-ssd: "false" node-type: "network" - cores: "4" + cores: "8" + hi-mem: "true" affinity: podAntiAffinity: @@ -41,7 +45,8 @@ broker: nodeSelector: local-ssd: "false" node-type: "network" - cores: "4" + cores: "8" + hi-mem: "true" persistence: enabled: true @@ -53,13 +58,16 @@ broker: accessModes: [ReadWriteOnce] resources: requests: - storage: 8Gi + storage: 256Gi node: + enableInspector: true + nodeJsOptions: + - "--max-old-space-size=65536" resources: requests: - # should land on a 4-core node - cpu: "3" - memory: "10Gi" + # should land on a 8-core node + cpu: "5" + memory: "54Gi" agent: replicaCount: 4 diff --git a/spartan/terraform/gke-cluster/cluster/main.tf b/spartan/terraform/gke-cluster/cluster/main.tf index 9051cc601d51..27ae5dff354f 100644 --- a/spartan/terraform/gke-cluster/cluster/main.tf +++ b/spartan/terraform/gke-cluster/cluster/main.tf @@ -154,6 +154,44 @@ resource "google_container_node_pool" "aztec_nodes-4core" { } } +# Create 8 core node pool no ssd with hi-mem +resource "google_container_node_pool" "aztec_nodes-8core-hi-mem" { + name = "${var.cluster_name}-8core-hi-mem" + location = var.zone + cluster = var.cluster_name + version = var.node_version + # Enable autoscaling + autoscaling { + min_node_count = 0 + max_node_count = 16 + } + + # Node configuration + node_config { + machine_type = "n2-highmem-8" + + service_account = var.service_account + oauth_scopes = [ + "https://www.googleapis.com/auth/cloud-platform" + ] + + labels = { + env = "production" + local-ssd = "false" + node-type = "network" + cores = "8" + hi-mem = "true" + } + tags = ["aztec-gke-node", "aztec"] + } + + # Management configuration + management { + auto_repair = true + auto_upgrade = false + } +} + # Create spot instance node pool with autoscaling resource "google_container_node_pool" "spot_nodes_32core" { name = "${var.cluster_name}-32core-spot" diff --git a/yarn-project/end-to-end/src/spartan/n_tps.test.ts b/yarn-project/end-to-end/src/spartan/n_tps.test.ts index f6d1605965cf..6eb034c08be6 100644 --- a/yarn-project/end-to-end/src/spartan/n_tps.test.ts +++ b/yarn-project/end-to-end/src/spartan/n_tps.test.ts @@ -1,9 +1,11 @@ import type { SentTx } from '@aztec/aztec.js/contracts'; import { SponsoredFeePaymentMethod } from '@aztec/aztec.js/fee'; import { type AztecNode, createAztecNodeClient } from '@aztec/aztec.js/node'; +import { BlockNumber } from '@aztec/foundation/branded-types'; import { Fr } from '@aztec/foundation/curves/bn254'; import { createLogger } from '@aztec/foundation/log'; import { SerialQueue } from '@aztec/foundation/queue'; +import { retryUntil } from '@aztec/foundation/retry'; import { sleep } from '@aztec/foundation/sleep'; import { BenchmarkingContract } from '@aztec/noir-test-contracts.js/Benchmarking'; import { Tx } from '@aztec/stdlib/tx'; @@ -50,7 +52,7 @@ const maxTps = Math.max(...tpsTargets); const CHAOS_MESH_NAME = 'network-shaping'; describe('sustained N TPS test', () => { - jest.setTimeout(60 * 60 * 1000 * 3); // 3 hours + jest.setTimeout(60 * 60 * 1000 * 10); // 10 hours const logger = createLogger(`e2e:spartan-test:sustained-tps`); const TEST_DURATION_SECONDS = parseInt(process.env.TEST_DURATION_SECONDS || '600', 10); @@ -102,6 +104,20 @@ describe('sustained N TPS test', () => { aztecNode = createAztecNodeClient(rpcUrl); metrics = new TxInclusionMetrics(aztecNode); + await retryUntil( + async () => { + const blockNumber = await aztecNode.getBlockNumber(); + if (blockNumber > BlockNumber(1)) { + return true; + } + logger.info('Waiting for the first block to mine...'); + return false; + }, + 'get block number', + 60 * 60 * 3, // wait up to 3 hours + 60, + ); + for (let i = 0; i < NUM_WALLETS; i++) { logger.info(`Creating wallet and pxe for wallet ${i + 1}/${NUM_WALLETS}`); const { wallet, cleanup } = await createWalletAndAztecNodeClient(rpcUrl, config.REAL_VERIFIER, logger);