Skip to content

Commit dc93b57

Browse files
committed
Dashboards beginning and some metrics updates
1 parent e22925e commit dc93b57

11 files changed

Lines changed: 2696 additions & 9 deletions

File tree

config-peer2.yaml

Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
# ======================================================================================
2+
# LOGGER RELATED CONFIGURATIONS
3+
# ======================================================================================
4+
logger:
5+
enabled: true
6+
environment: development
7+
level: debug
8+
9+
# ======================================================================================
10+
# MDBX RELATED CONFIGURATIONS
11+
# ======================================================================================
12+
mdbx:
13+
enabled: true
14+
nodes:
15+
- name: fdb
16+
path: /tmp/
17+
maxReaders: 4096
18+
maxSize: 1024 # Maximum database size (1 TB)
19+
minSize: 1 # Minimum database size (1 GB)
20+
growthStep: 4096 # Growth step size (4 KB)
21+
filePermissions: 0600 # File permissions for the database
22+
23+
# ======================================================================================
24+
# NODE RELATED CONFIGURATION
25+
# ======================================================================================
26+
node:
27+
# In blockchain world there is full node, light node, sequencer, validator, etc...
28+
# This is practically the same concept have RBAC (Role-Based Access Control) and
29+
# that is how node types are defined. Reusing this system to not pollute the system with
30+
# multiple nodes and role types and making things even more confusing than it is.
31+
roles:
32+
- node
33+
34+
35+
# ======================================================================================
36+
# IDENTITY MANAGER RELATED CONFIGURATION
37+
# ======================================================================================
38+
# Identity manager is responsible for generating P2P, BLS, and other keys.
39+
identity:
40+
enabled: true # Enable or disable the identity manager.
41+
basePath: /Users/nevio/.fdb/keystore # Path where keys are stored.
42+
43+
# ======================================================================================
44+
# P2P NETWORKING RELATED CONFIGURATION
45+
# ======================================================================================
46+
networking:
47+
peerId: "12D3KooWJwvXTmBGQ5MeqVBwnU18WgUdAnJtZwuLKN2wasUVoiq3"
48+
listenAddrs: # Changed from single port to multiple listen addresses
49+
- "/ip4/0.0.0.0/tcp/8433" # Listen on all TCP IPv4 interfaces
50+
- "/ip6/::/tcp/8433" # Listen on all TCP IPv6 interfaces
51+
- "/ip4/0.0.0.0/udp/8433" # Listen on all UDP IPv4 interfaces
52+
- "/ip6/::/udp/8433" # Listen on all UDP IPv6 interfaces
53+
protocolId: "/fdb/1.0.0" # Unique protocol identifier
54+
bootstrapPeers:
55+
- /ip4/127.0.0.1/tcp/8432/p2p/12D3KooWK2kJYjJZs23UCv9KqxNxgtu19xgipQsKbrXDsfsLestN # Connect to the first node
56+
bootstrapNode: false # This is a regular node, not a bootstrap node
57+
mdns: true # Enable or disable Multicast DNS (mDNS) discovery.
58+
enableRelay: true # Enable relay connections if needed.
59+
interface_name: "en0" # Network interface name for eBPF (if applicable)
60+
61+
# ======================================================================================
62+
# TRANSPORTS RELATED CONFIGURATIONS
63+
# ======================================================================================
64+
transports:
65+
- type: dummy
66+
enabled: false
67+
config:
68+
ipv4: 127.0.0.1
69+
port: 4434
70+
71+
- type: quic
72+
enabled: false
73+
config:
74+
ipv4: 127.0.0.1
75+
port: 4433
76+
tls:
77+
insecure: true
78+
key: ./data/certs/key.pem
79+
cert: ./data/certs/cert.pem
80+
81+
- type: uds
82+
enabled: false
83+
config:
84+
socket: "/tmp/fdb.sock"
85+
86+
- type: tcp
87+
enabled: true
88+
config:
89+
ipv4: 127.0.0.1
90+
port: 5012
91+
tls:
92+
insecure: true
93+
key: ./data/certs/key.pem
94+
cert: ./data/certs/cert.pem
95+
96+
- type: udp
97+
enabled: false
98+
config:
99+
ipv4: 127.0.0.1
100+
port: 5022
101+
dtls:
102+
insecure: true
103+
key: ./data/certs/key.pem
104+
cert: ./data/certs/cert.pem
105+
106+
# ======================================================================================
107+
# RPC RELATED CONFIGURATIONS
108+
# ======================================================================================
109+
rpc:
110+
poolMaxSize: 10_000
111+
transport:
112+
enabled: true
113+
type: "tcp"
114+
ipv4: 0.0.0.0
115+
port: 8846
116+
117+
# ======================================================================================
118+
# OBSERVABILITY CONFIGURATION (Metrics and Tracing)
119+
# ======================================================================================
120+
pprof:
121+
- name: fdb
122+
enabled: true
123+
addr: "0.0.0.0:6061"
124+
125+
# ======================================================================================
126+
# OBSERVABILITY CONFIGURATION (Metrics and Tracing)
127+
# ======================================================================================
128+
# Observability helps in monitoring system health, performance, and errors.
129+
# It includes metrics (Prometheus) and tracing (OpenTelemetry).
130+
observability:
131+
metrics:
132+
enable: true # Enable or disable metrics collection.
133+
endpoint: "localhost:4317" # Endpoint for metrics collector.
134+
exportInterval: 15s # Interval to export metrics data.
135+
useTLS: false # Set this to false to disable TLS.
136+
137+
tracing:
138+
enable: true # Enable or disable tracing.
139+
endpoint: "localhost:4317" # Endpoint for tracing collector.
140+
headers: {} # Additional headers for tracing requests.
141+
sampler: "always_on" # Options: always_on, probability.
142+
samplingRate: 0.1 # Sampling rate for probabilistic sampling.
143+
useTLS: false # Set this to false to disable TLS.

config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ rpc:
122122
pprof:
123123
- name: fdb
124124
enabled: true
125-
endpoint: "0.0.0.0:6060"
125+
addr: "0.0.0.0:6060"
126126

127127
# ======================================================================================
128128
# OBSERVABILITY CONFIGURATION (Metrics and Tracing)

db/metrics.go

Lines changed: 170 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,170 @@
1+
package db
2+
3+
import (
4+
"context"
5+
"time"
6+
7+
"go.opentelemetry.io/otel/attribute"
8+
"go.opentelemetry.io/otel/metric"
9+
)
10+
11+
// BatchWriterMetrics holds all metrics instruments for the BatchWriter
12+
type BatchWriterMetrics struct {
13+
// Counters
14+
BatchesProcessedTotal metric.Int64Counter
15+
RecordsProcessedTotal metric.Int64Counter
16+
ChannelOverflowsTotal metric.Int64Counter
17+
NonBlockingFallbacksTotal metric.Int64Counter
18+
19+
// Histograms
20+
BatchSizeHistogram metric.Int64Histogram
21+
FlushDurationMs metric.Float64Histogram
22+
WorkerQueueWaitTimeMs metric.Float64Histogram
23+
24+
// Gauges
25+
QueueDepth metric.Int64UpDownCounter
26+
WorkerBufferSizes []metric.Int64UpDownCounter
27+
ActiveWorkers metric.Int64UpDownCounter
28+
}
29+
30+
// InitializeBatchWriterMetrics creates and registers all BatchWriter metrics
31+
func InitializeBatchWriterMetrics(ctx context.Context, meter metric.Meter) (*BatchWriterMetrics, error) {
32+
m := &BatchWriterMetrics{}
33+
var err error
34+
35+
// Initialize counters
36+
m.BatchesProcessedTotal, err = meter.Int64Counter(
37+
"fdb_batch_writer_batches_processed_total",
38+
metric.WithDescription("Total number of batches processed by BatchWriter"),
39+
)
40+
if err != nil {
41+
return nil, err
42+
}
43+
44+
m.RecordsProcessedTotal, err = meter.Int64Counter(
45+
"fdb_batch_writer_records_processed_total",
46+
metric.WithDescription("Total number of records processed by BatchWriter"),
47+
)
48+
if err != nil {
49+
return nil, err
50+
}
51+
52+
m.ChannelOverflowsTotal, err = meter.Int64Counter(
53+
"fdb_batch_writer_channel_overflows_total",
54+
metric.WithDescription("Number of times worker channels reached capacity"),
55+
)
56+
if err != nil {
57+
return nil, err
58+
}
59+
60+
m.NonBlockingFallbacksTotal, err = meter.Int64Counter(
61+
"fdb_batch_writer_non_blocking_fallbacks_total",
62+
metric.WithDescription("Number of times non-blocking fallback was used"),
63+
)
64+
if err != nil {
65+
return nil, err
66+
}
67+
68+
// Initialize histograms
69+
m.BatchSizeHistogram, err = meter.Int64Histogram(
70+
"fdb_batch_writer_batch_size",
71+
metric.WithDescription("Distribution of batch sizes processed by BatchWriter"),
72+
)
73+
if err != nil {
74+
return nil, err
75+
}
76+
77+
m.FlushDurationMs, err = meter.Float64Histogram(
78+
"fdb_batch_writer_flush_duration_milliseconds",
79+
metric.WithDescription("Time taken to flush batches to disk in milliseconds"),
80+
)
81+
if err != nil {
82+
return nil, err
83+
}
84+
85+
m.WorkerQueueWaitTimeMs, err = meter.Float64Histogram(
86+
"fdb_batch_writer_queue_wait_milliseconds",
87+
metric.WithDescription("Time requests spend waiting in worker queues in milliseconds"),
88+
)
89+
if err != nil {
90+
return nil, err
91+
}
92+
93+
// Initialize gauges
94+
m.QueueDepth, err = meter.Int64UpDownCounter(
95+
"fdb_batch_writer_queue_depth",
96+
metric.WithDescription("Current depth of all BatchWriter queues combined"),
97+
)
98+
if err != nil {
99+
return nil, err
100+
}
101+
102+
m.ActiveWorkers, err = meter.Int64UpDownCounter(
103+
"fdb_batch_writer_active_workers",
104+
metric.WithDescription("Number of currently active worker goroutines"),
105+
)
106+
if err != nil {
107+
return nil, err
108+
}
109+
110+
return m, nil
111+
}
112+
113+
// RecordBatchProcessed records metrics for a processed batch
114+
func (m *BatchWriterMetrics) RecordBatchProcessed(ctx context.Context, workerID int, batchSize int, duration time.Duration) {
115+
attrs := attribute.NewSet(attribute.Int("worker_id", workerID))
116+
117+
m.BatchesProcessedTotal.Add(ctx, 1, metric.WithAttributeSet(attrs))
118+
m.RecordsProcessedTotal.Add(ctx, int64(batchSize), metric.WithAttributeSet(attrs))
119+
m.BatchSizeHistogram.Record(ctx, int64(batchSize), metric.WithAttributeSet(attrs))
120+
m.FlushDurationMs.Record(ctx, float64(duration.Milliseconds()), metric.WithAttributeSet(attrs))
121+
}
122+
123+
// RecordWorkerQueueWait records the time a request spends waiting in a worker queue
124+
func (m *BatchWriterMetrics) RecordWorkerQueueWait(ctx context.Context, workerID int, duration time.Duration) {
125+
attrs := attribute.NewSet(attribute.Int("worker_id", workerID))
126+
m.WorkerQueueWaitTimeMs.Record(ctx, float64(duration.Milliseconds()), metric.WithAttributeSet(attrs))
127+
}
128+
129+
// RecordChannelOverflow records a channel overflow event
130+
func (m *BatchWriterMetrics) RecordChannelOverflow(ctx context.Context, workerID int) {
131+
attrs := attribute.NewSet(attribute.Int("worker_id", workerID))
132+
m.ChannelOverflowsTotal.Add(ctx, 1, metric.WithAttributeSet(attrs))
133+
}
134+
135+
// RecordNonBlockingFallback records when the non-blocking fallback is used
136+
func (m *BatchWriterMetrics) RecordNonBlockingFallback(ctx context.Context, workerID int) {
137+
attrs := attribute.NewSet(attribute.Int("worker_id", workerID))
138+
m.NonBlockingFallbacksTotal.Add(ctx, 1, metric.WithAttributeSet(attrs))
139+
}
140+
141+
// UpdateQueueDepth updates the queue depth counter
142+
func (m *BatchWriterMetrics) UpdateQueueDepth(ctx context.Context, delta int64) {
143+
m.QueueDepth.Add(ctx, delta)
144+
}
145+
146+
// UpdateWorkerBuffer updates the buffer size for a specific worker
147+
func (m *BatchWriterMetrics) UpdateWorkerBuffer(ctx context.Context, workerID int, size int64) {
148+
if workerID < len(m.WorkerBufferSizes) {
149+
attrs := attribute.NewSet(attribute.Int("worker_id", workerID))
150+
m.WorkerBufferSizes[workerID].Add(ctx, size, metric.WithAttributeSet(attrs))
151+
}
152+
}
153+
154+
// InitializeWorkerBufferMetrics creates separate buffer size metrics for each worker
155+
func (m *BatchWriterMetrics) InitializeWorkerBufferMetrics(ctx context.Context, meter metric.Meter, workerCount int) error {
156+
m.WorkerBufferSizes = make([]metric.Int64UpDownCounter, workerCount)
157+
158+
for i := 0; i < workerCount; i++ {
159+
var err error
160+
m.WorkerBufferSizes[i], err = meter.Int64UpDownCounter(
161+
"fdb_batch_writer_worker_buffer_size",
162+
metric.WithDescription("Current size of worker buffer"),
163+
)
164+
if err != nil {
165+
return err
166+
}
167+
}
168+
169+
return nil
170+
}

0 commit comments

Comments
 (0)