Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions book/api/metrics-generated.md
Original file line number Diff line number Diff line change
Expand Up @@ -1223,5 +1223,6 @@
| <span class="metrics-name">tower_&#8203;hard_&#8203;forks_&#8203;seen</span> | counter | Number of hard forks we've seen (block ids with multiple candidate bank hashes) |
| <span class="metrics-name">tower_&#8203;hard_&#8203;forks_&#8203;pruned</span> | counter | Number of hard forks (candidate bank hashes) we've pruned |
| <span class="metrics-name">tower_&#8203;hard_&#8203;forks_&#8203;active</span> | gauge | Currently active hard forks |
| <span class="metrics-name">tower_&#8203;slot_&#8203;ignored</span> | counter | Number of times we ignored a slot likely due to minority fork publish |

</div>
1 change: 1 addition & 0 deletions src/disco/metrics/generated/fd_metrics_tower.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,5 @@ const fd_metrics_meta_t FD_METRICS_TOWER[FD_METRICS_TOWER_TOTAL] = {
DECLARE_METRIC( TOWER_HARD_FORKS_SEEN, COUNTER ),
DECLARE_METRIC( TOWER_HARD_FORKS_PRUNED, COUNTER ),
DECLARE_METRIC( TOWER_HARD_FORKS_ACTIVE, GAUGE ),
DECLARE_METRIC( TOWER_SLOT_IGNORED, COUNTER ),
};
8 changes: 7 additions & 1 deletion src/disco/metrics/generated/fd_metrics_tower.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,13 @@
#define FD_METRICS_GAUGE_TOWER_HARD_FORKS_ACTIVE_DESC "Currently active hard forks"
#define FD_METRICS_GAUGE_TOWER_HARD_FORKS_ACTIVE_CVT (FD_METRICS_CONVERTER_NONE)

#define FD_METRICS_TOWER_TOTAL (14UL)
#define FD_METRICS_COUNTER_TOWER_SLOT_IGNORED_OFF (30UL)
#define FD_METRICS_COUNTER_TOWER_SLOT_IGNORED_NAME "tower_slot_ignored"
#define FD_METRICS_COUNTER_TOWER_SLOT_IGNORED_TYPE (FD_METRICS_TYPE_COUNTER)
#define FD_METRICS_COUNTER_TOWER_SLOT_IGNORED_DESC "Number of times we ignored a slot likely due to minority fork publish"
#define FD_METRICS_COUNTER_TOWER_SLOT_IGNORED_CVT (FD_METRICS_CONVERTER_NONE)

#define FD_METRICS_TOWER_TOTAL (15UL)
extern const fd_metrics_meta_t FD_METRICS_TOWER[FD_METRICS_TOWER_TOTAL];

#endif /* HEADER_fd_src_disco_metrics_generated_fd_metrics_tower_h */
2 changes: 2 additions & 0 deletions src/disco/metrics/metrics.xml
Original file line number Diff line number Diff line change
Expand Up @@ -1176,6 +1176,8 @@ metric introduced.
<counter name="HardForksSeen" summary="Number of hard forks we've seen (block ids with multiple candidate bank hashes)" />
<counter name="HardForksPruned" summary="Number of hard forks (candidate bank hashes) we've pruned" />
<gauge name="HardForksActive" summary="Currently active hard forks" />

<counter name="SlotIgnored" summary="Number of times we ignored a slot likely due to minority fork publish" />
</tile>

<tile name="gui">
Expand Down
14 changes: 13 additions & 1 deletion src/discof/replay/fd_replay_tile.c
Original file line number Diff line number Diff line change
Expand Up @@ -2320,7 +2320,19 @@ returnable_frag( fd_replay_tile_t * ctx,
case FD_TOWER_SLOT_CONFIRMED_OPTIMISTIC: break;
case FD_TOWER_SLOT_CONFIRMED_ROOTED: break;
}
};
}
else if( FD_LIKELY( sig==FD_TOWER_SIG_SLOT_IGNORED ) ) {
fd_tower_slot_ignored_t const * msg = fd_chunk_to_laddr( ctx->in[ in_idx ].mem, chunk );
fd_tower_slot_done_t ignored = {
.replay_slot = msg->slot,
.replay_bank_idx = msg->bank_idx,
.vote_slot = ULONG_MAX,
.reset_slot = ctx->reset_slot, /* Use most recent reset slot */
.reset_block_id = ctx->reset_block_id,
.root_slot = ULONG_MAX
};
process_tower_slot_done( ctx, stem, &ignored, seq );
}
break;
}
case IN_KIND_SHRED: {
Expand Down
32 changes: 32 additions & 0 deletions src/discof/tower/fd_tower_tile.c
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,8 @@ typedef struct {
ulong threshold_fail;
ulong propagated_fail;

ulong slot_ignored;

fd_hfork_metrics_t hard_forks;
} metrics;
} ctx_t;
Expand Down Expand Up @@ -185,6 +187,7 @@ metrics_write( ctx_t * ctx ) {
FD_MCNT_SET( TOWER, THRESHOLD_FAIL, ctx->metrics.threshold_fail );
FD_MCNT_SET( TOWER, PROPAGATED_FAIL, ctx->metrics.propagated_fail );

FD_MCNT_SET( TOWER, SLOT_IGNORED, ctx->metrics.slot_ignored );
FD_MCNT_SET( TOWER, HARD_FORKS_SEEN, ctx->metrics.hard_forks.seen );
FD_MCNT_SET( TOWER, HARD_FORKS_PRUNED, ctx->metrics.hard_forks.pruned );

Expand Down Expand Up @@ -471,6 +474,14 @@ replay_slot_completed( ctx_t * ctx,
if( FD_UNLIKELY( fd_forks_query( ctx->forks, slot_completed->slot ) ) ) {
FD_BASE58_ENCODE_32_BYTES( slot_completed->block_id.uc, block_id );
FD_LOG_WARNING(( "tower ignoring replay of equivocating slot %lu %s", slot_completed->slot, block_id ));

/* Still need to return a message to replay so the refcnt on the bank is decremented. */
fd_tower_slot_ignored_t * msg = fd_chunk_to_laddr( ctx->out_mem, ctx->out_chunk );
msg->slot = slot_completed->slot;
msg->bank_idx = slot_completed->bank_idx;

fd_stem_publish( stem, 0UL, FD_TOWER_SIG_SLOT_IGNORED, ctx->out_chunk, sizeof(fd_tower_slot_ignored_t), 0UL, tsorig, fd_frag_meta_ts_comp( fd_tickcount() ) );
ctx->out_chunk = fd_dcache_compact_next( ctx->out_chunk, sizeof(fd_tower_slot_ignored_t), ctx->out_chunk0, ctx->out_wmark );
return;
}

Expand Down Expand Up @@ -512,6 +523,27 @@ replay_slot_completed( ctx_t * ctx,
fd_hash_t const * parent_block_id = &slot_completed->parent_block_id;
if( FD_UNLIKELY( slot_completed->parent_slot==ctx->init_slot ) ) parent_block_id = &manifest_block_id;
if( FD_UNLIKELY( slot_completed->slot ==ctx->init_slot ) ) parent_block_id = NULL;

if( FD_UNLIKELY( parent_block_id && !fd_ghost_query( ctx->ghost, parent_block_id ) ) ) {
/* Rare occurrence where replay executes a block down a minority fork
that we have pruned. Due to a race in reading frags, replay may
believe the minority fork exists and is still executable, and
executes the block and delivers it to tower. Tower should ignore
this block as it's parent no longer exists. */
FD_BASE58_ENCODE_32_BYTES( parent_block_id->uc, parent_block_id_cstr );
FD_LOG_WARNING(( "replay likely lagging tower publish, executed slot %lu is missing parent block id %s, excluding from ghost", slot_completed->slot, parent_block_id_cstr ));
ctx->metrics.slot_ignored++;

/* Still need to return a message to replay so the refcnt on the bank is decremented. */
fd_tower_slot_ignored_t * msg = fd_chunk_to_laddr( ctx->out_mem, ctx->out_chunk );
msg->slot = slot_completed->slot;
msg->bank_idx = slot_completed->bank_idx;

fd_stem_publish( stem, 0UL, FD_TOWER_SIG_SLOT_IGNORED, ctx->out_chunk, sizeof(fd_tower_slot_ignored_t), 0UL, tsorig, fd_frag_meta_ts_comp( fd_tickcount() ) );
ctx->out_chunk = fd_dcache_compact_next( ctx->out_chunk, sizeof(fd_tower_slot_ignored_t), ctx->out_chunk0, ctx->out_wmark );
return;
}

fd_ghost_blk_t * ghost_blk = fd_ghost_insert( ctx->ghost, &slot_completed->block_id, parent_block_id, slot_completed->slot );
ghost_blk->total_stake = total_stake;

Expand Down
8 changes: 8 additions & 0 deletions src/discof/tower/fd_tower_tile.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

#define FD_TOWER_SIG_SLOT_DONE (0)
#define FD_TOWER_SIG_SLOT_CONFIRMED (1)
#define FD_TOWER_SIG_SLOT_IGNORED (2)

/* In response to finishing replay of a slot, the tower tile will
produce both a block to vote for and block to reset to, and
Expand Down Expand Up @@ -119,9 +120,16 @@ struct fd_tower_slot_confirmed {
};
typedef struct fd_tower_slot_confirmed fd_tower_slot_confirmed_t;

struct fd_tower_slot_ignored {
ulong slot;
ulong bank_idx;
};
typedef struct fd_tower_slot_ignored fd_tower_slot_ignored_t;

union fd_tower_msg {
fd_tower_slot_done_t slot_done;
fd_tower_slot_confirmed_t slot_confirmed;
fd_tower_slot_ignored_t slot_ignored;
};
typedef union fd_tower_msg fd_tower_msg_t;

Expand Down
Loading