Skip to content

Commit fcc6db2

Browse files
committed
gui: fix repair slot, repair request
1 parent 9a344b1 commit fcc6db2

File tree

3 files changed

+137
-101
lines changed

3 files changed

+137
-101
lines changed

src/disco/gui/fd_gui.c

Lines changed: 128 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -832,6 +832,109 @@ fd_gui_run_boot_progress( fd_gui_t * gui, long now ) {
832832
}
833833
}
834834

835+
static inline int
836+
fd_gui_ephemeral_slots_contains( fd_gui_ephemeral_slot_t * slots, ulong slots_sz, ulong slot ) {
837+
for( ulong i=0UL; i<slots_sz; i++ ) {
838+
if( FD_UNLIKELY( slots[ i ].slot==ULONG_MAX ) ) break;
839+
if( FD_UNLIKELY( slots[ i ].slot==slot ) ) return 1;
840+
}
841+
return 0;
842+
}
843+
844+
#define SORT_NAME fd_gui_ephemeral_slot_sort
845+
#define SORT_KEY_T fd_gui_ephemeral_slot_t
846+
#define SORT_BEFORE(a,b) fd_int_if( (a).slot==ULONG_MAX, 0, fd_int_if( (b).slot==ULONG_MAX, 1, fd_int_if( (a).slot==(b).slot, (a).timestamp_arrival_nanos>(b).timestamp_arrival_nanos, (a).slot>(b).slot ) ) )
847+
#include "../../util/tmpl/fd_sort.c"
848+
849+
static inline void
850+
fd_gui_try_insert_ephemeral_slot( fd_gui_ephemeral_slot_t * slots, ulong slots_sz, ulong slot, long now ) {
851+
int already_present = 0;
852+
for( ulong i=0UL; i<slots_sz; i++ ) {
853+
/* evict any slots older than 4.8 seconds */
854+
if( FD_UNLIKELY( slots[ i ].slot!=ULONG_MAX && now-slots[ i ].timestamp_arrival_nanos>4800000000L ) ) {
855+
slots[ i ].slot = ULONG_MAX;
856+
continue;
857+
}
858+
859+
/* if we've already seen this slot, just update the timestamp */
860+
if( FD_UNLIKELY( slots[ i ].slot==slot ) ) {
861+
slots[ i ].timestamp_arrival_nanos = now;
862+
already_present = 1;
863+
}
864+
}
865+
if( FD_LIKELY( already_present ) ) return;
866+
867+
/* Insert the new slot number, evicting a smaller slot if necessary */
868+
slots[ slots_sz ].timestamp_arrival_nanos = now;
869+
slots[ slots_sz ].slot = slot;
870+
fd_gui_ephemeral_slot_sort_insert( slots, slots_sz+1UL );
871+
}
872+
873+
static inline void
874+
fd_gui_try_insert_catch_up_slot( ulong * slots, ulong capacity, ulong * slots_sz, ulong slot ) {
875+
/* catch up history is run-length encoded */
876+
int inserted = 0;
877+
for( ulong i=0UL; i<*slots_sz; i++ ) {
878+
if( FD_UNLIKELY( i%2UL==1UL && slots[ i ]==slot-1UL ) ) {
879+
slots[ i ]++;
880+
inserted = 1;
881+
break;
882+
} else if( FD_UNLIKELY( i%2UL==0UL && slots[ i ]==slot+1UL ) ) {
883+
slots[ i ]--;
884+
inserted = 1;
885+
break;
886+
}
887+
}
888+
if( FD_LIKELY( !inserted ) ) {
889+
slots[ (*slots_sz)++ ] = slot;
890+
slots[ (*slots_sz)++ ] = slot;
891+
}
892+
893+
/* colesce intervals that touch */
894+
ulong removed = 0UL;
895+
for( ulong i=1UL; i<(*slots_sz)-1UL; i+=2 ) {
896+
if( FD_UNLIKELY( slots[ i ]==slots[ i+1UL ] ) ) {
897+
slots[ i ] = ULONG_MAX;
898+
slots[ i+1UL ] = ULONG_MAX;
899+
removed += 2;
900+
}
901+
}
902+
903+
if( FD_UNLIKELY( (*slots_sz)>removed+capacity-2UL ) ) {
904+
/* We are at capacity, start coalescing earlier intervals. */
905+
slots[ 1 ] = ULONG_MAX;
906+
slots[ 2 ] = ULONG_MAX;
907+
}
908+
909+
fd_sort_up_ulong_insert( slots, (*slots_sz) );
910+
(*slots_sz) -= removed;
911+
}
912+
913+
void
914+
fd_gui_handle_repair_slot( fd_gui_t * gui, ulong slot, long now ) {
915+
int was_sent = fd_gui_ephemeral_slots_contains( gui->summary.slots_max_repair, FD_GUI_REPAIR_SLOT_HISTORY_SZ, slot );
916+
fd_gui_try_insert_ephemeral_slot( gui->summary.slots_max_repair, FD_GUI_REPAIR_SLOT_HISTORY_SZ, slot, now );
917+
918+
if( FD_UNLIKELY( !was_sent && slot!=gui->summary.slot_repair ) ) {
919+
gui->summary.slot_repair = slot;
920+
921+
fd_gui_printf_repair_slot( gui );
922+
fd_http_server_ws_broadcast( gui->http );
923+
924+
if( FD_UNLIKELY( gui->summary.slot_caught_up==ULONG_MAX ) ) fd_gui_try_insert_catch_up_slot( gui->summary.catch_up_repair, FD_GUI_REPAIR_CATCH_UP_HISTORY_SZ, &gui->summary.catch_up_repair_sz, slot );
925+
}
926+
}
927+
928+
void
929+
fd_gui_handle_repair_request( fd_gui_t * gui, ulong slot, ulong shred_idx, long now ) {
930+
fd_gui_slot_staged_shred_event_t * recv_event = &gui->shreds.staged[ gui->shreds.staged_tail % FD_GUI_SHREDS_STAGING_SZ ];
931+
gui->shreds.staged_tail++;
932+
recv_event->timestamp = now;
933+
recv_event->shred_idx = (ushort)shred_idx;
934+
recv_event->slot = slot;
935+
recv_event->event = FD_GUI_SLOT_SHRED_REPAIR_REQUEST;
936+
}
937+
835938
int
836939
fd_gui_poll( fd_gui_t * gui, long now ) {
837940
if( FD_LIKELY( now>gui->next_sample_400millis ) ) {
@@ -874,6 +977,18 @@ fd_gui_poll( fd_gui_t * gui, long now ) {
874977
gui->shreds.staged_next_broadcast = gui->shreds.staged_tail;
875978
}
876979

980+
/* We get the repair slot from the sampled metric after catching up
981+
and from incoming shred data before catchup. This makes the
982+
catchup progress bar look complete while also keeping the
983+
overview slots vis correct. TODO: do this properly using frags
984+
sent over a link */
985+
if( FD_LIKELY( gui->summary.slot_caught_up!=ULONG_MAX ) ) {
986+
fd_topo_tile_t * repair = &gui->topo->tiles[ fd_topo_find_tile( gui->topo, "repair", 0UL ) ];
987+
volatile ulong const * repair_metrics = fd_metrics_tile( repair->metrics );
988+
ulong slot = repair_metrics[ MIDX( COUNTER, REPAIR, REPAIRED_SLOTS ) ];
989+
fd_gui_handle_repair_slot( gui, slot, now );
990+
}
991+
877992
gui->next_sample_50millis += 50L*1000L*1000L;
878993
return 1;
879994
}
@@ -1713,84 +1828,6 @@ fd_gui_handle_slot_end( fd_gui_t * gui,
17131828
fd_gui_tile_stats_snap( gui, slot->waterfall_end, slot->tile_stats_end, now );
17141829
}
17151830

1716-
#define SORT_NAME fd_gui_ephemeral_slot_sort
1717-
#define SORT_KEY_T fd_gui_ephemeral_slot_t
1718-
#define SORT_BEFORE(a,b) fd_int_if( (a).slot==ULONG_MAX, 0, fd_int_if( (b).slot==ULONG_MAX, 1, fd_int_if( (a).slot==(b).slot, (a).timestamp_arrival_nanos>(b).timestamp_arrival_nanos, (a).slot>(b).slot ) ) )
1719-
#include "../../util/tmpl/fd_sort.c"
1720-
1721-
static inline void
1722-
fd_gui_try_insert_ephemeral_slot( fd_gui_ephemeral_slot_t * slots, ulong slots_sz, ulong slot, long now ) {
1723-
int already_present = 0;
1724-
for( ulong i=0UL; i<slots_sz; i++ ) {
1725-
/* evict any slots older than 4.8 seconds */
1726-
if( FD_UNLIKELY( slots[ i ].slot!=ULONG_MAX && now-slots[ i ].timestamp_arrival_nanos>4800000000L ) ) {
1727-
slots[ i ].slot = ULONG_MAX;
1728-
continue;
1729-
}
1730-
1731-
/* if we've already seen this slot, just update the timestamp */
1732-
if( FD_UNLIKELY( slots[ i ].slot==slot ) ) {
1733-
slots[ i ].timestamp_arrival_nanos = now;
1734-
already_present = 1;
1735-
}
1736-
}
1737-
if( FD_LIKELY( already_present ) ) return;
1738-
1739-
/* Insert the new slot number, evicting a smaller slot if necessary */
1740-
slots[ slots_sz ].timestamp_arrival_nanos = now;
1741-
slots[ slots_sz ].slot = slot;
1742-
fd_gui_ephemeral_slot_sort_insert( slots, slots_sz+1UL );
1743-
}
1744-
1745-
static inline void
1746-
fd_gui_try_insert_catch_up_slot( ulong * slots, ulong capacity, ulong * slots_sz, ulong slot ) {
1747-
/* catch up history is run-length encoded */
1748-
int inserted = 0;
1749-
for( ulong i=0UL; i<*slots_sz; i++ ) {
1750-
if( FD_UNLIKELY( i%2UL==1UL && slots[ i ]==slot-1UL ) ) {
1751-
slots[ i ]++;
1752-
inserted = 1;
1753-
break;
1754-
} else if( FD_UNLIKELY( i%2UL==0UL && slots[ i ]==slot+1UL ) ) {
1755-
slots[ i ]--;
1756-
inserted = 1;
1757-
break;
1758-
}
1759-
}
1760-
if( FD_LIKELY( !inserted ) ) {
1761-
slots[ (*slots_sz)++ ] = slot;
1762-
slots[ (*slots_sz)++ ] = slot;
1763-
}
1764-
1765-
/* colesce intervals that touch */
1766-
ulong removed = 0UL;
1767-
for( ulong i=1UL; i<(*slots_sz)-1UL; i+=2 ) {
1768-
if( FD_UNLIKELY( slots[ i ]==slots[ i+1UL ] ) ) {
1769-
slots[ i ] = ULONG_MAX;
1770-
slots[ i+1UL ] = ULONG_MAX;
1771-
removed += 2;
1772-
}
1773-
}
1774-
1775-
if( FD_UNLIKELY( (*slots_sz)>removed+capacity-2UL ) ) {
1776-
/* We are at capacity, start coalescing earlier intervals. */
1777-
slots[ 1 ] = ULONG_MAX;
1778-
slots[ 2 ] = ULONG_MAX;
1779-
}
1780-
1781-
fd_sort_up_ulong_insert( slots, (*slots_sz) );
1782-
(*slots_sz) -= removed;
1783-
}
1784-
1785-
static inline int
1786-
fd_gui_ephemeral_slots_contains( fd_gui_ephemeral_slot_t * slots, ulong slots_sz, ulong slot ) {
1787-
for( ulong i=0UL; i<slots_sz; i++ ) {
1788-
if( FD_UNLIKELY( slots[ i ].slot==ULONG_MAX ) ) break;
1789-
if( FD_UNLIKELY( slots[ i ].slot==slot ) ) return 1;
1790-
}
1791-
return 0;
1792-
}
1793-
17941831
void
17951832
fd_gui_handle_shred( fd_gui_t * gui,
17961833
ulong slot,
@@ -1800,6 +1837,11 @@ fd_gui_handle_shred( fd_gui_t * gui,
18001837
int was_sent = fd_gui_ephemeral_slots_contains( gui->summary.slots_max_turbine, FD_GUI_TURBINE_SLOT_HISTORY_SZ, slot );
18011838
if( FD_LIKELY( is_turbine ) ) fd_gui_try_insert_ephemeral_slot( gui->summary.slots_max_turbine, FD_GUI_TURBINE_SLOT_HISTORY_SZ, slot, tsorig );
18021839

1840+
/* If we haven't caught up yet, update repair slot using received
1841+
shreds. This is not technically correct, but close enough and will
1842+
make the progress bar look correct. */
1843+
if( FD_UNLIKELY( !is_turbine && gui->summary.slot_caught_up==ULONG_MAX ) ) fd_gui_handle_repair_slot( gui, slot, tsorig );
1844+
18031845
if( FD_UNLIKELY( !was_sent && is_turbine && slot!=gui->summary.slot_turbine ) ) {
18041846
gui->summary.slot_turbine = slot;
18051847

@@ -1865,21 +1907,6 @@ fd_gui_handle_exec_txn_done( fd_gui_t * gui,
18651907
}
18661908
}
18671909

1868-
void
1869-
fd_gui_handle_repair_slot( fd_gui_t * gui, ulong slot, long now ) {
1870-
int was_sent = fd_gui_ephemeral_slots_contains( gui->summary.slots_max_repair, FD_GUI_REPAIR_SLOT_HISTORY_SZ, slot );
1871-
fd_gui_try_insert_ephemeral_slot( gui->summary.slots_max_repair, FD_GUI_REPAIR_SLOT_HISTORY_SZ, slot, now );
1872-
1873-
if( FD_UNLIKELY( !was_sent && slot!=gui->summary.slot_repair ) ) {
1874-
gui->summary.slot_repair = slot;
1875-
1876-
fd_gui_printf_repair_slot( gui );
1877-
fd_http_server_ws_broadcast( gui->http );
1878-
1879-
if( FD_UNLIKELY( gui->summary.slot_caught_up==ULONG_MAX ) ) fd_gui_try_insert_catch_up_slot( gui->summary.catch_up_repair, FD_GUI_REPAIR_CATCH_UP_HISTORY_SZ, &gui->summary.catch_up_repair_sz, slot );
1880-
}
1881-
}
1882-
18831910
static void
18841911
fd_gui_handle_reset_slot_legacy( fd_gui_t * gui,
18851912
ulong * msg,
@@ -2787,6 +2814,14 @@ fd_gui_handle_replay_update( fd_gui_t * gui,
27872814
slot_complete_event->timestamp = slot_completed->completed_time;
27882815
slot_complete_event->shred_idx = USHORT_MAX;
27892816
slot_complete_event->slot = slot->slot;
2817+
2818+
/* addresses racey behavior if we just sample at 400ms */
2819+
if( FD_LIKELY( gui->summary.slot_caught_up!=ULONG_MAX ) ) {
2820+
fd_topo_tile_t * repair = &gui->topo->tiles[ fd_topo_find_tile( gui->topo, "repair", 0UL ) ];
2821+
volatile ulong const * repair_metrics = fd_metrics_tile( repair->metrics );
2822+
ulong slot = repair_metrics[ MIDX( COUNTER, REPAIR, REPAIRED_SLOTS ) ];
2823+
fd_gui_handle_repair_slot( gui, slot, now );
2824+
}
27902825
}
27912826

27922827
void

src/disco/gui/fd_gui.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -862,6 +862,9 @@ fd_gui_handle_exec_txn_done( fd_gui_t * gui,
862862
void
863863
fd_gui_handle_repair_slot( fd_gui_t * gui, ulong slot, long now );
864864

865+
void
866+
fd_gui_handle_repair_request( fd_gui_t * gui, ulong slot, ulong shred_idx, long now );
867+
865868
void
866869
fd_gui_handle_snapshot_update( fd_gui_t * gui,
867870
fd_snapct_update_t const * msg );

src/disco/gui/fd_gui_tile.c

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,7 @@ typedef struct {
124124
union {
125125
struct {
126126
ulong slot;
127+
ulong shred_idx;
127128
} repair_net;
128129

129130
uchar net_gossvf[ FD_NET_MTU ];
@@ -292,13 +293,10 @@ during_frag( fd_gui_ctx_t * ctx,
292293
ulong payload_sz;
293294
if( FD_LIKELY( fd_ip4_udp_hdr_strip( src, sz, &payload, &payload_sz, NULL, NULL, NULL ) ) ) {
294295
fd_repair_msg_t const * msg = (fd_repair_msg_t const *)payload;
295-
switch ( msg->kind ) {
296-
case FD_REPAIR_KIND_PING:
297-
case FD_REPAIR_KIND_PONG:
298-
case FD_REPAIR_KIND_ORPHAN: break;
299-
case FD_REPAIR_KIND_SHRED: { if( FD_UNLIKELY( msg->shred.slot==0 ) ) { break; } ctx->parsed.repair_net.slot = msg->shred.slot; break; }
300-
case FD_REPAIR_KIND_HIGHEST_SHRED: { if( FD_UNLIKELY( msg->highest_shred.slot==0 ) ) { break; } ctx->parsed.repair_net.slot = msg->highest_shred.slot; break; }
301-
default: FD_LOG_ERR(( "unexpected repair msg kind %u", msg->kind ));
296+
if( FD_LIKELY( msg->kind==FD_REPAIR_KIND_SHRED ) ) {
297+
if( FD_UNLIKELY( msg->shred.slot==0 ) ) break;
298+
ctx->parsed.repair_net.slot = msg->shred.slot;
299+
ctx->parsed.repair_net.shred_idx = msg->shred.shred_idx;
302300
}
303301
}
304302
break;
@@ -488,7 +486,7 @@ after_frag( fd_gui_ctx_t * ctx,
488486
case IN_KIND_REPAIR_NET: {
489487
if( FD_UNLIKELY( ctx->parsed.repair_net.slot==ULONG_MAX ) ) break;
490488
long tsorig_ns = ctx->ref_wallclock + (long)((double)(fd_frag_meta_ts_decomp( tsorig, fd_tickcount() ) - ctx->ref_tickcount) / ctx->tick_per_ns);
491-
fd_gui_handle_repair_slot( ctx->gui, ctx->parsed.repair_net.slot, tsorig_ns );
489+
fd_gui_handle_repair_request( ctx->gui, ctx->parsed.repair_net.slot, ctx->parsed.repair_net.shred_idx, tsorig_ns );
492490
break;
493491
}
494492
case IN_KIND_NET_GOSSVF: {

0 commit comments

Comments
 (0)