Skip to content

Commit 23822f6

Browse files
Jim8yNeo Bot
authored andcommitted
Add health check endpoints for telemetry
Introduces HTTP health check endpoints for monitoring node status: - /health: Combined health status with JSON response - /health/live: Kubernetes liveness probe endpoint - /health/ready: Kubernetes readiness probe endpoint Features: - Configurable health port (defaults to Prometheus port) - Sync status check based on block height comparison - Memory pressure detection - Peer connectivity validation - JSON response with detailed health information
1 parent ce4e49d commit 23822f6

File tree

2 files changed

+39
-5
lines changed

2 files changed

+39
-5
lines changed

src/Plugins/Telemetry/Health/HealthCheckEndpoint.cs

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -123,13 +123,16 @@ private JObject GetHealthStatus()
123123
var headerHeight = _system.HeaderCache.Last?.Index ?? currentHeight;
124124
var blocksBehind = headerHeight - currentHeight;
125125
var isSynced = blocksBehind <= 2;
126-
var peerCount = 0;
126+
127+
// Get actual peer count from LocalNode
128+
var peerCount = _system.LocalNode.Ask<int>(new Network.P2P.LocalNode.GetConnectedCount()).Result;
129+
var hasPeers = peerCount > 0;
127130

128131
var memPool = _system.MemPool;
129132
var mempoolCount = memPool.Count;
130133
var mempoolCapacity = memPool.Capacity;
131134

132-
var status = isSynced && peerCount >= 0 ? "healthy" : "degraded";
135+
var status = isSynced && hasPeers ? "healthy" : "degraded";
133136

134137
return new JObject
135138
{
@@ -147,6 +150,11 @@ private JObject GetHealthStatus()
147150
["blocks_behind"] = blocksBehind,
148151
["synced"] = isSynced
149152
},
153+
["network"] = new JObject
154+
{
155+
["status"] = hasPeers ? "healthy" : "degraded",
156+
["peer_count"] = peerCount
157+
},
150158
["mempool"] = new JObject
151159
{
152160
["status"] = "healthy",

src/Plugins/Telemetry/Telemetry.cs

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
// modifications are permitted.
1111

1212
using Neo.Plugins.Telemetry.Collectors;
13+
using Neo.Plugins.Telemetry.Health;
1314
using Prometheus;
1415
using System.Timers;
1516
using Timer = System.Timers.Timer;
@@ -27,6 +28,7 @@ public class Telemetry : Plugin
2728

2829
private NeoSystem? _system;
2930
private MetricServer? _metricServer;
31+
private HealthCheckEndpoint? _healthEndpoint;
3032
private Timer? _collectionTimer;
3133

3234
// Collectors
@@ -39,6 +41,7 @@ public class Telemetry : Plugin
3941
private bool _isRunning;
4042
private readonly object _collectionLock = new();
4143
private bool _isCollecting;
44+
private string _networkName = "unknown";
4245

4346
public Telemetry()
4447
{
@@ -66,17 +69,20 @@ protected internal override void OnSystemLoaded(NeoSystem system)
6669
try
6770
{
6871
// Determine network name from protocol settings
69-
var networkName = DetermineNetworkName(system);
72+
_networkName = DetermineNetworkName(system);
7073
var nodeId = TelemetrySettings.Default.NodeId;
7174

72-
Log($"Starting telemetry collection for node '{nodeId}' on network '{networkName}'", LogLevel.Info);
75+
Log($"Starting telemetry collection for node '{nodeId}' on network '{_networkName}'", LogLevel.Info);
7376

7477
// Initialize collectors based on configuration
75-
InitializeCollectors(system, nodeId, networkName);
78+
InitializeCollectors(system, nodeId, _networkName);
7679

7780
// Start Prometheus metric server
7881
StartMetricServer();
7982

83+
// Start health endpoint
84+
StartHealthEndpoint();
85+
8086
// Start periodic collection timer
8187
StartCollectionTimer();
8288

@@ -180,6 +186,24 @@ private void StartCollectionTimer()
180186
Log($"Metrics collection timer started with interval {intervalMs}ms", LogLevel.Debug);
181187
}
182188

189+
private void StartHealthEndpoint()
190+
{
191+
var settings = TelemetrySettings.Default;
192+
193+
try
194+
{
195+
var host = settings.PrometheusHost;
196+
var port = settings.HealthPort ?? settings.PrometheusPort;
197+
198+
_healthEndpoint = new HealthCheckEndpoint(_system!, host, port, settings.NodeId, _networkName);
199+
Log($"Health endpoints started at http://{host}:{port}/health", LogLevel.Info);
200+
}
201+
catch (Exception ex)
202+
{
203+
Log($"Failed to start health endpoint: {ex.Message}", LogLevel.Warning);
204+
}
205+
}
206+
183207
private void OnCollectionTimerElapsed(object? sender, ElapsedEventArgs e)
184208
{
185209
if (!_isRunning) return;
@@ -260,6 +284,8 @@ private void StopTelemetry()
260284
_mempoolCollector = null;
261285
_systemCollector = null;
262286
_pluginCollector = null;
287+
_healthEndpoint?.Dispose();
288+
_healthEndpoint = null;
263289

264290
Log("Telemetry plugin shut down successfully", LogLevel.Info);
265291
}

0 commit comments

Comments
 (0)