@@ -25,10 +25,8 @@ service ControlService {
2525 // encryption, and other critical cluster parameters.
2626 rpc Configure (ConfigureRequest ) returns (ConfigureReply );
2727
28- // Retrieves the current read version (transaction version) of the database.
29- rpc GetReadVersion (GetReadVersionRequest ) returns (GetReadVersionReply );
30-
3128 // Retrieves comprehensive cluster status including health, performance, and configuration.
29+ // Docs: https://github.com/apple/foundationdb/blob/main/documentation/sphinx/source/mr-status.rst
3230 rpc GetStatus (GetStatusRequest ) returns (GetStatusReply );
3331
3432 // Retrieves the list of all worker processes in the cluster.
@@ -45,6 +43,15 @@ service ControlService {
4543
4644 // Forcefully terminates worker processes. Use with caution - prefer Exclude for graceful removal.
4745 rpc Kill (KillRequest ) returns (KillReply );
46+
47+ // Manages maintenance mode for zones. Maintenance mode prevents data distribution from moving
48+ // data away from processes in the specified zone. A zone that is under maintenance will not
49+ // have data moved away from it even if processes in that zone fail. In particular, this means
50+ // the cluster will not attempt to heal the replication factor as a result of failures in the
51+ // maintenance zone. This is useful when the amount of time that the processes in a fault domain
52+ // are expected to be absent is reasonably short and you don’t want to move data to and from the
53+ // affected processes.
54+ rpc Maintenance (MaintenanceRequest ) returns (MaintenanceReply );
4855}
4956
5057//------ Messages -------
@@ -57,7 +64,7 @@ message Worker {
5764 // These fields help FoundationDB make intelligent placement decisions for
5865 // data replication and fault tolerance.
5966 message Locality {
60- // Unique identifier for this process
67+ // unique identifier for this process
6168 optional string process_id = 1 ;
6269
6370 // Zone identifier - typically represents a failure domain (e.g., rack, availability zone)
@@ -93,7 +100,7 @@ message GetCoordinatorsRequest {}
93100
94101// Response containing the current cluster coordinators.
95102message GetCoordinatorsReply {
96- // List of coordinator addresses in the format "ip:port"
103+ // List of coordinator addresses in the format "ip:port" or "host:port"
97104 repeated string coordinators = 1 ;
98105}
99106
@@ -104,16 +111,13 @@ message ChangeCoordinatorsRequest {
104111 // Human-readable description for the cluster (e.g., cluster name)
105112 optional string cluster_description = 1 ;
106113
107- // If true, disables the configuration database
108- optional bool disable_config_db = 2 ;
109-
110114 // If true, automatically selects coordinators based on the current cluster topology.
111115 // When false, uses the addresses specified in new_coordinator_addresses.
112- optional bool automatic_coordinators = 3 ;
116+ optional bool automatic_coordinators = 2 ;
113117
114118 // List of addresses to use as new coordinators (when automatic_coordinators is false).
115- // Each address should be in the format "ip :port".
116- repeated string new_coordinator_addresses = 4 ;
119+ // Each address should be in the format "[ip|host] :port".
120+ repeated string new_coordinator_addresses = 3 ;
117121}
118122
119123// Response to a coordinator change operation.
@@ -215,27 +219,23 @@ message ConfigureRequest {
215219 enum EncryptionAtRestMode {
216220 UNSET_ENCRYPTION = 0 ; // No change to encryption mode
217221 DISABLED_ENCRYPTION = 1 ; // Disable encryption at rest
218- DOMAIN_AWARE = 2 ; // Domain-aware encryption (tenant-based)
219- CLUSTER_AWARE = 3 ; // Cluster-wide encryption
222+ CLUSTER_AWARE = 2 ; // Cluster-wide encryption
220223 }
221224 // Encryption at rest mode for the database
222225 optional EncryptionAtRestMode encryption_at_rest_mode = 13 ;
223226
224227 // Other database features
225228
226- // If true, enables blob granules for storing large values
227- optional bool blob_granules_enabled = 14 ;
228-
229229 // List of addresses to exclude during recruitment (format: "ip" or "ip:port")
230- repeated string exclude_addresses = 15 ;
230+ repeated string exclude_addresses = 14 ;
231231
232232 // Number of testing storage servers to maintain
233- optional int32 tss_count = 16 ;
233+ optional int32 tss_count = 15 ;
234234
235235 // Control flags
236236
237237 // If true, skip safety checks (dangerous - use with caution)
238- optional bool force = 17 ;
238+ optional bool force = 16 ;
239239}
240240
241241// Response to a database configuration request.
@@ -274,17 +274,6 @@ message ConfigureReply {
274274 optional string message = 2 ;
275275}
276276
277- // Request to get the current read version of the database.
278- // The read version is a monotonically increasing transaction version number
279- // used for snapshot isolation.
280- message GetReadVersionRequest {}
281-
282- // Response containing the current read version.
283- message GetReadVersionReply {
284- // Current read version (transaction version number)
285- optional int64 version = 1 ;
286- }
287-
288277// Request to retrieve the cluster status.
289278// This provides comprehensive information about the cluster's health and state.
290279message GetStatusRequest {}
@@ -318,7 +307,7 @@ message IncludeRequest {
318307 // List of localities to include (format: "locality_key:locality_value")
319308 repeated string localities = 3 ;
320309
321- // If true, include workers that were marked as failed
310+ // If true, only include workers that were marked as failed
322311 optional bool failed = 4 ;
323312}
324313
@@ -335,7 +324,8 @@ message ExcludeRequest {
335324 // If true, exclude all workers (rarely used, requires force flag)
336325 optional bool all = 1 ;
337326
338- // If true, mark workers as failed (more aggressive than normal exclude)
327+ // If true, mark workers as failed. This flag will drop all the data for the
328+ // specified workers and could cause data loss.
339329 optional bool failed = 2 ;
340330
341331 // If true, don't wait for data migration to complete before returning
@@ -387,7 +377,6 @@ message ExcludeStatusReply {
387377
388378// Request to kill (terminate) worker processes.
389379// This is a forceful operation that immediately stops processes.
390- // Use with caution - prefer exclude for graceful removal.
391380message KillRequest {
392381 // If true, kill all workers (requires extreme caution)
393382 optional bool all = 1 ;
@@ -398,3 +387,45 @@ message KillRequest {
398387
399388// Response to a kill operation.
400389message KillReply {}
390+
391+ // Request to manage maintenance mode for zones.
392+ // Maintenance mode prevents data distribution from moving data away from the
393+ // specified zone, allowing safe maintenance operations (e.g., hardware upgrades).
394+ // Only one zone can be in maintenance mode at a time.
395+ message MaintenanceRequest {
396+ // Operation type for maintenance
397+ enum Operation {
398+ GET = 0 ; // Get current maintenance status
399+ SET = 1 ; // Set maintenance mode for a zone
400+ CLEAR = 2 ; // Clear maintenance mode
401+ }
402+
403+ // The operation to perform
404+ Operation operation = 1 ;
405+
406+ // Zone ID to place in maintenance mode (required for SET operation)
407+ optional string zone_id = 2 ;
408+
409+ // Duration in seconds for maintenance mode (required for SET operation)
410+ optional double duration_seconds = 3 ;
411+ }
412+
413+ // Response to a maintenance operation.
414+ message MaintenanceReply {
415+ enum Result {
416+ SUCCESS = 0 ; // Operation succeeded
417+ INVALID_PARAMETERS = 2 ; // Invalid parameters for the operation
418+ }
419+
420+ // Result code
421+ optional Result result = 1 ;
422+
423+ // Current or active zone ID in maintenance (if any)
424+ optional string zone_id = 2 ;
425+
426+ // Remaining seconds for the current maintenance (if active)
427+ optional int64 remaining_seconds = 3 ;
428+
429+ // Human-readable message with additional details
430+ optional string message = 4 ;
431+ }
0 commit comments