@@ -25,10 +25,8 @@ service ControlService {
2525 // encryption, and other critical cluster parameters.
2626 rpc Configure (ConfigureRequest ) returns (ConfigureReply );
2727
28- // Retrieves the current read version (transaction version) of the database.
29- rpc GetReadVersion (GetReadVersionRequest ) returns (GetReadVersionReply );
30-
3128 // Retrieves comprehensive cluster status including health, performance, and configuration.
29+ // Docs: https://github.com/apple/foundationdb/blob/main/documentation/sphinx/source/mr-status.rst
3230 rpc GetStatus (GetStatusRequest ) returns (GetStatusReply );
3331
3432 // Retrieves the list of all worker processes in the cluster.
@@ -45,6 +43,15 @@ service ControlService {
4543
4644 // Forcefully terminates worker processes. Use with caution - prefer Exclude for graceful removal.
4745 rpc Kill (KillRequest ) returns (KillReply );
46+
47+ // Manages maintenance mode for zones. Maintenance mode prevents data distribution from moving
48+ // data away from processes in the specified zone. A zone that is under maintenance will not
49+ // have data moved away from it even if processes in that zone fail. In particular, this means
50+ // the cluster will not attempt to heal the replication factor as a result of failures in the
51+ // maintenance zone. This is useful when the amount of time that the processes in a fault domain
52+ // are expected to be absent is reasonably short and you don’t want to move data to and from the
53+ // affected processes.
54+ rpc Maintenance (MaintenanceRequest ) returns (MaintenanceReply );
4855}
4956
5057//------ Messages -------
@@ -57,7 +64,7 @@ message Worker {
5764 // These fields help FoundationDB make intelligent placement decisions for
5865 // data replication and fault tolerance.
5966 message Locality {
60- // Unique identifier for this process
67+ // unique identifier for this process
6168 optional string process_id = 1 ;
6269
6370 // Zone identifier - typically represents a failure domain (e.g., rack, availability zone)
@@ -93,7 +100,7 @@ message GetCoordinatorsRequest {}
93100
94101// Response containing the current cluster coordinators.
95102message GetCoordinatorsReply {
96- // List of coordinator addresses in the format "ip:port"
103+ // List of coordinator addresses in the format "ip:port" or "host:port"
97104 repeated string coordinators = 1 ;
98105}
99106
@@ -104,16 +111,13 @@ message ChangeCoordinatorsRequest {
104111 // Human-readable description for the cluster (e.g., cluster name)
105112 optional string cluster_description = 1 ;
106113
107- // If true, disables the configuration database
108- optional bool disable_config_db = 2 ;
109-
110114 // If true, automatically selects coordinators based on the current cluster topology.
111115 // When false, uses the addresses specified in new_coordinator_addresses.
112- optional bool automatic_coordinators = 3 ;
116+ optional bool automatic_coordinators = 2 ;
113117
114118 // List of addresses to use as new coordinators (when automatic_coordinators is false).
115- // Each address should be in the format "ip :port".
116- repeated string new_coordinator_addresses = 4 ;
119+ // Each address should be in the format "[ip|host] :port".
120+ repeated string new_coordinator_addresses = 3 ;
117121}
118122
119123// Response to a coordinator change operation.
@@ -150,26 +154,33 @@ message ConfigureRequest {
150154 // Redundancy mode determines how many copies of data are maintained
151155 // and what failure scenarios the cluster can survive
152156 enum RedundancyMode {
153- UNSET_REDUNDANCY = 0 ; // No change to redundancy mode
154- SINGLE = 1 ; // One copy, not fault tolerant (for testing only)
155- DOUBLE = 2 ; // Two copies, survives one failure
156- TRIPLE = 3 ; // Three copies, survives two failures
157- THREE_DATA_HALL = 4 ; // Three data hall configuration for geographic redundancy
158- THREE_DATACENTER = 5 ; // Three datacenter configuration for maximum availability
157+ UNSET_REDUNDANCY = 0 ; // No change to redundancy mode
158+ SINGLE = 1 ; // One copy, not fault tolerant (for testing only)
159+ DOUBLE = 2 ; // Two copies, survives one failure
160+ TRIPLE = 3 ; // Three copies, survives two failures
161+ THREE_DATA_HALL = 4 ; // Three data hall configuration , survives failure of one
162+ // complete data hall and one additional machine in another
163+ // data hall
164+ THREE_DATA_HALL_FALLBACK = 5 ; // Similar to three_data_hall, differing only in that data is
165+ // stored on two instead of three replicas. This configuration
166+ // is useful to unblock data distribution when a data hall
167+ // becomes temporarily unavailable
168+ THREE_DATACENTER = 6 ; // Three datacenter configuration for maximum availability
159169 }
160170 // Desired redundancy mode for the database
161171 optional RedundancyMode redundancy_mode = 3 ;
162172
163173 // Storage engine determines the underlying storage technology and performance characteristics
164174 enum StorageEngine {
165- UNSET_STORAGE = 0 ; // No change to storage engine
166- SSD = 1 ; // B-Tree optimized for SSDs (default)
167- SSD_1 = 2 ; // ssd-1 variant
168- SSD_2 = 3 ; // ssd-2 variant (newer redwood engine)
169- MEMORY = 4 ; // In-memory storage (for testing or caching)
170- MEMORY_1 = 5 ; // memory-1 variant
171- MEMORY_2 = 6 ; // memory-2 variant
172- MEMORY_RADIXTREE = 7 ; // memory-radixtree variant
175+ NONE = 0 ; // No change to storage engine
176+ SSD_BTREE_V1 = 0 ;
177+ SSD_BTREE_V2 = 1 ;
178+ SSD = 2 ; // Same as SSD_BTREE_V2
179+ SSD_REDWOOD_V1 = 3 ;
180+ SSD_ROCKSDB_V1 = 4 ;
181+ SSD_SHARDED_ROCKSDB = 5 ;
182+ MEMORY = 6 ;
183+ MEMORY_RADIXTREE = 7 ;
173184 }
174185 // Desired storage engine for the database
175186 optional StorageEngine storage_engine = 4 ;
@@ -215,27 +226,23 @@ message ConfigureRequest {
215226 enum EncryptionAtRestMode {
216227 UNSET_ENCRYPTION = 0 ; // No change to encryption mode
217228 DISABLED_ENCRYPTION = 1 ; // Disable encryption at rest
218- DOMAIN_AWARE = 2 ; // Domain-aware encryption (tenant-based)
219- CLUSTER_AWARE = 3 ; // Cluster-wide encryption
229+ CLUSTER_AWARE = 2 ; // Cluster-wide encryption
220230 }
221231 // Encryption at rest mode for the database
222232 optional EncryptionAtRestMode encryption_at_rest_mode = 13 ;
223233
224234 // Other database features
225235
226- // If true, enables blob granules for storing large values
227- optional bool blob_granules_enabled = 14 ;
228-
229236 // List of addresses to exclude during recruitment (format: "ip" or "ip:port")
230- repeated string exclude_addresses = 15 ;
237+ repeated string exclude_addresses = 14 ;
231238
232239 // Number of testing storage servers to maintain
233- optional int32 tss_count = 16 ;
240+ optional int32 tss_count = 15 ;
234241
235242 // Control flags
236243
237244 // If true, skip safety checks (dangerous - use with caution)
238- optional bool force = 17 ;
245+ optional bool force = 16 ;
239246}
240247
241248// Response to a database configuration request.
@@ -274,17 +281,6 @@ message ConfigureReply {
274281 optional string message = 2 ;
275282}
276283
277- // Request to get the current read version of the database.
278- // The read version is a monotonically increasing transaction version number
279- // used for snapshot isolation.
280- message GetReadVersionRequest {}
281-
282- // Response containing the current read version.
283- message GetReadVersionReply {
284- // Current read version (transaction version number)
285- optional int64 version = 1 ;
286- }
287-
288284// Request to retrieve the cluster status.
289285// This provides comprehensive information about the cluster's health and state.
290286message GetStatusRequest {}
@@ -318,7 +314,7 @@ message IncludeRequest {
318314 // List of localities to include (format: "locality_key:locality_value")
319315 repeated string localities = 3 ;
320316
321- // If true, include workers that were marked as failed
317+ // If true, only include workers that were marked as failed
322318 optional bool failed = 4 ;
323319}
324320
@@ -335,7 +331,8 @@ message ExcludeRequest {
335331 // If true, exclude all workers (rarely used, requires force flag)
336332 optional bool all = 1 ;
337333
338- // If true, mark workers as failed (more aggressive than normal exclude)
334+ // If true, mark workers as failed. This flag will drop all the data for the
335+ // specified workers and could cause data loss.
339336 optional bool failed = 2 ;
340337
341338 // If true, don't wait for data migration to complete before returning
@@ -387,7 +384,6 @@ message ExcludeStatusReply {
387384
388385// Request to kill (terminate) worker processes.
389386// This is a forceful operation that immediately stops processes.
390- // Use with caution - prefer exclude for graceful removal.
391387message KillRequest {
392388 // If true, kill all workers (requires extreme caution)
393389 optional bool all = 1 ;
@@ -398,3 +394,45 @@ message KillRequest {
398394
399395// Response to a kill operation.
400396message KillReply {}
397+
398+ // Request to manage maintenance mode for zones.
399+ // Maintenance mode prevents data distribution from moving data away from the
400+ // specified zone, allowing safe maintenance operations (e.g., hardware upgrades).
401+ // Only one zone can be in maintenance mode at a time.
402+ message MaintenanceRequest {
403+ // Operation type for maintenance
404+ enum Operation {
405+ GET = 0 ; // Get current maintenance status
406+ SET = 1 ; // Set maintenance mode for a zone
407+ CLEAR = 2 ; // Clear maintenance mode
408+ }
409+
410+ // The operation to perform
411+ Operation operation = 1 ;
412+
413+ // Zone ID to place in maintenance mode (required for SET operation)
414+ optional string zone_id = 2 ;
415+
416+ // Duration in seconds for maintenance mode (required for SET operation)
417+ optional double duration_seconds = 3 ;
418+ }
419+
420+ // Response to a maintenance operation.
421+ message MaintenanceReply {
422+ enum Result {
423+ SUCCESS = 0 ; // Operation succeeded
424+ INVALID_PARAMETERS = 2 ; // Invalid parameters for the operation
425+ }
426+
427+ // Result code
428+ optional Result result = 1 ;
429+
430+ // Current or active zone ID in maintenance (if any)
431+ optional string zone_id = 2 ;
432+
433+ // Remaining seconds for the current maintenance (if active)
434+ optional int64 remaining_seconds = 3 ;
435+
436+ // Human-readable message with additional details
437+ optional string message = 4 ;
438+ }
0 commit comments