Skip to content

Commit 0816849

Browse files
committed
Address review comments
1 parent a0af9bd commit 0816849

File tree

1 file changed

+85
-47
lines changed

1 file changed

+85
-47
lines changed

fdbctl/protos/control_service.proto

Lines changed: 85 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,8 @@ service ControlService {
2525
// encryption, and other critical cluster parameters.
2626
rpc Configure(ConfigureRequest) returns (ConfigureReply);
2727

28-
// Retrieves the current read version (transaction version) of the database.
29-
rpc GetReadVersion(GetReadVersionRequest) returns (GetReadVersionReply);
30-
3128
// Retrieves comprehensive cluster status including health, performance, and configuration.
29+
// Docs: https://github.com/apple/foundationdb/blob/main/documentation/sphinx/source/mr-status.rst
3230
rpc GetStatus(GetStatusRequest) returns (GetStatusReply);
3331

3432
// Retrieves the list of all worker processes in the cluster.
@@ -45,6 +43,15 @@ service ControlService {
4543

4644
// Forcefully terminates worker processes. Use with caution - prefer Exclude for graceful removal.
4745
rpc Kill(KillRequest) returns (KillReply);
46+
47+
// Manages maintenance mode for zones. Maintenance mode prevents data distribution from moving
48+
// data away from processes in the specified zone. A zone that is under maintenance will not
49+
// have data moved away from it even if processes in that zone fail. In particular, this means
50+
// the cluster will not attempt to heal the replication factor as a result of failures in the
51+
// maintenance zone. This is useful when the amount of time that the processes in a fault domain
52+
// are expected to be absent is reasonably short and you don’t want to move data to and from the
53+
// affected processes.
54+
rpc Maintenance(MaintenanceRequest) returns (MaintenanceReply);
4855
}
4956

5057
//------ Messages -------
@@ -57,7 +64,7 @@ message Worker {
5764
// These fields help FoundationDB make intelligent placement decisions for
5865
// data replication and fault tolerance.
5966
message Locality {
60-
// Unique identifier for this process
67+
// unique identifier for this process
6168
optional string process_id = 1;
6269

6370
// Zone identifier - typically represents a failure domain (e.g., rack, availability zone)
@@ -93,7 +100,7 @@ message GetCoordinatorsRequest {}
93100

94101
// Response containing the current cluster coordinators.
95102
message GetCoordinatorsReply {
96-
// List of coordinator addresses in the format "ip:port"
103+
// List of coordinator addresses in the format "ip:port" or "host:port"
97104
repeated string coordinators = 1;
98105
}
99106

@@ -104,16 +111,13 @@ message ChangeCoordinatorsRequest {
104111
// Human-readable description for the cluster (e.g., cluster name)
105112
optional string cluster_description = 1;
106113

107-
// If true, disables the configuration database
108-
optional bool disable_config_db = 2;
109-
110114
// If true, automatically selects coordinators based on the current cluster topology.
111115
// When false, uses the addresses specified in new_coordinator_addresses.
112-
optional bool automatic_coordinators = 3;
116+
optional bool automatic_coordinators = 2;
113117

114118
// List of addresses to use as new coordinators (when automatic_coordinators is false).
115-
// Each address should be in the format "ip:port".
116-
repeated string new_coordinator_addresses = 4;
119+
// Each address should be in the format "[ip|host]:port".
120+
repeated string new_coordinator_addresses = 3;
117121
}
118122

119123
// Response to a coordinator change operation.
@@ -150,26 +154,33 @@ message ConfigureRequest {
150154
// Redundancy mode determines how many copies of data are maintained
151155
// and what failure scenarios the cluster can survive
152156
enum RedundancyMode {
153-
UNSET_REDUNDANCY = 0; // No change to redundancy mode
154-
SINGLE = 1; // One copy, not fault tolerant (for testing only)
155-
DOUBLE = 2; // Two copies, survives one failure
156-
TRIPLE = 3; // Three copies, survives two failures
157-
THREE_DATA_HALL = 4; // Three data hall configuration for geographic redundancy
158-
THREE_DATACENTER = 5; // Three datacenter configuration for maximum availability
157+
UNSET_REDUNDANCY = 0; // No change to redundancy mode
158+
SINGLE = 1; // One copy, not fault tolerant (for testing only)
159+
DOUBLE = 2; // Two copies, survives one failure
160+
TRIPLE = 3; // Three copies, survives two failures
161+
THREE_DATA_HALL = 4; // Three data hall configuration , survives failure of one
162+
// complete data hall and one additional machine in another
163+
// data hall
164+
THREE_DATA_HALL_FALLBACK = 5; // Similar to three_data_hall, differing only in that data is
165+
// stored on two instead of three replicas. This configuration
166+
// is useful to unblock data distribution when a data hall
167+
// becomes temporarily unavailable
168+
THREE_DATACENTER = 6; // Three datacenter configuration for maximum availability
159169
}
160170
// Desired redundancy mode for the database
161171
optional RedundancyMode redundancy_mode = 3;
162172

163173
// Storage engine determines the underlying storage technology and performance characteristics
164174
enum StorageEngine {
165-
UNSET_STORAGE = 0; // No change to storage engine
166-
SSD = 1; // B-Tree optimized for SSDs (default)
167-
SSD_1 = 2; // ssd-1 variant
168-
SSD_2 = 3; // ssd-2 variant (newer redwood engine)
169-
MEMORY = 4; // In-memory storage (for testing or caching)
170-
MEMORY_1 = 5; // memory-1 variant
171-
MEMORY_2 = 6; // memory-2 variant
172-
MEMORY_RADIXTREE = 7; // memory-radixtree variant
175+
NONE = 0; // No change to storage engine
176+
SSD_BTREE_V1 = 0;
177+
SSD_BTREE_V2 = 1;
178+
SSD = 2; // Same as SSD_BTREE_V2
179+
SSD_REDWOOD_V1 = 3;
180+
SSD_ROCKSDB_V1 = 4;
181+
SSD_SHARDED_ROCKSDB = 5;
182+
MEMORY = 6;
183+
MEMORY_RADIXTREE = 7;
173184
}
174185
// Desired storage engine for the database
175186
optional StorageEngine storage_engine = 4;
@@ -215,27 +226,23 @@ message ConfigureRequest {
215226
enum EncryptionAtRestMode {
216227
UNSET_ENCRYPTION = 0; // No change to encryption mode
217228
DISABLED_ENCRYPTION = 1; // Disable encryption at rest
218-
DOMAIN_AWARE = 2; // Domain-aware encryption (tenant-based)
219-
CLUSTER_AWARE = 3; // Cluster-wide encryption
229+
CLUSTER_AWARE = 2; // Cluster-wide encryption
220230
}
221231
// Encryption at rest mode for the database
222232
optional EncryptionAtRestMode encryption_at_rest_mode = 13;
223233

224234
// Other database features
225235

226-
// If true, enables blob granules for storing large values
227-
optional bool blob_granules_enabled = 14;
228-
229236
// List of addresses to exclude during recruitment (format: "ip" or "ip:port")
230-
repeated string exclude_addresses = 15;
237+
repeated string exclude_addresses = 14;
231238

232239
// Number of testing storage servers to maintain
233-
optional int32 tss_count = 16;
240+
optional int32 tss_count = 15;
234241

235242
// Control flags
236243

237244
// If true, skip safety checks (dangerous - use with caution)
238-
optional bool force = 17;
245+
optional bool force = 16;
239246
}
240247

241248
// Response to a database configuration request.
@@ -274,17 +281,6 @@ message ConfigureReply {
274281
optional string message = 2;
275282
}
276283

277-
// Request to get the current read version of the database.
278-
// The read version is a monotonically increasing transaction version number
279-
// used for snapshot isolation.
280-
message GetReadVersionRequest {}
281-
282-
// Response containing the current read version.
283-
message GetReadVersionReply {
284-
// Current read version (transaction version number)
285-
optional int64 version = 1;
286-
}
287-
288284
// Request to retrieve the cluster status.
289285
// This provides comprehensive information about the cluster's health and state.
290286
message GetStatusRequest {}
@@ -318,7 +314,7 @@ message IncludeRequest {
318314
// List of localities to include (format: "locality_key:locality_value")
319315
repeated string localities = 3;
320316

321-
// If true, include workers that were marked as failed
317+
// If true, only include workers that were marked as failed
322318
optional bool failed = 4;
323319
}
324320

@@ -335,7 +331,8 @@ message ExcludeRequest {
335331
// If true, exclude all workers (rarely used, requires force flag)
336332
optional bool all = 1;
337333

338-
// If true, mark workers as failed (more aggressive than normal exclude)
334+
// If true, mark workers as failed. This flag will drop all the data for the
335+
// specified workers and could cause data loss.
339336
optional bool failed = 2;
340337

341338
// If true, don't wait for data migration to complete before returning
@@ -387,7 +384,6 @@ message ExcludeStatusReply {
387384

388385
// Request to kill (terminate) worker processes.
389386
// This is a forceful operation that immediately stops processes.
390-
// Use with caution - prefer exclude for graceful removal.
391387
message KillRequest {
392388
// If true, kill all workers (requires extreme caution)
393389
optional bool all = 1;
@@ -398,3 +394,45 @@ message KillRequest {
398394

399395
// Response to a kill operation.
400396
message KillReply {}
397+
398+
// Request to manage maintenance mode for zones.
399+
// Maintenance mode prevents data distribution from moving data away from the
400+
// specified zone, allowing safe maintenance operations (e.g., hardware upgrades).
401+
// Only one zone can be in maintenance mode at a time.
402+
message MaintenanceRequest {
403+
// Operation type for maintenance
404+
enum Operation {
405+
GET = 0; // Get current maintenance status
406+
SET = 1; // Set maintenance mode for a zone
407+
CLEAR = 2; // Clear maintenance mode
408+
}
409+
410+
// The operation to perform
411+
Operation operation = 1;
412+
413+
// Zone ID to place in maintenance mode (required for SET operation)
414+
optional string zone_id = 2;
415+
416+
// Duration in seconds for maintenance mode (required for SET operation)
417+
optional double duration_seconds = 3;
418+
}
419+
420+
// Response to a maintenance operation.
421+
message MaintenanceReply {
422+
enum Result {
423+
SUCCESS = 0; // Operation succeeded
424+
INVALID_PARAMETERS = 2; // Invalid parameters for the operation
425+
}
426+
427+
// Result code
428+
optional Result result = 1;
429+
430+
// Current or active zone ID in maintenance (if any)
431+
optional string zone_id = 2;
432+
433+
// Remaining seconds for the current maintenance (if active)
434+
optional int64 remaining_seconds = 3;
435+
436+
// Human-readable message with additional details
437+
optional string message = 4;
438+
}

0 commit comments

Comments
 (0)