diff --git a/Sources/Containerization/LinuxContainer.swift b/Sources/Containerization/LinuxContainer.swift index 1738549b..aad39051 100644 --- a/Sources/Containerization/LinuxContainer.swift +++ b/Sources/Containerization/LinuxContainer.swift @@ -62,6 +62,9 @@ public final class LinuxContainer: Container, Sendable { public var virtualization: Bool = false /// Optional destination for serial boot logs. public var bootLog: BootLog? + /// EXPERIMENTAL: Path in the root filesystem for the virtual + /// machine where the OCI runtime used to spawn the container lives. + public var ociRuntimePath: String? public init() {} @@ -77,7 +80,8 @@ public final class LinuxContainer: Container, Sendable { dns: DNS? = nil, hosts: Hosts? = nil, virtualization: Bool = false, - bootLog: BootLog? = nil + bootLog: BootLog? = nil, + ociRuntimePath: String? = nil ) { self.process = process self.cpus = cpus @@ -91,6 +95,7 @@ public final class LinuxContainer: Container, Sendable { self.hosts = hosts self.virtualization = virtualization self.bootLog = bootLog + self.ociRuntimePath = ociRuntimePath } } @@ -317,19 +322,42 @@ public final class LinuxContainer: Container, Sendable { ) ) + spec.linux?.namespaces = [ + LinuxNamespace(type: .cgroup), + LinuxNamespace(type: .ipc), + LinuxNamespace(type: .mount), + LinuxNamespace(type: .pid), + LinuxNamespace(type: .uts), + ] + return spec } + /// The default set of mounts for a LinuxContainer. public static func defaultMounts() -> [Mount] { let defaultOptions = ["nosuid", "noexec", "nodev"] return [ - .any(type: "proc", source: "proc", destination: "/proc", options: defaultOptions), + .any(type: "proc", source: "proc", destination: "/proc"), .any(type: "sysfs", source: "sysfs", destination: "/sys", options: defaultOptions), .any(type: "devtmpfs", source: "none", destination: "/dev", options: ["nosuid", "mode=755"]), .any(type: "mqueue", source: "mqueue", destination: "/dev/mqueue", options: defaultOptions), .any(type: "tmpfs", source: "tmpfs", destination: "/dev/shm", options: defaultOptions + ["mode=1777", "size=65536k"]), .any(type: "cgroup2", source: "none", destination: "/sys/fs/cgroup", options: defaultOptions), - .any(type: "devpts", source: "devpts", destination: "/dev/pts", options: ["nosuid", "noexec", "gid=5", "mode=620", "ptmxmode=666"]), + .any(type: "devpts", source: "devpts", destination: "/dev/pts", options: ["nosuid", "noexec", "newinstance", "gid=5", "mode=0620", "ptmxmode=0666"]), + ] + } + + /// A more traditional default set of mounts that OCI runtimes typically employ. + public static func defaultOCIMounts() -> [Mount] { + let defaultOptions = ["nosuid", "noexec", "nodev"] + return [ + .any(type: "proc", source: "proc", destination: "/proc"), + .any(type: "tmpfs", source: "tmpfs", destination: "/dev", options: ["nosuid", "mode=755", "size=65536k"]), + .any(type: "devpts", source: "devpts", destination: "/dev/pts", options: ["nosuid", "noexec", "newinstance", "gid=5", "mode=0620", "ptmxmode=0666"]), + .any(type: "sysfs", source: "sysfs", destination: "/sys", options: defaultOptions), + .any(type: "mqueue", source: "mqueue", destination: "/dev/mqueue", options: defaultOptions), + .any(type: "tmpfs", source: "tmpfs", destination: "/dev/shm", options: defaultOptions + ["mode=1777", "size=65536k"]), + .any(type: "cgroup2", source: "none", destination: "/sys/fs/cgroup", options: defaultOptions), ] } @@ -456,6 +484,7 @@ extension LinuxContainer { containerID: self.id, spec: spec, io: stdio, + ociRuntimePath: self.config.ociRuntimePath, agent: agent, vm: createdState.vm, logger: self.logger @@ -657,6 +686,7 @@ extension LinuxContainer { containerID: self.id, spec: spec, io: stdio, + ociRuntimePath: self.config.ociRuntimePath, agent: agent, vm: startedState.vm, logger: self.logger, @@ -693,6 +723,7 @@ extension LinuxContainer { containerID: self.id, spec: spec, io: stdio, + ociRuntimePath: self.config.ociRuntimePath, agent: agent, vm: state.vm, logger: self.logger, diff --git a/Sources/Containerization/LinuxPod.swift b/Sources/Containerization/LinuxPod.swift index 927f5dea..f3213e83 100644 --- a/Sources/Containerization/LinuxPod.swift +++ b/Sources/Containerization/LinuxPod.swift @@ -406,6 +406,7 @@ extension LinuxPod { containerID: containerID, spec: spec, io: stdio, + ociRuntimePath: nil, agent: agent, vm: createdState.vm, logger: self.logger @@ -613,6 +614,7 @@ extension LinuxPod { containerID: containerID, spec: spec, io: stdio, + ociRuntimePath: nil, agent: agent, vm: createdState.vm, logger: self.logger diff --git a/Sources/Containerization/LinuxProcess.swift b/Sources/Containerization/LinuxProcess.swift index 3d62016f..46c90bc3 100644 --- a/Sources/Containerization/LinuxProcess.swift +++ b/Sources/Containerization/LinuxProcess.swift @@ -96,6 +96,7 @@ public final class LinuxProcess: Sendable { private let ioSetup: Stdio private let agent: any VirtualMachineAgent private let vm: any VirtualMachineInstance + private let ociRuntimePath: String? private let logger: Logger? private let onDelete: (@Sendable () async -> Void)? @@ -104,6 +105,7 @@ public final class LinuxProcess: Sendable { containerID: String? = nil, spec: Spec, io: Stdio, + ociRuntimePath: String?, agent: any VirtualMachineAgent, vm: any VirtualMachineInstance, logger: Logger?, @@ -114,6 +116,7 @@ public final class LinuxProcess: Sendable { self.state = Mutex(.init(spec: spec, pid: -1, stdio: StdioHandles())) self.ioSetup = io self.agent = agent + self.ociRuntimePath = ociRuntimePath self.vm = vm self.logger = logger self.onDelete = onDelete @@ -260,6 +263,7 @@ extension LinuxProcess { stdinPort: self.ioSetup.stdin?.port, stdoutPort: self.ioSetup.stdout?.port, stderrPort: self.ioSetup.stderr?.port, + ociRuntimePath: self.ociRuntimePath, configuration: spec, options: nil ) diff --git a/Sources/Containerization/SandboxContext/SandboxContext.pb.swift b/Sources/Containerization/SandboxContext/SandboxContext.pb.swift index f4b63152..9412156d 100644 --- a/Sources/Containerization/SandboxContext/SandboxContext.pb.swift +++ b/Sources/Containerization/SandboxContext/SandboxContext.pb.swift @@ -414,6 +414,15 @@ public struct Com_Apple_Containerization_Sandbox_V3_CreateProcessRequest: @unche /// Clears the value of `stderr`. Subsequent reads from it will return its default value. public mutating func clearStderr() {self._stderr = nil} + public var ociRuntimePath: String { + get {return _ociRuntimePath ?? String()} + set {_ociRuntimePath = newValue} + } + /// Returns true if `ociRuntimePath` has been explicitly set. + public var hasOciRuntimePath: Bool {return self._ociRuntimePath != nil} + /// Clears the value of `ociRuntimePath`. Subsequent reads from it will return its default value. + public mutating func clearOciRuntimePath() {self._ociRuntimePath = nil} + public var configuration: Data = Data() public var options: Data { @@ -433,6 +442,7 @@ public struct Com_Apple_Containerization_Sandbox_V3_CreateProcessRequest: @unche fileprivate var _stdin: UInt32? = nil fileprivate var _stdout: UInt32? = nil fileprivate var _stderr: UInt32? = nil + fileprivate var _ociRuntimePath: String? = nil fileprivate var _options: Data? = nil } @@ -1853,8 +1863,9 @@ extension Com_Apple_Containerization_Sandbox_V3_CreateProcessRequest: SwiftProto 3: .same(proto: "stdin"), 4: .same(proto: "stdout"), 5: .same(proto: "stderr"), - 6: .same(proto: "configuration"), - 7: .same(proto: "options"), + 6: .same(proto: "ociRuntimePath"), + 7: .same(proto: "configuration"), + 8: .same(proto: "options"), ] public mutating func decodeMessage(decoder: inout D) throws { @@ -1868,8 +1879,9 @@ extension Com_Apple_Containerization_Sandbox_V3_CreateProcessRequest: SwiftProto case 3: try { try decoder.decodeSingularUInt32Field(value: &self._stdin) }() case 4: try { try decoder.decodeSingularUInt32Field(value: &self._stdout) }() case 5: try { try decoder.decodeSingularUInt32Field(value: &self._stderr) }() - case 6: try { try decoder.decodeSingularBytesField(value: &self.configuration) }() - case 7: try { try decoder.decodeSingularBytesField(value: &self._options) }() + case 6: try { try decoder.decodeSingularStringField(value: &self._ociRuntimePath) }() + case 7: try { try decoder.decodeSingularBytesField(value: &self.configuration) }() + case 8: try { try decoder.decodeSingularBytesField(value: &self._options) }() default: break } } @@ -1895,11 +1907,14 @@ extension Com_Apple_Containerization_Sandbox_V3_CreateProcessRequest: SwiftProto try { if let v = self._stderr { try visitor.visitSingularUInt32Field(value: v, fieldNumber: 5) } }() + try { if let v = self._ociRuntimePath { + try visitor.visitSingularStringField(value: v, fieldNumber: 6) + } }() if !self.configuration.isEmpty { - try visitor.visitSingularBytesField(value: self.configuration, fieldNumber: 6) + try visitor.visitSingularBytesField(value: self.configuration, fieldNumber: 7) } try { if let v = self._options { - try visitor.visitSingularBytesField(value: v, fieldNumber: 7) + try visitor.visitSingularBytesField(value: v, fieldNumber: 8) } }() try unknownFields.traverse(visitor: &visitor) } @@ -1910,6 +1925,7 @@ extension Com_Apple_Containerization_Sandbox_V3_CreateProcessRequest: SwiftProto if lhs._stdin != rhs._stdin {return false} if lhs._stdout != rhs._stdout {return false} if lhs._stderr != rhs._stderr {return false} + if lhs._ociRuntimePath != rhs._ociRuntimePath {return false} if lhs.configuration != rhs.configuration {return false} if lhs._options != rhs._options {return false} if lhs.unknownFields != rhs.unknownFields {return false} diff --git a/Sources/Containerization/SandboxContext/SandboxContext.proto b/Sources/Containerization/SandboxContext/SandboxContext.proto index ea3b90ed..6405bc1a 100644 --- a/Sources/Containerization/SandboxContext/SandboxContext.proto +++ b/Sources/Containerization/SandboxContext/SandboxContext.proto @@ -148,8 +148,9 @@ message CreateProcessRequest { optional uint32 stdin = 3; optional uint32 stdout = 4; optional uint32 stderr = 5; - bytes configuration = 6; - optional bytes options = 7; + optional string ociRuntimePath = 6; + bytes configuration = 7; + optional bytes options = 8; } message CreateProcessResponse {} diff --git a/Sources/Containerization/VirtualMachineAgent.swift b/Sources/Containerization/VirtualMachineAgent.swift index 80c6da48..a15a344d 100644 --- a/Sources/Containerization/VirtualMachineAgent.swift +++ b/Sources/Containerization/VirtualMachineAgent.swift @@ -52,6 +52,7 @@ public protocol VirtualMachineAgent: Sendable { stdinPort: UInt32?, stdoutPort: UInt32?, stderrPort: UInt32?, + ociRuntimePath: String?, configuration: ContainerizationOCI.Spec, options: Data? ) async throws diff --git a/Sources/Containerization/Vminitd.swift b/Sources/Containerization/Vminitd.swift index c142e93f..90e48ea2 100644 --- a/Sources/Containerization/Vminitd.swift +++ b/Sources/Containerization/Vminitd.swift @@ -185,6 +185,7 @@ extension Vminitd: VirtualMachineAgent { stdinPort: UInt32?, stdoutPort: UInt32?, stderrPort: UInt32?, + ociRuntimePath: String?, configuration: ContainerizationOCI.Spec, options: Data? ) async throws { @@ -204,6 +205,9 @@ extension Vminitd: VirtualMachineAgent { if let containerID { $0.containerID = containerID } + if let ociRuntimePath { + $0.ociRuntimePath = ociRuntimePath + } $0.configuration = try enc.encode(configuration) }) } diff --git a/Sources/ContainerizationOCI/Spec.swift b/Sources/ContainerizationOCI/Spec.swift index 3694b1e4..1fa496fb 100644 --- a/Sources/ContainerizationOCI/Spec.swift +++ b/Sources/ContainerizationOCI/Spec.swift @@ -296,8 +296,8 @@ public struct Mount: Codable, Sendable { public var destination: String public var options: [String] - public var uidMappings: [LinuxIDMapping] - public var gidMappings: [LinuxIDMapping] + public var uidMappings: [LinuxIDMapping]? + public var gidMappings: [LinuxIDMapping]? public enum CodingKeys: String, CodingKey { case type @@ -313,8 +313,8 @@ public struct Mount: Codable, Sendable { source: String = "", destination: String, options: [String] = [], - uidMappings: [LinuxIDMapping] = [], - gidMappings: [LinuxIDMapping] = [] + uidMappings: [LinuxIDMapping]? = nil, + gidMappings: [LinuxIDMapping]? = nil ) { self.destination = destination self.type = type @@ -330,8 +330,18 @@ public struct Mount: Codable, Sendable { self.source = try container.decodeIfPresent(String.self, forKey: .source) ?? "" self.destination = try container.decode(String.self, forKey: .destination) self.options = try container.decodeIfPresent([String].self, forKey: .options) ?? [] - self.uidMappings = try container.decodeIfPresent([LinuxIDMapping].self, forKey: .uidMappings) ?? [] - self.gidMappings = try container.decodeIfPresent([LinuxIDMapping].self, forKey: .gidMappings) ?? [] + self.uidMappings = try container.decodeIfPresent([LinuxIDMapping].self, forKey: .uidMappings) + self.gidMappings = try container.decodeIfPresent([LinuxIDMapping].self, forKey: .gidMappings) + } + + public func encode(to encoder: Encoder) throws { + var container = encoder.container(keyedBy: CodingKeys.self) + try container.encode(type, forKey: .type) + try container.encode(source, forKey: .source) + try container.encode(destination, forKey: .destination) + try container.encode(options, forKey: .options) + try container.encodeIfPresent(uidMappings, forKey: .uidMappings) + try container.encodeIfPresent(gidMappings, forKey: .gidMappings) } } diff --git a/Sources/ContainerizationOS/Socket/Socket.swift b/Sources/ContainerizationOS/Socket/Socket.swift index 971a37a3..28a8bb49 100644 --- a/Sources/ContainerizationOS/Socket/Socket.swift +++ b/Sources/ContainerizationOS/Socket/Socket.swift @@ -300,7 +300,8 @@ extension Socket { return Socket( fd: clientFD, type: newSocketType, - closeOnDeinit: closeOnDeinit + closeOnDeinit: closeOnDeinit, + connected: true ) } diff --git a/Sources/cctl/RootfsCommand.swift b/Sources/cctl/RootfsCommand.swift index 9356d276..fb4b7943 100644 --- a/Sources/cctl/RootfsCommand.swift +++ b/Sources/cctl/RootfsCommand.swift @@ -55,6 +55,9 @@ extension Application { @Option(name: .long, help: "Path to vminitd") var vminitd: String + @Option(name: .long, help: "Path to OCI runtime") + var ociRuntime: String? + // The path where the intermediate tar archive is created. @Argument var tarPath: String @@ -144,6 +147,15 @@ extension Application { entry.size = Int64(data.count) try writer.writeEntry(entry: entry, data: data) + if let ociRuntimePath = self.ociRuntime { + src = URL(fileURLWithPath: ociRuntimePath) + let fileName = src.lastPathComponent + data = try Data(contentsOf: src) + entry.path = "sbin/\(fileName)" + entry.size = Int64(data.count) + try writer.writeEntry(entry: entry, data: data) + } + for addFile in addFiles { let paths = addFile.components(separatedBy: ":") guard paths.count == 2 else { diff --git a/Sources/cctl/RunCommand.swift b/Sources/cctl/RunCommand.swift index 66e3d99d..1670e866 100644 --- a/Sources/cctl/RunCommand.swift +++ b/Sources/cctl/RunCommand.swift @@ -58,6 +58,9 @@ extension Application { @Option(name: .customLong("ns"), help: "Nameserver addresses") var nameservers: [String] = [] + @Option(name: .long, help: "Path to OCI runtime to use for spawning the container") + var ociRuntimePath: String? + @Option( name: [.customLong("kernel"), .customShort("k")], help: "Kernel binary path", completion: .file(), transform: { str in @@ -132,6 +135,10 @@ extension Application { )) } config.hosts = hosts + if let ociRuntimePath { + config.ociRuntimePath = ociRuntimePath + config.mounts = LinuxContainer.defaultOCIMounts() + } } defer { diff --git a/Tests/ContainerizationOCITests/OCISpecTests.swift b/Tests/ContainerizationOCITests/OCISpecTests.swift index 80145349..3507abff 100644 --- a/Tests/ContainerizationOCITests/OCISpecTests.swift +++ b/Tests/ContainerizationOCITests/OCISpecTests.swift @@ -140,7 +140,7 @@ struct OCISpecTests { #expect(decodedSpec.source == "") #expect(decodedSpec.destination == destination) #expect(decodedSpec.options.isEmpty) - #expect(decodedSpec.uidMappings.isEmpty) - #expect(decodedSpec.gidMappings.isEmpty) + #expect(decodedSpec.uidMappings == nil) + #expect(decodedSpec.gidMappings == nil) } } diff --git a/vminitd/Sources/vminitd/ContainerProcess.swift b/vminitd/Sources/vminitd/ContainerProcess.swift new file mode 100644 index 00000000..4414a846 --- /dev/null +++ b/vminitd/Sources/vminitd/ContainerProcess.swift @@ -0,0 +1,66 @@ +//===----------------------------------------------------------------------===// +// Copyright © 2025 Apple Inc. and the Containerization project authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//===----------------------------------------------------------------------===// + +import ContainerizationOS +import Foundation + +/// Exit status information for a container process +struct ContainerExitStatus: Sendable { + var exitCode: Int32 + var exitedAt: Date +} + +/// Protocol for managing container processes +/// +/// This protocol abstracts the underlying container runtime implementation, +/// allowing for different backends like vmexec or runc. +protocol ContainerProcess: Sendable { + /// Unique identifier for the container process + var id: String { get } + + /// Process ID of the running container (nil if not started) + var pid: Int32? { get } + + /// Start the container process + /// - Returns: The process ID of the started container + /// - Throws: If the process fails to start + func start() async throws -> Int32 + + /// Wait for the container process to exit + /// - Returns: Exit status information when the process exits + func wait() async -> ContainerExitStatus + + /// Send a signal to the container process + /// - Parameter signal: The signal number to send + /// - Throws: If the signal cannot be sent + func kill(_ signal: Int32) async throws + + /// Resize the terminal for the container process + /// - Parameter size: The new terminal size + /// - Throws: If the terminal cannot be resized or process doesn't have a terminal + func resize(size: Terminal.Size) throws + + /// Close stdin for the container process + /// - Throws: If stdin cannot be closed + func closeStdin() throws + + /// Delete the container process and cleanup resources + /// - Throws: If cleanup fails + func delete() async throws + + /// Set the exit status of the process. + func setExit(_ status: Int32) +} diff --git a/vminitd/Sources/vminitd/ManagedContainer.swift b/vminitd/Sources/vminitd/ManagedContainer.swift index 013e9a93..8d1d91ae 100644 --- a/vminitd/Sources/vminitd/ManagedContainer.swift +++ b/vminitd/Sources/vminitd/ManagedContainer.swift @@ -23,12 +23,13 @@ import Logging actor ManagedContainer { let id: String - let initProcess: ManagedProcess + let initProcess: any ContainerProcess private let cgroupManager: Cgroup2Manager private let log: Logger private let bundle: ContainerizationOCI.Bundle - private var execs: [String: ManagedProcess] = [:] + private let needsCgroupCleanup: Bool + private var execs: [String: any ContainerProcess] = [:] var pid: Int32? { self.initProcess.pid @@ -38,8 +39,9 @@ actor ManagedContainer { id: String, stdio: HostStdio, spec: ContainerizationOCI.Spec, + ociRuntimePath: String? = nil, log: Logger - ) throws { + ) async throws { var cgroupsPath: String if let cgPath = spec.linux?.cgroupsPath { cgroupsPath = cgPath @@ -62,15 +64,38 @@ actor ManagedContainer { do { try cgManager.toggleAllAvailableControllers(enable: true) - let initProcess = try ManagedProcess( - id: id, - stdio: stdio, - bundle: bundle, - cgroupManager: cgManager, - owningPid: nil, - log: log - ) - log.info("created managed init process") + let initProcess: any ContainerProcess + + if let runtimePath = ociRuntimePath { + // Use runc runtime + let runc = await ProcessSupervisor.default.getRuncWithReaper( + Runc( + command: runtimePath, + root: "/run/runc" + ) + ) + initProcess = try RuncProcess( + id: id, + stdio: stdio, + bundle: bundle, + runc: runc, + log: log + ) + self.needsCgroupCleanup = false + log.info("created runc init process with runtime: \(runtimePath)") + } else { + // Use vmexec runtime + initProcess = try ManagedProcess( + id: id, + stdio: stdio, + bundle: bundle, + cgroupManager: cgManager, + owningPid: nil, + log: log + ) + self.needsCgroupCleanup = true + log.info("created vmexec init process") + } self.cgroupManager = cgManager self.initProcess = initProcess @@ -122,14 +147,14 @@ extension ManagedContainer { return try await ProcessSupervisor.default.start(process: proc) } - func wait(execID: String) async throws -> ManagedProcess.ExitStatus { + func wait(execID: String) async throws -> ContainerExitStatus { let proc = try self.getExecOrInit(execID: execID) return await proc.wait() } - func kill(execID: String, _ signal: Int32) throws { + func kill(execID: String, _ signal: Int32) async throws { let proc = try self.getExecOrInit(execID: execID) - try proc.kill(signal) + try await proc.kill(signal) } func resize(execID: String, size: Terminal.Size) throws { @@ -152,16 +177,22 @@ extension ManagedContainer { self.execs.removeValue(forKey: id) } - func delete() throws { + func delete() async throws { + // Delete the init process if it's a RuncProcess + try await self.initProcess.delete() + + // Delete the bundle and cgroup try self.bundle.delete() - try self.cgroupManager.delete(force: true) + if self.needsCgroupCleanup { + try self.cgroupManager.delete(force: true) + } } func stats() throws -> Cgroup2Stats { try self.cgroupManager.stats() } - func getExecOrInit(execID: String) throws -> ManagedProcess { + func getExecOrInit(execID: String) throws -> any ContainerProcess { if execID == self.id { return self.initProcess } diff --git a/vminitd/Sources/vminitd/ManagedProcess.swift b/vminitd/Sources/vminitd/ManagedProcess.swift index 4df3f9a9..5f68a0af 100644 --- a/vminitd/Sources/vminitd/ManagedProcess.swift +++ b/vminitd/Sources/vminitd/ManagedProcess.swift @@ -24,7 +24,7 @@ import GRPC import Logging import Synchronization -final class ManagedProcess: Sendable { +final class ManagedProcess: ContainerProcess, Sendable { // swiftlint: disable type_name protocol IO { func attach(pid: Int32, fd: Int32) throws @@ -36,19 +36,14 @@ final class ManagedProcess: Sendable { } // swiftlint: enable type_name - struct ExitStatus { - var exitStatus: Int32 - var exitedAt: Date - } - private struct State { init(io: IO) { self.io = io } let io: IO - var waiters: [CheckedContinuation] = [] - var exitStatus: ExitStatus? = nil + var waiters: [CheckedContinuation] = [] + var exitStatus: ContainerExitStatus? = nil var pid: Int32? } @@ -154,7 +149,7 @@ final class ManagedProcess: Sendable { } extension ManagedProcess { - func start() throws -> Int32 { + func start() async throws -> Int32 { do { return try self.state.withLock { log.info( @@ -274,7 +269,7 @@ extension ManagedProcess { "status": "\(status)" ]) - let exitStatus = ExitStatus(exitStatus: status, exitedAt: Date.now) + let exitStatus = ContainerExitStatus(exitCode: status, exitedAt: Date.now) state.exitStatus = exitStatus do { @@ -293,7 +288,7 @@ extension ManagedProcess { } /// Wait on the process to exit - func wait() async -> ExitStatus { + func wait() async -> ContainerExitStatus { await withCheckedContinuation { cont in self.state.withLock { if let status = $0.exitStatus { @@ -305,7 +300,7 @@ extension ManagedProcess { } } - func kill(_ signal: Int32) throws { + func kill(_ signal: Int32) async throws { try self.state.withLock { guard let pid = $0.pid else { throw ContainerizationError(.invalidState, message: "process PID is required") @@ -336,4 +331,9 @@ extension ManagedProcess { try $0.io.closeStdin() } } + + func delete() async throws { + // vmexec doesn't require explicit cleanup - the process is cleaned up + // when it exits and IO is closed via setExit() + } } diff --git a/vminitd/Sources/vminitd/ProcessSupervisor.swift b/vminitd/Sources/vminitd/ProcessSupervisor.swift index a0a70929..0b43d4ac 100644 --- a/vminitd/Sources/vminitd/ProcessSupervisor.swift +++ b/vminitd/Sources/vminitd/ProcessSupervisor.swift @@ -24,7 +24,7 @@ actor ProcessSupervisor { private let queue: DispatchQueue // `DispatchSourceSignal` is thread-safe. private nonisolated(unsafe) let source: DispatchSourceSignal - private var processes = [ManagedProcess]() + private var processes = [any ContainerProcess]() private let reaperCommandRunner = ReaperCommandRunner() var log: Logger? @@ -65,9 +65,6 @@ actor ProcessSupervisor { let exited = Reaper.reap() self.log?.debug("finished wait4 of \(exited.count) processes") - // Notify runc waiters - // NOTE: Runc/OCI runtimes are not hooked up at the moment so this is - // a nop, but ManagedProcess will be transitioned to this model. for (pid, status) in exited { reaperCommandRunner.notifyExit(pid: pid, status: status) } @@ -98,7 +95,7 @@ actor ProcessSupervisor { } } - func start(process: ManagedProcess) throws -> Int32 { + func start(process: any ContainerProcess) async throws -> Int32 { self.log?.debug("in supervisor lock to start process") defer { self.log?.debug("out of supervisor lock to start process") @@ -106,7 +103,7 @@ actor ProcessSupervisor { do { self.processes.append(process) - return try process.start() + return try await process.start() } catch { self.log?.error("process start failed \(error)", metadata: ["process-id": "\(process.id)"]) throw error diff --git a/vminitd/Sources/vminitd/Runc/ConsoleSocket.swift b/vminitd/Sources/vminitd/Runc/ConsoleSocket.swift index c30520ab..825baaed 100644 --- a/vminitd/Sources/vminitd/Runc/ConsoleSocket.swift +++ b/vminitd/Sources/vminitd/Runc/ConsoleSocket.swift @@ -23,7 +23,6 @@ import Foundation public final class ConsoleSocket: Sendable { private let socket: Socket private let socketPath: String - private let shouldRemove: Bool /// The path to the console socket public var path: String { socketPath } @@ -43,7 +42,6 @@ public final class ConsoleSocket: Sendable { let socketType = try UnixType(path: absPath, unlinkExisting: true) self.socket = try Socket(type: socketType) - self.shouldRemove = false try socket.listen() } @@ -68,23 +66,13 @@ public final class ConsoleSocket: Sendable { public func receiveMaster() throws -> Int32 { let connection = try socket.accept() defer { try? connection.close() } - return try connection.receiveFileDescriptor() } /// Close the socket and optionally remove the socket file public func close() throws { try socket.close() - - if shouldRemove { - try? FileManager.default.removeItem(atPath: socketPath) - - let pathURL = URL(fileURLWithPath: socketPath) - let dir = pathURL.deletingLastPathComponent().path - if dir.contains("runc-console-") { - try? FileManager.default.removeItem(atPath: dir) - } - } + try FileManager.default.removeItem(atPath: socketPath) } deinit { diff --git a/vminitd/Sources/vminitd/Runc/Runc.swift b/vminitd/Sources/vminitd/Runc/Runc.swift index 2408e439..ec7e3fbd 100644 --- a/vminitd/Sources/vminitd/Runc/Runc.swift +++ b/vminitd/Sources/vminitd/Runc/Runc.swift @@ -26,6 +26,25 @@ enum LogFormat: String, Sendable { /// Configuration and client for interacting with the runc binary struct Runc: Sendable { + /// IO configuration for runc operations + struct IO: Sendable { + var stdin: FileHandle? + var stdout: FileHandle? + var stderr: FileHandle? + + init( + stdin: FileHandle? = nil, + stdout: FileHandle? = nil, + stderr: FileHandle? = nil + ) { + self.stdin = stdin + self.stdout = stdout + self.stderr = stderr + } + + static let inherit = IO() + } + /// Path to the runc binary var command: String @@ -111,13 +130,17 @@ struct CreateOpts: Sendable { /// Additional file descriptors to pass to the container var extraFiles: [FileHandle] + /// IO configuration for the runc process + var io: Runc.IO + init( pidFile: String? = nil, consoleSocket: String? = nil, detach: Bool = false, noPivot: Bool = false, noNewKeyring: Bool = false, - extraFiles: [FileHandle] = [] + extraFiles: [FileHandle] = [], + io: Runc.IO = .inherit ) { self.pidFile = pidFile self.consoleSocket = consoleSocket @@ -125,6 +148,7 @@ struct CreateOpts: Sendable { self.noPivot = noPivot self.noNewKeyring = noNewKeyring self.extraFiles = extraFiles + self.io = io } } @@ -142,16 +166,21 @@ struct ExecOpts: Sendable { /// Path to process.json file var processPath: String? + /// IO configuration for the runc process + var io: Runc.IO + init( pidFile: String? = nil, consoleSocket: String? = nil, detach: Bool = false, - processPath: String? = nil + processPath: String? = nil, + io: Runc.IO = .inherit ) { self.pidFile = pidFile self.consoleSocket = consoleSocket self.detach = detach self.processPath = processPath + self.io = io } } @@ -424,6 +453,9 @@ extension Runc { try await executeVoid( args: args, + stdin: opts.io.stdin, + stdout: opts.io.stdout, + stderr: opts.io.stderr, extraFiles: opts.extraFiles, directory: bundle ) @@ -474,6 +506,9 @@ extension Runc { try await executeVoid( args: args, + stdin: opts.io.stdin, + stdout: opts.io.stdout, + stderr: opts.io.stderr, extraFiles: opts.extraFiles, directory: bundle ) @@ -528,10 +563,7 @@ extension Runc { func exec( id: String, processSpec: String, - opts: ExecOpts = ExecOpts(), - stdin: FileHandle? = nil, - stdout: FileHandle? = nil, - stderr: FileHandle? = nil + opts: ExecOpts = ExecOpts() ) async throws -> Int? { var args = baseArgs() + ["exec"] @@ -555,9 +587,9 @@ extension Runc { try await executeVoid( args: args, - stdin: stdin, - stdout: stdout, - stderr: stderr + stdin: opts.io.stdin, + stdout: opts.io.stdout, + stderr: opts.io.stderr ) // Read PID if pidFile was specified diff --git a/vminitd/Sources/vminitd/RuncProcess.swift b/vminitd/Sources/vminitd/RuncProcess.swift new file mode 100644 index 00000000..3ddff50a --- /dev/null +++ b/vminitd/Sources/vminitd/RuncProcess.swift @@ -0,0 +1,564 @@ +//===----------------------------------------------------------------------===// +// Copyright © 2025 Apple Inc. and the Containerization project authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//===----------------------------------------------------------------------===// + +#if os(Linux) + +import Containerization +import ContainerizationError +import ContainerizationOCI +import ContainerizationOS +import Foundation +import Logging +import Synchronization + +/// A container process implementation that uses runc as the OCI runtime +final class RuncProcess: ContainerProcess, Sendable { + // swiftlint: disable type_name + protocol IO: Sendable { + func attachConsole(fd: Int32) throws + func create() throws + func getIO() -> Runc.IO + func closeAfterExec() throws + func resize(size: Terminal.Size) throws + func close() throws + func closeStdin() throws + } + // swiftlint: enable type_name + + private enum ProcessState { + case initial + case creating + case running(pid: Int32) + case exited(ContainerExitStatus) + } + + private struct State { + var state: ProcessState = .initial + var waiters: [CheckedContinuation] = [] + } + + let id: String + + private let log: Logger + private let runc: Runc + private let io: IO + private let state: Mutex + private let terminal: Bool + private let bundle: ContainerizationOCI.Bundle + private let consoleSocket: ConsoleSocket? + + var pid: Int32? { + self.state.withLock { + switch $0.state { + case .running(let pid): + return pid + default: + return nil + } + } + } + + init( + id: String, + stdio: HostStdio, + bundle: ContainerizationOCI.Bundle, + runc: Runc, + log: Logger + ) throws { + self.id = id + var log = log + log[metadataKey: "id"] = "\(id)" + self.log = log + self.runc = runc + self.bundle = bundle + self.terminal = stdio.terminal + + var io: IO + var consoleSocket: ConsoleSocket? = nil + + if stdio.terminal { + log.info("setting up terminal I/O for runc") + let socket = try ConsoleSocket.temporary() + consoleSocket = socket + io = try RuncTerminalIO( + stdio: stdio, + log: log + ) + } else { + io = RuncStandardIO( + stdio: stdio, + log: log + ) + } + + log.info("starting I/O for runc") + try io.create() + + self.consoleSocket = consoleSocket + self.io = io + self.state = Mutex(State()) + } + + func start() async throws -> Int32 { + try self.state.withLock { + guard case .initial = $0.state else { + throw ContainerizationError( + .invalidState, + message: "container already started" + ) + } + $0.state = .creating + } + + log.info( + "starting runc process", + metadata: [ + "id": "\(id)" + ]) + + let pidFilePath = self.bundle.path.appendingPathComponent("runc-pid").path + let runcIO = self.io.getIO() + + let opts: CreateOpts + if let consoleSocket { + opts = CreateOpts( + pidFile: pidFilePath, + consoleSocket: consoleSocket.path, + io: runcIO + ) + } else { + opts = CreateOpts( + pidFile: pidFilePath, + io: runcIO + ) + } + + guard + let pidInt = try await self.runc.create( + id: self.id, + bundle: self.bundle.path.path, + opts: opts + ) + else { + throw ContainerizationError( + .internalError, + message: "runc create did not return a PID" + ) + } + + let pid = Int32(pidInt) + + self.log.info( + "container created", + metadata: [ + "pid": "\(pid)" + ]) + + // Close the pipe ends we gave to runc now that it has inherited them + // and attach console if in terminal mode + if self.terminal, let consoleSocket = self.consoleSocket { + self.log.info("waiting for console FD from runc") + let ptyFd = try consoleSocket.receiveMaster() + + self.log.info( + "received PTY FD from runc, attaching", + metadata: [ + "id": "\(self.id)" + ]) + + try self.io.closeAfterExec() + try self.io.attachConsole(fd: ptyFd) + } else { + try self.io.closeAfterExec() + } + + try await self.runc.start(id: self.id) + + self.state.withLock { + $0.state = .running(pid: pid) + } + + self.log.info( + "started runc process", + metadata: [ + "pid": "\(pid)", + "id": "\(self.id)", + ]) + + return pid + } + + func setExit(_ status: Int32) { + self.state.withLock { + self.log.info( + "runc process exit", + metadata: [ + "status": "\(status)" + ]) + + let exitStatus = ContainerExitStatus(exitCode: status, exitedAt: Date.now) + $0.state = .exited(exitStatus) + + do { + try self.io.close() + } catch { + self.log.error("failed to close I/O for process: \(error)") + } + + for waiter in $0.waiters { + waiter.resume(returning: exitStatus) + } + + self.log.debug("\($0.waiters.count) runc process waiters signaled") + $0.waiters.removeAll() + } + } + + func wait() async -> ContainerExitStatus { + await withCheckedContinuation { cont in + self.state.withLock { + if case .exited(let exitStatus) = $0.state { + cont.resume(returning: exitStatus) + return + } + $0.waiters.append(cont) + } + } + } + + func kill(_ signal: Int32) async throws { + self.log.info("sending signal \(signal) to runc container \(id)") + try await self.runc.kill(id: self.id, signal: signal) + } + + func resize(size: Terminal.Size) throws { + try self.state.withLock { + if case .exited = $0.state { + return + } + try self.io.resize(size: size) + } + } + + func closeStdin() throws { + try self.io.closeStdin() + } + + func delete() async throws { + let shouldDelete = self.state.withLock { state -> Bool in + switch state.state { + case .initial, .creating: + return false + default: + return true + } + } + + guard shouldDelete else { + log.info("container was never created, skipping delete") + return + } + + log.info("deleting runc container", metadata: ["id": "\(id)"]) + + try await self.runc.delete( + id: self.id, + opts: DeleteOpts(force: true) + ) + + if let consoleSocket = self.consoleSocket { + try consoleSocket.close() + } + } +} + +// MARK: - RuncTerminalIO + +final class RuncTerminalIO: RuncProcess.IO & Sendable { + private struct State { + var stdinSocket: Socket? + var stdoutSocket: Socket? + + var stdin: IOPair? + var stdout: IOPair? + var terminal: Terminal? + } + + private let log: Logger? + private let hostStdio: HostStdio + private let state: Mutex + + init( + stdio: HostStdio, + log: Logger? + ) throws { + self.hostStdio = stdio + self.log = log + self.state = Mutex(State()) + } + + func resize(size: Terminal.Size) throws { + try self.state.withLock { + if let terminal = $0.terminal { + try terminal.resize(size: size) + } + } + } + + func create() throws { + try self.state.withLock { + if let stdinPort = self.hostStdio.stdin { + let type = VsockType( + port: stdinPort, + cid: VsockType.hostCID + ) + let stdinSocket = try Socket(type: type, closeOnDeinit: false) + try stdinSocket.connect() + $0.stdinSocket = stdinSocket + } + + if let stdoutPort = self.hostStdio.stdout { + let type = VsockType( + port: stdoutPort, + cid: VsockType.hostCID + ) + let stdoutSocket = try Socket(type: type, closeOnDeinit: false) + try stdoutSocket.connect() + $0.stdoutSocket = stdoutSocket + } + } + } + + func getIO() -> Runc.IO { + // Terminal mode doesn't pass pipes to runc, it uses the console socket + .inherit + } + + func closeAfterExec() throws { + // No pipes to close in terminal mode + } + + func attachConsole(fd: Int32) throws { + try self.state.withLock { + let term = try Terminal(descriptor: fd, setInitState: false) + $0.terminal = term + + if let stdinSocket = $0.stdinSocket { + let pair = IOPair( + readFrom: stdinSocket, + writeTo: term, + reason: "RuncTerminalIO stdin", + logger: log + ) + try pair.relay(ignoreHup: true) + $0.stdin = pair + } + + if let stdoutSocket = $0.stdoutSocket { + let pair = IOPair( + readFrom: term, + writeTo: stdoutSocket, + reason: "RuncTerminalIO stdout", + logger: log + ) + try pair.relay(ignoreHup: true) + $0.stdout = pair + } + } + } + + func close() throws { + self.state.withLock { + if let stdin = $0.stdin { + stdin.close() + $0.stdin = nil + } + if let stdout = $0.stdout { + stdout.close() + $0.stdout = nil + } + $0.terminal = nil + } + } + + func closeStdin() throws { + self.state.withLock { + if let stdin = $0.stdin { + stdin.close() + $0.stdin = nil + } + } + } +} + +// MARK: - RuncStandardIO + +final class RuncStandardIO: RuncProcess.IO & Sendable { + private struct State { + var stdin: IOPair? + var stdout: IOPair? + var stderr: IOPair? + + var stdinPipe: Pipe? + var stdoutPipe: Pipe? + var stderrPipe: Pipe? + } + + private let log: Logger? + private let hostStdio: HostStdio + private let state: Mutex + + init( + stdio: HostStdio, + log: Logger? + ) { + self.hostStdio = stdio + self.log = log + self.state = Mutex(State()) + } + + // NOP for non-terminal + func attachConsole(fd: Int32) throws {} + + func create() throws { + try self.state.withLock { + if let stdinPort = self.hostStdio.stdin { + let inPipe = Pipe() + $0.stdinPipe = inPipe + + let type = VsockType( + port: stdinPort, + cid: VsockType.hostCID + ) + let stdinSocket = try Socket(type: type, closeOnDeinit: false) + try stdinSocket.connect() + + let pair = IOPair( + readFrom: stdinSocket, + writeTo: inPipe.fileHandleForWriting, + reason: "RuncStandardIO stdin", + logger: log + ) + $0.stdin = pair + try pair.relay() + } + + if let stdoutPort = self.hostStdio.stdout { + let outPipe = Pipe() + $0.stdoutPipe = outPipe + + let type = VsockType( + port: stdoutPort, + cid: VsockType.hostCID + ) + let stdoutSocket = try Socket(type: type, closeOnDeinit: false) + try stdoutSocket.connect() + + let pair = IOPair( + readFrom: outPipe.fileHandleForReading, + writeTo: stdoutSocket, + reason: "RuncStandardIO stdout", + logger: log + ) + $0.stdout = pair + try pair.relay() + } + + if let stderrPort = self.hostStdio.stderr { + let errPipe = Pipe() + $0.stderrPipe = errPipe + + let type = VsockType( + port: stderrPort, + cid: VsockType.hostCID + ) + let stderrSocket = try Socket(type: type, closeOnDeinit: false) + try stderrSocket.connect() + + let pair = IOPair( + readFrom: errPipe.fileHandleForReading, + writeTo: stderrSocket, + reason: "RuncStandardIO stderr", + logger: log + ) + $0.stderr = pair + try pair.relay() + } + } + } + + func getIO() -> Runc.IO { + self.state.withLock { + Runc.IO( + stdin: $0.stdinPipe?.fileHandleForReading, + stdout: $0.stdoutPipe?.fileHandleForWriting, + stderr: $0.stderrPipe?.fileHandleForWriting + ) + } + } + + func closeAfterExec() throws { + try self.state.withLock { + // Close the pipe ends we gave to runc (the child inherited them) + if let stdinPipe = $0.stdinPipe { + try stdinPipe.fileHandleForReading.close() + $0.stdinPipe = nil + } + if let stdoutPipe = $0.stdoutPipe { + try stdoutPipe.fileHandleForWriting.close() + $0.stdoutPipe = nil + } + if let stderrPipe = $0.stderrPipe { + try stderrPipe.fileHandleForWriting.close() + $0.stderrPipe = nil + } + } + } + + func resize(size: Terminal.Size) throws { + throw ContainerizationError(.unsupported, message: "resize not supported for standard IO") + } + + func close() throws { + self.state.withLock { + if let stdin = $0.stdin { + stdin.close() + $0.stdin = nil + } + + if let stdout = $0.stdout { + stdout.close() + $0.stdout = nil + } + + if let stderr = $0.stderr { + stderr.close() + $0.stderr = nil + } + } + } + + func closeStdin() throws { + self.state.withLock { + if let stdin = $0.stdin { + stdin.close() + $0.stdin = nil + } + } + } +} + +#endif // os(Linux) diff --git a/vminitd/Sources/vminitd/Server+GRPC.swift b/vminitd/Sources/vminitd/Server+GRPC.swift index 868a96b2..8be68389 100644 --- a/vminitd/Sources/vminitd/Server+GRPC.swift +++ b/vminitd/Sources/vminitd/Server+GRPC.swift @@ -488,10 +488,11 @@ extension Initd: Com_Apple_Containerization_Sandbox_V3_SandboxContextAsyncProvid try hostname.write(toFile: hostnamePath.path, atomically: true, encoding: .utf8) } - let ctr = try ManagedContainer( + let ctr = try await ManagedContainer( id: request.id, stdio: stdioPorts, spec: ociSpec, + ociRuntimePath: request.hasOciRuntimePath ? request.ociRuntimePath : nil, log: self.log ) try await self.state.add(container: ctr) @@ -685,7 +686,7 @@ extension Initd: Com_Apple_Containerization_Sandbox_V3_SandboxContextAsyncProvid let exitStatus = try await ctr.wait(execID: request.id) return .with { - $0.exitCode = exitStatus.exitStatus + $0.exitCode = exitStatus.exitCode $0.exitedAt = Google_Protobuf_Timestamp(date: exitStatus.exitedAt) } } catch {