1414// limitations under the License.
1515//===----------------------------------------------------------------------===//
1616
17- import ArgumentParser
1817import Containerization
1918import ContainerizationError
2019import ContainerizationOS
@@ -26,46 +25,29 @@ import NIOPosix
2625#if os(Linux)
2726import Musl
2827import LCShim
29- #else
30- import Darwin
3128#endif
3229
3330@main
34- struct Application : AsyncParsableCommand {
35- static let configuration = CommandConfiguration (
36- commandName: " vminitd " ,
37- abstract: " VM init process and container agent " ,
38- version: " 1.0.0 " ,
39- subcommands: [
40- InitCommand . self,
41- FsNotifyCommand . self,
42- ] ,
43- defaultSubcommand: InitCommand . self
44- )
31+ struct Application {
32+ private static let foregroundEnvVar = " FOREGROUND "
33+ private static let vsockPort = 1024
34+ private static let standardErrorLock = NSLock ( )
4535
46- static let foregroundEnvVar = " FOREGROUND "
47- static let vsockPort = 1024
48- static let standardErrorLock = NSLock ( )
49-
50- static func runInForeground( _ log: Logger ) throws {
51- precondition ( getpid ( ) != 1 , " runInForeground must not be called as PID 1 " )
52- log. info ( " running vminitd under pid1 wrapper " )
36+ private static func runInForeground( _ log: Logger ) throws {
37+ log. info ( " running vminitd under pid1 " )
5338
5439 var command = Command ( " /sbin/vminitd " )
5540 command. attrs = . init( setsid: true )
5641 command. stdin = . standardInput
5742 command. stdout = . standardOutput
5843 command. stderr = . standardError
59-
60- var env = ProcessInfo . processInfo. environment
61- env [ foregroundEnvVar] = " 1 "
62- command. environment = env. map { " \( $0. key) = \( $0. value) " }
44+ command. environment = [ " \( foregroundEnvVar) =1 " ]
6345
6446 try command. start ( )
6547 _ = try command. wait ( )
6648 }
6749
68- static func adjustLimits( ) throws {
50+ private static func adjustLimits( ) throws {
6951 var limits = rlimit ( )
7052 guard getrlimit ( RLIMIT_NOFILE, & limits) == 0 else {
7153 throw POSIXError ( . init( rawValue: errno) !)
@@ -78,232 +60,64 @@ struct Application: AsyncParsableCommand {
7860 }
7961
8062 @Sendable
81- static func standardError( label: String ) -> StreamLogHandler {
63+ private static func standardError( label: String ) -> StreamLogHandler {
8264 standardErrorLock. withLock {
8365 StreamLogHandler . standardError ( label: label)
8466 }
8567 }
8668
87- static func exit( _ code: Int32 ) -> Never {
88- #if os(Linux)
89- Musl . exit ( code)
90- #else
91- Darwin . exit ( code)
92- #endif
93- }
94- }
95-
96- extension Application {
97- struct InitCommand : AsyncParsableCommand {
98- static let configuration = CommandConfiguration (
99- commandName: " init " ,
100- abstract: " Run vminitd as init process (default) "
101- )
102-
103- func run( ) async throws {
104- LoggingSystem . bootstrap ( Application . standardError)
105- var log = Logger ( label: " vminitd " )
106-
107- try Application . adjustLimits ( )
108-
109- // when running under debug mode, launch vminitd as a sub process of pid1
110- // so that we get a chance to collect better logs and errors before pid1 exists
111- // and the kernel panics.
112- #if DEBUG
113- let environment = ProcessInfo . processInfo. environment
114- let foreground = environment [ Application . foregroundEnvVar]
115- let isPid1 = ( getpid ( ) == 1 )
116- log. info ( " checking for shim var \( Application . foregroundEnvVar) = \( String ( describing: foreground) ) ; pid= \( getpid ( ) ) " )
69+ static func main( ) async throws {
70+ LoggingSystem . bootstrap ( standardError)
71+ var log = Logger ( label: " vminitd " )
11772
118- // only use the FOREGROUND shim when we're not PID 1
119- // if we are PID 1 (fresh VM boot), skip the shim to avoid exiting init
120- if foreground == nil && !isPid1 {
121- try Application . runInForeground ( log)
122- Application . exit ( 0 ) // parent is not PID 1; safe to exit after child completes
123- }
73+ try adjustLimits ( )
12474
125- // we only need to be a subreaper when we're not PID 1
126- // (when PID 1, the kernel already reaps children)
127- if !isPid1 {
128- CZ_set_sub_reaper ( )
129- }
130- #endif
75+ // when running under debug mode, launch vminitd as a sub process of pid1
76+ // so that we get a chance to collect better logs and errors before pid1 exists
77+ // and the kernel panics.
78+ #if DEBUG
79+ let environment = ProcessInfo . processInfo. environment
80+ let foreground = environment [ Self . foregroundEnvVar]
81+ log. info ( " checking for shim var \( foregroundEnvVar) = \( String ( describing: foreground) ) " )
13182
132- signal ( SIGPIPE, SIG_IGN)
133-
134- // Because the sysctl rpc wouldn't make sense if this didn't always exist, we
135- // ALWAYS mount /proc.
136- guard Musl . mount ( " proc " , " /proc " , " proc " , 0 , " " ) == 0 else {
137- log. error ( " failed to mount /proc " )
138- Application . exit ( 1 )
139- }
140- guard Musl . mount ( " tmpfs " , " /run " , " tmpfs " , 0 , " " ) == 0 else {
141- log. error ( " failed to mount /run " )
142- Application . exit ( 1 )
143- }
144- try Binfmt . mount ( )
145-
146- log. logLevel = . debug
147-
148- log. info ( " vminitd booting " )
149- let eg = MultiThreadedEventLoopGroup ( numberOfThreads: System . coreCount)
150- let server = Initd ( log: log, group: eg)
151-
152- do {
153- log. info ( " serving vminitd API " )
154- try await server. serve ( port: Application . vsockPort)
155- log. info ( " vminitd API returned " )
156- } catch {
157- log. error ( " vminitd boot error \( error) " )
158- Application . exit ( 1 )
159- }
83+ if foreground == nil {
84+ try runInForeground ( log)
85+ exit ( 0 )
16086 }
161- }
162- }
163-
164- extension Application {
165- struct FsNotifyCommand : ParsableCommand {
166- static let configuration = CommandConfiguration (
167- commandName: " fs-notify " ,
168- abstract: " Internal command to run filesystem notification worker in container namespace " ,
169- shouldDisplay: false
170- )
171-
172- @Argument ( help: " Container PID whose namespace to enter " )
173- var containerPID : Int32
174-
175- private static let handshakeReady : UInt8 = 0xAA
176- private static let handshakeFailure : UInt8 = 0xFF
17787
178- func run( ) throws {
179- // FD 3 = socket (extraFiles[0]), FD 4 = error pipe (extraFiles[1])
180- let socketFD : Int32 = 3
181- let errorPipeFD : Int32 = 4
182-
183- do {
184- try enterContainerNamespace ( containerPID: containerPID)
185- close ( errorPipeFD)
186- } catch {
187- let errorMsg = " Failed to enter namespace: \( error) "
188- _ = errorMsg. utf8CString. withUnsafeBufferPointer { buffer in
189- // -1 to skip null terminator
190- write ( errorPipeFD, buffer. baseAddress, buffer. count - 1 )
191- }
192- close ( errorPipeFD)
193-
194- var failureHandshake = Self . handshakeFailure
195- _ = write ( socketFD, & failureHandshake, 1 )
196- close ( socketFD)
197- Application . exit ( 1 )
198- }
199-
200- var readyHandshake = Self . handshakeReady
201- guard write ( socketFD, & readyHandshake, 1 ) == 1 else {
202- close ( socketFD)
203- Application . exit ( 1 )
204- }
205-
206- while true {
207- do {
208- guard let ( path, eventType) = try readEventFromParent ( socket: socketFD) else {
209- break
210- }
211-
212- do {
213- try generateSyntheticInotifyEvent ( path: path, eventType: eventType)
214- } catch {
215- // Log detailed error to stderr (captured by parent)
216- let errorMsg = " Failed to generate inotify event: path= \( path) , type= \( eventType) , error= \( error) "
217- fputs ( errorMsg + " \n " , stderr)
218- fflush ( stderr)
219- }
88+ // since we are not running as pid1 in this mode we must set ourselves
89+ // as a subpreaper so that all child processes are reaped by us and not
90+ // passed onto our parent.
91+ CZ_set_sub_reaper ( )
92+ #endif
22093
221- } catch {
222- // Log and exit
223- fputs ( " Protocol error reading from parent: \( error) \n " , stderr)
224- fflush ( stderr)
225- break
226- }
227- }
94+ signal ( SIGPIPE, SIG_IGN)
22895
229- close ( socketFD)
96+ // Because the sysctl rpc wouldn't make sense if this didn't always exist, we
97+ // ALWAYS mount /proc.
98+ guard Musl . mount ( " proc " , " /proc " , " proc " , 0 , " " ) == 0 else {
99+ log. error ( " failed to mount /proc " )
100+ exit ( 1 )
230101 }
231-
232- private func enterContainerNamespace( containerPID: Int32 ) throws {
233- let nsPath = " /proc/ \( containerPID) /ns/mnt "
234- let vmNsPath = " /proc/self/ns/mnt "
235-
236- let containerNsStatPtr = UnsafeMutablePointer< stat> . allocate( capacity: 1 )
237- let vmNsStatPtr = UnsafeMutablePointer< stat> . allocate( capacity: 1 )
238- defer {
239- containerNsStatPtr. deallocate ( )
240- vmNsStatPtr. deallocate ( )
241- }
242-
243- let containerStatResult = stat ( nsPath, containerNsStatPtr)
244- let vmStatResult = stat ( vmNsPath, vmNsStatPtr)
245-
246- if containerStatResult == 0 && vmStatResult == 0 {
247- let containerInode = containerNsStatPtr. pointee. st_ino
248- let vmInode = vmNsStatPtr. pointee. st_ino
249-
250- if containerInode == vmInode {
251- return
252- }
253- }
254-
255- let fd = open ( nsPath, O_RDONLY)
256- guard fd >= 0 else {
257- throw ContainerizationError ( . internalError, message: " Failed to open namespace file: \( nsPath) , errno \( errno) " )
258- }
259- defer {
260- _ = close ( fd)
261- }
262- let _ = unshare ( CLONE_FS)
263- let setnsResult = setns ( fd, CLONE_NEWNS)
264- guard setnsResult == 0 else {
265- throw ContainerizationError ( . internalError, message: " Failed to setns to mount namespace: errno \( errno) " )
266- }
102+ guard Musl . mount ( " tmpfs " , " /run " , " tmpfs " , 0 , " " ) == 0 else {
103+ log. error ( " failed to mount /run " )
104+ exit ( 1 )
267105 }
106+ try Binfmt . mount ( )
268107
269- private func readEventFromParent( socket: Int32 ) throws -> ( String , FileSystemEventType ) ? {
270- var eventTypeValue : UInt32 = 0
271- guard read ( socket, & eventTypeValue, 4 ) == 4 else { return nil }
272- eventTypeValue = UInt32 ( bigEndian: eventTypeValue)
273-
274- var pathLen : UInt32 = 0
275- guard read ( socket, & pathLen, 4 ) == 4 else { return nil }
276- pathLen = UInt32 ( bigEndian: pathLen)
277-
278- let pathData = UnsafeMutablePointer< UInt8> . allocate( capacity: Int ( pathLen) )
279- defer { pathData. deallocate ( ) }
280- guard read ( socket, pathData, Int ( pathLen) ) == pathLen else { return nil }
281- let pathBytes = Data ( bytes: pathData, count: Int ( pathLen) )
282- guard let path = String ( data: pathBytes, encoding: . utf8) else { return nil }
283-
284- guard let eventType = FileSystemEventType ( rawValue: Int ( eventTypeValue) ) else {
285- return nil
286- }
287-
288- return ( path, eventType)
289- }
108+ log. logLevel = . debug
290109
291- private func generateSyntheticInotifyEvent(
292- path: String ,
293- eventType: FileSystemEventType
294- ) throws {
295- if eventType == . delete && !FileManager. default. fileExists ( atPath: path) {
296- return
297- }
110+ log. info ( " vminitd booting " )
111+ let eg = MultiThreadedEventLoopGroup ( numberOfThreads: System . coreCount)
112+ let server = Initd ( log: log, group: eg)
298113
299- let attributes = try FileManager . default. attributesOfItem ( atPath: path)
300- guard let permissions = attributes [ . posixPermissions] as? NSNumber else {
301- throw ContainerizationError ( . internalError, message: " Failed to get file permissions for path: \( path) " )
302- }
303- try FileManager . default. setAttributes (
304- [ . posixPermissions: permissions] ,
305- ofItemAtPath: path
306- )
114+ do {
115+ log. info ( " serving vminitd API " )
116+ try await server. serve ( port: vsockPort)
117+ log. info ( " vminitd API returned " )
118+ } catch {
119+ log. error ( " vminitd boot error \( error) " )
120+ exit ( 1 )
307121 }
308122 }
309123}
0 commit comments