@@ -22,21 +22,23 @@ import (
2222)
2323
2424type child struct {
25- def api.Child
26- status atomic.Pointer [api.ChildStatus ]
27- cmds chan childCmd
28- wg sync.WaitGroup
25+ def api.Child
26+ status atomic.Pointer [api.ChildStatus ]
27+ cmds chan childCmd
28+ wg sync.WaitGroup
29+ isolator isolator
2930
3031 restartDelay time.Duration
3132 killDelay time.Duration
3233 healthCheckInitialInterval time.Duration
3334 healthCheckInterval time.Duration
3435}
3536
36- func newChild (def api.Child ) * child {
37+ func newChild (def api.Child , isolator isolator ) * child {
3738 c := & child {
38- def : def ,
39- cmds : make (chan childCmd ), // important that this be un-buffered
39+ def : def ,
40+ cmds : make (chan childCmd ), // important that this be un-buffered
41+ isolator : isolator ,
4042
4143 // tests may override these
4244 restartDelay : time .Second , // TODO: scale
@@ -142,6 +144,9 @@ MANAGER:
142144 }
143145 curProc = nil
144146 s := curStatus ()
147+ // make sure any children that tried to fork off get caught and killed via
148+ // the cgroup, unless they managed to escape into a new cgroup
149+ c .cleanup (s )
145150 s .State = api .ExecEnded
146151 var ee * exec.ExitError
147152 if errors .As (err , & ee ) {
@@ -153,6 +158,9 @@ MANAGER:
153158 s .Pid = 0
154159 switch status .State {
155160 case api .ChildStopping :
161+ // re-check all the isolation groups to make sure all processes are
162+ // killed and cgroups removed
163+ c .cleanupAll (& status )
156164 // stop completed
157165 status .State = api .ChildStopped
158166 // reset the starting process to the beginning
@@ -315,21 +323,20 @@ func (c *child) start(
315323 return nil , api.ExecStatus {State : api .ExecNotStarted , StartErr : err .Error ()}, errorState
316324 }
317325 log .Printf ("started %s as pid %d" , name , cmd .Process .Pid )
318- c .wg .Add (1 )
319- go func () {
320- defer c .wg .Done ()
326+ c .wg .Go (func () {
321327 err := cmd .Wait ()
322328 exited <- err
323- }()
324- if err := isolateProcess (context .TODO (), name , cmd .Process ); err != nil {
329+ })
330+ eStat := api.ExecStatus {
331+ State : api .ExecRunning ,
332+ Pid : cmd .Process .Pid ,
333+ }
334+ if isolationGroup , err := c .isolator .isolateProcess (context .TODO (), name , cmd .Process ); err != nil {
325335 log .Printf ("ERROR: failed to isolate process %d as %q: %v" , cmd .Process .Pid , name , err )
336+ } else {
337+ eStat .Group = isolationGroup
326338 }
327- return cmd .Process ,
328- api.ExecStatus {
329- State : api .ExecRunning ,
330- Pid : cmd .Process .Pid ,
331- },
332- runningState
339+ return cmd .Process , eStat , runningState
333340}
334341
335342func (c * child ) terminate (p * os.Process , s * api.ExecStatus ) {
@@ -345,9 +352,32 @@ func (c *child) kill(p *os.Process, s *api.ExecStatus) {
345352 if err := syscall .Kill (- p .Pid , syscall .SIGKILL ); err != nil {
346353 log .Printf ("failed to kill %d: %v" , p .Pid , err )
347354 }
355+ if s .Group != "" {
356+ if err := c .isolator .cleanup (context .TODO (), s .Group ); err != nil {
357+ log .Printf ("failed to cleanup isolation group %q: %v" , s .Group , err )
358+ }
359+ }
348360 s .State = api .ExecStopping
349361}
350362
363+ func (c * child ) cleanup (s * api.ExecStatus ) {
364+ if s .Group == "" {
365+ return
366+ }
367+ if err := c .isolator .cleanup (context .TODO (), s .Group ); err != nil {
368+ log .Printf ("failed to cleanup isolation group %q: %v" , s .Group , err )
369+ } else {
370+ s .Group = ""
371+ }
372+ }
373+
374+ func (c * child ) cleanupAll (s * api.ChildStatus ) {
375+ for i := range s .Init {
376+ c .cleanup (& s .Init [i ])
377+ }
378+ c .cleanup (& s .Main )
379+ }
380+
351381func cloneStatus (s api.ChildStatus ) * api.ChildStatus {
352382 r := s
353383 r .Init = slices .Clone (s .Init )
0 commit comments