diff --git a/src/agent/rustjail/src/process.rs b/src/agent/rustjail/src/process.rs index 1ac4694b7..665bd8d07 100644 --- a/src/agent/rustjail/src/process.rs +++ b/src/agent/rustjail/src/process.rs @@ -7,6 +7,7 @@ use libc::pid_t; use std::fs::File; use std::os::unix::io::RawFd; +use std::sync::mpsc::Sender; // use crate::configs::{Capabilities, Rlimit}; // use crate::cgroups::Manager as CgroupManager; @@ -45,6 +46,7 @@ pub struct Process { pub pid: pid_t, pub exit_code: i32, + pub exit_watchers: Vec>, pub oci: OCIProcess, pub logger: Logger, } @@ -95,6 +97,7 @@ impl Process { init, pid: -1, exit_code: 0, + exit_watchers: Vec::new(), oci: ocip.clone(), logger: logger.clone(), }; diff --git a/src/agent/src/rpc.rs b/src/agent/src/rpc.rs index 85f0ebb30..e4a6c697a 100644 --- a/src/agent/src/rpc.rs +++ b/src/agent/src/rpc.rs @@ -4,6 +4,7 @@ // use std::path::Path; +use std::sync::mpsc::{channel, Sender}; use std::sync::{Arc, Mutex}; use ttrpc; @@ -365,6 +366,7 @@ impl agentService { let pid: pid_t; let mut exit_pipe_r: RawFd = -1; let mut buf: Vec = vec![0, 1]; + let (exit_send, exit_recv) = channel(); info!( sl!(), @@ -382,6 +384,7 @@ impl agentService { exit_pipe_r = p.exit_pipe_r.unwrap(); } + p.exit_watchers.push(exit_send); pid = p.pid; } @@ -398,9 +401,16 @@ impl agentService { } }; - // need to close all fds - let mut p = ctr.processes.get_mut(&pid).unwrap(); + let mut p = match ctr.processes.get_mut(&pid) { + Some(p) => p, + None => { + // Lost race, pick up exit code from channel + resp.status = exit_recv.recv().unwrap(); + return Ok(resp); + } + }; + // need to close all fds if p.parent_stdin.is_some() { let _ = unistd::close(p.parent_stdin.unwrap()); } @@ -427,6 +437,11 @@ impl agentService { p.term_master = None; resp.status = p.exit_code; + // broadcast exit code to all parallel watchers + for s in p.exit_watchers.iter() { + // Just ignore errors in case any watcher quits unexpectedly + let _ = s.send(p.exit_code); + } ctr.processes.remove(&pid); diff --git a/src/runtime/containerd-shim-v2/delete.go b/src/runtime/containerd-shim-v2/delete.go index ed7f785e0..70631c4fa 100644 --- a/src/runtime/containerd-shim-v2/delete.go +++ b/src/runtime/containerd-shim-v2/delete.go @@ -9,25 +9,21 @@ import ( "context" "path" + "github.com/containerd/containerd/api/types/task" "github.com/containerd/containerd/mount" "github.com/kata-containers/kata-containers/src/runtime/pkg/katautils" - "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types" ) func deleteContainer(ctx context.Context, s *service, c *container) error { - status, err := s.sandbox.StatusContainer(c.id) - if err != nil && !isNotFound(err) { - return err - } - if !c.cType.IsSandbox() && err == nil { - if status.State.State != types.StateStopped { - _, err = s.sandbox.StopContainer(c.id, false) + if !c.cType.IsSandbox() { + if c.status != task.StatusStopped { + _, err := s.sandbox.StopContainer(c.id, false) if err != nil { return err } } - if _, err = s.sandbox.DeleteContainer(c.id); err != nil { + if _, err := s.sandbox.DeleteContainer(c.id); err != nil { return err } } diff --git a/src/runtime/containerd-shim-v2/service.go b/src/runtime/containerd-shim-v2/service.go index 22bdc364a..795f27e4b 100644 --- a/src/runtime/containerd-shim-v2/service.go +++ b/src/runtime/containerd-shim-v2/service.go @@ -681,20 +681,7 @@ func (s *service) Kill(ctx context.Context, r *taskAPI.KillRequest) (_ *ptypes.E return nil, err } - // According to CRI specs, kubelet will call StopPodSandbox() - // at least once before calling RemovePodSandbox, and this call - // is idempotent, and must not return an error if all relevant - // resources have already been reclaimed. And in that call it will - // send a SIGKILL signal first to try to stop the container, thus - // once the container has terminated, here should ignore this signal - // and return directly. - if signum == syscall.SIGKILL || signum == syscall.SIGTERM { - if c.status == task.StatusStopped { - shimLog.WithField("sandbox", s.sandbox.ID()).WithField("container", c.id).Debug("Container has already been stopped") - return empty, nil - } - } - + processStatus := c.status processID := c.id if r.ExecID != "" { execs, err := c.getExec(r.ExecID) @@ -710,6 +697,23 @@ func (s *service) Kill(ctx context.Context, r *taskAPI.KillRequest) (_ *ptypes.E }).Debug("Id of exec process to be signalled is empty") return empty, errors.New("The exec process does not exist") } + processStatus = execs.status + } + + // According to CRI specs, kubelet will call StopPodSandbox() + // at least once before calling RemovePodSandbox, and this call + // is idempotent, and must not return an error if all relevant + // resources have already been reclaimed. And in that call it will + // send a SIGKILL signal first to try to stop the container, thus + // once the container has terminated, here should ignore this signal + // and return directly. + if (signum == syscall.SIGKILL || signum == syscall.SIGTERM) && processStatus == task.StatusStopped { + shimLog.WithFields(logrus.Fields{ + "sandbox": s.sandbox.ID(), + "container": c.id, + "exec-id": r.ExecID, + }).Debug("process has already stopped") + return empty, nil } return empty, s.sandbox.SignalProcess(c.id, processID, signum, r.All)