From 51ab8700913700f85c4458a53a7d09b72a13b3d7 Mon Sep 17 00:00:00 2001 From: "James O. D. Hunt" Date: Thu, 15 Apr 2021 11:44:18 +0100 Subject: [PATCH] utils: Improve WaitLocalProcess Previously, the hypervisors were sending a signal and then checking to see if the process had died by sending the magic null signal (`0`). However, that doesn't work as it was written: the logic was assuming sending the null signal to a process that was dead would return `ESRCH`, but it doesn't: you first need to you `wait(2)` for the process before sending that signal. This means that previously, all affected hypervisors would appear to take `timeout` seconds to end, even though they had _already_ finished. Now, the hypervisors true end time will be seen as we wait for the processes before sending the null signal to ensure the process has finished. Signed-off-by: James O. D. Hunt --- src/runtime/virtcontainers/utils/utils.go | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/runtime/virtcontainers/utils/utils.go b/src/runtime/virtcontainers/utils/utils.go index 0936c03ad..b34dd954d 100644 --- a/src/runtime/virtcontainers/utils/utils.go +++ b/src/runtime/virtcontainers/utils/utils.go @@ -349,6 +349,18 @@ func WaitLocalProcess(pid int, timeoutSecs uint, initialSignal syscall.Signal, l startTime := time.Now() for { + var _status syscall.WaitStatus + var _rusage syscall.Rusage + var waitedPid int + + // "A watched pot never boils" and an unwaited-for process never appears to die! + waitedPid, err = syscall.Wait4(pid, &_status, syscall.WNOHANG, &_rusage) + + if waitedPid == pid && err == nil { + pidRunning = false + break + } + if err = syscall.Kill(pid, syscall.Signal(0)); err != nil { pidRunning = false break