From 6c77d76f24ba0b4a654b3fa1823f4e60029005ee Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Tue, 13 Aug 2019 15:56:23 +0800 Subject: [PATCH] qemu: check guest status with qmp query-status When guest panics or stops with unexpected internal error, qemu process might still be running but we can find out such situation with qmp. Then monitor can still report such failures to watchers. Fixes: #1963 Signed-off-by: Peng Tao --- virtcontainers/acrn.go | 15 ++++++++++++--- virtcontainers/fc.go | 8 ++++++++ virtcontainers/hypervisor.go | 1 + virtcontainers/mock_hypervisor.go | 4 ++++ virtcontainers/mock_hypervisor_test.go | 6 ++++++ virtcontainers/monitor.go | 3 +-- virtcontainers/qemu.go | 25 +++++++++++++++++++++---- 7 files changed, 53 insertions(+), 9 deletions(-) diff --git a/virtcontainers/acrn.go b/virtcontainers/acrn.go index 7fdb831bb..877ff166a 100644 --- a/virtcontainers/acrn.go +++ b/virtcontainers/acrn.go @@ -7,7 +7,6 @@ package virtcontainers import ( "context" - "errors" "fmt" "os" "os/exec" @@ -16,13 +15,15 @@ import ( "syscall" "time" + opentracing "github.com/opentracing/opentracing-go" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" + "github.com/kata-containers/runtime/virtcontainers/device/config" persistapi "github.com/kata-containers/runtime/virtcontainers/persist/api" "github.com/kata-containers/runtime/virtcontainers/store" "github.com/kata-containers/runtime/virtcontainers/types" "github.com/kata-containers/runtime/virtcontainers/utils" - opentracing "github.com/opentracing/opentracing-go" - "github.com/sirupsen/logrus" ) // AcrnState keeps Acrn's state @@ -648,3 +649,11 @@ func (a *acrn) load(s persistapi.HypervisorState) { a.info.PID = s.Pid a.state.UUID = s.UUID } + +func (a *acrn) check() error { + if err := syscall.Kill(a.pid(), syscall.Signal(0)); err != nil { + return errors.Wrapf(err, "failed to ping acrn process") + } + + return nil +} diff --git a/virtcontainers/fc.go b/virtcontainers/fc.go index 2ef5ffc59..f8bc56285 100644 --- a/virtcontainers/fc.go +++ b/virtcontainers/fc.go @@ -1006,3 +1006,11 @@ func (fc *firecracker) save() (s persistapi.HypervisorState) { func (fc *firecracker) load(s persistapi.HypervisorState) { fc.info.PID = s.Pid } + +func (fc *firecracker) check() error { + if err := syscall.Kill(fc.pid(), syscall.Signal(0)); err != nil { + return errors.Wrapf(err, "failed to ping fc process") + } + + return nil +} diff --git a/virtcontainers/hypervisor.go b/virtcontainers/hypervisor.go index f12595328..4a795f7ba 100644 --- a/virtcontainers/hypervisor.go +++ b/virtcontainers/hypervisor.go @@ -671,6 +671,7 @@ type hypervisor interface { pid() int fromGrpc(ctx context.Context, hypervisorConfig *HypervisorConfig, store *store.VCStore, j []byte) error toGrpc() ([]byte, error) + check() error save() persistapi.HypervisorState load(persistapi.HypervisorState) diff --git a/virtcontainers/mock_hypervisor.go b/virtcontainers/mock_hypervisor.go index 1c198f47b..90611aeb7 100644 --- a/virtcontainers/mock_hypervisor.go +++ b/virtcontainers/mock_hypervisor.go @@ -121,3 +121,7 @@ func (m *mockHypervisor) save() (s persistapi.HypervisorState) { } func (m *mockHypervisor) load(s persistapi.HypervisorState) {} + +func (m *mockHypervisor) check() error { + return nil +} diff --git a/virtcontainers/mock_hypervisor_test.go b/virtcontainers/mock_hypervisor_test.go index 77279ef22..efcee1c80 100644 --- a/virtcontainers/mock_hypervisor_test.go +++ b/virtcontainers/mock_hypervisor_test.go @@ -82,3 +82,9 @@ func TestMockHypervisorDisconnect(t *testing.T) { m.disconnect() } + +func TestMockHypervisorCheck(t *testing.T) { + var m *mockHypervisor + + assert.NoError(t, m.check()) +} diff --git a/virtcontainers/monitor.go b/virtcontainers/monitor.go index 25c55f425..ff5b4d034 100644 --- a/virtcontainers/monitor.go +++ b/virtcontainers/monitor.go @@ -7,7 +7,6 @@ package virtcontainers import ( "sync" - "syscall" "time" "github.com/pkg/errors" @@ -127,7 +126,7 @@ func (m *monitor) watchAgent() { } func (m *monitor) watchHypervisor() error { - if err := syscall.Kill(m.sandbox.hypervisor.pid(), syscall.Signal(0)); err != nil { + if err := m.sandbox.hypervisor.check(); err != nil { m.notify(errors.Wrapf(err, "failed to ping hypervisor process")) return err } diff --git a/virtcontainers/qemu.go b/virtcontainers/qemu.go index 1366e39bd..c36bd5bbb 100644 --- a/virtcontainers/qemu.go +++ b/virtcontainers/qemu.go @@ -10,7 +10,6 @@ import ( "context" "encoding/hex" "encoding/json" - "errors" "fmt" "io/ioutil" "math" @@ -24,17 +23,17 @@ import ( "unsafe" govmmQemu "github.com/intel/govmm/qemu" - "github.com/kata-containers/runtime/virtcontainers/pkg/uuid" "github.com/opentracing/opentracing-go" + "github.com/pkg/errors" "github.com/sirupsen/logrus" + "golang.org/x/sys/unix" "github.com/kata-containers/runtime/virtcontainers/device/config" persistapi "github.com/kata-containers/runtime/virtcontainers/persist/api" + "github.com/kata-containers/runtime/virtcontainers/pkg/uuid" "github.com/kata-containers/runtime/virtcontainers/store" "github.com/kata-containers/runtime/virtcontainers/types" "github.com/kata-containers/runtime/virtcontainers/utils" - - "golang.org/x/sys/unix" ) // romFile is the file name of the ROM that can be used for virtio-pci devices. @@ -2015,3 +2014,21 @@ func (q *qemu) load(s persistapi.HypervisorState) { }) } } + +func (q *qemu) check() error { + err := q.qmpSetup() + if err != nil { + return err + } + + status, err := q.qmpMonitorCh.qmp.ExecuteQueryStatus(q.qmpMonitorCh.ctx) + if err != nil { + return err + } + + if status.Status == "internal-error" || status.Status == "guest-panicked" { + return errors.Errorf("guest failure: %s", status.Status) + } + + return nil +}