kata-monitor: improve detection of kata workloads

When the container engine is different than containerd or CRI-O we
lack proper detection of kata workloads and consider all the pods as
kata ones.
Instead of querying the container engine for the lower level runtime
used in each pod, check if a directory matching the pod exists in
the virtualcontainers sandboxes storage path.
This provides a container engine independent way to check for kata pods.

Signed-off-by: Francesco Giudici <fgiudici@redhat.com>
(cherry picked from commit 0e854f3b80)
This commit is contained in:
Francesco Giudici
2021-09-01 15:13:49 +02:00
parent 30d07d4407
commit 3525a2ed03
8 changed files with 15 additions and 516 deletions

View File

@@ -8,15 +8,12 @@ package katamonitor
import (
"context"
"encoding/json"
"fmt"
"net"
"net/url"
"strings"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"github.com/xeipuuv/gojsonpointer"
"google.golang.org/grpc"
pb "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
@@ -149,61 +146,14 @@ func (km *KataMonitor) getSandboxes(sandboxMap map[string]bool) (map[string]bool
continue
}
request := &pb.PodSandboxStatusRequest{
PodSandboxId: pod.Id,
Verbose: true,
}
r, err := runtimeClient.PodSandboxStatus(context.Background(), request)
if err != nil {
return newMap, err
}
lowRuntime := ""
var res map[string]interface{}
if err := json.Unmarshal([]byte(r.Info["info"]), &res); err != nil {
monitorLog.WithError(err).WithField("pod", r).Error("failed to Unmarshal pod info")
continue
} else {
monitorLog.WithField("pod info", res).Debug("")
// get low level container runtime
// containerd stores the pod runtime in "/runtimeType" while CRI-O stores it the
// io.kubernetes.cri-o.RuntimeHandler annotation: check for both.
const (
containerdRuntimeMarker = "/runtimeType"
crioRuntimeMarker = "/runtimeSpec/annotations/io.kubernetes.cri-o.RuntimeHandler"
)
keys := []string{containerdRuntimeMarker, crioRuntimeMarker}
for _, key := range keys {
pointer, _ := gojsonpointer.NewJsonPointer(key)
rt, _, _ := pointer.Get(res)
if rt != nil {
if str, ok := rt.(string); ok {
lowRuntime = str
break
}
}
}
}
// If lowRuntime is empty something changed in containerd/CRI-O or we are dealing with an unknown container engine.
// Safest options is to add the POD in the list: we will be able to connect to the shim to retrieve the actual info
// only for kata PODs.
if lowRuntime == "" {
monitorLog.WithField("pod", r).Warning("unable to retrieve the runtime type")
newMap[pod.Id] = true
continue
}
// Check if a directory associated with the POD ID exist on the kata fs:
// if so we know that the POD is a kata one.
newMap[pod.Id] = checkSandboxFSExists(pod.Id)
monitorLog.WithFields(logrus.Fields{
"low runtime": lowRuntime,
"id": pod.Id,
"is kata": newMap[pod.Id],
"pod": pod,
}).Debug("")
if strings.Contains(lowRuntime, "kata") {
newMap[pod.Id] = true
} else {
newMap[pod.Id] = false
}
}
return newMap, nil

View File

@@ -10,6 +10,8 @@ import (
"io/ioutil"
"net"
"net/http"
"os"
"path/filepath"
"time"
cdshim "github.com/containerd/containerd/runtime/v2/shim"
@@ -40,6 +42,13 @@ func getSandboxFS() string {
return shim.GetSanboxesStoragePath()
}
func checkSandboxFSExists(sandboxID string) bool {
sbsPath := filepath.Join(string(filepath.Separator), getSandboxFS(), sandboxID)
_, err := os.Stat(sbsPath)
return !os.IsNotExist(err)
}
// BuildShimClient builds and returns an http client for communicating with the provided sandbox
func BuildShimClient(sandboxID string, timeout time.Duration) (*http.Client, error) {
return buildUnixSocketClient(shim.SocketAddress(sandboxID), timeout)