mirror of
https://github.com/aljazceru/kata-containers.git
synced 2025-12-18 14:54:19 +01:00
For kata containers, rootfs is used in the read-only way. EROFS can noticably decrease metadata overhead. On the basis of supporting the EROFS file system, it supports using the config parameter to switch the file system used by rootfs. Fixes: #6063 Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com> Signed-off-by: yaoyinnan <yaoyinnan@foxmail.com>
1714 lines
50 KiB
Go
1714 lines
50 KiB
Go
//go:build linux
|
|
|
|
// Copyright (c) 2019 Ericsson Eurolab Deutschland GmbH
|
|
//
|
|
// SPDX-License-Identifier: Apache-2.0
|
|
//
|
|
|
|
package virtcontainers
|
|
|
|
import (
|
|
"bufio"
|
|
"bytes"
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"io"
|
|
"net"
|
|
"net/http"
|
|
"net/http/httputil"
|
|
"os"
|
|
"os/exec"
|
|
"path/filepath"
|
|
"regexp"
|
|
"strconv"
|
|
"strings"
|
|
"sync"
|
|
"sync/atomic"
|
|
"syscall"
|
|
"time"
|
|
|
|
"github.com/containerd/console"
|
|
chclient "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/cloud-hypervisor/client"
|
|
"github.com/opencontainers/selinux/go-selinux/label"
|
|
"github.com/pkg/errors"
|
|
log "github.com/sirupsen/logrus"
|
|
|
|
"github.com/kata-containers/kata-containers/src/runtime/pkg/device/config"
|
|
hv "github.com/kata-containers/kata-containers/src/runtime/pkg/hypervisors"
|
|
"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils/katatrace"
|
|
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types"
|
|
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/utils"
|
|
)
|
|
|
|
// clhTracingTags defines tags for the trace span
|
|
var clhTracingTags = map[string]string{
|
|
"source": "runtime",
|
|
"package": "virtcontainers",
|
|
"subsystem": "hypervisor",
|
|
"type": "clh",
|
|
}
|
|
|
|
//
|
|
// Constants and type definitions related to cloud hypervisor
|
|
//
|
|
|
|
type clhState uint8
|
|
|
|
const (
|
|
clhNotReady clhState = iota
|
|
clhReady
|
|
)
|
|
|
|
const (
|
|
clhStateCreated = "Created"
|
|
clhStateRunning = "Running"
|
|
)
|
|
|
|
const (
|
|
// Values are mandatory by http API
|
|
// Values based on:
|
|
clhTimeout = 10
|
|
clhAPITimeout = 1
|
|
clhAPITimeoutConfidentialGuest = 10
|
|
// Timeout for hot-plug - hotplug devices can take more time, than usual API calls
|
|
// Use longer time timeout for it.
|
|
clhHotPlugAPITimeout = 5
|
|
clhStopSandboxTimeout = 3
|
|
clhStopSandboxTimeoutConfidentialGuest = 5
|
|
clhSocket = "clh.sock"
|
|
clhAPISocket = "clh-api.sock"
|
|
virtioFsSocket = "virtiofsd.sock"
|
|
defaultClhPath = "/usr/local/bin/cloud-hypervisor"
|
|
)
|
|
|
|
// Interface that hides the implementation of openAPI client
|
|
// If the client changes its methods, this interface should do it as well,
|
|
// The main purpose is to hide the client in an interface to allow mock testing.
|
|
// This is an interface that has to match with OpenAPI CLH client
|
|
type clhClient interface {
|
|
// Check for the REST API availability
|
|
VmmPingGet(ctx context.Context) (chclient.VmmPingResponse, *http.Response, error)
|
|
// Shut the VMM down
|
|
ShutdownVMM(ctx context.Context) (*http.Response, error)
|
|
// Create the VM
|
|
CreateVM(ctx context.Context, vmConfig chclient.VmConfig) (*http.Response, error)
|
|
// Dump the VM information
|
|
// No lint: golint suggest to rename to VMInfoGet.
|
|
VmInfoGet(ctx context.Context) (chclient.VmInfo, *http.Response, error) //nolint:golint
|
|
// Boot the VM
|
|
BootVM(ctx context.Context) (*http.Response, error)
|
|
// Add/remove CPUs to/from the VM
|
|
VmResizePut(ctx context.Context, vmResize chclient.VmResize) (*http.Response, error)
|
|
// Add VFIO PCI device to the VM
|
|
VmAddDevicePut(ctx context.Context, deviceConfig chclient.DeviceConfig) (chclient.PciDeviceInfo, *http.Response, error)
|
|
// Add a new disk device to the VM
|
|
VmAddDiskPut(ctx context.Context, diskConfig chclient.DiskConfig) (chclient.PciDeviceInfo, *http.Response, error)
|
|
// Remove a device from the VM
|
|
VmRemoveDevicePut(ctx context.Context, vmRemoveDevice chclient.VmRemoveDevice) (*http.Response, error)
|
|
}
|
|
|
|
type clhClientApi struct {
|
|
ApiInternal *chclient.DefaultApiService
|
|
}
|
|
|
|
func (c *clhClientApi) VmmPingGet(ctx context.Context) (chclient.VmmPingResponse, *http.Response, error) {
|
|
return c.ApiInternal.VmmPingGet(ctx).Execute()
|
|
}
|
|
|
|
func (c *clhClientApi) ShutdownVMM(ctx context.Context) (*http.Response, error) {
|
|
return c.ApiInternal.ShutdownVMM(ctx).Execute()
|
|
}
|
|
|
|
func (c *clhClientApi) CreateVM(ctx context.Context, vmConfig chclient.VmConfig) (*http.Response, error) {
|
|
return c.ApiInternal.CreateVM(ctx).VmConfig(vmConfig).Execute()
|
|
}
|
|
|
|
//nolint:golint
|
|
func (c *clhClientApi) VmInfoGet(ctx context.Context) (chclient.VmInfo, *http.Response, error) {
|
|
return c.ApiInternal.VmInfoGet(ctx).Execute()
|
|
}
|
|
|
|
func (c *clhClientApi) BootVM(ctx context.Context) (*http.Response, error) {
|
|
return c.ApiInternal.BootVM(ctx).Execute()
|
|
}
|
|
|
|
func (c *clhClientApi) VmResizePut(ctx context.Context, vmResize chclient.VmResize) (*http.Response, error) {
|
|
return c.ApiInternal.VmResizePut(ctx).VmResize(vmResize).Execute()
|
|
}
|
|
|
|
func (c *clhClientApi) VmAddDevicePut(ctx context.Context, deviceConfig chclient.DeviceConfig) (chclient.PciDeviceInfo, *http.Response, error) {
|
|
return c.ApiInternal.VmAddDevicePut(ctx).DeviceConfig(deviceConfig).Execute()
|
|
}
|
|
|
|
func (c *clhClientApi) VmAddDiskPut(ctx context.Context, diskConfig chclient.DiskConfig) (chclient.PciDeviceInfo, *http.Response, error) {
|
|
return c.ApiInternal.VmAddDiskPut(ctx).DiskConfig(diskConfig).Execute()
|
|
}
|
|
|
|
func (c *clhClientApi) VmRemoveDevicePut(ctx context.Context, vmRemoveDevice chclient.VmRemoveDevice) (*http.Response, error) {
|
|
return c.ApiInternal.VmRemoveDevicePut(ctx).VmRemoveDevice(vmRemoveDevice).Execute()
|
|
}
|
|
|
|
// This is done in order to be able to override such a function as part of
|
|
// our unit tests, as when testing bootVM we're on a mocked scenario already.
|
|
var vmAddNetPutRequest = func(clh *cloudHypervisor) error {
|
|
if clh.netDevices == nil {
|
|
clh.Logger().Info("No network device has been configured by the upper layer")
|
|
return nil
|
|
}
|
|
|
|
addr, err := net.ResolveUnixAddr("unix", clh.state.apiSocket)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
conn, err := net.DialUnix("unix", nil, addr)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer conn.Close()
|
|
|
|
for _, netDevice := range *clh.netDevices {
|
|
clh.Logger().Infof("Adding the net device to the Cloud Hypervisor VM configuration: %+v", netDevice)
|
|
|
|
netDeviceAsJson, err := json.Marshal(netDevice)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
netDeviceAsIoReader := bytes.NewBuffer(netDeviceAsJson)
|
|
|
|
req, err := http.NewRequest(http.MethodPut, "http://localhost/api/v1/vm.add-net", netDeviceAsIoReader)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
req.Header.Set("Accept", "application/json")
|
|
req.Header.Set("Content-Type", "application/json")
|
|
req.Header.Set("Content-Length", strconv.Itoa(int(netDeviceAsIoReader.Len())))
|
|
|
|
payload, err := httputil.DumpRequest(req, true)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
files := clh.netDevicesFiles[*netDevice.Mac]
|
|
var fds []int
|
|
for _, f := range files {
|
|
fds = append(fds, int(f.Fd()))
|
|
}
|
|
oob := syscall.UnixRights(fds...)
|
|
payloadn, oobn, err := conn.WriteMsgUnix([]byte(payload), oob, nil)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if payloadn != len(payload) || oobn != len(oob) {
|
|
return fmt.Errorf("Failed to send all the request to Cloud Hypervisor. %d bytes expect to send as payload, %d bytes expect to send as oob date, but only %d sent as payload, and %d sent as oob", len(payload), len(oob), payloadn, oobn)
|
|
}
|
|
|
|
reader := bufio.NewReader(conn)
|
|
resp, err := http.ReadResponse(reader, req)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
respBody, err := io.ReadAll(resp.Body)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
resp.Body.Close()
|
|
resp.Body = io.NopCloser(bytes.NewBuffer(respBody))
|
|
|
|
if resp.StatusCode != 200 && resp.StatusCode != 204 {
|
|
clh.Logger().Errorf("vmAddNetPut failed with error '%d'. Response: %+v", resp.StatusCode, resp)
|
|
return fmt.Errorf("Failed to add the network device '%+v' to Cloud Hypervisor: %v", netDevice, resp.StatusCode)
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// Cloud hypervisor state
|
|
type CloudHypervisorState struct {
|
|
apiSocket string
|
|
PID int
|
|
VirtiofsDaemonPid int
|
|
state clhState
|
|
}
|
|
|
|
func (s *CloudHypervisorState) reset() {
|
|
s.PID = 0
|
|
s.VirtiofsDaemonPid = 0
|
|
s.state = clhNotReady
|
|
}
|
|
|
|
type cloudHypervisor struct {
|
|
console console.Console
|
|
virtiofsDaemon VirtiofsDaemon
|
|
APIClient clhClient
|
|
ctx context.Context
|
|
id string
|
|
netDevices *[]chclient.NetConfig
|
|
devicesIds map[string]string
|
|
netDevicesFiles map[string][]*os.File
|
|
vmconfig chclient.VmConfig
|
|
state CloudHypervisorState
|
|
config HypervisorConfig
|
|
stopped int32
|
|
mu sync.Mutex
|
|
}
|
|
|
|
var clhKernelParams = []Param{
|
|
{"panic", "1"}, // upon kernel panic wait 1 second before reboot
|
|
{"no_timer_check", ""}, // do not Check broken timer IRQ resources
|
|
{"noreplace-smp", ""}, // do not replace SMP instructions
|
|
}
|
|
|
|
var clhDebugKernelParams = []Param{
|
|
{"console", "ttyS0,115200n8"}, // enable serial console
|
|
}
|
|
|
|
var clhDebugConfidentialGuestKernelParams = []Param{
|
|
{"console", "hvc0"}, // enable HVC console
|
|
}
|
|
|
|
var clhDebugKernelParamsCommon = []Param{
|
|
{"systemd.log_target", "console"}, // send loggng to the console
|
|
}
|
|
|
|
//###########################################################
|
|
//
|
|
// hypervisor interface implementation for cloud-hypervisor
|
|
//
|
|
//###########################################################
|
|
|
|
func (clh *cloudHypervisor) getClhAPITimeout() time.Duration {
|
|
// Increase the APITimeout when dealing with a Confidential Guest.
|
|
// The value has been chosen based on tests using `ctr`, and hopefully
|
|
// this change can be dropped in further steps of the development.
|
|
if clh.config.ConfidentialGuest {
|
|
return clhAPITimeoutConfidentialGuest
|
|
}
|
|
|
|
return clhAPITimeout
|
|
}
|
|
|
|
func (clh *cloudHypervisor) getClhStopSandboxTimeout() time.Duration {
|
|
// Increase the StopSandboxTimeout when dealing with a Confidential Guest.
|
|
// The value has been chosen based on tests using `ctr`, and hopefully
|
|
// this change can be dropped in further steps of the development.
|
|
if clh.config.ConfidentialGuest {
|
|
return clhStopSandboxTimeoutConfidentialGuest
|
|
}
|
|
|
|
return clhStopSandboxTimeout
|
|
}
|
|
|
|
func (clh *cloudHypervisor) setConfig(config *HypervisorConfig) error {
|
|
clh.config = *config
|
|
|
|
return nil
|
|
}
|
|
|
|
func (clh *cloudHypervisor) createVirtiofsDaemon(sharedPath string) (VirtiofsDaemon, error) {
|
|
virtiofsdSocketPath, err := clh.virtioFsSocketPath(clh.id)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if clh.config.SharedFS == config.VirtioFSNydus {
|
|
apiSockPath, err := clh.nydusdAPISocketPath(clh.id)
|
|
if err != nil {
|
|
clh.Logger().WithError(err).Error("Invalid api socket path for nydusd")
|
|
return nil, err
|
|
}
|
|
nd := &nydusd{
|
|
path: clh.config.VirtioFSDaemon,
|
|
sockPath: virtiofsdSocketPath,
|
|
apiSockPath: apiSockPath,
|
|
sourcePath: sharedPath,
|
|
debug: clh.config.Debug,
|
|
extraArgs: clh.config.VirtioFSExtraArgs,
|
|
startFn: startInShimNS,
|
|
}
|
|
nd.setupShareDirFn = nd.setupPassthroughFS
|
|
return nd, nil
|
|
}
|
|
|
|
// default: use virtiofsd
|
|
return &virtiofsd{
|
|
path: clh.config.VirtioFSDaemon,
|
|
sourcePath: sharedPath,
|
|
socketPath: virtiofsdSocketPath,
|
|
extraArgs: clh.config.VirtioFSExtraArgs,
|
|
cache: clh.config.VirtioFSCache,
|
|
}, nil
|
|
}
|
|
|
|
func (clh *cloudHypervisor) setupVirtiofsDaemon(ctx context.Context) error {
|
|
if clh.config.SharedFS == config.Virtio9P {
|
|
return errors.New("cloud-hypervisor only supports virtio based file sharing")
|
|
}
|
|
|
|
// virtioFS or virtioFsNydus
|
|
clh.Logger().WithField("function", "setupVirtiofsDaemon").Info("Starting virtiofsDaemon")
|
|
|
|
if clh.virtiofsDaemon == nil {
|
|
return errors.New("Missing virtiofsDaemon configuration")
|
|
}
|
|
|
|
pid, err := clh.virtiofsDaemon.Start(ctx, func() {
|
|
clh.StopVM(ctx, false)
|
|
})
|
|
if err != nil {
|
|
return err
|
|
}
|
|
clh.state.VirtiofsDaemonPid = pid
|
|
|
|
return nil
|
|
}
|
|
|
|
func (clh *cloudHypervisor) stopVirtiofsDaemon(ctx context.Context) (err error) {
|
|
if clh.state.VirtiofsDaemonPid == 0 {
|
|
clh.Logger().Warn("The virtiofsd had stopped")
|
|
return nil
|
|
}
|
|
|
|
err = clh.virtiofsDaemon.Stop(ctx)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
clh.state.VirtiofsDaemonPid = 0
|
|
|
|
return nil
|
|
}
|
|
|
|
func (clh *cloudHypervisor) loadVirtiofsDaemon(sharedPath string) (VirtiofsDaemon, error) {
|
|
virtiofsdSocketPath, err := clh.virtioFsSocketPath(clh.id)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return &virtiofsd{
|
|
PID: clh.state.VirtiofsDaemonPid,
|
|
sourcePath: sharedPath,
|
|
socketPath: virtiofsdSocketPath,
|
|
}, nil
|
|
}
|
|
|
|
func (clh *cloudHypervisor) nydusdAPISocketPath(id string) (string, error) {
|
|
return utils.BuildSocketPath(clh.config.VMStorePath, id, nydusdAPISock)
|
|
}
|
|
|
|
func (clh *cloudHypervisor) enableProtection() error {
|
|
protection, err := availableGuestProtection()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
switch protection {
|
|
case tdxProtection:
|
|
firmwarePath, err := clh.config.FirmwareAssetPath()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if firmwarePath == "" {
|
|
return errors.New("Firmware path is not specified")
|
|
}
|
|
|
|
clh.vmconfig.Payload.SetFirmware(firmwarePath)
|
|
|
|
if clh.vmconfig.Platform == nil {
|
|
clh.vmconfig.Platform = chclient.NewPlatformConfig()
|
|
}
|
|
clh.vmconfig.Platform.SetTdx(true)
|
|
|
|
return nil
|
|
|
|
case sevProtection:
|
|
return errors.New("SEV protection is not supported by Cloud Hypervisor")
|
|
case snpProtection:
|
|
return errors.New("SEV-SNP protection is not supported by Cloud Hypervisor")
|
|
|
|
default:
|
|
return errors.New("This system doesn't support Confidential Computing (Guest Protection)")
|
|
}
|
|
}
|
|
|
|
// For cloudHypervisor this call only sets the internal structure up.
|
|
// The VM will be created and started through StartVM().
|
|
func (clh *cloudHypervisor) CreateVM(ctx context.Context, id string, network Network, hypervisorConfig *HypervisorConfig) error {
|
|
clh.ctx = ctx
|
|
|
|
span, newCtx := katatrace.Trace(clh.ctx, clh.Logger(), "CreateVM", clhTracingTags, map[string]string{"sandbox_id": clh.id})
|
|
clh.ctx = newCtx
|
|
defer span.End()
|
|
|
|
if err := clh.setConfig(hypervisorConfig); err != nil {
|
|
return err
|
|
}
|
|
|
|
clh.id = id
|
|
clh.state.state = clhNotReady
|
|
clh.devicesIds = make(map[string]string)
|
|
clh.netDevicesFiles = make(map[string][]*os.File)
|
|
|
|
clh.Logger().WithField("function", "CreateVM").Info("creating Sandbox")
|
|
|
|
if clh.state.PID > 0 {
|
|
clh.Logger().WithField("function", "CreateVM").Info("Sandbox already exist, loading from state")
|
|
|
|
virtiofsDaemon, err := clh.loadVirtiofsDaemon(hypervisorConfig.SharedFS)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
clh.virtiofsDaemon = virtiofsDaemon
|
|
|
|
return nil
|
|
}
|
|
|
|
// No need to return an error from there since there might be nothing
|
|
// to fetch if this is the first time the hypervisor is created.
|
|
clh.Logger().WithField("function", "CreateVM").Info("Sandbox not found creating")
|
|
|
|
// Create the VM config via the constructor to ensure default values are properly assigned
|
|
clh.vmconfig = *chclient.NewVmConfig(*chclient.NewPayloadConfig())
|
|
|
|
// Make sure the kernel path is valid
|
|
kernelPath, err := clh.config.KernelAssetPath()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
clh.vmconfig.Payload.SetKernel(kernelPath)
|
|
|
|
if clh.config.ConfidentialGuest {
|
|
if err := clh.enableProtection(); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
// Create the VM memory config via the constructor to ensure default values are properly assigned
|
|
clh.vmconfig.Memory = chclient.NewMemoryConfig(int64((utils.MemUnit(clh.config.MemorySize) * utils.MiB).ToBytes()))
|
|
// shared memory should be enabled if using vhost-user(kata uses virtiofsd)
|
|
clh.vmconfig.Memory.Shared = func(b bool) *bool { return &b }(true)
|
|
// Enable hugepages if needed
|
|
clh.vmconfig.Memory.Hugepages = func(b bool) *bool { return &b }(clh.config.HugePages)
|
|
if !clh.config.ConfidentialGuest {
|
|
hotplugSize := clh.config.DefaultMaxMemorySize
|
|
// OpenAPI only supports int64 values
|
|
clh.vmconfig.Memory.HotplugSize = func(i int64) *int64 { return &i }(int64((utils.MemUnit(hotplugSize) * utils.MiB).ToBytes()))
|
|
}
|
|
// Set initial amount of cpu's for the virtual machine
|
|
clh.vmconfig.Cpus = chclient.NewCpusConfig(int32(clh.config.NumVCPUs), int32(clh.config.DefaultMaxVCPUs))
|
|
|
|
params, err := GetKernelRootParams(hypervisorConfig.RootfsType, clh.config.ConfidentialGuest, false)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
params = append(params, clhKernelParams...)
|
|
|
|
// Followed by extra debug parameters if debug enabled in configuration file
|
|
if clh.config.Debug {
|
|
if clh.config.ConfidentialGuest {
|
|
params = append(params, clhDebugConfidentialGuestKernelParams...)
|
|
} else {
|
|
params = append(params, clhDebugKernelParams...)
|
|
}
|
|
params = append(params, clhDebugKernelParamsCommon...)
|
|
} else {
|
|
// start the guest kernel with 'quiet' in non-debug mode
|
|
params = append(params, Param{"quiet", ""})
|
|
}
|
|
|
|
// Followed by extra kernel parameters defined in the configuration file
|
|
params = append(params, clh.config.KernelParams...)
|
|
|
|
clh.vmconfig.Payload.SetCmdline(kernelParamsToString(params))
|
|
|
|
// set random device generator to hypervisor
|
|
clh.vmconfig.Rng = chclient.NewRngConfig(clh.config.EntropySource)
|
|
|
|
// set the initial root/boot disk of hypervisor
|
|
imagePath, err := clh.config.ImageAssetPath()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if imagePath != "" {
|
|
if clh.config.ConfidentialGuest {
|
|
disk := chclient.NewDiskConfig(imagePath)
|
|
disk.SetReadonly(true)
|
|
|
|
diskRateLimiterConfig := clh.getDiskRateLimiterConfig()
|
|
if diskRateLimiterConfig != nil {
|
|
disk.SetRateLimiterConfig(*diskRateLimiterConfig)
|
|
}
|
|
|
|
if clh.vmconfig.Disks != nil {
|
|
*clh.vmconfig.Disks = append(*clh.vmconfig.Disks, *disk)
|
|
} else {
|
|
clh.vmconfig.Disks = &[]chclient.DiskConfig{*disk}
|
|
}
|
|
} else {
|
|
pmem := chclient.NewPmemConfig(imagePath)
|
|
*pmem.DiscardWrites = true
|
|
|
|
if clh.vmconfig.Pmem != nil {
|
|
*clh.vmconfig.Pmem = append(*clh.vmconfig.Pmem, *pmem)
|
|
} else {
|
|
clh.vmconfig.Pmem = &[]chclient.PmemConfig{*pmem}
|
|
}
|
|
}
|
|
} else {
|
|
initrdPath, err := clh.config.InitrdAssetPath()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
clh.vmconfig.Payload.SetInitramfs(initrdPath)
|
|
}
|
|
|
|
if clh.config.ConfidentialGuest {
|
|
// Use HVC as the guest console only in debug mode, only
|
|
// for Confidential Guests
|
|
if clh.config.Debug {
|
|
clh.vmconfig.Console = chclient.NewConsoleConfig(cctTTY)
|
|
} else {
|
|
clh.vmconfig.Console = chclient.NewConsoleConfig(cctOFF)
|
|
}
|
|
|
|
clh.vmconfig.Serial = chclient.NewConsoleConfig(cctOFF)
|
|
} else {
|
|
// Use serial port as the guest console only in debug mode,
|
|
// so that we can gather early OS booting log
|
|
if clh.config.Debug {
|
|
clh.vmconfig.Serial = chclient.NewConsoleConfig(cctTTY)
|
|
} else {
|
|
clh.vmconfig.Serial = chclient.NewConsoleConfig(cctOFF)
|
|
}
|
|
|
|
clh.vmconfig.Console = chclient.NewConsoleConfig(cctOFF)
|
|
}
|
|
|
|
cpu_topology := chclient.NewCpuTopology()
|
|
cpu_topology.ThreadsPerCore = func(i int32) *int32 { return &i }(1)
|
|
cpu_topology.CoresPerDie = func(i int32) *int32 { return &i }(int32(clh.config.DefaultMaxVCPUs))
|
|
cpu_topology.DiesPerPackage = func(i int32) *int32 { return &i }(1)
|
|
cpu_topology.Packages = func(i int32) *int32 { return &i }(1)
|
|
clh.vmconfig.Cpus.Topology = cpu_topology
|
|
|
|
// Overwrite the default value of HTTP API socket path for cloud hypervisor
|
|
apiSocketPath, err := clh.apiSocketPath(id)
|
|
if err != nil {
|
|
clh.Logger().WithError(err).Info("Invalid api socket path for cloud-hypervisor")
|
|
return err
|
|
}
|
|
clh.state.apiSocket = apiSocketPath
|
|
|
|
cfg := chclient.NewConfiguration()
|
|
cfg.HTTPClient = &http.Client{
|
|
Transport: &http.Transport{
|
|
DialContext: func(ctx context.Context, network, path string) (net.Conn, error) {
|
|
addr, err := net.ResolveUnixAddr("unix", clh.state.apiSocket)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return net.DialUnix("unix", nil, addr)
|
|
},
|
|
},
|
|
}
|
|
|
|
clh.APIClient = &clhClientApi{
|
|
ApiInternal: chclient.NewAPIClient(cfg).DefaultApi,
|
|
}
|
|
|
|
clh.virtiofsDaemon, err = clh.createVirtiofsDaemon(filepath.Join(GetSharePath(clh.id)))
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if clh.config.SGXEPCSize > 0 {
|
|
epcSection := chclient.NewSgxEpcConfig("kata-epc", clh.config.SGXEPCSize)
|
|
epcSection.Prefault = func(b bool) *bool { return &b }(true)
|
|
|
|
if clh.vmconfig.SgxEpc != nil {
|
|
*clh.vmconfig.SgxEpc = append(*clh.vmconfig.SgxEpc, *epcSection)
|
|
} else {
|
|
clh.vmconfig.SgxEpc = &[]chclient.SgxEpcConfig{*epcSection}
|
|
}
|
|
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// StartVM will start the VMM and boot the virtual machine for the given sandbox.
|
|
func (clh *cloudHypervisor) StartVM(ctx context.Context, timeout int) error {
|
|
span, _ := katatrace.Trace(ctx, clh.Logger(), "StartVM", clhTracingTags, map[string]string{"sandbox_id": clh.id})
|
|
defer span.End()
|
|
|
|
clh.Logger().WithField("function", "StartVM").Info("starting Sandbox")
|
|
|
|
vmPath := filepath.Join(clh.config.VMStorePath, clh.id)
|
|
err := os.MkdirAll(vmPath, DirMode)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// This needs to be done as late as possible, just before launching
|
|
// virtiofsd are executed by kata-runtime after this call, run with
|
|
// the SELinux label. If these processes require privileged, we do
|
|
// notwant to run them under confinement.
|
|
if !clh.config.DisableSeLinux {
|
|
|
|
if err := label.SetProcessLabel(clh.config.SELinuxProcessLabel); err != nil {
|
|
return err
|
|
}
|
|
defer label.SetProcessLabel("")
|
|
}
|
|
|
|
err = clh.setupVirtiofsDaemon(ctx)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer func() {
|
|
if err != nil {
|
|
if shutdownErr := clh.stopVirtiofsDaemon(ctx); shutdownErr != nil {
|
|
clh.Logger().WithError(shutdownErr).Warn("error shutting down VirtiofsDaemon")
|
|
}
|
|
}
|
|
}()
|
|
|
|
pid, err := clh.launchClh()
|
|
if err != nil {
|
|
return fmt.Errorf("failed to launch cloud-hypervisor: %q", err)
|
|
}
|
|
clh.state.PID = pid
|
|
|
|
ctx, cancel := context.WithTimeout(ctx, clh.getClhAPITimeout()*time.Second)
|
|
defer cancel()
|
|
|
|
if err := clh.bootVM(ctx); err != nil {
|
|
return err
|
|
}
|
|
|
|
clh.state.state = clhReady
|
|
return nil
|
|
}
|
|
|
|
// GetVMConsole builds the path of the console where we can read logs coming
|
|
// from the sandbox.
|
|
func (clh *cloudHypervisor) GetVMConsole(ctx context.Context, id string) (string, string, error) {
|
|
clh.Logger().WithField("function", "GetVMConsole").WithField("id", id).Info("Get Sandbox Console")
|
|
master, slave, err := console.NewPty()
|
|
if err != nil {
|
|
clh.Logger().WithError(err).Error("Error create pseudo tty")
|
|
return consoleProtoPty, "", err
|
|
}
|
|
clh.console = master
|
|
|
|
return consoleProtoPty, slave, nil
|
|
}
|
|
|
|
func (clh *cloudHypervisor) Disconnect(ctx context.Context) {
|
|
clh.Logger().WithField("function", "Disconnect").Info("Disconnecting Sandbox Console")
|
|
}
|
|
|
|
func (clh *cloudHypervisor) GetThreadIDs(ctx context.Context) (VcpuThreadIDs, error) {
|
|
|
|
clh.Logger().WithField("function", "GetThreadIDs").Info("get thread ID's")
|
|
|
|
var vcpuInfo VcpuThreadIDs
|
|
|
|
vcpuInfo.vcpus = make(map[int]int)
|
|
|
|
getVcpus := func(pid int) (map[int]int, error) {
|
|
vcpus := make(map[int]int)
|
|
|
|
dir := fmt.Sprintf("/proc/%d/task", pid)
|
|
files, err := os.ReadDir(dir)
|
|
if err != nil {
|
|
return vcpus, err
|
|
}
|
|
|
|
pattern, err := regexp.Compile(`^vcpu\d+$`)
|
|
if err != nil {
|
|
return vcpus, err
|
|
}
|
|
for _, file := range files {
|
|
comm, err := os.ReadFile(fmt.Sprintf("%s/%s/comm", dir, file.Name()))
|
|
if err != nil {
|
|
return vcpus, err
|
|
}
|
|
pName := strings.TrimSpace(string(comm))
|
|
if !pattern.MatchString(pName) {
|
|
continue
|
|
}
|
|
|
|
cpuID := strings.TrimPrefix(pName, "vcpu")
|
|
threadID := file.Name()
|
|
|
|
k, err := strconv.Atoi(cpuID)
|
|
if err != nil {
|
|
return vcpus, err
|
|
}
|
|
v, err := strconv.Atoi(threadID)
|
|
if err != nil {
|
|
return vcpus, err
|
|
}
|
|
vcpus[k] = v
|
|
}
|
|
return vcpus, nil
|
|
}
|
|
|
|
if clh.state.PID == 0 {
|
|
return vcpuInfo, nil
|
|
}
|
|
|
|
vcpus, err := getVcpus(clh.state.PID)
|
|
if err != nil {
|
|
return vcpuInfo, err
|
|
}
|
|
vcpuInfo.vcpus = vcpus
|
|
|
|
return vcpuInfo, nil
|
|
}
|
|
|
|
func clhDriveIndexToID(i int) string {
|
|
return "clh_drive_" + strconv.Itoa(i)
|
|
}
|
|
|
|
// Various cloud-hypervisor APIs report a PCI address in "BB:DD.F"
|
|
// form within the PciDeviceInfo struct. This is a broken API,
|
|
// because there's no way clh can reliably know the guest side bdf for
|
|
// a device, since the bus number depends on how the guest firmware
|
|
// and/or kernel enumerates it. They get away with it only because
|
|
// they don't use bridges, and so the bus is always 0. Under that
|
|
// assumption convert a clh PciDeviceInfo into a PCI path
|
|
func clhPciInfoToPath(pciInfo chclient.PciDeviceInfo) (types.PciPath, error) {
|
|
tokens := strings.Split(pciInfo.Bdf, ":")
|
|
if len(tokens) != 3 || tokens[0] != "0000" || tokens[1] != "00" {
|
|
return types.PciPath{}, fmt.Errorf("Unexpected PCI address %q from clh hotplug", pciInfo.Bdf)
|
|
}
|
|
|
|
tokens = strings.Split(tokens[2], ".")
|
|
if len(tokens) != 2 || tokens[1] != "0" || len(tokens[0]) != 2 {
|
|
return types.PciPath{}, fmt.Errorf("Unexpected PCI address %q from clh hotplug", pciInfo.Bdf)
|
|
}
|
|
|
|
return types.PciPathFromString(tokens[0])
|
|
}
|
|
|
|
func (clh *cloudHypervisor) hotplugAddBlockDevice(drive *config.BlockDrive) error {
|
|
if drive.Swap {
|
|
return fmt.Errorf("cloudHypervisor doesn't support swap")
|
|
}
|
|
|
|
if clh.config.BlockDeviceDriver != config.VirtioBlock {
|
|
return fmt.Errorf("incorrect hypervisor configuration on 'block_device_driver':"+
|
|
" using '%v' but only support '%v'", clh.config.BlockDeviceDriver, config.VirtioBlock)
|
|
}
|
|
|
|
var err error
|
|
|
|
cl := clh.client()
|
|
ctx, cancel := context.WithTimeout(context.Background(), clhHotPlugAPITimeout*time.Second)
|
|
defer cancel()
|
|
|
|
driveID := clhDriveIndexToID(drive.Index)
|
|
|
|
if drive.Pmem {
|
|
return fmt.Errorf("pmem device hotplug not supported")
|
|
}
|
|
|
|
// Create the clh disk config via the constructor to ensure default values are properly assigned
|
|
clhDisk := *chclient.NewDiskConfig(drive.File)
|
|
clhDisk.Readonly = &drive.ReadOnly
|
|
clhDisk.VhostUser = func(b bool) *bool { return &b }(false)
|
|
|
|
queues := int32(clh.config.NumVCPUs)
|
|
queueSize := int32(1024)
|
|
clhDisk.NumQueues = &queues
|
|
clhDisk.QueueSize = &queueSize
|
|
|
|
diskRateLimiterConfig := clh.getDiskRateLimiterConfig()
|
|
if diskRateLimiterConfig != nil {
|
|
clhDisk.SetRateLimiterConfig(*diskRateLimiterConfig)
|
|
}
|
|
|
|
pciInfo, _, err := cl.VmAddDiskPut(ctx, clhDisk)
|
|
|
|
if err != nil {
|
|
return fmt.Errorf("failed to hotplug block device %+v %s", drive, openAPIClientError(err))
|
|
}
|
|
|
|
clh.devicesIds[driveID] = pciInfo.GetId()
|
|
drive.PCIPath, err = clhPciInfoToPath(pciInfo)
|
|
|
|
return err
|
|
}
|
|
|
|
func (clh *cloudHypervisor) hotPlugVFIODevice(device *config.VFIODev) error {
|
|
cl := clh.client()
|
|
ctx, cancel := context.WithTimeout(context.Background(), clhHotPlugAPITimeout*time.Second)
|
|
defer cancel()
|
|
|
|
// Create the clh device config via the constructor to ensure default values are properly assigned
|
|
clhDevice := *chclient.NewDeviceConfig(device.SysfsDev)
|
|
pciInfo, _, err := cl.VmAddDevicePut(ctx, clhDevice)
|
|
if err != nil {
|
|
return fmt.Errorf("Failed to hotplug device %+v %s", device, openAPIClientError(err))
|
|
}
|
|
clh.devicesIds[device.ID] = pciInfo.GetId()
|
|
|
|
// clh doesn't use bridges, so the PCI path is simply the slot
|
|
// number of the device. This will break if clh starts using
|
|
// bridges (including PCI-E root ports), but so will the clh
|
|
// API, since there's no way it can reliably predict a guest
|
|
// Bdf when bridges are present.
|
|
tokens := strings.Split(pciInfo.Bdf, ":")
|
|
if len(tokens) != 3 || tokens[0] != "0000" || tokens[1] != "00" {
|
|
return fmt.Errorf("Unexpected PCI address %q from clh hotplug", pciInfo.Bdf)
|
|
}
|
|
|
|
tokens = strings.Split(tokens[2], ".")
|
|
if len(tokens) != 2 || tokens[1] != "0" || len(tokens[0]) != 2 {
|
|
return fmt.Errorf("Unexpected PCI address %q from clh hotplug", pciInfo.Bdf)
|
|
}
|
|
|
|
device.GuestPciPath, err = types.PciPathFromString(tokens[0])
|
|
|
|
return err
|
|
}
|
|
|
|
func (clh *cloudHypervisor) hotplugAddNetDevice(e Endpoint) error {
|
|
err := clh.addNet(e)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
return clh.vmAddNetPut()
|
|
}
|
|
|
|
func (clh *cloudHypervisor) HotplugAddDevice(ctx context.Context, devInfo interface{}, devType DeviceType) (interface{}, error) {
|
|
span, _ := katatrace.Trace(ctx, clh.Logger(), "HotplugAddDevice", clhTracingTags, map[string]string{"sandbox_id": clh.id})
|
|
defer span.End()
|
|
|
|
switch devType {
|
|
case BlockDev:
|
|
drive := devInfo.(*config.BlockDrive)
|
|
return nil, clh.hotplugAddBlockDevice(drive)
|
|
case VfioDev:
|
|
device := devInfo.(*config.VFIODev)
|
|
return nil, clh.hotPlugVFIODevice(device)
|
|
case NetDev:
|
|
device := devInfo.(Endpoint)
|
|
return nil, clh.hotplugAddNetDevice(device)
|
|
default:
|
|
return nil, fmt.Errorf("cannot hotplug device: unsupported device type '%v'", devType)
|
|
}
|
|
|
|
}
|
|
|
|
func (clh *cloudHypervisor) HotplugRemoveDevice(ctx context.Context, devInfo interface{}, devType DeviceType) (interface{}, error) {
|
|
span, _ := katatrace.Trace(ctx, clh.Logger(), "HotplugRemoveDevice", clhTracingTags, map[string]string{"sandbox_id": clh.id})
|
|
defer span.End()
|
|
|
|
var deviceID string
|
|
|
|
switch devType {
|
|
case BlockDev:
|
|
deviceID = clhDriveIndexToID(devInfo.(*config.BlockDrive).Index)
|
|
case VfioDev:
|
|
deviceID = devInfo.(*config.VFIODev).ID
|
|
default:
|
|
clh.Logger().WithFields(log.Fields{"devInfo": devInfo,
|
|
"deviceType": devType}).Error("HotplugRemoveDevice: unsupported device")
|
|
return nil, fmt.Errorf("Could not hot remove device: unsupported device: %v, type: %v",
|
|
devInfo, devType)
|
|
}
|
|
|
|
cl := clh.client()
|
|
ctx, cancel := context.WithTimeout(context.Background(), clhHotPlugAPITimeout*time.Second)
|
|
defer cancel()
|
|
|
|
originalDeviceID := clh.devicesIds[deviceID]
|
|
remove := *chclient.NewVmRemoveDevice()
|
|
remove.Id = &originalDeviceID
|
|
_, err := cl.VmRemoveDevicePut(ctx, remove)
|
|
if err != nil {
|
|
err = fmt.Errorf("failed to hotplug remove (unplug) device %+v: %s", devInfo, openAPIClientError(err))
|
|
}
|
|
|
|
delete(clh.devicesIds, deviceID)
|
|
return nil, err
|
|
}
|
|
|
|
func (clh *cloudHypervisor) HypervisorConfig() HypervisorConfig {
|
|
return clh.config
|
|
}
|
|
|
|
func (clh *cloudHypervisor) ResizeMemory(ctx context.Context, reqMemMB uint32, memoryBlockSizeMB uint32, probe bool) (uint32, MemoryDevice, error) {
|
|
|
|
// TODO: Add support for virtio-mem
|
|
|
|
if probe {
|
|
return 0, MemoryDevice{}, errors.New("probe memory is not supported for cloud-hypervisor")
|
|
}
|
|
|
|
if reqMemMB == 0 {
|
|
// This is a corner case if requested to resize to 0 means something went really wrong.
|
|
return 0, MemoryDevice{}, errors.New("Can not resize memory to 0")
|
|
}
|
|
|
|
info, err := clh.vmInfo()
|
|
if err != nil {
|
|
return 0, MemoryDevice{}, err
|
|
}
|
|
|
|
// HotplugSize can be nil in cases where Hotplug is not supported, as Cloud Hypervisor API
|
|
// does *not* allow us to set 0 as the HotplugSize.
|
|
maxHotplugSize := 0 * utils.Byte
|
|
if info.Config.Memory.HotplugSize != nil {
|
|
maxHotplugSize = utils.MemUnit(*info.Config.Memory.HotplugSize) * utils.Byte
|
|
}
|
|
|
|
if reqMemMB > uint32(maxHotplugSize.ToMiB()) {
|
|
reqMemMB = uint32(maxHotplugSize.ToMiB())
|
|
}
|
|
|
|
currentMem := utils.MemUnit(info.Config.Memory.Size) * utils.Byte
|
|
newMem := utils.MemUnit(reqMemMB) * utils.MiB
|
|
|
|
// Early Check to verify if boot memory is the same as requested
|
|
if currentMem == newMem {
|
|
clh.Logger().WithField("memory", reqMemMB).Debugf("VM already has requested memory")
|
|
return uint32(currentMem.ToMiB()), MemoryDevice{}, nil
|
|
}
|
|
|
|
if currentMem > newMem {
|
|
clh.Logger().Warn("Remove memory is not supported, nothing to do")
|
|
return uint32(currentMem.ToMiB()), MemoryDevice{}, nil
|
|
}
|
|
|
|
blockSize := utils.MemUnit(memoryBlockSizeMB) * utils.MiB
|
|
hotplugSize := (newMem - currentMem).AlignMem(blockSize)
|
|
|
|
// Update memory request to increase memory aligned block
|
|
alignedRequest := currentMem + hotplugSize
|
|
if newMem != alignedRequest {
|
|
clh.Logger().WithFields(log.Fields{"request": newMem, "aligned-request": alignedRequest}).Debug("aligning VM memory request")
|
|
newMem = alignedRequest
|
|
}
|
|
|
|
// Check if memory is the same as requested, a second Check is done
|
|
// to consider the memory request now that is updated to be memory aligned
|
|
if currentMem == newMem {
|
|
clh.Logger().WithFields(log.Fields{"current-memory": currentMem, "new-memory": newMem}).Debug("VM already has requested memory(after alignment)")
|
|
return uint32(currentMem.ToMiB()), MemoryDevice{}, nil
|
|
}
|
|
|
|
cl := clh.client()
|
|
ctx, cancelResize := context.WithTimeout(ctx, clh.getClhAPITimeout()*time.Second)
|
|
defer cancelResize()
|
|
|
|
resize := *chclient.NewVmResize()
|
|
// OpenApi does not support uint64, convert to int64
|
|
resize.DesiredRam = func(i int64) *int64 { return &i }(int64(newMem.ToBytes()))
|
|
clh.Logger().WithFields(log.Fields{"current-memory": currentMem, "new-memory": newMem}).Debug("updating VM memory")
|
|
if _, err = cl.VmResizePut(ctx, resize); err != nil {
|
|
clh.Logger().WithError(err).WithFields(log.Fields{"current-memory": currentMem, "new-memory": newMem}).Warnf("failed to update memory %s", openAPIClientError(err))
|
|
err = fmt.Errorf("Failed to resize memory from %d to %d: %s", currentMem, newMem, openAPIClientError(err))
|
|
return uint32(currentMem.ToMiB()), MemoryDevice{}, openAPIClientError(err)
|
|
}
|
|
|
|
return uint32(newMem.ToMiB()), MemoryDevice{SizeMB: int(hotplugSize.ToMiB())}, nil
|
|
}
|
|
|
|
func (clh *cloudHypervisor) ResizeVCPUs(ctx context.Context, reqVCPUs uint32) (currentVCPUs uint32, newVCPUs uint32, err error) {
|
|
cl := clh.client()
|
|
|
|
// Retrieve the number of current vCPUs via HTTP API
|
|
info, err := clh.vmInfo()
|
|
if err != nil {
|
|
clh.Logger().WithField("function", "ResizeVCPUs").WithError(err).Info("[clh] vmInfo failed")
|
|
return 0, 0, openAPIClientError(err)
|
|
}
|
|
|
|
currentVCPUs = uint32(info.Config.Cpus.BootVcpus)
|
|
newVCPUs = currentVCPUs
|
|
|
|
// Sanity Check
|
|
if reqVCPUs == 0 {
|
|
clh.Logger().WithField("function", "ResizeVCPUs").Debugf("Cannot resize vCPU to 0")
|
|
return currentVCPUs, newVCPUs, fmt.Errorf("Cannot resize vCPU to 0")
|
|
}
|
|
if reqVCPUs > uint32(info.Config.Cpus.MaxVcpus) {
|
|
clh.Logger().WithFields(log.Fields{
|
|
"function": "ResizeVCPUs",
|
|
"reqVCPUs": reqVCPUs,
|
|
"clhMaxVCPUs": info.Config.Cpus.MaxVcpus,
|
|
}).Warn("exceeding the 'clhMaxVCPUs' (resizing to 'clhMaxVCPUs')")
|
|
|
|
reqVCPUs = uint32(info.Config.Cpus.MaxVcpus)
|
|
}
|
|
|
|
// Resize (hot-plug) vCPUs via HTTP API
|
|
ctx, cancel := context.WithTimeout(ctx, clh.getClhAPITimeout()*time.Second)
|
|
defer cancel()
|
|
resize := *chclient.NewVmResize()
|
|
resize.DesiredVcpus = func(i int32) *int32 { return &i }(int32(reqVCPUs))
|
|
if _, err = cl.VmResizePut(ctx, resize); err != nil {
|
|
return currentVCPUs, newVCPUs, errors.Wrap(err, "[clh] VmResizePut failed")
|
|
}
|
|
|
|
newVCPUs = reqVCPUs
|
|
|
|
return currentVCPUs, newVCPUs, nil
|
|
}
|
|
|
|
func (clh *cloudHypervisor) Cleanup(ctx context.Context) error {
|
|
clh.Logger().WithField("function", "Cleanup").Info("Cleanup")
|
|
return nil
|
|
}
|
|
|
|
func (clh *cloudHypervisor) PauseVM(ctx context.Context) error {
|
|
clh.Logger().WithField("function", "PauseVM").Info("Pause Sandbox")
|
|
return nil
|
|
}
|
|
|
|
func (clh *cloudHypervisor) SaveVM() error {
|
|
clh.Logger().WithField("function", "saveSandboxC").Info("Save Sandbox")
|
|
return nil
|
|
}
|
|
|
|
func (clh *cloudHypervisor) ResumeVM(ctx context.Context) error {
|
|
clh.Logger().WithField("function", "ResumeVM").Info("Resume Sandbox")
|
|
return nil
|
|
}
|
|
|
|
// StopVM will stop the Sandbox's VM.
|
|
func (clh *cloudHypervisor) StopVM(ctx context.Context, waitOnly bool) (err error) {
|
|
clh.mu.Lock()
|
|
defer func() {
|
|
if err == nil {
|
|
atomic.StoreInt32(&clh.stopped, 1)
|
|
}
|
|
clh.mu.Unlock()
|
|
}()
|
|
span, _ := katatrace.Trace(ctx, clh.Logger(), "StopVM", clhTracingTags, map[string]string{"sandbox_id": clh.id})
|
|
defer span.End()
|
|
clh.Logger().WithField("function", "StopVM").Info("Stop Sandbox")
|
|
if atomic.LoadInt32(&clh.stopped) != 0 {
|
|
clh.Logger().Info("Already stopped")
|
|
return nil
|
|
}
|
|
|
|
return clh.terminate(ctx, waitOnly)
|
|
}
|
|
|
|
func (clh *cloudHypervisor) fromGrpc(ctx context.Context, hypervisorConfig *HypervisorConfig, j []byte) error {
|
|
return errors.New("cloudHypervisor is not supported by VM cache")
|
|
}
|
|
|
|
func (clh *cloudHypervisor) toGrpc(ctx context.Context) ([]byte, error) {
|
|
return nil, errors.New("cloudHypervisor is not supported by VM cache")
|
|
}
|
|
|
|
func (clh *cloudHypervisor) Save() (s hv.HypervisorState) {
|
|
s.Pid = clh.state.PID
|
|
s.Type = string(ClhHypervisor)
|
|
s.VirtiofsDaemonPid = clh.state.VirtiofsDaemonPid
|
|
s.APISocket = clh.state.apiSocket
|
|
return
|
|
}
|
|
|
|
func (clh *cloudHypervisor) Load(s hv.HypervisorState) {
|
|
clh.state.PID = s.Pid
|
|
clh.state.VirtiofsDaemonPid = s.VirtiofsDaemonPid
|
|
clh.state.apiSocket = s.APISocket
|
|
}
|
|
|
|
// Check is the implementation of Check from the Hypervisor interface.
|
|
// Check if the VMM API is working.
|
|
|
|
func (clh *cloudHypervisor) Check() error {
|
|
// Use a long timeout to check if the VMM is running:
|
|
// Check is used by the monitor thread(a background thread). If the
|
|
// monitor thread calls Check() during the Container boot, it will take
|
|
// longer than usual specially if there is a hot-plug request in progress.
|
|
running, err := clh.isClhRunning(10)
|
|
if !running {
|
|
return fmt.Errorf("clh is not running: %s", err)
|
|
}
|
|
return err
|
|
}
|
|
|
|
func (clh *cloudHypervisor) GetPids() []int {
|
|
return []int{clh.state.PID}
|
|
}
|
|
|
|
func (clh *cloudHypervisor) GetVirtioFsPid() *int {
|
|
return &clh.state.VirtiofsDaemonPid
|
|
}
|
|
|
|
func (clh *cloudHypervisor) AddDevice(ctx context.Context, devInfo interface{}, devType DeviceType) error {
|
|
span, _ := katatrace.Trace(ctx, clh.Logger(), "AddDevice", clhTracingTags, map[string]string{"sandbox_id": clh.id})
|
|
defer span.End()
|
|
|
|
var err error
|
|
|
|
switch v := devInfo.(type) {
|
|
case Endpoint:
|
|
if err := clh.addNet(v); err != nil {
|
|
return err
|
|
}
|
|
case types.HybridVSock:
|
|
clh.addVSock(defaultGuestVSockCID, v.UdsPath)
|
|
case types.Volume:
|
|
err = clh.addVolume(v)
|
|
default:
|
|
clh.Logger().WithField("function", "AddDevice").Warnf("Add device of type %v is not supported.", v)
|
|
return fmt.Errorf("Not implemented support for %s", v)
|
|
}
|
|
|
|
return err
|
|
}
|
|
|
|
//###########################################################################
|
|
//
|
|
// Local helper methods related to the hypervisor interface implementation
|
|
//
|
|
//###########################################################################
|
|
|
|
func (clh *cloudHypervisor) Logger() *log.Entry {
|
|
return hvLogger.WithField("subsystem", "cloudHypervisor")
|
|
}
|
|
|
|
// Adds all capabilities supported by cloudHypervisor implementation of hypervisor interface
|
|
func (clh *cloudHypervisor) Capabilities(ctx context.Context) types.Capabilities {
|
|
span, _ := katatrace.Trace(ctx, clh.Logger(), "Capabilities", clhTracingTags, map[string]string{"sandbox_id": clh.id})
|
|
defer span.End()
|
|
|
|
clh.Logger().WithField("function", "Capabilities").Info("get Capabilities")
|
|
var caps types.Capabilities
|
|
caps.SetFsSharingSupport()
|
|
caps.SetBlockDeviceHotplugSupport()
|
|
return caps
|
|
}
|
|
|
|
func (clh *cloudHypervisor) terminate(ctx context.Context, waitOnly bool) (err error) {
|
|
span, _ := katatrace.Trace(ctx, clh.Logger(), "terminate", clhTracingTags, map[string]string{"sandbox_id": clh.id})
|
|
defer span.End()
|
|
|
|
pid := clh.state.PID
|
|
pidRunning := true
|
|
if pid == 0 {
|
|
pidRunning = false
|
|
}
|
|
|
|
defer func() {
|
|
clh.Logger().Debug("Cleanup VM")
|
|
if err1 := clh.cleanupVM(true); err1 != nil {
|
|
clh.Logger().WithError(err1).Error("failed to cleanupVM")
|
|
}
|
|
}()
|
|
|
|
clh.Logger().Debug("Stopping Cloud Hypervisor")
|
|
|
|
if pidRunning && !waitOnly {
|
|
clhRunning, _ := clh.isClhRunning(uint(clh.getClhStopSandboxTimeout()))
|
|
if clhRunning {
|
|
ctx, cancel := context.WithTimeout(context.Background(), clh.getClhStopSandboxTimeout()*time.Second)
|
|
defer cancel()
|
|
if _, err = clh.client().ShutdownVMM(ctx); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
}
|
|
|
|
if err = utils.WaitLocalProcess(pid, uint(clh.getClhStopSandboxTimeout()), syscall.Signal(0), clh.Logger()); err != nil {
|
|
return err
|
|
}
|
|
|
|
clh.Logger().Debug("stop virtiofsDaemon")
|
|
|
|
if err = clh.stopVirtiofsDaemon(ctx); err != nil {
|
|
clh.Logger().WithError(err).Error("failed to stop virtiofsDaemon")
|
|
}
|
|
|
|
return
|
|
}
|
|
|
|
func (clh *cloudHypervisor) reset() {
|
|
clh.state.reset()
|
|
}
|
|
|
|
func (clh *cloudHypervisor) GenerateSocket(id string) (interface{}, error) {
|
|
udsPath, err := clh.vsockSocketPath(id)
|
|
if err != nil {
|
|
clh.Logger().Info("Can't generate socket path for cloud-hypervisor")
|
|
return types.HybridVSock{}, err
|
|
}
|
|
|
|
return types.HybridVSock{
|
|
UdsPath: udsPath,
|
|
Port: uint32(vSockPort),
|
|
}, nil
|
|
}
|
|
|
|
func (clh *cloudHypervisor) virtioFsSocketPath(id string) (string, error) {
|
|
return utils.BuildSocketPath(clh.config.VMStorePath, id, virtioFsSocket)
|
|
}
|
|
|
|
func (clh *cloudHypervisor) vsockSocketPath(id string) (string, error) {
|
|
return utils.BuildSocketPath(clh.config.VMStorePath, id, clhSocket)
|
|
}
|
|
|
|
func (clh *cloudHypervisor) apiSocketPath(id string) (string, error) {
|
|
return utils.BuildSocketPath(clh.config.VMStorePath, id, clhAPISocket)
|
|
}
|
|
|
|
func (clh *cloudHypervisor) waitVMM(timeout uint) error {
|
|
clhRunning, err := clh.isClhRunning(timeout)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if !clhRunning {
|
|
return fmt.Errorf("CLH is not running")
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (clh *cloudHypervisor) clhPath() (string, error) {
|
|
p, err := clh.config.HypervisorAssetPath()
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
|
|
if p == "" {
|
|
p = defaultClhPath
|
|
}
|
|
|
|
if _, err = os.Stat(p); os.IsNotExist(err) {
|
|
return "", fmt.Errorf("Cloud-Hypervisor path (%s) does not exist", p)
|
|
}
|
|
|
|
return p, err
|
|
}
|
|
|
|
func (clh *cloudHypervisor) launchClh() (int, error) {
|
|
|
|
clhPath, err := clh.clhPath()
|
|
if err != nil {
|
|
return -1, err
|
|
}
|
|
|
|
args := []string{cscAPIsocket, clh.state.apiSocket}
|
|
if clh.config.Debug {
|
|
// Cloud hypervisor log levels
|
|
// 'v' occurrences increase the level
|
|
//0 => Error
|
|
//1 => Warn
|
|
//2 => Info
|
|
//3 => Debug
|
|
//4+ => Trace
|
|
// Use Info, the CI runs with debug enabled
|
|
// a high level of logging increases the boot time
|
|
// and in a nested environment this could increase
|
|
// the chances to fail because agent is not
|
|
// ready on time.
|
|
//
|
|
// Note that for debugging CLH boot failures, the Info level
|
|
// should be sufficient: Debug level generates so many
|
|
// messages it floods the output stream to the extent that it
|
|
// is almost impossible to view the guest kernel and userland
|
|
// output. For further details, see the discussion on:
|
|
//
|
|
// https://github.com/kata-containers/kata-containers/pull/2751
|
|
args = append(args, "-v")
|
|
}
|
|
|
|
// Enable the `seccomp` feature from Cloud Hypervisor by default
|
|
// Disable it only when requested by users for debugging purposes
|
|
if clh.config.DisableSeccomp {
|
|
args = append(args, "--seccomp", "false")
|
|
}
|
|
|
|
clh.Logger().WithField("path", clhPath).Info()
|
|
clh.Logger().WithField("args", strings.Join(args, " ")).Info()
|
|
|
|
cmdHypervisor := exec.Command(clhPath, args...)
|
|
if clh.config.Debug {
|
|
cmdHypervisor.Env = os.Environ()
|
|
cmdHypervisor.Env = append(cmdHypervisor.Env, "RUST_BACKTRACE=full")
|
|
if clh.console != nil {
|
|
cmdHypervisor.Stderr = clh.console
|
|
cmdHypervisor.Stdout = clh.console
|
|
}
|
|
}
|
|
|
|
cmdHypervisor.Stderr = cmdHypervisor.Stdout
|
|
|
|
err = utils.StartCmd(cmdHypervisor)
|
|
if err != nil {
|
|
return -1, err
|
|
}
|
|
|
|
if err := clh.waitVMM(clhTimeout); err != nil {
|
|
clh.Logger().WithError(err).Warn("cloud-hypervisor init failed")
|
|
return -1, err
|
|
}
|
|
|
|
return cmdHypervisor.Process.Pid, nil
|
|
}
|
|
|
|
//###########################################################################
|
|
//
|
|
// Cloud-hypervisor CLI builder
|
|
//
|
|
//###########################################################################
|
|
|
|
const (
|
|
cctOFF string = "Off"
|
|
cctTTY string = "Tty"
|
|
)
|
|
|
|
const (
|
|
cscAPIsocket string = "--api-socket"
|
|
)
|
|
|
|
//****************************************
|
|
// The kernel command line
|
|
//****************************************
|
|
|
|
func kernelParamsToString(params []Param) string {
|
|
|
|
var paramBuilder strings.Builder
|
|
for _, p := range params {
|
|
paramBuilder.WriteString(p.Key)
|
|
if len(p.Value) > 0 {
|
|
paramBuilder.WriteString("=")
|
|
paramBuilder.WriteString(p.Value)
|
|
}
|
|
paramBuilder.WriteString(" ")
|
|
}
|
|
return strings.TrimSpace(paramBuilder.String())
|
|
}
|
|
|
|
// ****************************************
|
|
// API calls
|
|
// ****************************************
|
|
func (clh *cloudHypervisor) isClhRunning(timeout uint) (bool, error) {
|
|
|
|
pid := clh.state.PID
|
|
|
|
if atomic.LoadInt32(&clh.stopped) != 0 {
|
|
return false, nil
|
|
}
|
|
|
|
timeStart := time.Now()
|
|
cl := clh.client()
|
|
for {
|
|
err := syscall.Kill(pid, syscall.Signal(0))
|
|
if err != nil {
|
|
return false, nil
|
|
}
|
|
ctx, cancel := context.WithTimeout(context.Background(), clh.getClhAPITimeout()*time.Second)
|
|
_, _, err = cl.VmmPingGet(ctx)
|
|
cancel()
|
|
if err == nil {
|
|
return true, nil
|
|
} else {
|
|
clh.Logger().WithError(err).Warning("clh.VmmPingGet API call failed")
|
|
}
|
|
|
|
if time.Since(timeStart).Seconds() > float64(timeout) {
|
|
return false, fmt.Errorf("Failed to connect to API (timeout %ds): %s", timeout, openAPIClientError(err))
|
|
}
|
|
|
|
time.Sleep(time.Duration(10) * time.Millisecond)
|
|
}
|
|
|
|
}
|
|
|
|
func (clh *cloudHypervisor) client() clhClient {
|
|
return clh.APIClient
|
|
}
|
|
|
|
func openAPIClientError(err error) error {
|
|
|
|
if err == nil {
|
|
return nil
|
|
}
|
|
|
|
reason := ""
|
|
if apierr, ok := err.(chclient.GenericOpenAPIError); ok {
|
|
reason = string(apierr.Body())
|
|
}
|
|
|
|
return fmt.Errorf("error: %v reason: %s", err, reason)
|
|
}
|
|
|
|
func (clh *cloudHypervisor) vmAddNetPut() error {
|
|
return vmAddNetPutRequest(clh)
|
|
}
|
|
|
|
func (clh *cloudHypervisor) bootVM(ctx context.Context) error {
|
|
|
|
cl := clh.client()
|
|
|
|
if clh.config.Debug {
|
|
bodyBuf, err := json.Marshal(clh.vmconfig)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
clh.Logger().WithField("body", string(bodyBuf)).Debug("VM config")
|
|
}
|
|
_, err := cl.CreateVM(ctx, clh.vmconfig)
|
|
if err != nil {
|
|
return openAPIClientError(err)
|
|
}
|
|
|
|
info, err := clh.vmInfo()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
clh.Logger().Debugf("VM state after create: %#v", info)
|
|
|
|
if info.State != clhStateCreated {
|
|
return fmt.Errorf("VM state is not 'Created' after 'CreateVM'")
|
|
}
|
|
|
|
err = clh.vmAddNetPut()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
clh.Logger().Debug("Booting VM")
|
|
_, err = cl.BootVM(ctx)
|
|
if err != nil {
|
|
return openAPIClientError(err)
|
|
}
|
|
|
|
info, err = clh.vmInfo()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
clh.Logger().Debugf("VM state after boot: %#v", info)
|
|
|
|
if info.State != clhStateRunning {
|
|
return fmt.Errorf("VM state is not 'Running' after 'BootVM'")
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (clh *cloudHypervisor) addVSock(cid int64, path string) {
|
|
clh.Logger().WithFields(log.Fields{
|
|
"path": path,
|
|
"cid": cid,
|
|
}).Info("Adding HybridVSock")
|
|
|
|
clh.vmconfig.Vsock = chclient.NewVsockConfig(cid, path)
|
|
}
|
|
|
|
func (clh *cloudHypervisor) getRateLimiterConfig(bwSize, bwOneTimeBurst, opsSize, opsOneTimeBurst int64) *chclient.RateLimiterConfig {
|
|
if bwSize == 0 && opsSize == 0 {
|
|
return nil
|
|
}
|
|
|
|
rateLimiterConfig := chclient.NewRateLimiterConfig()
|
|
|
|
if bwSize != 0 {
|
|
bwTokenBucket := chclient.NewTokenBucket(bwSize, int64(utils.DefaultRateLimiterRefillTimeMilliSecs))
|
|
|
|
if bwOneTimeBurst != 0 {
|
|
bwTokenBucket.SetOneTimeBurst(bwOneTimeBurst)
|
|
}
|
|
|
|
rateLimiterConfig.SetBandwidth(*bwTokenBucket)
|
|
}
|
|
|
|
if opsSize != 0 {
|
|
opsTokenBucket := chclient.NewTokenBucket(opsSize, int64(utils.DefaultRateLimiterRefillTimeMilliSecs))
|
|
|
|
if opsOneTimeBurst != 0 {
|
|
opsTokenBucket.SetOneTimeBurst(opsOneTimeBurst)
|
|
}
|
|
|
|
rateLimiterConfig.SetOps(*opsTokenBucket)
|
|
}
|
|
|
|
return rateLimiterConfig
|
|
}
|
|
|
|
func (clh *cloudHypervisor) getNetRateLimiterConfig() *chclient.RateLimiterConfig {
|
|
return clh.getRateLimiterConfig(
|
|
int64(utils.RevertBytes(uint64(clh.config.NetRateLimiterBwMaxRate/8))),
|
|
int64(utils.RevertBytes(uint64(clh.config.NetRateLimiterBwOneTimeBurst/8))),
|
|
clh.config.NetRateLimiterOpsMaxRate,
|
|
clh.config.NetRateLimiterOpsOneTimeBurst)
|
|
}
|
|
|
|
func (clh *cloudHypervisor) getDiskRateLimiterConfig() *chclient.RateLimiterConfig {
|
|
return clh.getRateLimiterConfig(
|
|
int64(utils.RevertBytes(uint64(clh.config.DiskRateLimiterBwMaxRate/8))),
|
|
int64(utils.RevertBytes(uint64(clh.config.DiskRateLimiterBwOneTimeBurst/8))),
|
|
clh.config.DiskRateLimiterOpsMaxRate,
|
|
clh.config.DiskRateLimiterOpsOneTimeBurst)
|
|
}
|
|
|
|
func (clh *cloudHypervisor) addNet(e Endpoint) error {
|
|
clh.Logger().WithField("endpoint-type", e).Debugf("Adding Endpoint of type %v", e)
|
|
|
|
mac := e.HardwareAddr()
|
|
netPair := e.NetworkPair()
|
|
if netPair == nil {
|
|
return errors.New("net Pair to be added is nil, needed to get TAP file descriptors")
|
|
}
|
|
|
|
if len(netPair.TapInterface.VMFds) == 0 {
|
|
return errors.New("The file descriptors for the network pair are not present")
|
|
}
|
|
clh.netDevicesFiles[mac] = netPair.TapInterface.VMFds
|
|
|
|
netRateLimiterConfig := clh.getNetRateLimiterConfig()
|
|
|
|
net := chclient.NewNetConfig()
|
|
net.Mac = &mac
|
|
if netRateLimiterConfig != nil {
|
|
net.SetRateLimiterConfig(*netRateLimiterConfig)
|
|
}
|
|
|
|
if clh.netDevices != nil {
|
|
*clh.netDevices = append(*clh.netDevices, *net)
|
|
} else {
|
|
clh.netDevices = &[]chclient.NetConfig{*net}
|
|
}
|
|
|
|
clh.Logger().Infof("Storing the Cloud Hypervisor network configuration: %+v", net)
|
|
|
|
return nil
|
|
}
|
|
|
|
// Add shared Volume using virtiofs
|
|
func (clh *cloudHypervisor) addVolume(volume types.Volume) error {
|
|
if clh.config.SharedFS != config.VirtioFS && clh.config.SharedFS != config.VirtioFSNydus {
|
|
return fmt.Errorf("shared fs method not supported %s", clh.config.SharedFS)
|
|
}
|
|
|
|
vfsdSockPath, err := clh.virtioFsSocketPath(clh.id)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// numQueues and queueSize are required, let's use the
|
|
// default values defined by cloud-hypervisor
|
|
numQueues := int32(1)
|
|
queueSize := int32(1024)
|
|
if clh.config.VirtioFSQueueSize != 0 {
|
|
queueSize = int32(clh.config.VirtioFSQueueSize)
|
|
}
|
|
|
|
fs := chclient.NewFsConfig(volume.MountTag, vfsdSockPath, numQueues, queueSize)
|
|
clh.vmconfig.Fs = &[]chclient.FsConfig{*fs}
|
|
|
|
clh.Logger().Debug("Adding share volume to hypervisor: ", volume.MountTag)
|
|
return nil
|
|
}
|
|
|
|
// cleanupVM will remove generated files and directories related with the virtual machine
|
|
func (clh *cloudHypervisor) cleanupVM(force bool) error {
|
|
|
|
if clh.id == "" {
|
|
return errors.New("Hypervisor ID is empty")
|
|
}
|
|
|
|
clh.Logger().Debug("removing vm sockets")
|
|
|
|
path, err := clh.vsockSocketPath(clh.id)
|
|
if err == nil {
|
|
if err := os.Remove(path); err != nil {
|
|
clh.Logger().WithError(err).WithField("path", path).Warn("removing vm socket failed")
|
|
}
|
|
}
|
|
|
|
// Cleanup vm path
|
|
dir := filepath.Join(clh.config.VMStorePath, clh.id)
|
|
|
|
// If it's a symlink, remove both dir and the target.
|
|
link, err := filepath.EvalSymlinks(dir)
|
|
if err != nil {
|
|
clh.Logger().WithError(err).WithField("dir", dir).Warn("failed to resolve vm path")
|
|
}
|
|
|
|
clh.Logger().WithFields(log.Fields{
|
|
"link": link,
|
|
"dir": dir,
|
|
}).Infof("Cleanup vm path")
|
|
|
|
if err := os.RemoveAll(dir); err != nil {
|
|
if !force {
|
|
return err
|
|
}
|
|
clh.Logger().WithError(err).Warnf("failed to remove vm path %s", dir)
|
|
}
|
|
if link != dir && link != "" {
|
|
if err := os.RemoveAll(link); err != nil {
|
|
if !force {
|
|
return err
|
|
}
|
|
clh.Logger().WithError(err).WithField("link", link).Warn("failed to remove resolved vm path")
|
|
}
|
|
}
|
|
|
|
if clh.config.VMid != "" {
|
|
dir = filepath.Join(clh.config.VMStorePath, clh.config.VMid)
|
|
if err := os.RemoveAll(dir); err != nil {
|
|
if !force {
|
|
return err
|
|
}
|
|
clh.Logger().WithError(err).WithField("path", dir).Warnf("failed to remove vm path")
|
|
}
|
|
}
|
|
|
|
clh.reset()
|
|
|
|
return nil
|
|
}
|
|
|
|
func (clh *cloudHypervisor) GetTotalMemoryMB(ctx context.Context) uint32 {
|
|
vminfo, err := clh.vmInfo()
|
|
if err != nil {
|
|
clh.Logger().WithError(err).Error("failed to get vminfo")
|
|
return 0
|
|
}
|
|
|
|
return uint32(vminfo.GetMemoryActualSize() >> utils.MibToBytesShift)
|
|
}
|
|
|
|
// vmInfo ask to hypervisor for current VM status
|
|
func (clh *cloudHypervisor) vmInfo() (chclient.VmInfo, error) {
|
|
cl := clh.client()
|
|
ctx, cancelInfo := context.WithTimeout(context.Background(), clh.getClhAPITimeout()*time.Second)
|
|
defer cancelInfo()
|
|
|
|
info, _, err := cl.VmInfoGet(ctx)
|
|
if err != nil {
|
|
clh.Logger().WithError(openAPIClientError(err)).Warn("VmInfoGet failed")
|
|
}
|
|
return info, openAPIClientError(err)
|
|
}
|
|
|
|
func (clh *cloudHypervisor) IsRateLimiterBuiltin() bool {
|
|
return true
|
|
}
|