mirror of
https://github.com/aljazceru/kata-containers.git
synced 2025-12-17 06:14:26 +01:00
Core VCPU threads pinning logics for issue 4476. Also provided docs. Fixes:#4476 Signed-off-by: LitFlwr0 <861690705@qq.com>
1211 lines
36 KiB
Go
1211 lines
36 KiB
Go
// Copyright (c) 2017 Intel Corporation
|
|
// Copyright (c) 2021 Adobe Inc.
|
|
//
|
|
// SPDX-License-Identifier: Apache-2.0
|
|
//
|
|
|
|
package oci
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"errors"
|
|
"fmt"
|
|
"path/filepath"
|
|
"regexp"
|
|
goruntime "runtime"
|
|
"strconv"
|
|
"strings"
|
|
"syscall"
|
|
|
|
ctrAnnotations "github.com/containerd/containerd/pkg/cri/annotations"
|
|
podmanAnnotations "github.com/containers/podman/v4/pkg/annotations"
|
|
specs "github.com/opencontainers/runtime-spec/specs-go"
|
|
"github.com/sirupsen/logrus"
|
|
"k8s.io/apimachinery/pkg/api/resource"
|
|
|
|
"github.com/kata-containers/kata-containers/src/runtime/pkg/govmm"
|
|
vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers"
|
|
|
|
"github.com/kata-containers/kata-containers/src/runtime/pkg/device/config"
|
|
exp "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/experimental"
|
|
vcAnnotations "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/annotations"
|
|
dockershimAnnotations "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/annotations/dockershim"
|
|
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types"
|
|
vcutils "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/utils"
|
|
)
|
|
|
|
type annotationContainerType struct {
|
|
annotation string
|
|
containerType vc.ContainerType
|
|
}
|
|
|
|
var (
|
|
// ErrNoLinux is an error for missing Linux sections in the OCI configuration file.
|
|
ErrNoLinux = errors.New("missing Linux section")
|
|
|
|
// CRIContainerTypeKeyList lists all the CRI keys that could define
|
|
// the container type from annotations in the config.json.
|
|
CRIContainerTypeKeyList = []string{ctrAnnotations.ContainerType, podmanAnnotations.ContainerType, dockershimAnnotations.ContainerTypeLabelKey}
|
|
|
|
// CRISandboxNameKeyList lists all the CRI keys that could define
|
|
// the sandbox ID (sandbox ID) from annotations in the config.json.
|
|
CRISandboxNameKeyList = []string{ctrAnnotations.SandboxID, podmanAnnotations.SandboxID, dockershimAnnotations.SandboxIDLabelKey}
|
|
|
|
// CRIContainerTypeList lists all the maps from CRI ContainerTypes annotations
|
|
// to a virtcontainers ContainerType.
|
|
CRIContainerTypeList = []annotationContainerType{
|
|
{podmanAnnotations.ContainerTypeSandbox, vc.PodSandbox},
|
|
{podmanAnnotations.ContainerTypeContainer, vc.PodContainer},
|
|
{ctrAnnotations.ContainerTypeSandbox, vc.PodSandbox},
|
|
{ctrAnnotations.ContainerTypeContainer, vc.PodContainer},
|
|
{dockershimAnnotations.ContainerTypeLabelSandbox, vc.PodSandbox},
|
|
{dockershimAnnotations.ContainerTypeLabelContainer, vc.PodContainer},
|
|
}
|
|
)
|
|
|
|
const (
|
|
// StateCreated represents a container that has been created and is
|
|
// ready to be run.
|
|
StateCreated = "created"
|
|
|
|
// StateRunning represents a container that's currently running.
|
|
StateRunning = "running"
|
|
|
|
// StateStopped represents a container that has been stopped.
|
|
StateStopped = "stopped"
|
|
|
|
// StatePaused represents a container that has been paused.
|
|
StatePaused = "paused"
|
|
)
|
|
|
|
const KernelModulesSeparator = ";"
|
|
|
|
// FactoryConfig is a structure to set the VM factory configuration.
|
|
type FactoryConfig struct {
|
|
// TemplatePath specifies the path of template.
|
|
TemplatePath string
|
|
|
|
// VMCacheEndpoint specifies the endpoint of transport VM from the VM cache server to runtime.
|
|
VMCacheEndpoint string
|
|
|
|
// VMCacheNumber specifies the the number of caches of VMCache.
|
|
VMCacheNumber uint
|
|
|
|
// Template enables VM templating support in VM factory.
|
|
Template bool
|
|
}
|
|
|
|
// RuntimeConfig aggregates all runtime specific settings
|
|
// nolint: govet
|
|
type RuntimeConfig struct {
|
|
//Paths to be bindmounted RO into the guest.
|
|
SandboxBindMounts []string
|
|
|
|
//Experimental features enabled
|
|
Experimental []exp.Feature
|
|
|
|
JaegerEndpoint string
|
|
JaegerUser string
|
|
JaegerPassword string
|
|
HypervisorType vc.HypervisorType
|
|
|
|
FactoryConfig FactoryConfig
|
|
HypervisorConfig vc.HypervisorConfig
|
|
AgentConfig vc.KataAgentConfig
|
|
|
|
//Determines how the VM should be connected to the
|
|
//the container network interface
|
|
InterNetworkModel vc.NetInterworkingModel
|
|
|
|
//Determines how VFIO devices should be presented to the
|
|
//container
|
|
VfioMode config.VFIOModeType
|
|
|
|
Debug bool
|
|
Trace bool
|
|
|
|
//Determines if seccomp should be applied inside guest
|
|
DisableGuestSeccomp bool
|
|
|
|
// Sandbox sizing information which, if provided, indicates the size of
|
|
// the sandbox needed for the workload(s)
|
|
SandboxCPUs uint32
|
|
SandboxMemMB uint32
|
|
|
|
// Determines if we should attempt to size the VM at boot time and skip
|
|
// any later resource updates.
|
|
StaticSandboxResourceMgmt bool
|
|
|
|
// Determines if create a netns for hypervisor process
|
|
DisableNewNetNs bool
|
|
|
|
//Determines kata processes are managed only in sandbox cgroup
|
|
SandboxCgroupOnly bool
|
|
|
|
// Determines if enable pprof
|
|
EnablePprof bool
|
|
|
|
// Determines if Kata creates emptyDir on the guest
|
|
DisableGuestEmptyDir bool
|
|
}
|
|
|
|
// AddKernelParam allows the addition of new kernel parameters to an existing
|
|
// hypervisor configuration stored inside the current runtime configuration.
|
|
func (config *RuntimeConfig) AddKernelParam(p vc.Param) error {
|
|
return config.HypervisorConfig.AddKernelParam(p)
|
|
}
|
|
|
|
var ociLog = logrus.WithFields(logrus.Fields{
|
|
"source": "virtcontainers",
|
|
"subsystem": "oci",
|
|
})
|
|
|
|
// SetLogger sets the logger for oci package.
|
|
func SetLogger(ctx context.Context, logger *logrus.Entry) {
|
|
fields := ociLog.Data
|
|
ociLog = logger.WithFields(fields)
|
|
}
|
|
|
|
func cmdEnvs(spec specs.Spec, envs []types.EnvVar) []types.EnvVar {
|
|
for _, env := range spec.Process.Env {
|
|
kv := strings.Split(env, "=")
|
|
if len(kv) < 2 {
|
|
continue
|
|
}
|
|
|
|
envs = append(envs,
|
|
types.EnvVar{
|
|
Var: kv[0],
|
|
Value: kv[1],
|
|
})
|
|
}
|
|
|
|
return envs
|
|
}
|
|
|
|
func newMount(m specs.Mount) vc.Mount {
|
|
readonly := false
|
|
bind := false
|
|
for _, flag := range m.Options {
|
|
switch flag {
|
|
case "rbind", "bind":
|
|
bind = true
|
|
case "ro":
|
|
readonly = true
|
|
}
|
|
}
|
|
|
|
// normal bind mounts, set type to bind.
|
|
// https://github.com/opencontainers/runc/blob/v1.1.3/libcontainer/specconv/spec_linux.go#L512-L520
|
|
mountType := m.Type
|
|
if mountType != vc.KataEphemeralDevType && mountType != vc.KataLocalDevType && bind {
|
|
mountType = "bind"
|
|
}
|
|
|
|
return vc.Mount{
|
|
Source: m.Source,
|
|
Destination: m.Destination,
|
|
Type: mountType,
|
|
Options: m.Options,
|
|
ReadOnly: readonly,
|
|
}
|
|
}
|
|
|
|
func containerMounts(spec specs.Spec) []vc.Mount {
|
|
ociMounts := spec.Mounts
|
|
|
|
if ociMounts == nil {
|
|
return []vc.Mount{}
|
|
}
|
|
|
|
var mnts []vc.Mount
|
|
for _, m := range ociMounts {
|
|
mnts = append(mnts, newMount(m))
|
|
}
|
|
|
|
return mnts
|
|
}
|
|
|
|
func contains(strings []string, toFind string) bool {
|
|
for _, candidate := range strings {
|
|
if candidate == toFind {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
func regexpContains(regexps []string, toMatch string) bool {
|
|
for _, candidate := range regexps {
|
|
if matched, _ := regexp.MatchString(candidate, toMatch); matched {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
func checkPathIsInGlobs(globs []string, path string) bool {
|
|
for _, glob := range globs {
|
|
filenames, _ := filepath.Glob(glob)
|
|
for _, a := range filenames {
|
|
if path == a {
|
|
return true
|
|
}
|
|
}
|
|
}
|
|
|
|
return false
|
|
}
|
|
|
|
// Check if an annotation name either belongs to another prefix, matches regexp list
|
|
func checkAnnotationNameIsValid(list []string, name string, prefix string) bool {
|
|
if strings.HasPrefix(name, prefix) {
|
|
return regexpContains(list, strings.TrimPrefix(name, prefix))
|
|
}
|
|
|
|
return true
|
|
}
|
|
|
|
func newLinuxDeviceInfo(d specs.LinuxDevice) (*config.DeviceInfo, error) {
|
|
allowedDeviceTypes := []string{"c", "b", "u", "p"}
|
|
|
|
if !contains(allowedDeviceTypes, d.Type) {
|
|
return nil, fmt.Errorf("Unexpected Device Type %s for device %s", d.Type, d.Path)
|
|
}
|
|
|
|
if d.Path == "" {
|
|
return nil, fmt.Errorf("Path cannot be empty for device")
|
|
}
|
|
|
|
deviceInfo := config.DeviceInfo{
|
|
ContainerPath: d.Path,
|
|
DevType: d.Type,
|
|
Major: d.Major,
|
|
Minor: d.Minor,
|
|
}
|
|
if d.UID != nil {
|
|
deviceInfo.UID = *d.UID
|
|
}
|
|
|
|
if d.GID != nil {
|
|
deviceInfo.GID = *d.GID
|
|
}
|
|
|
|
if d.FileMode != nil {
|
|
deviceInfo.FileMode = *d.FileMode
|
|
}
|
|
|
|
return &deviceInfo, nil
|
|
}
|
|
|
|
func containerDeviceInfos(spec specs.Spec) ([]config.DeviceInfo, error) {
|
|
ociLinuxDevices := spec.Linux.Devices
|
|
|
|
if ociLinuxDevices == nil {
|
|
return []config.DeviceInfo{}, nil
|
|
}
|
|
|
|
var devices []config.DeviceInfo
|
|
for _, d := range ociLinuxDevices {
|
|
linuxDeviceInfo, err := newLinuxDeviceInfo(d)
|
|
if err != nil {
|
|
return []config.DeviceInfo{}, err
|
|
}
|
|
|
|
devices = append(devices, *linuxDeviceInfo)
|
|
}
|
|
|
|
return devices, nil
|
|
}
|
|
|
|
func networkConfig(ocispec specs.Spec, config RuntimeConfig) (vc.NetworkConfig, error) {
|
|
linux := ocispec.Linux
|
|
if linux == nil {
|
|
return vc.NetworkConfig{}, ErrNoLinux
|
|
}
|
|
|
|
var netConf vc.NetworkConfig
|
|
|
|
for _, n := range linux.Namespaces {
|
|
if n.Type != specs.NetworkNamespace {
|
|
continue
|
|
}
|
|
|
|
if n.Path != "" {
|
|
netConf.NetworkID = n.Path
|
|
}
|
|
}
|
|
netConf.InterworkingModel = config.InterNetworkModel
|
|
netConf.DisableNewNetwork = config.DisableNewNetNs
|
|
|
|
return netConf, nil
|
|
}
|
|
|
|
// ContainerType returns the type of container and if the container type was
|
|
// found from CRI server's annotations in the container spec.
|
|
func ContainerType(spec specs.Spec) (vc.ContainerType, error) {
|
|
for _, key := range CRIContainerTypeKeyList {
|
|
containerTypeVal, ok := spec.Annotations[key]
|
|
if !ok {
|
|
continue
|
|
}
|
|
|
|
for _, t := range CRIContainerTypeList {
|
|
if t.annotation == containerTypeVal {
|
|
return t.containerType, nil
|
|
}
|
|
|
|
}
|
|
return vc.UnknownContainerType, fmt.Errorf("Unknown container type %s", containerTypeVal)
|
|
}
|
|
|
|
return vc.SingleContainer, nil
|
|
}
|
|
|
|
func GetSandboxConfigPath(annotations map[string]string) string {
|
|
return annotations[vcAnnotations.SandboxConfigPathKey]
|
|
}
|
|
|
|
// SandboxID determines the sandbox ID related to an OCI configuration. This function
|
|
// is expected to be called only when the container type is "PodContainer".
|
|
func SandboxID(spec specs.Spec) (string, error) {
|
|
for _, key := range CRISandboxNameKeyList {
|
|
sandboxID, ok := spec.Annotations[key]
|
|
if ok {
|
|
return sandboxID, nil
|
|
}
|
|
}
|
|
|
|
return "", fmt.Errorf("Could not find sandbox ID")
|
|
}
|
|
|
|
func addAnnotations(ocispec specs.Spec, config *vc.SandboxConfig, runtime RuntimeConfig) error {
|
|
for key := range ocispec.Annotations {
|
|
if !checkAnnotationNameIsValid(runtime.HypervisorConfig.EnableAnnotations, key, vcAnnotations.KataAnnotationHypervisorPrefix) {
|
|
return fmt.Errorf("annotation %v is not enabled", key)
|
|
}
|
|
}
|
|
|
|
err := addAssetAnnotations(ocispec, config)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if err := addHypervisorConfigOverrides(ocispec, config, runtime); err != nil {
|
|
return err
|
|
}
|
|
|
|
if err := addRuntimeConfigOverrides(ocispec, config, runtime); err != nil {
|
|
return err
|
|
}
|
|
|
|
if err := addAgentConfigOverrides(ocispec, config); err != nil {
|
|
return err
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func addAssetAnnotations(ocispec specs.Spec, config *vc.SandboxConfig) error {
|
|
assetAnnotations, err := types.AssetAnnotations()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
for _, a := range assetAnnotations {
|
|
value, ok := ocispec.Annotations[a]
|
|
if ok {
|
|
config.Annotations[a] = value
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func addHypervisorConfigOverrides(ocispec specs.Spec, config *vc.SandboxConfig, runtime RuntimeConfig) error {
|
|
if err := addHypervisorCPUOverrides(ocispec, config); err != nil {
|
|
return err
|
|
}
|
|
|
|
if err := addHypervisorMemoryOverrides(ocispec, config, runtime); err != nil {
|
|
return err
|
|
}
|
|
|
|
if err := addHypervisorBlockOverrides(ocispec, config); err != nil {
|
|
return err
|
|
}
|
|
|
|
if err := addHypervisorVirtioFsOverrides(ocispec, config, runtime); err != nil {
|
|
return err
|
|
}
|
|
|
|
if err := addHypervisporNetworkOverrides(ocispec, config); err != nil {
|
|
return err
|
|
}
|
|
|
|
if err := addHypervisorPathOverrides(ocispec, config, runtime); err != nil {
|
|
return err
|
|
}
|
|
|
|
if value, ok := ocispec.Annotations[vcAnnotations.MachineType]; ok {
|
|
if value != "" {
|
|
config.HypervisorConfig.HypervisorMachineType = value
|
|
}
|
|
}
|
|
|
|
if value, ok := ocispec.Annotations[vcAnnotations.MachineAccelerators]; ok {
|
|
if value != "" {
|
|
config.HypervisorConfig.MachineAccelerators = value
|
|
}
|
|
}
|
|
|
|
if value, ok := ocispec.Annotations[vcAnnotations.VhostUserStorePath]; ok {
|
|
if !checkPathIsInGlobs(runtime.HypervisorConfig.VhostUserStorePathList, value) {
|
|
return fmt.Errorf("vhost store path %v required from annotation is not valid", value)
|
|
}
|
|
config.HypervisorConfig.VhostUserStorePath = value
|
|
}
|
|
|
|
if value, ok := ocispec.Annotations[vcAnnotations.GuestHookPath]; ok {
|
|
if value != "" {
|
|
config.HypervisorConfig.GuestHookPath = value
|
|
}
|
|
}
|
|
|
|
if err := newAnnotationConfiguration(ocispec, vcAnnotations.DisableImageNvdimm).setBool(func(disableNvdimm bool) {
|
|
config.HypervisorConfig.DisableImageNvdimm = disableNvdimm
|
|
}); err != nil {
|
|
return err
|
|
}
|
|
|
|
if err := newAnnotationConfiguration(ocispec, vcAnnotations.HotplugVFIOOnRootBus).setBool(func(hotplugVFIOOnRootBus bool) {
|
|
config.HypervisorConfig.HotplugVFIOOnRootBus = hotplugVFIOOnRootBus
|
|
}); err != nil {
|
|
return err
|
|
}
|
|
|
|
if err := newAnnotationConfiguration(ocispec, vcAnnotations.UseLegacySerial).setBool(func(useLegacySerial bool) {
|
|
config.HypervisorConfig.LegacySerial = useLegacySerial
|
|
}); err != nil {
|
|
return err
|
|
}
|
|
|
|
if err := newAnnotationConfiguration(ocispec, vcAnnotations.PCIeRootPort).setUint(func(pcieRootPort uint64) {
|
|
config.HypervisorConfig.PCIeRootPort = uint32(pcieRootPort)
|
|
}); err != nil {
|
|
return err
|
|
}
|
|
|
|
if value, ok := ocispec.Annotations[vcAnnotations.EntropySource]; ok {
|
|
if !checkPathIsInGlobs(runtime.HypervisorConfig.EntropySourceList, value) {
|
|
return fmt.Errorf("entropy source %v required from annotation is not valid", value)
|
|
}
|
|
if value != "" {
|
|
config.HypervisorConfig.EntropySource = value
|
|
}
|
|
}
|
|
if epcSize, ok := ocispec.Annotations[vcAnnotations.SGXEPC]; ok {
|
|
quantity, err := resource.ParseQuantity(epcSize)
|
|
if err != nil {
|
|
return fmt.Errorf("Couldn't parse EPC '%v': %v", err, epcSize)
|
|
}
|
|
|
|
if quantity.Format != resource.BinarySI {
|
|
return fmt.Errorf("Unsupported EPC format '%v': use Ki | Mi | Gi | Ti | Pi | Ei as suffix", epcSize)
|
|
}
|
|
|
|
size, _ := quantity.AsInt64()
|
|
|
|
config.HypervisorConfig.SGXEPCSize = size
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func addHypervisorPathOverrides(ocispec specs.Spec, config *vc.SandboxConfig, runtime RuntimeConfig) error {
|
|
if value, ok := ocispec.Annotations[vcAnnotations.HypervisorPath]; ok {
|
|
if !checkPathIsInGlobs(runtime.HypervisorConfig.HypervisorPathList, value) {
|
|
return fmt.Errorf("hypervisor %v required from annotation is not valid", value)
|
|
}
|
|
config.HypervisorConfig.HypervisorPath = value
|
|
}
|
|
|
|
if value, ok := ocispec.Annotations[vcAnnotations.JailerPath]; ok {
|
|
if !checkPathIsInGlobs(runtime.HypervisorConfig.JailerPathList, value) {
|
|
return fmt.Errorf("jailer %v required from annotation is not valid", value)
|
|
}
|
|
config.HypervisorConfig.JailerPath = value
|
|
}
|
|
|
|
if value, ok := ocispec.Annotations[vcAnnotations.CtlPath]; ok {
|
|
if !checkPathIsInGlobs(runtime.HypervisorConfig.HypervisorCtlPathList, value) {
|
|
return fmt.Errorf("hypervisor control %v required from annotation is not valid", value)
|
|
}
|
|
config.HypervisorConfig.HypervisorCtlPath = value
|
|
}
|
|
|
|
if value, ok := ocispec.Annotations[vcAnnotations.KernelParams]; ok {
|
|
if value != "" {
|
|
params := vc.DeserializeParams(strings.Fields(value))
|
|
for _, param := range params {
|
|
if err := config.HypervisorConfig.AddKernelParam(param); err != nil {
|
|
return fmt.Errorf("Error adding kernel parameters in annotation kernel_params : %v", err)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func addHypervisorMemoryOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig, runtime RuntimeConfig) error {
|
|
|
|
if err := newAnnotationConfiguration(ocispec, vcAnnotations.DefaultMemory).setUintWithCheck(func(memorySz uint64) error {
|
|
if memorySz < vc.MinHypervisorMemory {
|
|
return fmt.Errorf("Memory specified in annotation %s is less than minimum required %d, please specify a larger value", vcAnnotations.DefaultMemory, vc.MinHypervisorMemory)
|
|
}
|
|
sbConfig.HypervisorConfig.MemorySize = uint32(memorySz)
|
|
return nil
|
|
}); err != nil {
|
|
return err
|
|
}
|
|
|
|
if err := newAnnotationConfiguration(ocispec, vcAnnotations.MemSlots).setUint(func(mslots uint64) {
|
|
if mslots > 0 {
|
|
sbConfig.HypervisorConfig.MemSlots = uint32(mslots)
|
|
}
|
|
}); err != nil {
|
|
return err
|
|
}
|
|
|
|
if err := newAnnotationConfiguration(ocispec, vcAnnotations.MemOffset).setUint(func(moffset uint64) {
|
|
if moffset > 0 {
|
|
sbConfig.HypervisorConfig.MemOffset = moffset
|
|
}
|
|
}); err != nil {
|
|
return err
|
|
}
|
|
|
|
if err := newAnnotationConfiguration(ocispec, vcAnnotations.VirtioMem).setBool(func(virtioMem bool) {
|
|
sbConfig.HypervisorConfig.VirtioMem = virtioMem
|
|
}); err != nil {
|
|
return err
|
|
}
|
|
|
|
if err := newAnnotationConfiguration(ocispec, vcAnnotations.MemPrealloc).setBool(func(memPrealloc bool) {
|
|
sbConfig.HypervisorConfig.MemPrealloc = memPrealloc
|
|
}); err != nil {
|
|
return err
|
|
}
|
|
|
|
if value, ok := ocispec.Annotations[vcAnnotations.FileBackedMemRootDir]; ok {
|
|
if !checkPathIsInGlobs(runtime.HypervisorConfig.FileBackedMemRootList, value) {
|
|
return fmt.Errorf("file_mem_backend value %v required from annotation is not valid", value)
|
|
}
|
|
sbConfig.HypervisorConfig.FileBackedMemRootDir = value
|
|
}
|
|
|
|
if err := newAnnotationConfiguration(ocispec, vcAnnotations.HugePages).setBool(func(hugePages bool) {
|
|
sbConfig.HypervisorConfig.HugePages = hugePages
|
|
}); err != nil {
|
|
return err
|
|
}
|
|
|
|
if err := newAnnotationConfiguration(ocispec, vcAnnotations.IOMMU).setBool(func(iommu bool) {
|
|
sbConfig.HypervisorConfig.IOMMU = iommu
|
|
}); err != nil {
|
|
return err
|
|
}
|
|
|
|
if err := newAnnotationConfiguration(ocispec, vcAnnotations.IOMMUPlatform).setBool(func(deviceIOMMU bool) {
|
|
sbConfig.HypervisorConfig.IOMMUPlatform = deviceIOMMU
|
|
}); err != nil {
|
|
return err
|
|
}
|
|
|
|
if err := newAnnotationConfiguration(ocispec, vcAnnotations.EnableGuestSwap).setBool(func(enableGuestSwap bool) {
|
|
sbConfig.HypervisorConfig.GuestSwap = enableGuestSwap
|
|
}); err != nil {
|
|
return err
|
|
}
|
|
|
|
if err := newAnnotationConfiguration(ocispec, vcAnnotations.EnableRootlessHypervisor).setBool(func(enableRootlessHypervisor bool) {
|
|
sbConfig.HypervisorConfig.Rootless = enableRootlessHypervisor
|
|
}); err != nil {
|
|
return err
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func addHypervisorCPUOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig) error {
|
|
numCPUs := goruntime.NumCPU()
|
|
|
|
if err := newAnnotationConfiguration(ocispec, vcAnnotations.DefaultVCPUs).setUintWithCheck(func(vcpus uint64) error {
|
|
if uint32(vcpus) > uint32(numCPUs) {
|
|
return fmt.Errorf("Number of cpus %d specified in annotation default_vcpus is greater than the number of CPUs %d on the system", vcpus, numCPUs)
|
|
}
|
|
sbConfig.HypervisorConfig.NumVCPUs = uint32(vcpus)
|
|
return nil
|
|
}); err != nil {
|
|
return err
|
|
}
|
|
|
|
if err := newAnnotationConfiguration(ocispec, vcAnnotations.EnableVCPUsPinning).setBool(func(enableVCPUsPinning bool) {
|
|
sbConfig.HypervisorConfig.EnableVCPUsPinning = enableVCPUsPinning
|
|
}); err != nil {
|
|
return err
|
|
}
|
|
|
|
return newAnnotationConfiguration(ocispec, vcAnnotations.DefaultMaxVCPUs).setUintWithCheck(func(maxVCPUs uint64) error {
|
|
max := uint32(maxVCPUs)
|
|
|
|
if max > uint32(numCPUs) {
|
|
return fmt.Errorf("Number of cpus %d in annotation default_maxvcpus is greater than the number of CPUs %d on the system", max, numCPUs)
|
|
}
|
|
|
|
if sbConfig.HypervisorType == vc.QemuHypervisor && max > govmm.MaxVCPUs() {
|
|
return fmt.Errorf("Number of cpus %d in annotation default_maxvcpus is greater than max no of CPUs %d supported for qemu", max, govmm.MaxVCPUs())
|
|
}
|
|
sbConfig.HypervisorConfig.DefaultMaxVCPUs = max
|
|
return nil
|
|
})
|
|
}
|
|
|
|
func addHypervisorBlockOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig) error {
|
|
if value, ok := ocispec.Annotations[vcAnnotations.BlockDeviceDriver]; ok {
|
|
supportedBlockDrivers := []string{config.VirtioSCSI, config.VirtioBlock, config.VirtioMmio, config.Nvdimm, config.VirtioBlockCCW}
|
|
|
|
valid := false
|
|
for _, b := range supportedBlockDrivers {
|
|
if b == value {
|
|
sbConfig.HypervisorConfig.BlockDeviceDriver = value
|
|
valid = true
|
|
}
|
|
}
|
|
|
|
if !valid {
|
|
return fmt.Errorf("Invalid hypervisor block storage driver %v specified in annotation (supported drivers: %v)", value, supportedBlockDrivers)
|
|
}
|
|
}
|
|
|
|
if value, ok := ocispec.Annotations[vcAnnotations.BlockDeviceAIO]; ok {
|
|
supportedAIO := []string{config.AIONative, config.AIOThreads, config.AIOIOUring}
|
|
|
|
valid := false
|
|
for _, b := range supportedAIO {
|
|
if b == value {
|
|
sbConfig.HypervisorConfig.BlockDeviceAIO = value
|
|
valid = true
|
|
}
|
|
}
|
|
|
|
if !valid {
|
|
return fmt.Errorf("Invalid AIO mechanism %v specified in annotation (supported IO mechanism : %v)", value, supportedAIO)
|
|
}
|
|
}
|
|
|
|
if err := newAnnotationConfiguration(ocispec, vcAnnotations.DisableBlockDeviceUse).setBool(func(disableBlockDeviceUse bool) {
|
|
sbConfig.HypervisorConfig.DisableBlockDeviceUse = disableBlockDeviceUse
|
|
}); err != nil {
|
|
return err
|
|
}
|
|
|
|
if err := newAnnotationConfiguration(ocispec, vcAnnotations.EnableIOThreads).setBool(func(enableIOThreads bool) {
|
|
sbConfig.HypervisorConfig.EnableIOThreads = enableIOThreads
|
|
}); err != nil {
|
|
return err
|
|
}
|
|
|
|
if err := newAnnotationConfiguration(ocispec, vcAnnotations.BlockDeviceCacheSet).setBool(func(blockDeviceCacheSet bool) {
|
|
sbConfig.HypervisorConfig.BlockDeviceCacheSet = blockDeviceCacheSet
|
|
}); err != nil {
|
|
return err
|
|
}
|
|
|
|
if err := newAnnotationConfiguration(ocispec, vcAnnotations.BlockDeviceCacheDirect).setBool(func(blockDeviceCacheDirect bool) {
|
|
sbConfig.HypervisorConfig.BlockDeviceCacheDirect = blockDeviceCacheDirect
|
|
}); err != nil {
|
|
return err
|
|
}
|
|
|
|
return newAnnotationConfiguration(ocispec, vcAnnotations.BlockDeviceCacheNoflush).setBool(func(blockDeviceCacheNoflush bool) {
|
|
sbConfig.HypervisorConfig.BlockDeviceCacheNoflush = blockDeviceCacheNoflush
|
|
})
|
|
}
|
|
|
|
func addHypervisorVirtioFsOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig, runtime RuntimeConfig) error {
|
|
if value, ok := ocispec.Annotations[vcAnnotations.SharedFS]; ok {
|
|
supportedSharedFS := []string{config.Virtio9P, config.VirtioFS}
|
|
valid := false
|
|
for _, fs := range supportedSharedFS {
|
|
if fs == value {
|
|
sbConfig.HypervisorConfig.SharedFS = value
|
|
valid = true
|
|
}
|
|
}
|
|
|
|
if !valid {
|
|
return fmt.Errorf("Invalid hypervisor shared file system %v specified for annotation shared_fs, (supported file systems: %v)", value, supportedSharedFS)
|
|
}
|
|
}
|
|
|
|
if value, ok := ocispec.Annotations[vcAnnotations.VirtioFSDaemon]; ok {
|
|
if !checkPathIsInGlobs(runtime.HypervisorConfig.VirtioFSDaemonList, value) {
|
|
return fmt.Errorf("virtiofs daemon %v required from annotation is not valid", value)
|
|
}
|
|
sbConfig.HypervisorConfig.VirtioFSDaemon = value
|
|
}
|
|
|
|
if value, ok := ocispec.Annotations[vcAnnotations.VirtioFSExtraArgs]; ok {
|
|
var parsedValue []string
|
|
err := json.Unmarshal([]byte(value), &parsedValue)
|
|
if err != nil {
|
|
return fmt.Errorf("Error parsing virtiofsd extra arguments: %v", err)
|
|
}
|
|
sbConfig.HypervisorConfig.VirtioFSExtraArgs = append(sbConfig.HypervisorConfig.VirtioFSExtraArgs, parsedValue...)
|
|
}
|
|
|
|
if sbConfig.HypervisorConfig.SharedFS == config.VirtioFS && sbConfig.HypervisorConfig.VirtioFSDaemon == "" {
|
|
return fmt.Errorf("cannot enable virtio-fs without daemon path")
|
|
}
|
|
|
|
if value, ok := ocispec.Annotations[vcAnnotations.VirtioFSCache]; ok {
|
|
sbConfig.HypervisorConfig.VirtioFSCache = value
|
|
}
|
|
|
|
if err := newAnnotationConfiguration(ocispec, vcAnnotations.VirtioFSCacheSize).setUint(func(cacheSize uint64) {
|
|
sbConfig.HypervisorConfig.VirtioFSCacheSize = uint32(cacheSize)
|
|
}); err != nil {
|
|
return err
|
|
}
|
|
|
|
return newAnnotationConfiguration(ocispec, vcAnnotations.Msize9p).setUintWithCheck(func(msize9p uint64) error {
|
|
if msize9p == 0 {
|
|
return fmt.Errorf("Error parsing annotation for msize_9p, please specify positive numeric value")
|
|
}
|
|
sbConfig.HypervisorConfig.Msize9p = uint32(msize9p)
|
|
return nil
|
|
})
|
|
}
|
|
|
|
func addHypervisporNetworkOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig) error {
|
|
if value, ok := ocispec.Annotations[vcAnnotations.CPUFeatures]; ok {
|
|
if value != "" {
|
|
sbConfig.HypervisorConfig.CPUFeatures = value
|
|
}
|
|
}
|
|
|
|
if err := newAnnotationConfiguration(ocispec, vcAnnotations.DisableVhostNet).setBool(func(disableVhostNet bool) {
|
|
sbConfig.HypervisorConfig.DisableVhostNet = disableVhostNet
|
|
}); err != nil {
|
|
return err
|
|
}
|
|
|
|
if err := newAnnotationConfiguration(ocispec, vcAnnotations.RxRateLimiterMaxRate).setUint(func(rxRateLimiterMaxRate uint64) {
|
|
sbConfig.HypervisorConfig.RxRateLimiterMaxRate = rxRateLimiterMaxRate
|
|
}); err != nil {
|
|
return err
|
|
}
|
|
|
|
return newAnnotationConfiguration(ocispec, vcAnnotations.TxRateLimiterMaxRate).setUint(func(txRateLimiterMaxRate uint64) {
|
|
sbConfig.HypervisorConfig.TxRateLimiterMaxRate = txRateLimiterMaxRate
|
|
})
|
|
}
|
|
|
|
func addRuntimeConfigOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig, runtime RuntimeConfig) error {
|
|
|
|
if err := newAnnotationConfiguration(ocispec, vcAnnotations.DisableGuestSeccomp).setBool(func(disableGuestSeccomp bool) {
|
|
sbConfig.DisableGuestSeccomp = disableGuestSeccomp
|
|
}); err != nil {
|
|
return err
|
|
}
|
|
|
|
if err := newAnnotationConfiguration(ocispec, vcAnnotations.SandboxCgroupOnly).setBool(func(sandboxCgroupOnly bool) {
|
|
sbConfig.SandboxCgroupOnly = sandboxCgroupOnly
|
|
}); err != nil {
|
|
return err
|
|
}
|
|
|
|
if value, ok := ocispec.Annotations[vcAnnotations.Experimental]; ok {
|
|
features := strings.Split(value, " ")
|
|
sbConfig.Experimental = []exp.Feature{}
|
|
|
|
for _, f := range features {
|
|
feature := exp.Get(f)
|
|
if feature == nil {
|
|
return fmt.Errorf("Unsupported experimental feature %s specified in annotation %v", f, vcAnnotations.Experimental)
|
|
}
|
|
sbConfig.Experimental = append(sbConfig.Experimental, *feature)
|
|
}
|
|
}
|
|
|
|
if err := newAnnotationConfiguration(ocispec, vcAnnotations.DisableNewNetNs).setBool(func(disableNewNetNs bool) {
|
|
sbConfig.NetworkConfig.DisableNewNetwork = disableNewNetNs
|
|
}); err != nil {
|
|
return err
|
|
}
|
|
|
|
if value, ok := ocispec.Annotations[vcAnnotations.InterNetworkModel]; ok {
|
|
runtimeConfig := RuntimeConfig{}
|
|
if err := runtimeConfig.InterNetworkModel.SetModel(value); err != nil {
|
|
return fmt.Errorf("Unknown network model specified in annotation %s", vcAnnotations.InterNetworkModel)
|
|
}
|
|
|
|
sbConfig.NetworkConfig.InterworkingModel = runtimeConfig.InterNetworkModel
|
|
}
|
|
|
|
if value, ok := ocispec.Annotations[vcAnnotations.VfioMode]; ok {
|
|
if err := sbConfig.VfioMode.VFIOSetMode(value); err != nil {
|
|
return fmt.Errorf("Unknown VFIO mode \"%s\" in annotation %s",
|
|
value, vcAnnotations.VfioMode)
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func addAgentConfigOverrides(ocispec specs.Spec, config *vc.SandboxConfig) error {
|
|
c := config.AgentConfig
|
|
|
|
if value, ok := ocispec.Annotations[vcAnnotations.KernelModules]; ok {
|
|
modules := strings.Split(value, KernelModulesSeparator)
|
|
c.KernelModules = modules
|
|
config.AgentConfig = c
|
|
}
|
|
|
|
if err := newAnnotationConfiguration(ocispec, vcAnnotations.AgentTrace).setBool(func(trace bool) {
|
|
c.Trace = trace
|
|
}); err != nil {
|
|
return err
|
|
}
|
|
|
|
if err := newAnnotationConfiguration(ocispec, vcAnnotations.AgentContainerPipeSize).setUint(func(containerPipeSize uint64) {
|
|
c.ContainerPipeSize = uint32(containerPipeSize)
|
|
}); err != nil {
|
|
return err
|
|
}
|
|
|
|
config.AgentConfig = c
|
|
|
|
return nil
|
|
}
|
|
|
|
// SandboxConfig converts an OCI compatible runtime configuration file
|
|
// to a virtcontainers sandbox configuration structure.
|
|
func SandboxConfig(ocispec specs.Spec, runtime RuntimeConfig, bundlePath, cid string, detach, systemdCgroup bool) (vc.SandboxConfig, error) {
|
|
containerConfig, err := ContainerConfig(ocispec, bundlePath, cid, detach)
|
|
if err != nil {
|
|
return vc.SandboxConfig{}, err
|
|
}
|
|
|
|
shmSize, err := getShmSize(containerConfig)
|
|
if err != nil {
|
|
return vc.SandboxConfig{}, err
|
|
}
|
|
|
|
networkConfig, err := networkConfig(ocispec, runtime)
|
|
if err != nil {
|
|
return vc.SandboxConfig{}, err
|
|
}
|
|
|
|
sandboxConfig := vc.SandboxConfig{
|
|
ID: cid,
|
|
|
|
Hostname: ocispec.Hostname,
|
|
|
|
HypervisorType: runtime.HypervisorType,
|
|
HypervisorConfig: runtime.HypervisorConfig,
|
|
|
|
AgentConfig: runtime.AgentConfig,
|
|
|
|
NetworkConfig: networkConfig,
|
|
|
|
Containers: []vc.ContainerConfig{containerConfig},
|
|
|
|
Annotations: map[string]string{
|
|
vcAnnotations.BundlePathKey: bundlePath,
|
|
},
|
|
|
|
SandboxResources: vc.SandboxResourceSizing{
|
|
WorkloadCPUs: runtime.SandboxCPUs,
|
|
WorkloadMemMB: runtime.SandboxMemMB,
|
|
},
|
|
|
|
StaticResourceMgmt: runtime.StaticSandboxResourceMgmt,
|
|
|
|
ShmSize: shmSize,
|
|
|
|
VfioMode: runtime.VfioMode,
|
|
|
|
SystemdCgroup: systemdCgroup,
|
|
|
|
SandboxCgroupOnly: runtime.SandboxCgroupOnly,
|
|
SandboxBindMounts: runtime.SandboxBindMounts,
|
|
|
|
DisableGuestSeccomp: runtime.DisableGuestSeccomp,
|
|
|
|
Experimental: runtime.Experimental,
|
|
}
|
|
|
|
if err := addAnnotations(ocispec, &sandboxConfig, runtime); err != nil {
|
|
return vc.SandboxConfig{}, err
|
|
}
|
|
|
|
// If we are utilizing static resource management for the sandbox, ensure that the hypervisor is started
|
|
// with the base number of CPU/memory (which is equal to the default CPU/memory specified for the runtime
|
|
// configuration or annotations) as well as any specified workload resources.
|
|
if sandboxConfig.StaticResourceMgmt {
|
|
sandboxConfig.SandboxResources.BaseCPUs = sandboxConfig.HypervisorConfig.NumVCPUs
|
|
sandboxConfig.SandboxResources.BaseMemMB = sandboxConfig.HypervisorConfig.MemorySize
|
|
|
|
sandboxConfig.HypervisorConfig.NumVCPUs += sandboxConfig.SandboxResources.WorkloadCPUs
|
|
sandboxConfig.HypervisorConfig.MemorySize += sandboxConfig.SandboxResources.WorkloadMemMB
|
|
|
|
ociLog.WithFields(logrus.Fields{
|
|
"workload cpu": sandboxConfig.SandboxResources.WorkloadCPUs,
|
|
"default cpu": sandboxConfig.SandboxResources.BaseCPUs,
|
|
"workload mem in MB": sandboxConfig.SandboxResources.WorkloadMemMB,
|
|
"default mem": sandboxConfig.SandboxResources.BaseMemMB,
|
|
}).Debugf("static resources set")
|
|
|
|
}
|
|
|
|
return sandboxConfig, nil
|
|
}
|
|
|
|
// ContainerConfig converts an OCI compatible runtime configuration
|
|
// file to a virtcontainers container configuration structure.
|
|
func ContainerConfig(ocispec specs.Spec, bundlePath, cid string, detach bool) (vc.ContainerConfig, error) {
|
|
rootfs := vc.RootFs{Target: ocispec.Root.Path, Mounted: true}
|
|
if !filepath.IsAbs(rootfs.Target) {
|
|
rootfs.Target = filepath.Join(bundlePath, ocispec.Root.Path)
|
|
}
|
|
|
|
ociLog.Debugf("container rootfs: %s", rootfs.Target)
|
|
|
|
cmd := types.Cmd{
|
|
Args: ocispec.Process.Args,
|
|
Envs: cmdEnvs(ocispec, []types.EnvVar{}),
|
|
WorkDir: ocispec.Process.Cwd,
|
|
User: strconv.FormatUint(uint64(ocispec.Process.User.UID), 10),
|
|
PrimaryGroup: strconv.FormatUint(uint64(ocispec.Process.User.GID), 10),
|
|
Interactive: ocispec.Process.Terminal,
|
|
Detach: detach,
|
|
NoNewPrivileges: ocispec.Process.NoNewPrivileges,
|
|
}
|
|
|
|
cmd.SupplementaryGroups = []string{}
|
|
for _, gid := range ocispec.Process.User.AdditionalGids {
|
|
cmd.SupplementaryGroups = append(cmd.SupplementaryGroups, strconv.FormatUint(uint64(gid), 10))
|
|
}
|
|
|
|
deviceInfos, err := containerDeviceInfos(ocispec)
|
|
if err != nil {
|
|
return vc.ContainerConfig{}, err
|
|
}
|
|
|
|
if ocispec.Process != nil {
|
|
cmd.Capabilities = ocispec.Process.Capabilities
|
|
}
|
|
|
|
containerConfig := vc.ContainerConfig{
|
|
ID: cid,
|
|
RootFs: rootfs,
|
|
ReadonlyRootfs: ocispec.Root.Readonly,
|
|
Cmd: cmd,
|
|
Annotations: ocispec.Annotations,
|
|
Mounts: containerMounts(ocispec),
|
|
DeviceInfos: deviceInfos,
|
|
Resources: *ocispec.Linux.Resources,
|
|
|
|
// This is a custom OCI spec modified at SetEphemeralStorageType()
|
|
// to support ephemeral storage and k8s empty dir.
|
|
CustomSpec: &ocispec,
|
|
}
|
|
if containerConfig.Annotations == nil {
|
|
containerConfig.Annotations = map[string]string{
|
|
vcAnnotations.BundlePathKey: bundlePath,
|
|
}
|
|
} else {
|
|
containerConfig.Annotations[vcAnnotations.BundlePathKey] = bundlePath
|
|
}
|
|
|
|
cType, err := ContainerType(ocispec)
|
|
if err != nil {
|
|
return vc.ContainerConfig{}, err
|
|
}
|
|
|
|
containerConfig.Annotations[vcAnnotations.ContainerTypeKey] = string(cType)
|
|
|
|
return containerConfig, nil
|
|
}
|
|
|
|
func getShmSize(c vc.ContainerConfig) (uint64, error) {
|
|
var shmSize uint64
|
|
|
|
for _, m := range c.Mounts {
|
|
if m.Destination != "/dev/shm" {
|
|
continue
|
|
}
|
|
|
|
shmSize = vc.DefaultShmSize
|
|
|
|
if m.Type == "bind" && m.Source != "/dev/shm" {
|
|
var s syscall.Statfs_t
|
|
|
|
if err := syscall.Statfs(m.Source, &s); err != nil {
|
|
return 0, err
|
|
}
|
|
shmSize = uint64(s.Bsize) * s.Blocks
|
|
}
|
|
break
|
|
}
|
|
|
|
ociLog.Infof("shm-size detected: %d", shmSize)
|
|
|
|
return shmSize, nil
|
|
}
|
|
|
|
// IsCRIOContainerManager check if a Pod is created from CRI-O
|
|
func IsCRIOContainerManager(spec *specs.Spec) bool {
|
|
if val, ok := spec.Annotations[podmanAnnotations.ContainerType]; ok {
|
|
if val == podmanAnnotations.ContainerTypeSandbox || val == podmanAnnotations.ContainerTypeContainer {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
const (
|
|
errAnnotationPositiveNumericKey = "Error parsing annotation for %s: Please specify positive numeric value"
|
|
errAnnotationBoolKey = "Error parsing annotation for %s: Please specify boolean value 'true|false'"
|
|
)
|
|
|
|
type annotationConfiguration struct {
|
|
ocispec specs.Spec
|
|
key string
|
|
}
|
|
|
|
func newAnnotationConfiguration(ocispec specs.Spec, key string) *annotationConfiguration {
|
|
return &annotationConfiguration{
|
|
ocispec: ocispec,
|
|
key: key,
|
|
}
|
|
}
|
|
|
|
func (a *annotationConfiguration) setBool(f func(bool)) error {
|
|
if value, ok := a.ocispec.Annotations[a.key]; ok {
|
|
boolValue, err := strconv.ParseBool(value)
|
|
if err != nil {
|
|
return fmt.Errorf(errAnnotationBoolKey, a.key)
|
|
}
|
|
f(boolValue)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (a *annotationConfiguration) setUint(f func(uint64)) error {
|
|
return a.setUintWithCheck(func(v uint64) error {
|
|
f(v)
|
|
return nil
|
|
})
|
|
}
|
|
|
|
func (a *annotationConfiguration) setUintWithCheck(f func(uint64) error) error {
|
|
if value, ok := a.ocispec.Annotations[a.key]; ok {
|
|
uintValue, err := strconv.ParseUint(value, 10, 64)
|
|
if err != nil {
|
|
return fmt.Errorf(errAnnotationPositiveNumericKey, a.key)
|
|
}
|
|
return f(uintValue)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// CalculateSandboxSizing will calculate the number of CPUs and amount of Memory that should
|
|
// be added to the VM if sandbox annotations are provided with this sizing details
|
|
func CalculateSandboxSizing(spec *specs.Spec) (numCPU, memSizeMB uint32) {
|
|
var memory, quota int64
|
|
var period uint64
|
|
var err error
|
|
|
|
if spec == nil || spec.Annotations == nil {
|
|
return 0, 0
|
|
}
|
|
|
|
// For each annotation, if it isn't defined, or if there's an error in parsing, we'll log
|
|
// a warning and continue the calculation with 0 value. We expect values like,
|
|
// Annotations[SandboxMem] = "1048576"
|
|
// Annotations[SandboxCPUPeriod] = "100000"
|
|
// Annotations[SandboxCPUQuota] = "220000"
|
|
// ... to result in VM resources of 1 (MB) for memory, and 3 for CPU (2200 mCPU rounded up to 3).
|
|
annotation, ok := spec.Annotations[ctrAnnotations.SandboxCPUPeriod]
|
|
if ok {
|
|
period, err = strconv.ParseUint(annotation, 10, 64)
|
|
if err != nil {
|
|
ociLog.Warningf("sandbox-sizing: failure to parse SandboxCPUPeriod: %s", annotation)
|
|
period = 0
|
|
}
|
|
}
|
|
|
|
annotation, ok = spec.Annotations[ctrAnnotations.SandboxCPUQuota]
|
|
if ok {
|
|
quota, err = strconv.ParseInt(annotation, 10, 64)
|
|
if err != nil {
|
|
ociLog.Warningf("sandbox-sizing: failure to parse SandboxCPUQuota: %s", annotation)
|
|
quota = 0
|
|
}
|
|
}
|
|
|
|
annotation, ok = spec.Annotations[ctrAnnotations.SandboxMem]
|
|
if ok {
|
|
memory, err = strconv.ParseInt(annotation, 10, 64)
|
|
if err != nil {
|
|
ociLog.Warningf("sandbox-sizing: failure to parse SandboxMem: %s", annotation)
|
|
memory = 0
|
|
}
|
|
}
|
|
|
|
return calculateVMResources(period, quota, memory)
|
|
}
|
|
|
|
// CalculateContainerSizing will calculate the number of CPUs and amount of memory that is needed
|
|
// based on the provided LinuxResources
|
|
func CalculateContainerSizing(spec *specs.Spec) (numCPU, memSizeMB uint32) {
|
|
var memory, quota int64
|
|
var period uint64
|
|
|
|
if spec == nil || spec.Linux == nil || spec.Linux.Resources == nil {
|
|
return 0, 0
|
|
}
|
|
|
|
resources := spec.Linux.Resources
|
|
|
|
if resources.CPU != nil && resources.CPU.Quota != nil && resources.CPU.Period != nil {
|
|
quota = *resources.CPU.Quota
|
|
period = *resources.CPU.Period
|
|
}
|
|
|
|
if resources.Memory != nil && resources.Memory.Limit != nil {
|
|
memory = *resources.Memory.Limit
|
|
}
|
|
|
|
return calculateVMResources(period, quota, memory)
|
|
}
|
|
|
|
func calculateVMResources(period uint64, quota int64, memory int64) (numCPU, memSizeMB uint32) {
|
|
numCPU = vcutils.CalculateVCpusFromMilliCpus(vcutils.CalculateMilliCPUs(quota, period))
|
|
|
|
if memory < 0 {
|
|
// While spec allows for a negative value to indicate unconstrained, we don't
|
|
// see this in practice. Since we rely only on default memory if the workload
|
|
// is unconstrained, we will treat as 0 for VM resource accounting.
|
|
ociLog.Infof("memory limit provided < 0, treating as 0 MB for VM sizing: %d", memory)
|
|
memSizeMB = 0
|
|
} else {
|
|
memSizeMB = uint32(memory / 1024 / 1024)
|
|
}
|
|
return numCPU, memSizeMB
|
|
}
|