diff --git a/Makefile b/Makefile index a9055ec23..a27ec3127 100644 --- a/Makefile +++ b/Makefile @@ -92,6 +92,8 @@ PROXYPATH := $(PKGLIBEXECDIR)/$(PROXYCMD) # Default number of vCPUs DEFVCPUS := 1 +# Default maximum number of vCPUs +DEFMAXVCPUS := 0 # Default memory size in MiB DEFMEMSZ := 2048 #Default number of bridges @@ -169,6 +171,7 @@ USER_VARS += SHAREDIR USER_VARS += SHIMPATH USER_VARS += SYSCONFDIR USER_VARS += DEFVCPUS +USER_VARS += DEFMAXVCPUS USER_VARS += DEFMEMSZ USER_VARS += DEFBRIDGES USER_VARS += DEFNETWORKMODEL @@ -262,6 +265,7 @@ const defaultMachineType = "$(MACHINETYPE)" const defaultRootDirectory = "$(PKGRUNDIR)" const defaultVCPUCount uint32 = $(DEFVCPUS) +const defaultMaxVCPUCount uint32 = $(DEFMAXVCPUS) const defaultMemSize uint32 = $(DEFMEMSZ) // MiB const defaultBridgesCount uint32 = $(DEFBRIDGES) const defaultInterNetworkingModel = "$(DEFNETWORKMODEL)" @@ -347,6 +351,7 @@ $(GENERATED_FILES): %: %.in Makefile VERSION -e "s|@MACHINETYPE@|$(MACHINETYPE)|g" \ -e "s|@SHIMPATH@|$(SHIMPATH)|g" \ -e "s|@DEFVCPUS@|$(DEFVCPUS)|g" \ + -e "s|@DEFMAXVCPUS@|$(DEFMAXVCPUS)|g" \ -e "s|@DEFMEMSZ@|$(DEFMEMSZ)|g" \ -e "s|@DEFBRIDGES@|$(DEFBRIDGES)|g" \ -e "s|@DEFNETWORKMODEL@|$(DEFNETWORKMODEL)|g" \ diff --git a/cli/config.go b/cli/config.go index 9950d0ae4..40d64a788 100644 --- a/cli/config.go +++ b/cli/config.go @@ -74,6 +74,7 @@ type hypervisor struct { KernelParams string `toml:"kernel_params"` MachineType string `toml:"machine_type"` DefaultVCPUs int32 `toml:"default_vcpus"` + DefaultMaxVCPUs uint32 `toml:"default_maxvcpus"` DefaultMemSz uint32 `toml:"default_memory"` DefaultBridges uint32 `toml:"default_bridges"` Msize9p uint32 `toml:"msize_9p"` @@ -202,6 +203,25 @@ func (h hypervisor) defaultVCPUs() uint32 { return uint32(h.DefaultVCPUs) } +func (h hypervisor) defaultMaxVCPUs() uint32 { + numcpus := uint32(goruntime.NumCPU()) + maxvcpus := vc.MaxQemuVCPUs() + reqVCPUs := h.DefaultMaxVCPUs + + //don't exceed the number of physical CPUs. If a default is not provided, use the + // numbers of physical CPUs + if reqVCPUs >= numcpus || reqVCPUs == 0 { + reqVCPUs = numcpus + } + + // Don't exceed the maximum number of vCPUs supported by hypervisor + if reqVCPUs > maxvcpus { + return maxvcpus + } + + return reqVCPUs +} + func (h hypervisor) defaultMemSz() uint32 { if h.DefaultMemSz < 8 { return defaultMemSize // MiB @@ -313,6 +333,7 @@ func newQemuHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { KernelParams: vc.DeserializeParams(strings.Fields(kernelParams)), HypervisorMachineType: machineType, DefaultVCPUs: h.defaultVCPUs(), + DefaultMaxVCPUs: h.defaultMaxVCPUs(), DefaultMemSz: h.defaultMemSz(), DefaultBridges: h.defaultBridges(), DisableBlockDeviceUse: h.DisableBlockDeviceUse, @@ -418,6 +439,7 @@ func loadConfiguration(configPath string, ignoreLogging bool) (resolvedConfigPat MachineAccelerators: defaultMachineAccelerators, HypervisorMachineType: defaultMachineType, DefaultVCPUs: defaultVCPUCount, + DefaultMaxVCPUs: defaultMaxVCPUCount, DefaultMemSz: defaultMemSize, DefaultBridges: defaultBridgesCount, MemPrealloc: defaultEnableMemPrealloc, diff --git a/cli/config/configuration.toml.in b/cli/config/configuration.toml.in index 4b310aa20..b6e91a31a 100644 --- a/cli/config/configuration.toml.in +++ b/cli/config/configuration.toml.in @@ -45,6 +45,21 @@ machine_accelerators="@MACHINEACCELERATORS@" # > number of physical cores --> will be set to the actual number of physical cores default_vcpus = 1 +# Default maximum number of vCPUs per SB/VM: +# unspecified or == 0 --> will be set to the actual number of physical cores or to the maximum number +# of vCPUs supported by KVM if that number is exceeded +# > 0 <= number of physical cores --> will be set to the specified number +# > number of physical cores --> will be set to the actual number of physical cores or to the maximum number +# of vCPUs supported by KVM if that number is exceeded +# WARNING: Depending of the architecture, the maximum number of vCPUs supported by KVM is used when +# the actual number of physical cores is greater than it. +# WARNING: Be aware that this value impacts the virtual machine's memory footprint and CPU +# the hotplug functionality. For example, `default_maxvcpus = 240` specifies that until 240 vCPUs +# can be added to a SB/VM, but the memory footprint will be big. Another example, with +# `default_maxvcpus = 8` the memory footprint will be small, but 8 will be the maximum number of +# vCPUs supported by the SB/VM. In general, we recommend that you do not edit this variable, +# unless you know what are you doing. +default_maxvcpus = @DEFMAXVCPUS@ # Bridges can be used to hot plug devices. # Limitations: diff --git a/cli/config_test.go b/cli/config_test.go index fc24137a4..5dbdf7524 100644 --- a/cli/config_test.go +++ b/cli/config_test.go @@ -44,6 +44,7 @@ func makeRuntimeConfigFileData(hypervisor, hypervisorPath, kernelPath, imagePath image = "` + imagePath + `" machine_type = "` + machineType + `" default_vcpus = ` + strconv.FormatUint(uint64(defaultVCPUCount), 10) + ` + default_maxvcpus = ` + strconv.FormatUint(uint64(defaultMaxVCPUCount), 10) + ` default_memory = ` + strconv.FormatUint(uint64(defaultMemSize), 10) + ` disable_block_device_use = ` + strconv.FormatBool(disableBlock) + ` enable_iothreads = ` + strconv.FormatBool(enableIOThreads) + ` @@ -129,6 +130,7 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config testRuntimeConf KernelParams: vc.DeserializeParams(strings.Fields(kernelParams)), HypervisorMachineType: machineType, DefaultVCPUs: defaultVCPUCount, + DefaultMaxVCPUs: uint32(goruntime.NumCPU()), DefaultMemSz: defaultMemSize, DisableBlockDeviceUse: disableBlockDevice, BlockDeviceDriver: defaultBlockDeviceDriver, @@ -513,6 +515,7 @@ func TestMinimalRuntimeConfig(t *testing.T) { InitrdPath: defaultInitrdPath, HypervisorMachineType: defaultMachineType, DefaultVCPUs: defaultVCPUCount, + DefaultMaxVCPUs: defaultMaxVCPUCount, DefaultMemSz: defaultMemSize, DisableBlockDeviceUse: defaultDisableBlockDeviceUse, DefaultBridges: defaultBridgesCount, @@ -658,10 +661,13 @@ func TestNewShimConfig(t *testing.T) { func TestHypervisorDefaults(t *testing.T) { assert := assert.New(t) + numCPUs := goruntime.NumCPU() + h := hypervisor{} assert.Equal(h.machineType(), defaultMachineType, "default hypervisor machine type wrong") assert.Equal(h.defaultVCPUs(), defaultVCPUCount, "default vCPU number is wrong") + assert.Equal(h.defaultMaxVCPUs(), uint32(numCPUs), "default max vCPU number is wrong") assert.Equal(h.defaultMemSz(), defaultMemSize, "default memory size is wrong") machineType := "foo" @@ -670,15 +676,24 @@ func TestHypervisorDefaults(t *testing.T) { // auto inferring h.DefaultVCPUs = -1 - assert.Equal(h.defaultVCPUs(), uint32(goruntime.NumCPU()), "default vCPU number is wrong") + assert.Equal(h.defaultVCPUs(), uint32(numCPUs), "default vCPU number is wrong") h.DefaultVCPUs = 2 assert.Equal(h.defaultVCPUs(), uint32(2), "default vCPU number is wrong") - numCPUs := goruntime.NumCPU() h.DefaultVCPUs = int32(numCPUs) + 1 assert.Equal(h.defaultVCPUs(), uint32(numCPUs), "default vCPU number is wrong") + h.DefaultMaxVCPUs = 2 + assert.Equal(h.defaultMaxVCPUs(), uint32(h.DefaultMaxVCPUs), "default max vCPU number is wrong") + + h.DefaultMaxVCPUs = uint32(numCPUs) + 1 + assert.Equal(h.defaultMaxVCPUs(), uint32(numCPUs), "default max vCPU number is wrong") + + maxvcpus := vc.MaxQemuVCPUs() + h.DefaultMaxVCPUs = uint32(maxvcpus) + 1 + assert.Equal(h.defaultMaxVCPUs(), uint32(numCPUs), "default max vCPU number is wrong") + h.DefaultMemSz = 1024 assert.Equal(h.defaultMemSz(), uint32(1024), "default memory size is wrong") } diff --git a/virtcontainers/container.go b/virtcontainers/container.go index f96cc61d3..0d1411e9a 100644 --- a/virtcontainers/container.go +++ b/virtcontainers/container.go @@ -57,15 +57,11 @@ type ContainerStatus struct { // ContainerResources describes container resources type ContainerResources struct { - // CPUQuota specifies the total amount of time in microseconds - // The number of microseconds per CPUPeriod that the container is guaranteed CPU access - CPUQuota int64 + // VCPUs are the number of vCPUs that are being used by the container + VCPUs uint32 - // CPUPeriod specifies the CPU CFS scheduler period of time in microseconds - CPUPeriod uint64 - - // CPUShares specifies container's weight vs. other containers - CPUShares uint64 + // Mem is the memory that is being used by the container + Mem uint32 } // ContainerConfig describes one container runtime configuration. @@ -804,8 +800,7 @@ func (c *Container) update(resources specs.LinuxResources) error { } newResources := ContainerResources{ - CPUPeriod: *resources.CPU.Period, - CPUQuota: *resources.CPU.Quota, + VCPUs: uint32(utils.ConstraintsToVCPUs(*resources.CPU.Quota, *resources.CPU.Period)), } if err := c.updateResources(currentConfig.Resources, newResources); err != nil { @@ -866,7 +861,7 @@ func (c *Container) hotplugDrive() error { Index: driveIndex, } - if err := c.sandbox.hypervisor.hotplugAddDevice(&drive, blockDev); err != nil { + if _, err := c.sandbox.hypervisor.hotplugAddDevice(&drive, blockDev); err != nil { return err } @@ -903,7 +898,7 @@ func (c *Container) removeDrive() (err error) { l := c.Logger().WithField("device-id", devID) l.Info("Unplugging block device") - if err := c.sandbox.hypervisor.hotplugRemoveDevice(drive, blockDev); err != nil { + if _, err := c.sandbox.hypervisor.hotplugRemoveDevice(drive, blockDev); err != nil { l.WithError(err).Info("Failed to unplug block device") return err } @@ -938,14 +933,31 @@ func (c *Container) addResources() error { return nil } - vCPUs := utils.ConstraintsToVCPUs(c.config.Resources.CPUQuota, c.config.Resources.CPUPeriod) + // Container is being created, try to add the number of vCPUs specified + vCPUs := c.config.Resources.VCPUs if vCPUs != 0 { virtLog.Debugf("hot adding %d vCPUs", vCPUs) - if err := c.sandbox.hypervisor.hotplugAddDevice(uint32(vCPUs), cpuDev); err != nil { + data, err := c.sandbox.hypervisor.hotplugAddDevice(vCPUs, cpuDev) + if err != nil { return err } - return c.sandbox.agent.onlineCPUMem(uint32(vCPUs)) + vcpusAdded, ok := data.(uint32) + if !ok { + return fmt.Errorf("Could not get the number of vCPUs added, got %+v", data) + } + + // A different number of vCPUs was added, we have to update + // the resources in order to don't remove vCPUs used by other containers. + if vcpusAdded != vCPUs { + // Set and save container's config + c.config.Resources.VCPUs = vcpusAdded + if err := c.storeContainer(); err != nil { + return err + } + } + + return c.sandbox.agent.onlineCPUMem(vcpusAdded) } return nil @@ -957,10 +969,18 @@ func (c *Container) removeResources() error { return nil } - vCPUs := utils.ConstraintsToVCPUs(c.config.Resources.CPUQuota, c.config.Resources.CPUPeriod) + // In order to don't remove vCPUs used by other containers, we have to remove + // only the vCPUs assigned to the container + config, err := c.sandbox.storage.fetchContainerConfig(c.sandbox.id, c.id) + if err != nil { + // don't fail, let's use the default configuration + config = *c.config + } + + vCPUs := config.Resources.VCPUs if vCPUs != 0 { virtLog.Debugf("hot removing %d vCPUs", vCPUs) - if err := c.sandbox.hypervisor.hotplugRemoveDevice(uint32(vCPUs), cpuDev); err != nil { + if _, err := c.sandbox.hypervisor.hotplugRemoveDevice(vCPUs, cpuDev); err != nil { return err } } @@ -970,9 +990,9 @@ func (c *Container) removeResources() error { func (c *Container) updateResources(oldResources, newResources ContainerResources) error { //TODO add support for memory, Issue: https://github.com/containers/virtcontainers/issues/578 - var vCPUs uint - oldVCPUs := utils.ConstraintsToVCPUs(oldResources.CPUQuota, oldResources.CPUPeriod) - newVCPUs := utils.ConstraintsToVCPUs(newResources.CPUQuota, newResources.CPUPeriod) + var vCPUs uint32 + oldVCPUs := oldResources.VCPUs + newVCPUs := newResources.VCPUs // Update vCPUs is not possible if period and/or quota are not set or // oldVCPUs and newVCPUs are equal. @@ -989,23 +1009,36 @@ func (c *Container) updateResources(oldResources, newResources ContainerResource // hot add vCPUs vCPUs = newVCPUs - oldVCPUs virtLog.Debugf("hot adding %d vCPUs", vCPUs) - if err := c.sandbox.hypervisor.hotplugAddDevice(uint32(vCPUs), cpuDev); err != nil { + data, err := c.sandbox.hypervisor.hotplugAddDevice(vCPUs, cpuDev) + if err != nil { + return err + } + vcpusAdded, ok := data.(uint32) + if !ok { + return fmt.Errorf("Could not get the number of vCPUs added, got %+v", data) + } + // recalculate the actual number of vCPUs if a different number of vCPUs was added + newResources.VCPUs = oldVCPUs + vcpusAdded + if err := c.sandbox.agent.onlineCPUMem(vcpusAdded); err != nil { return err } } else { // hot remove vCPUs vCPUs = oldVCPUs - newVCPUs virtLog.Debugf("hot removing %d vCPUs", vCPUs) - if err := c.sandbox.hypervisor.hotplugRemoveDevice(uint32(vCPUs), cpuDev); err != nil { + data, err := c.sandbox.hypervisor.hotplugRemoveDevice(vCPUs, cpuDev) + if err != nil { return err } + vcpusRemoved, ok := data.(uint32) + if !ok { + return fmt.Errorf("Could not get the number of vCPUs removed, got %+v", data) + } + // recalculate the actual number of vCPUs if a different number of vCPUs was removed + newResources.VCPUs = oldVCPUs - vcpusRemoved } // Set and save container's config c.config.Resources = newResources - if err := c.storeContainer(); err != nil { - return err - } - - return c.sandbox.agent.onlineCPUMem(uint32(vCPUs)) + return c.storeContainer() } diff --git a/virtcontainers/container_test.go b/virtcontainers/container_test.go index bab57911c..cf2668652 100644 --- a/virtcontainers/container_test.go +++ b/virtcontainers/container_test.go @@ -284,7 +284,11 @@ func TestCheckSandboxRunningSuccessful(t *testing.T) { func TestContainerAddResources(t *testing.T) { assert := assert.New(t) - c := &Container{} + c := &Container{ + sandbox: &Sandbox{ + storage: &filesystem{}, + }, + } err := c.addResources() assert.Nil(err) @@ -297,13 +301,16 @@ func TestContainerAddResources(t *testing.T) { err = c.addResources() assert.Nil(err) + vCPUs := uint32(5) c.config.Resources = ContainerResources{ - CPUQuota: 5000, - CPUPeriod: 1000, + VCPUs: vCPUs, } c.sandbox = &Sandbox{ - hypervisor: &mockHypervisor{}, - agent: &noopAgent{}, + hypervisor: &mockHypervisor{ + vCPUs: vCPUs, + }, + agent: &noopAgent{}, + storage: &filesystem{}, } err = c.addResources() assert.Nil(err) @@ -312,7 +319,12 @@ func TestContainerAddResources(t *testing.T) { func TestContainerRemoveResources(t *testing.T) { assert := assert.New(t) - c := &Container{} + c := &Container{ + sandbox: &Sandbox{ + storage: &filesystem{}, + }, + } + err := c.addResources() assert.Nil(err) @@ -325,11 +337,18 @@ func TestContainerRemoveResources(t *testing.T) { err = c.removeResources() assert.Nil(err) + vCPUs := uint32(5) c.config.Resources = ContainerResources{ - CPUQuota: 5000, - CPUPeriod: 1000, + VCPUs: vCPUs, } - c.sandbox = &Sandbox{hypervisor: &mockHypervisor{}} + + c.sandbox = &Sandbox{ + hypervisor: &mockHypervisor{ + vCPUs: vCPUs, + }, + storage: &filesystem{}, + } + err = c.removeResources() assert.Nil(err) } diff --git a/virtcontainers/hyperstart_agent.go b/virtcontainers/hyperstart_agent.go index 648acd506..aab1d341a 100644 --- a/virtcontainers/hyperstart_agent.go +++ b/virtcontainers/hyperstart_agent.go @@ -430,17 +430,6 @@ func (h *hyper) startOneContainer(sandbox *Sandbox, c *Container) error { Process: process, } - if c.config.Resources.CPUQuota != 0 && c.config.Resources.CPUPeriod != 0 { - container.Constraints = hyperstart.Constraints{ - CPUQuota: c.config.Resources.CPUQuota, - CPUPeriod: c.config.Resources.CPUPeriod, - } - } - - if c.config.Resources.CPUShares != 0 { - container.Constraints.CPUShares = c.config.Resources.CPUShares - } - container.SystemMountsInfo.BindMountDev = c.systemMountsInfo.BindMountDev if c.state.Fstype != "" { diff --git a/virtcontainers/hypervisor.go b/virtcontainers/hypervisor.go index 2e0907ca8..8b501a3f5 100644 --- a/virtcontainers/hypervisor.go +++ b/virtcontainers/hypervisor.go @@ -41,7 +41,7 @@ const ( ) // In some architectures the maximum number of vCPUs depends on the number of physical cores. -var defaultMaxQemuVCPUs = maxQemuVCPUs() +var defaultMaxQemuVCPUs = MaxQemuVCPUs() // deviceType describes a virtualized device type. type deviceType int @@ -499,8 +499,8 @@ type hypervisor interface { pauseSandbox() error resumeSandbox() error addDevice(devInfo interface{}, devType deviceType) error - hotplugAddDevice(devInfo interface{}, devType deviceType) error - hotplugRemoveDevice(devInfo interface{}, devType deviceType) error + hotplugAddDevice(devInfo interface{}, devType deviceType) (interface{}, error) + hotplugRemoveDevice(devInfo interface{}, devType deviceType) (interface{}, error) getSandboxConsole(sandboxID string) (string, error) capabilities() capabilities } diff --git a/virtcontainers/mock_hypervisor.go b/virtcontainers/mock_hypervisor.go index 563a395ca..89824cdfe 100644 --- a/virtcontainers/mock_hypervisor.go +++ b/virtcontainers/mock_hypervisor.go @@ -6,6 +6,7 @@ package virtcontainers type mockHypervisor struct { + vCPUs uint32 } func (m *mockHypervisor) init(sandbox *Sandbox) error { @@ -49,12 +50,20 @@ func (m *mockHypervisor) addDevice(devInfo interface{}, devType deviceType) erro return nil } -func (m *mockHypervisor) hotplugAddDevice(devInfo interface{}, devType deviceType) error { - return nil +func (m *mockHypervisor) hotplugAddDevice(devInfo interface{}, devType deviceType) (interface{}, error) { + switch devType { + case cpuDev: + return m.vCPUs, nil + } + return nil, nil } -func (m *mockHypervisor) hotplugRemoveDevice(devInfo interface{}, devType deviceType) error { - return nil +func (m *mockHypervisor) hotplugRemoveDevice(devInfo interface{}, devType deviceType) (interface{}, error) { + switch devType { + case cpuDev: + return m.vCPUs, nil + } + return nil, nil } func (m *mockHypervisor) getSandboxConsole(sandboxID string) (string, error) { diff --git a/virtcontainers/pkg/oci/utils.go b/virtcontainers/pkg/oci/utils.go index f82fd7c38..c886befbd 100644 --- a/virtcontainers/pkg/oci/utils.go +++ b/virtcontainers/pkg/oci/utils.go @@ -23,6 +23,7 @@ import ( "github.com/kata-containers/runtime/virtcontainers/device/config" vcAnnotations "github.com/kata-containers/runtime/virtcontainers/pkg/annotations" dockershimAnnotations "github.com/kata-containers/runtime/virtcontainers/pkg/annotations/dockershim" + "github.com/kata-containers/runtime/virtcontainers/utils" ) type annotationContainerType struct { @@ -562,11 +563,7 @@ func ContainerConfig(ocispec CompatOCISpec, bundlePath, cid, console string, det if ocispec.Linux.Resources.CPU != nil { if ocispec.Linux.Resources.CPU.Quota != nil && ocispec.Linux.Resources.CPU.Period != nil { - resources.CPUQuota = *ocispec.Linux.Resources.CPU.Quota - resources.CPUPeriod = *ocispec.Linux.Resources.CPU.Period - } - if ocispec.Linux.Resources.CPU.Shares != nil { - resources.CPUShares = *ocispec.Linux.Resources.CPU.Shares + resources.VCPUs = uint32(utils.ConstraintsToVCPUs(*ocispec.Linux.Resources.CPU.Quota, *ocispec.Linux.Resources.CPU.Period)) } } diff --git a/virtcontainers/qemu.go b/virtcontainers/qemu.go index 95ae5c826..e329623ac 100644 --- a/virtcontainers/qemu.go +++ b/virtcontainers/qemu.go @@ -122,6 +122,9 @@ func (q *qemu) kernelParameters() string { // use default parameters params = append(params, defaultKernelParameters...) + // set the maximum number of vCPUs + params = append(params, Param{"nr_cpus", fmt.Sprintf("%d", q.config.DefaultMaxVCPUs)}) + // add the params specified by the provided config. As the kernel // honours the last parameter value set and since the config-provided // params are added here, they will take priority over the defaults. @@ -197,7 +200,7 @@ func (q *qemu) init(sandbox *Sandbox) error { } func (q *qemu) cpuTopology() govmmQemu.SMP { - return q.arch.cpuTopology(q.config.DefaultVCPUs) + return q.arch.cpuTopology(q.config.DefaultVCPUs, q.config.DefaultMaxVCPUs) } func (q *qemu) memoryTopology(sandboxConfig SandboxConfig) (govmmQemu.Memory, error) { @@ -737,44 +740,46 @@ func (q *qemu) hotplugVFIODevice(device deviceDrivers.VFIODevice, op operation) return nil } -func (q *qemu) hotplugDevice(devInfo interface{}, devType deviceType, op operation) error { +func (q *qemu) hotplugDevice(devInfo interface{}, devType deviceType, op operation) (interface{}, error) { switch devType { case blockDev: // TODO: find a way to remove dependency of deviceDrivers lib @weizhang555 drive := devInfo.(*deviceDrivers.Drive) - return q.hotplugBlockDevice(drive, op) + return nil, q.hotplugBlockDevice(drive, op) case cpuDev: vcpus := devInfo.(uint32) return q.hotplugCPUs(vcpus, op) case vfioDev: // TODO: find a way to remove dependency of deviceDrivers lib @weizhang555 device := devInfo.(deviceDrivers.VFIODevice) - return q.hotplugVFIODevice(device, op) + return nil, q.hotplugVFIODevice(device, op) default: - return fmt.Errorf("cannot hotplug device: unsupported device type '%v'", devType) + return nil, fmt.Errorf("cannot hotplug device: unsupported device type '%v'", devType) } } -func (q *qemu) hotplugAddDevice(devInfo interface{}, devType deviceType) error { - if err := q.hotplugDevice(devInfo, devType, addDevice); err != nil { - return err +func (q *qemu) hotplugAddDevice(devInfo interface{}, devType deviceType) (interface{}, error) { + data, err := q.hotplugDevice(devInfo, devType, addDevice) + if err != nil { + return data, err } - return q.sandbox.storage.storeHypervisorState(q.sandbox.id, q.state) + return data, q.sandbox.storage.storeHypervisorState(q.sandbox.id, q.state) } -func (q *qemu) hotplugRemoveDevice(devInfo interface{}, devType deviceType) error { - if err := q.hotplugDevice(devInfo, devType, removeDevice); err != nil { - return err +func (q *qemu) hotplugRemoveDevice(devInfo interface{}, devType deviceType) (interface{}, error) { + data, err := q.hotplugDevice(devInfo, devType, removeDevice) + if err != nil { + return data, err } - return q.sandbox.storage.storeHypervisorState(q.sandbox.id, q.state) + return data, q.sandbox.storage.storeHypervisorState(q.sandbox.id, q.state) } -func (q *qemu) hotplugCPUs(vcpus uint32, op operation) error { +func (q *qemu) hotplugCPUs(vcpus uint32, op operation) (uint32, error) { if vcpus == 0 { q.Logger().Warnf("cannot hotplug 0 vCPUs") - return nil + return 0, nil } defer func(qemu *qemu) { @@ -785,7 +790,7 @@ func (q *qemu) hotplugCPUs(vcpus uint32, op operation) error { qmp, err := q.qmpSetup() if err != nil { - return err + return 0, err } q.qmpMonitorCh.qmp = qmp @@ -797,19 +802,28 @@ func (q *qemu) hotplugCPUs(vcpus uint32, op operation) error { return q.hotplugRemoveCPUs(vcpus) } -func (q *qemu) hotplugAddCPUs(amount uint32) error { +// try to hot add an amount of vCPUs, returns the number of vCPUs added +func (q *qemu) hotplugAddCPUs(amount uint32) (uint32, error) { currentVCPUs := q.qemuConfig.SMP.CPUs + uint32(len(q.state.HotpluggedVCPUs)) - // Don't exceed the maximum amount of vCPUs + // Don't fail if the number of max vCPUs is exceeded, log a warning and hot add the vCPUs needed + // to reach out max vCPUs if currentVCPUs+amount > q.config.DefaultMaxVCPUs { - return fmt.Errorf("Unable to hotplug %d CPUs, currently this SB has %d CPUs and the maximum amount of CPUs is %d", + q.Logger().Warnf("Cannot hotplug %d CPUs, currently this SB has %d CPUs and the maximum amount of CPUs is %d", amount, currentVCPUs, q.config.DefaultMaxVCPUs) + amount = q.config.DefaultMaxVCPUs - currentVCPUs + } + + if amount == 0 { + // Don't fail if no more vCPUs can be added, since cgroups still can be updated + q.Logger().Warnf("maximum number of vCPUs '%d' has been reached", q.config.DefaultMaxVCPUs) + return 0, nil } // get the list of hotpluggable CPUs hotpluggableVCPUs, err := q.qmpMonitorCh.qmp.ExecuteQueryHotpluggableCPUs(q.qmpMonitorCh.ctx) if err != nil { - return fmt.Errorf("failed to query hotpluggable CPUs: %v", err) + return 0, fmt.Errorf("failed to query hotpluggable CPUs: %v", err) } var hotpluggedVCPUs uint32 @@ -835,7 +849,7 @@ func (q *qemu) hotplugAddCPUs(amount uint32) error { hotpluggedVCPUs++ if hotpluggedVCPUs == amount { // All vCPUs were hotplugged - return q.sandbox.storage.storeHypervisorState(q.sandbox.id, q.state) + return amount, q.sandbox.storage.storeHypervisorState(q.sandbox.id, q.state) } } @@ -844,29 +858,31 @@ func (q *qemu) hotplugAddCPUs(amount uint32) error { q.Logger().Errorf("failed to save hypervisor state after hotplug %d vCPUs: %v", hotpluggedVCPUs, err) } - return fmt.Errorf("failed to hot add vCPUs: only %d vCPUs of %d were added", hotpluggedVCPUs, amount) + return hotpluggedVCPUs, fmt.Errorf("failed to hot add vCPUs: only %d vCPUs of %d were added", hotpluggedVCPUs, amount) } -func (q *qemu) hotplugRemoveCPUs(amount uint32) error { +// try to hot remove an amount of vCPUs, returns the number of vCPUs removed +func (q *qemu) hotplugRemoveCPUs(amount uint32) (uint32, error) { hotpluggedVCPUs := uint32(len(q.state.HotpluggedVCPUs)) // we can only remove hotplugged vCPUs if amount > hotpluggedVCPUs { - return fmt.Errorf("Unable to remove %d CPUs, currently there are only %d hotplugged CPUs", amount, hotpluggedVCPUs) + return 0, fmt.Errorf("Unable to remove %d CPUs, currently there are only %d hotplugged CPUs", amount, hotpluggedVCPUs) } for i := uint32(0); i < amount; i++ { // get the last vCPUs and try to remove it cpu := q.state.HotpluggedVCPUs[len(q.state.HotpluggedVCPUs)-1] if err := q.qmpMonitorCh.qmp.ExecuteDeviceDel(q.qmpMonitorCh.ctx, cpu.ID); err != nil { - return fmt.Errorf("failed to hotunplug CPUs, only %d CPUs were hotunplugged: %v", i, err) + _ = q.sandbox.storage.storeHypervisorState(q.sandbox.id, q.state) + return i, fmt.Errorf("failed to hotunplug CPUs, only %d CPUs were hotunplugged: %v", i, err) } // remove from the list the vCPU hotunplugged q.state.HotpluggedVCPUs = q.state.HotpluggedVCPUs[:len(q.state.HotpluggedVCPUs)-1] } - return q.sandbox.storage.storeHypervisorState(q.sandbox.id, q.state) + return amount, q.sandbox.storage.storeHypervisorState(q.sandbox.id, q.state) } func (q *qemu) pauseSandbox() error { diff --git a/virtcontainers/qemu_amd64.go b/virtcontainers/qemu_amd64.go index 2e3406a92..6099ade1e 100644 --- a/virtcontainers/qemu_amd64.go +++ b/virtcontainers/qemu_amd64.go @@ -71,8 +71,8 @@ var supportedQemuMachines = []govmmQemu.Machine{ }, } -// returns the maximum number of vCPUs supported -func maxQemuVCPUs() uint32 { +// MaxQemuVCPUs returns the maximum number of vCPUs supported +func MaxQemuVCPUs() uint32 { return uint32(240) } diff --git a/virtcontainers/qemu_arch_base.go b/virtcontainers/qemu_arch_base.go index 28a06d2a0..795c63cc5 100644 --- a/virtcontainers/qemu_arch_base.go +++ b/virtcontainers/qemu_arch_base.go @@ -42,7 +42,7 @@ type qemuArch interface { bridges(number uint32) []Bridge // cpuTopology returns the CPU topology for the given amount of vcpus - cpuTopology(vcpus uint32) govmmQemu.SMP + cpuTopology(vcpus, maxvcpus uint32) govmmQemu.SMP // cpuModel returns the CPU model for the machine type cpuModel() string @@ -219,13 +219,13 @@ func (q *qemuArchBase) bridges(number uint32) []Bridge { return bridges } -func (q *qemuArchBase) cpuTopology(vcpus uint32) govmmQemu.SMP { +func (q *qemuArchBase) cpuTopology(vcpus, maxvcpus uint32) govmmQemu.SMP { smp := govmmQemu.SMP{ CPUs: vcpus, Sockets: vcpus, Cores: defaultCores, Threads: defaultThreads, - MaxCPUs: defaultMaxQemuVCPUs, + MaxCPUs: maxvcpus, } return smp diff --git a/virtcontainers/qemu_arch_base_test.go b/virtcontainers/qemu_arch_base_test.go index 24100d1e8..906d354cc 100644 --- a/virtcontainers/qemu_arch_base_test.go +++ b/virtcontainers/qemu_arch_base_test.go @@ -169,7 +169,7 @@ func TestQemuArchBaseCPUTopology(t *testing.T) { MaxCPUs: defaultMaxQemuVCPUs, } - smp := qemuArchBase.cpuTopology(vcpus) + smp := qemuArchBase.cpuTopology(vcpus, defaultMaxQemuVCPUs) assert.Equal(expectedSMP, smp) } diff --git a/virtcontainers/qemu_arm64.go b/virtcontainers/qemu_arm64.go index 2b80440b5..42ae76718 100644 --- a/virtcontainers/qemu_arm64.go +++ b/virtcontainers/qemu_arm64.go @@ -42,8 +42,8 @@ var supportedQemuMachines = []govmmQemu.Machine{ }, } -// returns the maximum number of vCPUs supported -func maxQemuVCPUs() uint32 { +// MaxQemuVCPUs returns the maximum number of vCPUs supported +func MaxQemuVCPUs() uint32 { return uint32(runtime.NumCPU()) } diff --git a/virtcontainers/qemu_test.go b/virtcontainers/qemu_test.go index 14751db5e..4470e8b3a 100644 --- a/virtcontainers/qemu_test.go +++ b/virtcontainers/qemu_test.go @@ -52,7 +52,7 @@ func testQemuKernelParameters(t *testing.T, kernelParams []Param, expected strin } func TestQemuKernelParameters(t *testing.T) { - expectedOut := "panic=1 initcall_debug foo=foo bar=bar" + expectedOut := fmt.Sprintf("panic=1 initcall_debug nr_cpus=%d foo=foo bar=bar", MaxQemuVCPUs()) params := []Param{ { Key: "foo", @@ -128,7 +128,8 @@ func TestQemuCPUTopology(t *testing.T) { q := &qemu{ arch: &qemuArchBase{}, config: HypervisorConfig{ - DefaultVCPUs: uint32(vcpus), + DefaultVCPUs: uint32(vcpus), + DefaultMaxVCPUs: uint32(vcpus), }, } @@ -137,7 +138,7 @@ func TestQemuCPUTopology(t *testing.T) { Sockets: uint32(vcpus), Cores: defaultCores, Threads: defaultThreads, - MaxCPUs: defaultMaxQemuVCPUs, + MaxCPUs: uint32(vcpus), } smp := q.cpuTopology() diff --git a/virtcontainers/sandbox.go b/virtcontainers/sandbox.go index 823f4bbe4..cd4c17019 100644 --- a/virtcontainers/sandbox.go +++ b/virtcontainers/sandbox.go @@ -1338,13 +1338,15 @@ func (s *Sandbox) HotplugAddDevice(device api.Device, devType config.DeviceType) if !ok { return fmt.Errorf("device type mismatch, expect device type to be %s", devType) } - return s.hypervisor.hotplugAddDevice(*vfioDevice, vfioDev) + _, err := s.hypervisor.hotplugAddDevice(*vfioDevice, vfioDev) + return err case config.DeviceBlock: blockDevice, ok := device.(*drivers.BlockDevice) if !ok { return fmt.Errorf("device type mismatch, expect device type to be %s", devType) } - return s.hypervisor.hotplugAddDevice(blockDevice.BlockDrive, blockDev) + _, err := s.hypervisor.hotplugAddDevice(blockDevice.BlockDrive, blockDev) + return err case config.DeviceGeneric: // TODO: what? return nil @@ -1361,13 +1363,15 @@ func (s *Sandbox) HotplugRemoveDevice(device api.Device, devType config.DeviceTy if !ok { return fmt.Errorf("device type mismatch, expect device type to be %s", devType) } - return s.hypervisor.hotplugRemoveDevice(*vfioDevice, vfioDev) + _, err := s.hypervisor.hotplugRemoveDevice(*vfioDevice, vfioDev) + return err case config.DeviceBlock: blockDevice, ok := device.(*drivers.BlockDevice) if !ok { return fmt.Errorf("device type mismatch, expect device type to be %s", devType) } - return s.hypervisor.hotplugRemoveDevice(blockDevice.BlockDrive, blockDev) + _, err := s.hypervisor.hotplugRemoveDevice(blockDevice.BlockDrive, blockDev) + return err case config.DeviceGeneric: // TODO: what? return nil