mirror of
https://github.com/aljazceru/kata-containers.git
synced 2026-01-10 09:54:22 +01:00
Merge pull request #2407 from teawater/virtio-mem2
qemu: Add virtio-mem support
This commit is contained in:
4
Gopkg.lock
generated
4
Gopkg.lock
generated
@@ -412,11 +412,11 @@
|
||||
revision = "2f1d1f20f75d5404f53b9edf6b53ed5505508675"
|
||||
|
||||
[[projects]]
|
||||
digest = "1:b3ab3b3615583d7a374d4803ea2b71f173a4d8f4d594210e0fc8e9fc7c5a49c9"
|
||||
digest = "1:f0e85729c00a427cef8de798b33b9a35b3117ee8760f6f00fc3cf3a5bb98f844"
|
||||
name = "github.com/intel/govmm"
|
||||
packages = ["qemu"]
|
||||
pruneopts = "NUT"
|
||||
revision = "8cba5a8e5f2816f26f9dc34b8ea968279a5a76eb"
|
||||
revision = "ee21903287393441c7bb53a0b0d39b8fa4075221"
|
||||
|
||||
[[projects]]
|
||||
digest = "1:6da9487ef0cc0cca3eeb1a24e3bb018ce2e07b7c2fc4d50975b54efdcc942118"
|
||||
|
||||
@@ -48,7 +48,7 @@
|
||||
|
||||
[[constraint]]
|
||||
name = "github.com/intel/govmm"
|
||||
revision = "8cba5a8e5f2816f26f9dc34b8ea968279a5a76eb"
|
||||
revision = "ee21903287393441c7bb53a0b0d39b8fa4075221"
|
||||
|
||||
[[constraint]]
|
||||
name = "github.com/kata-containers/agent"
|
||||
|
||||
@@ -89,6 +89,12 @@ default_memory = @DEFMEMSZ@
|
||||
# Default 0
|
||||
#memory_offset = 0
|
||||
|
||||
# Specifies virtio-mem will be enabled or not.
|
||||
# Please note that this option should be used with the command
|
||||
# "echo 1 > /proc/sys/vm/overcommit_memory".
|
||||
# Default false
|
||||
#enable_virtio_mem = true
|
||||
|
||||
# Disable block device from being used for a container's rootfs.
|
||||
# In case of a storage driver like devicemapper where a container's
|
||||
# root file system is backed by a block device, the block device is passed
|
||||
|
||||
@@ -27,6 +27,7 @@ const defaultMaxVCPUCount uint32 = 0
|
||||
const defaultMemSize uint32 = 2048 // MiB
|
||||
const defaultMemSlots uint32 = 10
|
||||
const defaultMemOffset uint32 = 0 // MiB
|
||||
const defaultVirtioMem bool = false
|
||||
const defaultBridgesCount uint32 = 1
|
||||
const defaultInterNetworkingModel = "tcfilter"
|
||||
const defaultDisableBlockDeviceUse bool = false
|
||||
|
||||
@@ -114,6 +114,7 @@ type hypervisor struct {
|
||||
DisableBlockDeviceUse bool `toml:"disable_block_device_use"`
|
||||
MemPrealloc bool `toml:"enable_mem_prealloc"`
|
||||
HugePages bool `toml:"enable_hugepages"`
|
||||
VirtioMem bool `toml:"enable_virtio_mem"`
|
||||
FileBackedMemRootDir string `toml:"file_mem_backend"`
|
||||
Swap bool `toml:"enable_swap"`
|
||||
Debug bool `toml:"enable_debug"`
|
||||
@@ -623,6 +624,7 @@ func newQemuHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
|
||||
MemorySize: h.defaultMemSz(),
|
||||
MemSlots: h.defaultMemSlots(),
|
||||
MemOffset: h.defaultMemOffset(),
|
||||
VirtioMem: h.VirtioMem,
|
||||
EntropySource: h.GetEntropySource(),
|
||||
DefaultBridges: h.defaultBridges(),
|
||||
DisableBlockDeviceUse: h.DisableBlockDeviceUse,
|
||||
@@ -773,6 +775,7 @@ func newClhHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
|
||||
MemorySize: h.defaultMemSz(),
|
||||
MemSlots: h.defaultMemSlots(),
|
||||
MemOffset: h.defaultMemOffset(),
|
||||
VirtioMem: h.VirtioMem,
|
||||
EntropySource: h.GetEntropySource(),
|
||||
DefaultBridges: h.defaultBridges(),
|
||||
DisableBlockDeviceUse: h.DisableBlockDeviceUse,
|
||||
@@ -1054,6 +1057,7 @@ func GetDefaultHypervisorConfig() vc.HypervisorConfig {
|
||||
DefaultMaxVCPUs: defaultMaxVCPUCount,
|
||||
MemorySize: defaultMemSize,
|
||||
MemOffset: defaultMemOffset,
|
||||
VirtioMem: defaultVirtioMem,
|
||||
DisableBlockDeviceUse: defaultDisableBlockDeviceUse,
|
||||
DefaultBridges: defaultBridgesCount,
|
||||
MemPrealloc: defaultEnableMemPrealloc,
|
||||
|
||||
2
vendor/github.com/intel/govmm/qemu/qemu_arch_base.go
generated
vendored
2
vendor/github.com/intel/govmm/qemu/qemu_arch_base.go
generated
vendored
@@ -1,4 +1,4 @@
|
||||
// +build !s390x,!s390x_test
|
||||
// +build !s390x
|
||||
|
||||
/*
|
||||
// Copyright contributors to the Virtual Machine Manager for Go project
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
// +build s390x s390x_test
|
||||
// +build s390x
|
||||
|
||||
/*
|
||||
// Copyright contributors to the Virtual Machine Manager for Go project
|
||||
60
vendor/github.com/intel/govmm/qemu/qmp.go
generated
vendored
60
vendor/github.com/intel/govmm/qemu/qmp.go
generated
vendored
@@ -946,6 +946,17 @@ func (q *QMP) ExecuteBlockdevDel(ctx context.Context, blockdevID string) error {
|
||||
return q.executeCommand(ctx, "x-blockdev-del", args, nil)
|
||||
}
|
||||
|
||||
// ExecuteChardevDel deletes a char device by sending a chardev-remove command.
|
||||
// chardevID is the id of the char device to be deleted. Typically, this will
|
||||
// match the id passed to ExecuteCharDevUnixSocketAdd. It must be a valid QMP id.
|
||||
func (q *QMP) ExecuteChardevDel(ctx context.Context, chardevID string) error {
|
||||
args := map[string]interface{}{
|
||||
"id": chardevID,
|
||||
}
|
||||
|
||||
return q.executeCommand(ctx, "chardev-remove", args, nil)
|
||||
}
|
||||
|
||||
// ExecuteNetdevAdd adds a Net device to a QEMU instance
|
||||
// using the netdev_add command. netdevID is the id of the device to add.
|
||||
// Must be valid QMP identifier.
|
||||
@@ -1124,6 +1135,27 @@ func (q *QMP) ExecutePCIDeviceAdd(ctx context.Context, blockdevID, devID, driver
|
||||
return q.executeCommand(ctx, "device_add", args, nil)
|
||||
}
|
||||
|
||||
// ExecutePCIVhostUserDevAdd adds a vhost-user device to a QEMU instance using the device_add command.
|
||||
// This function can be used to hot plug vhost-user devices on PCI(E) bridges.
|
||||
// It receives the bus and the device address on its parent bus. bus is optional.
|
||||
// devID is the id of the device to add.Must be valid QMP identifier. chardevID
|
||||
// is the QMP identifier of character device using a unix socket as backend.
|
||||
// driver is the name of vhost-user driver, like vhost-user-blk-pci.
|
||||
func (q *QMP) ExecutePCIVhostUserDevAdd(ctx context.Context, driver, devID, chardevID, addr, bus string) error {
|
||||
args := map[string]interface{}{
|
||||
"driver": driver,
|
||||
"id": devID,
|
||||
"chardev": chardevID,
|
||||
"addr": addr,
|
||||
}
|
||||
|
||||
if bus != "" {
|
||||
args["bus"] = bus
|
||||
}
|
||||
|
||||
return q.executeCommand(ctx, "device_add", args, nil)
|
||||
}
|
||||
|
||||
// ExecuteVFIODeviceAdd adds a VFIO device to a QEMU instance
|
||||
// using the device_add command. devID is the id of the device to add.
|
||||
// Must be valid QMP identifier. bdf is the PCI bus-device-function
|
||||
@@ -1347,8 +1379,8 @@ func (q *QMP) ExecQueryCpusFast(ctx context.Context) ([]CPUInfoFast, error) {
|
||||
return cpuInfoFast, nil
|
||||
}
|
||||
|
||||
// ExecHotplugMemory adds size of MiB memory to the guest
|
||||
func (q *QMP) ExecHotplugMemory(ctx context.Context, qomtype, id, mempath string, size int, share bool) error {
|
||||
// ExecMemdevAdd adds size of MiB memory device to the guest
|
||||
func (q *QMP) ExecMemdevAdd(ctx context.Context, qomtype, id, mempath string, size int, share bool, driver, driverID string) error {
|
||||
props := map[string]interface{}{"size": uint64(size) << 20}
|
||||
args := map[string]interface{}{
|
||||
"qom-type": qomtype,
|
||||
@@ -1368,17 +1400,17 @@ func (q *QMP) ExecHotplugMemory(ctx context.Context, qomtype, id, mempath string
|
||||
|
||||
defer func() {
|
||||
if err != nil {
|
||||
q.cfg.Logger.Errorf("Unable to hotplug memory device: %v", err)
|
||||
q.cfg.Logger.Errorf("Unable to add memory device %s: %v", id, err)
|
||||
err = q.executeCommand(ctx, "object-del", map[string]interface{}{"id": id}, nil)
|
||||
if err != nil {
|
||||
q.cfg.Logger.Warningf("Unable to clean up memory object: %v", err)
|
||||
q.cfg.Logger.Warningf("Unable to clean up memory object %s: %v", id, err)
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
args = map[string]interface{}{
|
||||
"driver": "pc-dimm",
|
||||
"id": "dimm" + id,
|
||||
"driver": driver,
|
||||
"id": driverID,
|
||||
"memdev": id,
|
||||
}
|
||||
err = q.executeCommand(ctx, "device_add", args, nil)
|
||||
@@ -1386,6 +1418,11 @@ func (q *QMP) ExecHotplugMemory(ctx context.Context, qomtype, id, mempath string
|
||||
return err
|
||||
}
|
||||
|
||||
// ExecHotplugMemory adds size of MiB memory to the guest
|
||||
func (q *QMP) ExecHotplugMemory(ctx context.Context, qomtype, id, mempath string, size int, share bool) error {
|
||||
return q.ExecMemdevAdd(ctx, qomtype, id, mempath, size, share, "pc-dimm", "dimm"+id)
|
||||
}
|
||||
|
||||
// ExecuteNVDIMMDeviceAdd adds a block device to a QEMU instance using
|
||||
// a NVDIMM driver with the device_add command.
|
||||
// id is the id of the device to add. It must be a valid QMP identifier.
|
||||
@@ -1572,3 +1609,14 @@ func (q *QMP) ExecuteQueryStatus(ctx context.Context) (StatusInfo, error) {
|
||||
|
||||
return status, nil
|
||||
}
|
||||
|
||||
// ExecQomSet qom-set path property value
|
||||
func (q *QMP) ExecQomSet(ctx context.Context, path, property string, value uint64) error {
|
||||
args := map[string]interface{}{
|
||||
"path": path,
|
||||
"property": property,
|
||||
"value": value,
|
||||
}
|
||||
|
||||
return q.executeCommand(ctx, "qom-set", args, nil)
|
||||
}
|
||||
|
||||
@@ -307,6 +307,9 @@ type HypervisorConfig struct {
|
||||
// VirtioFSExtraArgs passes options to virtiofsd daemon
|
||||
VirtioFSExtraArgs []string
|
||||
|
||||
// File based memory backend root directory
|
||||
FileBackedMemRootDir string
|
||||
|
||||
// customAssets is a map of assets.
|
||||
// Each value in that map takes precedence over the configured assets.
|
||||
// For example, if there is a value for the "kernel" key in this map,
|
||||
@@ -341,8 +344,8 @@ type HypervisorConfig struct {
|
||||
// HugePages specifies if the memory should be pre-allocated from huge pages
|
||||
HugePages bool
|
||||
|
||||
// File based memory backend root directory
|
||||
FileBackedMemRootDir string
|
||||
// VirtioMem is used to enable/disable virtio-mem
|
||||
VirtioMem bool
|
||||
|
||||
// Realtime Used to enable/disable realtime
|
||||
Realtime bool
|
||||
|
||||
@@ -214,6 +214,7 @@ func (s *Sandbox) dumpConfig(ss *persistapi.SandboxState) {
|
||||
Msize9p: sconfig.HypervisorConfig.Msize9p,
|
||||
MemSlots: sconfig.HypervisorConfig.MemSlots,
|
||||
MemOffset: sconfig.HypervisorConfig.MemOffset,
|
||||
VirtioMem: sconfig.HypervisorConfig.VirtioMem,
|
||||
VirtioFSCacheSize: sconfig.HypervisorConfig.VirtioFSCacheSize,
|
||||
KernelPath: sconfig.HypervisorConfig.KernelPath,
|
||||
ImagePath: sconfig.HypervisorConfig.ImagePath,
|
||||
@@ -499,6 +500,7 @@ func loadSandboxConfig(id string) (*SandboxConfig, error) {
|
||||
Msize9p: hconf.Msize9p,
|
||||
MemSlots: hconf.MemSlots,
|
||||
MemOffset: hconf.MemOffset,
|
||||
VirtioMem: hconf.VirtioMem,
|
||||
VirtioFSCacheSize: hconf.VirtioFSCacheSize,
|
||||
KernelPath: hconf.KernelPath,
|
||||
ImagePath: hconf.ImagePath,
|
||||
|
||||
@@ -97,6 +97,9 @@ type HypervisorConfig struct {
|
||||
// VirtioFSExtraArgs passes options to virtiofsd daemon
|
||||
VirtioFSExtraArgs []string
|
||||
|
||||
// File based memory backend root directory
|
||||
FileBackedMemRootDir string
|
||||
|
||||
// BlockDeviceCacheSet specifies cache-related options will be set to block devices or not.
|
||||
BlockDeviceCacheSet bool
|
||||
|
||||
@@ -125,8 +128,8 @@ type HypervisorConfig struct {
|
||||
// HugePages specifies if the memory should be pre-allocated from huge pages
|
||||
HugePages bool
|
||||
|
||||
// File based memory backend root directory
|
||||
FileBackedMemRootDir string
|
||||
// VirtioMem is used to enable/disable virtio-mem
|
||||
VirtioMem bool
|
||||
|
||||
// Realtime Used to enable/disable realtime
|
||||
Realtime bool
|
||||
|
||||
@@ -124,6 +124,9 @@ const (
|
||||
// MemOffset is a sandbox annotation that specifies the memory space used for nvdimm device by the hypervisor.
|
||||
MemOffset = kataAnnotHypervisorPrefix + "memory_offset"
|
||||
|
||||
// VirtioMem is a sandbox annotation that is used to enable/disable virtio-mem.
|
||||
VirtioMem = kataAnnotHypervisorPrefix + "enable_virtio_mem"
|
||||
|
||||
// MemPrealloc is a sandbox annotation that specifies the memory space used for nvdimm device by the hypervisor.
|
||||
MemPrealloc = kataAnnotHypervisorPrefix + "enable_mem_prealloc"
|
||||
|
||||
|
||||
@@ -492,6 +492,15 @@ func addHypervisorMemoryOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig
|
||||
}
|
||||
}
|
||||
|
||||
if value, ok := ocispec.Annotations[vcAnnotations.VirtioMem]; ok {
|
||||
virtioMem, err := strconv.ParseBool(value)
|
||||
if err != nil {
|
||||
return fmt.Errorf("Error parsing annotation for enable_virtio_mem: Please specify boolean value 'true|false'")
|
||||
}
|
||||
|
||||
sbConfig.HypervisorConfig.VirtioMem = virtioMem
|
||||
}
|
||||
|
||||
if value, ok := ocispec.Annotations[vcAnnotations.MemPrealloc]; ok {
|
||||
memPrealloc, err := strconv.ParseBool(value)
|
||||
if err != nil {
|
||||
|
||||
@@ -741,6 +741,7 @@ func TestAddHypervisorAnnotations(t *testing.T) {
|
||||
ocispec.Annotations[vcAnnotations.DefaultMemory] = "1024"
|
||||
ocispec.Annotations[vcAnnotations.MemSlots] = "20"
|
||||
ocispec.Annotations[vcAnnotations.MemOffset] = "512"
|
||||
ocispec.Annotations[vcAnnotations.VirtioMem] = "true"
|
||||
ocispec.Annotations[vcAnnotations.MemPrealloc] = "true"
|
||||
ocispec.Annotations[vcAnnotations.EnableSwap] = "true"
|
||||
ocispec.Annotations[vcAnnotations.FileBackedMemRootDir] = "/dev/shm"
|
||||
@@ -770,6 +771,7 @@ func TestAddHypervisorAnnotations(t *testing.T) {
|
||||
assert.Equal(config.HypervisorConfig.MemorySize, uint32(1024))
|
||||
assert.Equal(config.HypervisorConfig.MemSlots, uint32(20))
|
||||
assert.Equal(config.HypervisorConfig.MemOffset, uint32(512))
|
||||
assert.Equal(config.HypervisorConfig.VirtioMem, true)
|
||||
assert.Equal(config.HypervisorConfig.MemPrealloc, true)
|
||||
assert.Equal(config.HypervisorConfig.Mlock, false)
|
||||
assert.Equal(config.HypervisorConfig.FileBackedMemRootDir, "/dev/shm")
|
||||
|
||||
@@ -668,6 +668,56 @@ func (q *qemu) setupVirtiofsd() (err error) {
|
||||
return err
|
||||
}
|
||||
|
||||
func (q *qemu) getMemArgs() (bool, string, string) {
|
||||
share := false
|
||||
target := ""
|
||||
memoryBack := "memory-backend-ram"
|
||||
|
||||
if q.qemuConfig.Knobs.HugePages {
|
||||
// we are setting all the bits that govmm sets when hugepages are enabled.
|
||||
// https://github.com/intel/govmm/blob/master/qemu/qemu.go#L1677
|
||||
target = "/dev/hugepages"
|
||||
memoryBack = "memory-backend-file"
|
||||
share = true
|
||||
} else if q.config.SharedFS == config.VirtioFS || q.config.FileBackedMemRootDir != "" {
|
||||
target = q.qemuConfig.Memory.Path
|
||||
memoryBack = "memory-backend-file"
|
||||
}
|
||||
if q.qemuConfig.Knobs.MemShared {
|
||||
share = true
|
||||
}
|
||||
|
||||
return share, target, memoryBack
|
||||
}
|
||||
|
||||
func (q *qemu) setupVirtioMem() error {
|
||||
maxMem, err := q.hostMemMB()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// 1024 is size for nvdimm
|
||||
sizeMB := int(maxMem) - int(q.config.MemorySize)
|
||||
|
||||
share, target, memoryBack := q.getMemArgs()
|
||||
err = q.qmpSetup()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = q.qmpMonitorCh.qmp.ExecMemdevAdd(q.qmpMonitorCh.ctx, memoryBack, "virtiomem", target, sizeMB, share, "virtio-mem-pci", "virtiomem0")
|
||||
if err == nil {
|
||||
q.config.VirtioMem = true
|
||||
q.Logger().Infof("Setup %dMB virtio-mem-pci success", sizeMB)
|
||||
} else {
|
||||
help := ""
|
||||
if strings.Contains(err.Error(), "Cannot allocate memory") {
|
||||
help = ". Please use command \"echo 1 > /proc/sys/vm/overcommit_memory\" handle it."
|
||||
}
|
||||
err = fmt.Errorf("Add %dMB virtio-mem-pci fail %s%s", sizeMB, err.Error(), help)
|
||||
}
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
// startSandbox will start the Sandbox's VM.
|
||||
func (q *qemu) startSandbox(timeout int) error {
|
||||
span, _ := q.trace("startSandbox")
|
||||
@@ -744,6 +794,10 @@ func (q *qemu) startSandbox(timeout int) error {
|
||||
}
|
||||
}
|
||||
|
||||
if q.config.VirtioMem {
|
||||
err = q.setupVirtioMem()
|
||||
}
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -1449,9 +1503,6 @@ func (q *qemu) hotplugMemory(memDev *memoryDevice, op operation) (int, error) {
|
||||
|
||||
func (q *qemu) hotplugAddMemory(memDev *memoryDevice) (int, error) {
|
||||
memoryDevices, err := q.qmpMonitorCh.qmp.ExecQueryMemoryDevices(q.qmpMonitorCh.ctx)
|
||||
share := false
|
||||
target := ""
|
||||
memoryBack := "memory-backend-ram"
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("failed to query memory devices: %v", err)
|
||||
}
|
||||
@@ -1465,19 +1516,8 @@ func (q *qemu) hotplugAddMemory(memDev *memoryDevice) (int, error) {
|
||||
}
|
||||
memDev.slot = maxSlot + 1
|
||||
}
|
||||
if q.qemuConfig.Knobs.HugePages {
|
||||
// we are setting all the bits that govmm sets when hugepages are enabled.
|
||||
// https://github.com/intel/govmm/blob/master/qemu/qemu.go#L1677
|
||||
target = "/dev/hugepages"
|
||||
memoryBack = "memory-backend-file"
|
||||
share = true
|
||||
} else if q.config.SharedFS == config.VirtioFS || q.config.FileBackedMemRootDir != "" {
|
||||
target = q.qemuConfig.Memory.Path
|
||||
memoryBack = "memory-backend-file"
|
||||
}
|
||||
if q.qemuConfig.Knobs.MemShared {
|
||||
share = true
|
||||
}
|
||||
|
||||
share, target, memoryBack := q.getMemArgs()
|
||||
err = q.qmpMonitorCh.qmp.ExecHotplugMemory(q.qmpMonitorCh.ctx, memoryBack, "mem"+strconv.Itoa(memDev.slot), target, memDev.sizeMB, share)
|
||||
if err != nil {
|
||||
q.Logger().WithError(err).Error("hotplug memory")
|
||||
@@ -1661,6 +1701,17 @@ func (q *qemu) resizeMemory(reqMemMB uint32, memoryBlockSizeMB uint32, probe boo
|
||||
return 0, memoryDevice{}, err
|
||||
}
|
||||
var addMemDevice memoryDevice
|
||||
if q.config.VirtioMem && currentMemory != reqMemMB {
|
||||
q.Logger().WithField("hotplug", "memory").Debugf("resize memory from %dMB to %dMB", currentMemory, reqMemMB)
|
||||
sizeByte := (reqMemMB - q.config.MemorySize) * 1024 * 1024
|
||||
err = q.qmpMonitorCh.qmp.ExecQomSet(q.qmpMonitorCh.ctx, "virtiomem0", "requested-size", uint64(sizeByte))
|
||||
if err != nil {
|
||||
return 0, memoryDevice{}, err
|
||||
}
|
||||
q.state.HotpluggedMemory = int(sizeByte / 1024 / 1024)
|
||||
return reqMemMB, memoryDevice{}, nil
|
||||
}
|
||||
|
||||
switch {
|
||||
case currentMemory < reqMemMB:
|
||||
//hotplug
|
||||
|
||||
Reference in New Issue
Block a user