Merge pull request #2407 from teawater/virtio-mem2

qemu: Add virtio-mem support
This commit is contained in:
Julio Montes
2020-01-27 08:55:03 -06:00
committed by GitHub
15 changed files with 163 additions and 31 deletions

4
Gopkg.lock generated
View File

@@ -412,11 +412,11 @@
revision = "2f1d1f20f75d5404f53b9edf6b53ed5505508675"
[[projects]]
digest = "1:b3ab3b3615583d7a374d4803ea2b71f173a4d8f4d594210e0fc8e9fc7c5a49c9"
digest = "1:f0e85729c00a427cef8de798b33b9a35b3117ee8760f6f00fc3cf3a5bb98f844"
name = "github.com/intel/govmm"
packages = ["qemu"]
pruneopts = "NUT"
revision = "8cba5a8e5f2816f26f9dc34b8ea968279a5a76eb"
revision = "ee21903287393441c7bb53a0b0d39b8fa4075221"
[[projects]]
digest = "1:6da9487ef0cc0cca3eeb1a24e3bb018ce2e07b7c2fc4d50975b54efdcc942118"

View File

@@ -48,7 +48,7 @@
[[constraint]]
name = "github.com/intel/govmm"
revision = "8cba5a8e5f2816f26f9dc34b8ea968279a5a76eb"
revision = "ee21903287393441c7bb53a0b0d39b8fa4075221"
[[constraint]]
name = "github.com/kata-containers/agent"

View File

@@ -89,6 +89,12 @@ default_memory = @DEFMEMSZ@
# Default 0
#memory_offset = 0
# Specifies virtio-mem will be enabled or not.
# Please note that this option should be used with the command
# "echo 1 > /proc/sys/vm/overcommit_memory".
# Default false
#enable_virtio_mem = true
# Disable block device from being used for a container's rootfs.
# In case of a storage driver like devicemapper where a container's
# root file system is backed by a block device, the block device is passed

View File

@@ -27,6 +27,7 @@ const defaultMaxVCPUCount uint32 = 0
const defaultMemSize uint32 = 2048 // MiB
const defaultMemSlots uint32 = 10
const defaultMemOffset uint32 = 0 // MiB
const defaultVirtioMem bool = false
const defaultBridgesCount uint32 = 1
const defaultInterNetworkingModel = "tcfilter"
const defaultDisableBlockDeviceUse bool = false

View File

@@ -114,6 +114,7 @@ type hypervisor struct {
DisableBlockDeviceUse bool `toml:"disable_block_device_use"`
MemPrealloc bool `toml:"enable_mem_prealloc"`
HugePages bool `toml:"enable_hugepages"`
VirtioMem bool `toml:"enable_virtio_mem"`
FileBackedMemRootDir string `toml:"file_mem_backend"`
Swap bool `toml:"enable_swap"`
Debug bool `toml:"enable_debug"`
@@ -623,6 +624,7 @@ func newQemuHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
MemorySize: h.defaultMemSz(),
MemSlots: h.defaultMemSlots(),
MemOffset: h.defaultMemOffset(),
VirtioMem: h.VirtioMem,
EntropySource: h.GetEntropySource(),
DefaultBridges: h.defaultBridges(),
DisableBlockDeviceUse: h.DisableBlockDeviceUse,
@@ -773,6 +775,7 @@ func newClhHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
MemorySize: h.defaultMemSz(),
MemSlots: h.defaultMemSlots(),
MemOffset: h.defaultMemOffset(),
VirtioMem: h.VirtioMem,
EntropySource: h.GetEntropySource(),
DefaultBridges: h.defaultBridges(),
DisableBlockDeviceUse: h.DisableBlockDeviceUse,
@@ -1054,6 +1057,7 @@ func GetDefaultHypervisorConfig() vc.HypervisorConfig {
DefaultMaxVCPUs: defaultMaxVCPUCount,
MemorySize: defaultMemSize,
MemOffset: defaultMemOffset,
VirtioMem: defaultVirtioMem,
DisableBlockDeviceUse: defaultDisableBlockDeviceUse,
DefaultBridges: defaultBridgesCount,
MemPrealloc: defaultEnableMemPrealloc,

View File

@@ -1,4 +1,4 @@
// +build !s390x,!s390x_test
// +build !s390x
/*
// Copyright contributors to the Virtual Machine Manager for Go project

View File

@@ -1,4 +1,4 @@
// +build s390x s390x_test
// +build s390x
/*
// Copyright contributors to the Virtual Machine Manager for Go project

View File

@@ -946,6 +946,17 @@ func (q *QMP) ExecuteBlockdevDel(ctx context.Context, blockdevID string) error {
return q.executeCommand(ctx, "x-blockdev-del", args, nil)
}
// ExecuteChardevDel deletes a char device by sending a chardev-remove command.
// chardevID is the id of the char device to be deleted. Typically, this will
// match the id passed to ExecuteCharDevUnixSocketAdd. It must be a valid QMP id.
func (q *QMP) ExecuteChardevDel(ctx context.Context, chardevID string) error {
args := map[string]interface{}{
"id": chardevID,
}
return q.executeCommand(ctx, "chardev-remove", args, nil)
}
// ExecuteNetdevAdd adds a Net device to a QEMU instance
// using the netdev_add command. netdevID is the id of the device to add.
// Must be valid QMP identifier.
@@ -1124,6 +1135,27 @@ func (q *QMP) ExecutePCIDeviceAdd(ctx context.Context, blockdevID, devID, driver
return q.executeCommand(ctx, "device_add", args, nil)
}
// ExecutePCIVhostUserDevAdd adds a vhost-user device to a QEMU instance using the device_add command.
// This function can be used to hot plug vhost-user devices on PCI(E) bridges.
// It receives the bus and the device address on its parent bus. bus is optional.
// devID is the id of the device to add.Must be valid QMP identifier. chardevID
// is the QMP identifier of character device using a unix socket as backend.
// driver is the name of vhost-user driver, like vhost-user-blk-pci.
func (q *QMP) ExecutePCIVhostUserDevAdd(ctx context.Context, driver, devID, chardevID, addr, bus string) error {
args := map[string]interface{}{
"driver": driver,
"id": devID,
"chardev": chardevID,
"addr": addr,
}
if bus != "" {
args["bus"] = bus
}
return q.executeCommand(ctx, "device_add", args, nil)
}
// ExecuteVFIODeviceAdd adds a VFIO device to a QEMU instance
// using the device_add command. devID is the id of the device to add.
// Must be valid QMP identifier. bdf is the PCI bus-device-function
@@ -1347,8 +1379,8 @@ func (q *QMP) ExecQueryCpusFast(ctx context.Context) ([]CPUInfoFast, error) {
return cpuInfoFast, nil
}
// ExecHotplugMemory adds size of MiB memory to the guest
func (q *QMP) ExecHotplugMemory(ctx context.Context, qomtype, id, mempath string, size int, share bool) error {
// ExecMemdevAdd adds size of MiB memory device to the guest
func (q *QMP) ExecMemdevAdd(ctx context.Context, qomtype, id, mempath string, size int, share bool, driver, driverID string) error {
props := map[string]interface{}{"size": uint64(size) << 20}
args := map[string]interface{}{
"qom-type": qomtype,
@@ -1368,17 +1400,17 @@ func (q *QMP) ExecHotplugMemory(ctx context.Context, qomtype, id, mempath string
defer func() {
if err != nil {
q.cfg.Logger.Errorf("Unable to hotplug memory device: %v", err)
q.cfg.Logger.Errorf("Unable to add memory device %s: %v", id, err)
err = q.executeCommand(ctx, "object-del", map[string]interface{}{"id": id}, nil)
if err != nil {
q.cfg.Logger.Warningf("Unable to clean up memory object: %v", err)
q.cfg.Logger.Warningf("Unable to clean up memory object %s: %v", id, err)
}
}
}()
args = map[string]interface{}{
"driver": "pc-dimm",
"id": "dimm" + id,
"driver": driver,
"id": driverID,
"memdev": id,
}
err = q.executeCommand(ctx, "device_add", args, nil)
@@ -1386,6 +1418,11 @@ func (q *QMP) ExecHotplugMemory(ctx context.Context, qomtype, id, mempath string
return err
}
// ExecHotplugMemory adds size of MiB memory to the guest
func (q *QMP) ExecHotplugMemory(ctx context.Context, qomtype, id, mempath string, size int, share bool) error {
return q.ExecMemdevAdd(ctx, qomtype, id, mempath, size, share, "pc-dimm", "dimm"+id)
}
// ExecuteNVDIMMDeviceAdd adds a block device to a QEMU instance using
// a NVDIMM driver with the device_add command.
// id is the id of the device to add. It must be a valid QMP identifier.
@@ -1572,3 +1609,14 @@ func (q *QMP) ExecuteQueryStatus(ctx context.Context) (StatusInfo, error) {
return status, nil
}
// ExecQomSet qom-set path property value
func (q *QMP) ExecQomSet(ctx context.Context, path, property string, value uint64) error {
args := map[string]interface{}{
"path": path,
"property": property,
"value": value,
}
return q.executeCommand(ctx, "qom-set", args, nil)
}

View File

@@ -307,6 +307,9 @@ type HypervisorConfig struct {
// VirtioFSExtraArgs passes options to virtiofsd daemon
VirtioFSExtraArgs []string
// File based memory backend root directory
FileBackedMemRootDir string
// customAssets is a map of assets.
// Each value in that map takes precedence over the configured assets.
// For example, if there is a value for the "kernel" key in this map,
@@ -341,8 +344,8 @@ type HypervisorConfig struct {
// HugePages specifies if the memory should be pre-allocated from huge pages
HugePages bool
// File based memory backend root directory
FileBackedMemRootDir string
// VirtioMem is used to enable/disable virtio-mem
VirtioMem bool
// Realtime Used to enable/disable realtime
Realtime bool

View File

@@ -214,6 +214,7 @@ func (s *Sandbox) dumpConfig(ss *persistapi.SandboxState) {
Msize9p: sconfig.HypervisorConfig.Msize9p,
MemSlots: sconfig.HypervisorConfig.MemSlots,
MemOffset: sconfig.HypervisorConfig.MemOffset,
VirtioMem: sconfig.HypervisorConfig.VirtioMem,
VirtioFSCacheSize: sconfig.HypervisorConfig.VirtioFSCacheSize,
KernelPath: sconfig.HypervisorConfig.KernelPath,
ImagePath: sconfig.HypervisorConfig.ImagePath,
@@ -499,6 +500,7 @@ func loadSandboxConfig(id string) (*SandboxConfig, error) {
Msize9p: hconf.Msize9p,
MemSlots: hconf.MemSlots,
MemOffset: hconf.MemOffset,
VirtioMem: hconf.VirtioMem,
VirtioFSCacheSize: hconf.VirtioFSCacheSize,
KernelPath: hconf.KernelPath,
ImagePath: hconf.ImagePath,

View File

@@ -97,6 +97,9 @@ type HypervisorConfig struct {
// VirtioFSExtraArgs passes options to virtiofsd daemon
VirtioFSExtraArgs []string
// File based memory backend root directory
FileBackedMemRootDir string
// BlockDeviceCacheSet specifies cache-related options will be set to block devices or not.
BlockDeviceCacheSet bool
@@ -125,8 +128,8 @@ type HypervisorConfig struct {
// HugePages specifies if the memory should be pre-allocated from huge pages
HugePages bool
// File based memory backend root directory
FileBackedMemRootDir string
// VirtioMem is used to enable/disable virtio-mem
VirtioMem bool
// Realtime Used to enable/disable realtime
Realtime bool

View File

@@ -124,6 +124,9 @@ const (
// MemOffset is a sandbox annotation that specifies the memory space used for nvdimm device by the hypervisor.
MemOffset = kataAnnotHypervisorPrefix + "memory_offset"
// VirtioMem is a sandbox annotation that is used to enable/disable virtio-mem.
VirtioMem = kataAnnotHypervisorPrefix + "enable_virtio_mem"
// MemPrealloc is a sandbox annotation that specifies the memory space used for nvdimm device by the hypervisor.
MemPrealloc = kataAnnotHypervisorPrefix + "enable_mem_prealloc"

View File

@@ -492,6 +492,15 @@ func addHypervisorMemoryOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig
}
}
if value, ok := ocispec.Annotations[vcAnnotations.VirtioMem]; ok {
virtioMem, err := strconv.ParseBool(value)
if err != nil {
return fmt.Errorf("Error parsing annotation for enable_virtio_mem: Please specify boolean value 'true|false'")
}
sbConfig.HypervisorConfig.VirtioMem = virtioMem
}
if value, ok := ocispec.Annotations[vcAnnotations.MemPrealloc]; ok {
memPrealloc, err := strconv.ParseBool(value)
if err != nil {

View File

@@ -741,6 +741,7 @@ func TestAddHypervisorAnnotations(t *testing.T) {
ocispec.Annotations[vcAnnotations.DefaultMemory] = "1024"
ocispec.Annotations[vcAnnotations.MemSlots] = "20"
ocispec.Annotations[vcAnnotations.MemOffset] = "512"
ocispec.Annotations[vcAnnotations.VirtioMem] = "true"
ocispec.Annotations[vcAnnotations.MemPrealloc] = "true"
ocispec.Annotations[vcAnnotations.EnableSwap] = "true"
ocispec.Annotations[vcAnnotations.FileBackedMemRootDir] = "/dev/shm"
@@ -770,6 +771,7 @@ func TestAddHypervisorAnnotations(t *testing.T) {
assert.Equal(config.HypervisorConfig.MemorySize, uint32(1024))
assert.Equal(config.HypervisorConfig.MemSlots, uint32(20))
assert.Equal(config.HypervisorConfig.MemOffset, uint32(512))
assert.Equal(config.HypervisorConfig.VirtioMem, true)
assert.Equal(config.HypervisorConfig.MemPrealloc, true)
assert.Equal(config.HypervisorConfig.Mlock, false)
assert.Equal(config.HypervisorConfig.FileBackedMemRootDir, "/dev/shm")

View File

@@ -668,6 +668,56 @@ func (q *qemu) setupVirtiofsd() (err error) {
return err
}
func (q *qemu) getMemArgs() (bool, string, string) {
share := false
target := ""
memoryBack := "memory-backend-ram"
if q.qemuConfig.Knobs.HugePages {
// we are setting all the bits that govmm sets when hugepages are enabled.
// https://github.com/intel/govmm/blob/master/qemu/qemu.go#L1677
target = "/dev/hugepages"
memoryBack = "memory-backend-file"
share = true
} else if q.config.SharedFS == config.VirtioFS || q.config.FileBackedMemRootDir != "" {
target = q.qemuConfig.Memory.Path
memoryBack = "memory-backend-file"
}
if q.qemuConfig.Knobs.MemShared {
share = true
}
return share, target, memoryBack
}
func (q *qemu) setupVirtioMem() error {
maxMem, err := q.hostMemMB()
if err != nil {
return err
}
// 1024 is size for nvdimm
sizeMB := int(maxMem) - int(q.config.MemorySize)
share, target, memoryBack := q.getMemArgs()
err = q.qmpSetup()
if err != nil {
return err
}
err = q.qmpMonitorCh.qmp.ExecMemdevAdd(q.qmpMonitorCh.ctx, memoryBack, "virtiomem", target, sizeMB, share, "virtio-mem-pci", "virtiomem0")
if err == nil {
q.config.VirtioMem = true
q.Logger().Infof("Setup %dMB virtio-mem-pci success", sizeMB)
} else {
help := ""
if strings.Contains(err.Error(), "Cannot allocate memory") {
help = ". Please use command \"echo 1 > /proc/sys/vm/overcommit_memory\" handle it."
}
err = fmt.Errorf("Add %dMB virtio-mem-pci fail %s%s", sizeMB, err.Error(), help)
}
return err
}
// startSandbox will start the Sandbox's VM.
func (q *qemu) startSandbox(timeout int) error {
span, _ := q.trace("startSandbox")
@@ -744,6 +794,10 @@ func (q *qemu) startSandbox(timeout int) error {
}
}
if q.config.VirtioMem {
err = q.setupVirtioMem()
}
return err
}
@@ -1449,9 +1503,6 @@ func (q *qemu) hotplugMemory(memDev *memoryDevice, op operation) (int, error) {
func (q *qemu) hotplugAddMemory(memDev *memoryDevice) (int, error) {
memoryDevices, err := q.qmpMonitorCh.qmp.ExecQueryMemoryDevices(q.qmpMonitorCh.ctx)
share := false
target := ""
memoryBack := "memory-backend-ram"
if err != nil {
return 0, fmt.Errorf("failed to query memory devices: %v", err)
}
@@ -1465,19 +1516,8 @@ func (q *qemu) hotplugAddMemory(memDev *memoryDevice) (int, error) {
}
memDev.slot = maxSlot + 1
}
if q.qemuConfig.Knobs.HugePages {
// we are setting all the bits that govmm sets when hugepages are enabled.
// https://github.com/intel/govmm/blob/master/qemu/qemu.go#L1677
target = "/dev/hugepages"
memoryBack = "memory-backend-file"
share = true
} else if q.config.SharedFS == config.VirtioFS || q.config.FileBackedMemRootDir != "" {
target = q.qemuConfig.Memory.Path
memoryBack = "memory-backend-file"
}
if q.qemuConfig.Knobs.MemShared {
share = true
}
share, target, memoryBack := q.getMemArgs()
err = q.qmpMonitorCh.qmp.ExecHotplugMemory(q.qmpMonitorCh.ctx, memoryBack, "mem"+strconv.Itoa(memDev.slot), target, memDev.sizeMB, share)
if err != nil {
q.Logger().WithError(err).Error("hotplug memory")
@@ -1661,6 +1701,17 @@ func (q *qemu) resizeMemory(reqMemMB uint32, memoryBlockSizeMB uint32, probe boo
return 0, memoryDevice{}, err
}
var addMemDevice memoryDevice
if q.config.VirtioMem && currentMemory != reqMemMB {
q.Logger().WithField("hotplug", "memory").Debugf("resize memory from %dMB to %dMB", currentMemory, reqMemMB)
sizeByte := (reqMemMB - q.config.MemorySize) * 1024 * 1024
err = q.qmpMonitorCh.qmp.ExecQomSet(q.qmpMonitorCh.ctx, "virtiomem0", "requested-size", uint64(sizeByte))
if err != nil {
return 0, memoryDevice{}, err
}
q.state.HotpluggedMemory = int(sizeByte / 1024 / 1024)
return reqMemMB, memoryDevice{}, nil
}
switch {
case currentMemory < reqMemMB:
//hotplug