mirror of
https://github.com/aljazceru/kata-containers.git
synced 2026-02-23 15:34:28 +01:00
Merge pull request #360 from devimc/2020-06-29/forwardport/fixVFIOHotplug
[forwardport 2.0] create cgroup manager after creating the network
This commit is contained in:
@@ -100,6 +100,10 @@ func createSandboxFromConfig(ctx context.Context, sandboxConfig SandboxConfig, f
|
||||
|
||||
// Move runtime to sandbox cgroup so all process are created there.
|
||||
if s.config.SandboxCgroupOnly {
|
||||
if err := s.createCgroupManager(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := s.setupSandboxCgroup(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -86,7 +86,8 @@ func (endpoint *BridgedMacvlanEndpoint) NetworkPair() *NetworkInterfacePair {
|
||||
|
||||
// Attach for virtual endpoint bridges the network pair and adds the
|
||||
// tap interface of the network pair to the hypervisor.
|
||||
func (endpoint *BridgedMacvlanEndpoint) Attach(h hypervisor) error {
|
||||
func (endpoint *BridgedMacvlanEndpoint) Attach(s *Sandbox) error {
|
||||
h := s.hypervisor
|
||||
if err := xConnectVMNetwork(endpoint, h); err != nil {
|
||||
networkLogger().WithError(err).Error("Error bridging virtual ep")
|
||||
return err
|
||||
|
||||
@@ -117,6 +117,10 @@ type DeviceInfo struct {
|
||||
// for a nvdimm device in the guest.
|
||||
Pmem bool
|
||||
|
||||
// ColdPlug specifies whether the device must be cold plugged (true)
|
||||
// or hot plugged (false).
|
||||
ColdPlug bool
|
||||
|
||||
// FileMode permission bits for the device.
|
||||
FileMode os.FileMode
|
||||
|
||||
|
||||
@@ -140,6 +140,7 @@ func (device *GenericDevice) Save() persistapi.DeviceState {
|
||||
dss.Major = info.Major
|
||||
dss.Minor = info.Minor
|
||||
dss.DriverOptions = info.DriverOptions
|
||||
dss.ColdPlug = info.ColdPlug
|
||||
}
|
||||
return dss
|
||||
}
|
||||
@@ -155,5 +156,6 @@ func (device *GenericDevice) Load(ds persistapi.DeviceState) {
|
||||
Major: ds.Major,
|
||||
Minor: ds.Minor,
|
||||
DriverOptions: ds.DriverOptions,
|
||||
ColdPlug: ds.ColdPlug,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -9,6 +9,7 @@ package drivers
|
||||
import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
@@ -27,6 +28,8 @@ const (
|
||||
pciDriverBindPath = "/sys/bus/pci/drivers/%s/bind"
|
||||
vfioNewIDPath = "/sys/bus/pci/drivers/vfio-pci/new_id"
|
||||
vfioRemoveIDPath = "/sys/bus/pci/drivers/vfio-pci/remove_id"
|
||||
iommuGroupPath = "/sys/bus/pci/devices/%s/iommu_group"
|
||||
vfioDevPath = "/dev/vfio/%s"
|
||||
pcieRootPortPrefix = "rp"
|
||||
)
|
||||
|
||||
@@ -98,10 +101,20 @@ func (device *VFIODevice) Attach(devReceiver api.DeviceReceiver) (retErr error)
|
||||
}
|
||||
}
|
||||
|
||||
// hotplug a VFIO device is actually hotplugging a group of iommu devices
|
||||
if err := devReceiver.HotplugAddDevice(device, config.DeviceVFIO); err != nil {
|
||||
deviceLogger().WithError(err).Error("Failed to add device")
|
||||
return err
|
||||
coldPlug := device.DeviceInfo.ColdPlug
|
||||
deviceLogger().WithField("cold-plug", coldPlug).Info("Attaching VFIO device")
|
||||
|
||||
if coldPlug {
|
||||
if err := devReceiver.AppendDevice(device); err != nil {
|
||||
deviceLogger().WithError(err).Error("Failed to append device")
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
// hotplug a VFIO device is actually hotplugging a group of iommu devices
|
||||
if err := devReceiver.HotplugAddDevice(device, config.DeviceVFIO); err != nil {
|
||||
deviceLogger().WithError(err).Error("Failed to add device")
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
deviceLogger().WithFields(logrus.Fields{
|
||||
@@ -128,6 +141,15 @@ func (device *VFIODevice) Detach(devReceiver api.DeviceReceiver) (retErr error)
|
||||
}
|
||||
}()
|
||||
|
||||
if device.GenericDevice.DeviceInfo.ColdPlug {
|
||||
// nothing to detach, device was cold plugged
|
||||
deviceLogger().WithFields(logrus.Fields{
|
||||
"device-group": device.DeviceInfo.HostPath,
|
||||
"device-type": "vfio-passthrough",
|
||||
}).Info("Nothing to detach. VFIO device was cold plugged")
|
||||
return nil
|
||||
}
|
||||
|
||||
// hotplug a VFIO device is actually hotplugging a group of iommu devices
|
||||
if err := devReceiver.HotplugRemoveDevice(device, config.DeviceVFIO); err != nil {
|
||||
deviceLogger().WithError(err).Error("Failed to remove device")
|
||||
@@ -223,7 +245,7 @@ func getSysfsDev(sysfsDevStr string) (string, error) {
|
||||
|
||||
// BindDevicetoVFIO binds the device to vfio driver after unbinding from host.
|
||||
// Will be called by a network interface or a generic pcie device.
|
||||
func BindDevicetoVFIO(bdf, hostDriver, vendorDeviceID string) error {
|
||||
func BindDevicetoVFIO(bdf, hostDriver, vendorDeviceID string) (string, error) {
|
||||
|
||||
// Unbind from the host driver
|
||||
unbindDriverPath := fmt.Sprintf(pciDriverUnbindPath, bdf)
|
||||
@@ -233,7 +255,7 @@ func BindDevicetoVFIO(bdf, hostDriver, vendorDeviceID string) error {
|
||||
}).Info("Unbinding device from driver")
|
||||
|
||||
if err := utils.WriteToFile(unbindDriverPath, []byte(bdf)); err != nil {
|
||||
return err
|
||||
return "", err
|
||||
}
|
||||
|
||||
// Add device id to vfio driver.
|
||||
@@ -243,7 +265,7 @@ func BindDevicetoVFIO(bdf, hostDriver, vendorDeviceID string) error {
|
||||
}).Info("Writing vendor-device-id to vfio new-id path")
|
||||
|
||||
if err := utils.WriteToFile(vfioNewIDPath, []byte(vendorDeviceID)); err != nil {
|
||||
return err
|
||||
return "", err
|
||||
}
|
||||
|
||||
// Bind to vfio-pci driver.
|
||||
@@ -257,7 +279,12 @@ func BindDevicetoVFIO(bdf, hostDriver, vendorDeviceID string) error {
|
||||
// Device may be already bound at this time because of earlier write to new_id, ignore error
|
||||
utils.WriteToFile(bindDriverPath, []byte(bdf))
|
||||
|
||||
return nil
|
||||
groupPath, err := os.Readlink(fmt.Sprintf(iommuGroupPath, bdf))
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return fmt.Sprintf(vfioDevPath, filepath.Base(groupPath)), nil
|
||||
}
|
||||
|
||||
// BindDevicetoHost binds the device to the host driver driver after unbinding from vfio-pci.
|
||||
|
||||
@@ -22,7 +22,7 @@ type Endpoint interface {
|
||||
|
||||
SetProperties(NetworkInfo)
|
||||
SetPciAddr(string)
|
||||
Attach(hypervisor) error
|
||||
Attach(*Sandbox) error
|
||||
Detach(netNsCreated bool, netNsPath string) error
|
||||
HotAttach(h hypervisor) error
|
||||
HotDetach(h hypervisor, netNsCreated bool, netNsPath string) error
|
||||
|
||||
@@ -89,7 +89,8 @@ func (endpoint *IPVlanEndpoint) NetworkPair() *NetworkInterfacePair {
|
||||
|
||||
// Attach for virtual endpoint bridges the network pair and adds the
|
||||
// tap interface of the network pair to the hypervisor.
|
||||
func (endpoint *IPVlanEndpoint) Attach(h hypervisor) error {
|
||||
func (endpoint *IPVlanEndpoint) Attach(s *Sandbox) error {
|
||||
h := s.hypervisor
|
||||
if err := xConnectVMNetwork(endpoint, h); err != nil {
|
||||
networkLogger().WithError(err).Error("Error bridging virtual ep")
|
||||
return err
|
||||
|
||||
@@ -58,8 +58,9 @@ func (endpoint *MacvtapEndpoint) SetProperties(properties NetworkInfo) {
|
||||
}
|
||||
|
||||
// Attach for macvtap endpoint passes macvtap device to the hypervisor.
|
||||
func (endpoint *MacvtapEndpoint) Attach(h hypervisor) error {
|
||||
func (endpoint *MacvtapEndpoint) Attach(s *Sandbox) error {
|
||||
var err error
|
||||
h := s.hypervisor
|
||||
|
||||
endpoint.VMFds, err = createMacvtapFds(endpoint.EndpointProperties.Iface.Index, int(h.hypervisorConfig().NumVCPUs))
|
||||
if err != nil {
|
||||
|
||||
@@ -1271,7 +1271,7 @@ func (n *Network) Run(networkNSPath string, cb func() error) error {
|
||||
}
|
||||
|
||||
// Add adds all needed interfaces inside the network namespace.
|
||||
func (n *Network) Add(ctx context.Context, config *NetworkConfig, hypervisor hypervisor, hotplug bool) ([]Endpoint, error) {
|
||||
func (n *Network) Add(ctx context.Context, config *NetworkConfig, s *Sandbox, hotplug bool) ([]Endpoint, error) {
|
||||
span, _ := n.trace(ctx, "add")
|
||||
defer span.Finish()
|
||||
|
||||
@@ -1284,24 +1284,24 @@ func (n *Network) Add(ctx context.Context, config *NetworkConfig, hypervisor hyp
|
||||
for _, endpoint := range endpoints {
|
||||
networkLogger().WithField("endpoint-type", endpoint.Type()).WithField("hotplug", hotplug).Info("Attaching endpoint")
|
||||
if hotplug {
|
||||
if err := endpoint.HotAttach(hypervisor); err != nil {
|
||||
if err := endpoint.HotAttach(s.hypervisor); err != nil {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
if err := endpoint.Attach(hypervisor); err != nil {
|
||||
if err := endpoint.Attach(s); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if !hypervisor.isRateLimiterBuiltin() {
|
||||
rxRateLimiterMaxRate := hypervisor.hypervisorConfig().RxRateLimiterMaxRate
|
||||
if !s.hypervisor.isRateLimiterBuiltin() {
|
||||
rxRateLimiterMaxRate := s.hypervisor.hypervisorConfig().RxRateLimiterMaxRate
|
||||
if rxRateLimiterMaxRate > 0 {
|
||||
networkLogger().Info("Add Rx Rate Limiter")
|
||||
if err := addRxRateLimiter(endpoint, rxRateLimiterMaxRate); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
txRateLimiterMaxRate := hypervisor.hypervisorConfig().TxRateLimiterMaxRate
|
||||
txRateLimiterMaxRate := s.hypervisor.hypervisorConfig().TxRateLimiterMaxRate
|
||||
if txRateLimiterMaxRate > 0 {
|
||||
networkLogger().Info("Add Tx Rate Limiter")
|
||||
if err := addTxRateLimiter(endpoint, txRateLimiterMaxRate); err != nil {
|
||||
|
||||
@@ -103,6 +103,10 @@ type DeviceState struct {
|
||||
Major int64
|
||||
Minor int64
|
||||
|
||||
// ColdPlug specifies whether the device must be cold plugged (true)
|
||||
// or hot plugged (false).
|
||||
ColdPlug bool
|
||||
|
||||
// DriverOptions is specific options for each device driver
|
||||
// for example, for BlockDevice, we can set DriverOptions["blockDriver"]="virtio-blk"
|
||||
DriverOptions map[string]string
|
||||
|
||||
@@ -15,6 +15,7 @@ import (
|
||||
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/device/config"
|
||||
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/device/drivers"
|
||||
persistapi "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/persist/api"
|
||||
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/cgroups"
|
||||
"github.com/safchain/ethtool"
|
||||
)
|
||||
|
||||
@@ -72,27 +73,29 @@ func (endpoint *PhysicalEndpoint) NetworkPair() *NetworkInterfacePair {
|
||||
|
||||
// Attach for physical endpoint binds the physical network interface to
|
||||
// vfio-pci and adds device to the hypervisor with vfio-passthrough.
|
||||
func (endpoint *PhysicalEndpoint) Attach(h hypervisor) error {
|
||||
func (endpoint *PhysicalEndpoint) Attach(s *Sandbox) error {
|
||||
// Unbind physical interface from host driver and bind to vfio
|
||||
// so that it can be passed to qemu.
|
||||
if err := bindNICToVFIO(endpoint); err != nil {
|
||||
vfioPath, err := bindNICToVFIO(endpoint)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// TODO: use device manager as general device management entrance
|
||||
var vendorID, deviceID string
|
||||
if splits := strings.Split(endpoint.VendorDeviceID, " "); len(splits) == 2 {
|
||||
vendorID = splits[0]
|
||||
deviceID = splits[1]
|
||||
c, err := cgroups.DeviceToCgroupDevice(vfioPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
d := config.VFIODev{
|
||||
BDF: endpoint.BDF,
|
||||
VendorID: vendorID,
|
||||
DeviceID: deviceID,
|
||||
d := config.DeviceInfo{
|
||||
ContainerPath: c.Path,
|
||||
DevType: string(c.Type),
|
||||
Major: c.Major,
|
||||
Minor: c.Minor,
|
||||
ColdPlug: true,
|
||||
}
|
||||
|
||||
return h.addDevice(d, vfioDev)
|
||||
_, err = s.AddDevice(d)
|
||||
return err
|
||||
}
|
||||
|
||||
// Detach for physical endpoint unbinds the physical network interface from vfio-pci
|
||||
@@ -202,7 +205,7 @@ func createPhysicalEndpoint(netInfo NetworkInfo) (*PhysicalEndpoint, error) {
|
||||
return physicalEndpoint, nil
|
||||
}
|
||||
|
||||
func bindNICToVFIO(endpoint *PhysicalEndpoint) error {
|
||||
func bindNICToVFIO(endpoint *PhysicalEndpoint) (string, error) {
|
||||
return drivers.BindDevicetoVFIO(endpoint.BDF, endpoint.Driver, endpoint.VendorDeviceID)
|
||||
}
|
||||
|
||||
|
||||
@@ -1761,7 +1761,7 @@ func (q *qemu) addDevice(devInfo interface{}, devType deviceType) error {
|
||||
case config.VFIODev:
|
||||
q.qemuConfig.Devices = q.arch.appendVFIODevice(q.qemuConfig.Devices, v)
|
||||
default:
|
||||
break
|
||||
q.Logger().WithField("dev-type", v).Warn("Could not append device: unsupported device type")
|
||||
}
|
||||
|
||||
return err
|
||||
|
||||
@@ -566,10 +566,6 @@ func newSandbox(ctx context.Context, sandboxConfig SandboxConfig, factory Factor
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := s.createCgroupManager(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if s.disableVMShutdown, err = s.agent.init(ctx, s, sandboxConfig.AgentConfig); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -687,6 +683,12 @@ func fetchSandbox(ctx context.Context, sandboxID string) (sandbox *Sandbox, err
|
||||
return nil, fmt.Errorf("failed to create sandbox with config %+v: %v", config, err)
|
||||
}
|
||||
|
||||
if sandbox.config.SandboxCgroupOnly {
|
||||
if err := sandbox.createCgroupManager(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
// This sandbox already exists, we don't need to recreate the containers in the guest.
|
||||
// We only need to fetch the containers from storage and create the container structs.
|
||||
if err := sandbox.fetchContainers(); err != nil {
|
||||
@@ -824,7 +826,7 @@ func (s *Sandbox) createNetwork() error {
|
||||
// after vm is started.
|
||||
if s.factory == nil {
|
||||
// Add the network
|
||||
endpoints, err := s.network.Add(s.ctx, &s.config.NetworkConfig, s.hypervisor, false)
|
||||
endpoints, err := s.network.Add(s.ctx, &s.config.NetworkConfig, s, false)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -991,7 +993,7 @@ func (s *Sandbox) startVM() (err error) {
|
||||
// In case of vm factory, network interfaces are hotplugged
|
||||
// after vm is started.
|
||||
if s.factory != nil {
|
||||
endpoints, err := s.network.Add(s.ctx, &s.config.NetworkConfig, s.hypervisor, true)
|
||||
endpoints, err := s.network.Add(s.ctx, &s.config.NetworkConfig, s, true)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -1713,7 +1715,16 @@ func (s *Sandbox) AppendDevice(device api.Device) error {
|
||||
switch device.DeviceType() {
|
||||
case config.VhostUserSCSI, config.VhostUserNet, config.VhostUserBlk, config.VhostUserFS:
|
||||
return s.hypervisor.addDevice(device.GetDeviceInfo().(*config.VhostUserDeviceAttrs), vhostuserDev)
|
||||
case config.DeviceVFIO:
|
||||
vfioDevs := device.GetDeviceInfo().([]*config.VFIODev)
|
||||
for _, d := range vfioDevs {
|
||||
return s.hypervisor.addDevice(*d, vfioDev)
|
||||
}
|
||||
default:
|
||||
s.Logger().WithField("device-type", device.DeviceType()).
|
||||
Warn("Could not append device: unsupported device type")
|
||||
}
|
||||
|
||||
return fmt.Errorf("unsupported device type")
|
||||
}
|
||||
|
||||
|
||||
@@ -66,7 +66,7 @@ func (endpoint *TapEndpoint) SetProperties(properties NetworkInfo) {
|
||||
}
|
||||
|
||||
// Attach for tap endpoint adds the tap interface to the hypervisor.
|
||||
func (endpoint *TapEndpoint) Attach(h hypervisor) error {
|
||||
func (endpoint *TapEndpoint) Attach(s *Sandbox) error {
|
||||
return fmt.Errorf("TapEndpoint does not support Attach, if you're using docker please use --net none")
|
||||
}
|
||||
|
||||
|
||||
@@ -68,7 +68,8 @@ func (endpoint *TuntapEndpoint) SetProperties(properties NetworkInfo) {
|
||||
}
|
||||
|
||||
// Attach for tap endpoint adds the tap interface to the hypervisor.
|
||||
func (endpoint *TuntapEndpoint) Attach(h hypervisor) error {
|
||||
func (endpoint *TuntapEndpoint) Attach(s *Sandbox) error {
|
||||
h := s.hypervisor
|
||||
if err := xConnectVMNetwork(endpoint, h); err != nil {
|
||||
networkLogger().WithError(err).Error("Error bridging virtual endpoint")
|
||||
return err
|
||||
|
||||
@@ -89,7 +89,8 @@ func (endpoint *VethEndpoint) SetProperties(properties NetworkInfo) {
|
||||
|
||||
// Attach for veth endpoint bridges the network pair and adds the
|
||||
// tap interface of the network pair to the hypervisor.
|
||||
func (endpoint *VethEndpoint) Attach(h hypervisor) error {
|
||||
func (endpoint *VethEndpoint) Attach(s *Sandbox) error {
|
||||
h := s.hypervisor
|
||||
if err := xConnectVMNetwork(endpoint, h); err != nil {
|
||||
networkLogger().WithError(err).Error("Error bridging virtual endpoint")
|
||||
return err
|
||||
|
||||
@@ -74,7 +74,7 @@ func (endpoint *VhostUserEndpoint) NetworkPair() *NetworkInterfacePair {
|
||||
}
|
||||
|
||||
// Attach for vhostuser endpoint
|
||||
func (endpoint *VhostUserEndpoint) Attach(h hypervisor) error {
|
||||
func (endpoint *VhostUserEndpoint) Attach(s *Sandbox) error {
|
||||
// Generate a unique ID to be used for hypervisor commandline fields
|
||||
randBytes, err := utils.GenerateRandomBytes(8)
|
||||
if err != nil {
|
||||
@@ -89,7 +89,7 @@ func (endpoint *VhostUserEndpoint) Attach(h hypervisor) error {
|
||||
Type: config.VhostUserNet,
|
||||
}
|
||||
|
||||
return h.addDevice(d, vhostuserDev)
|
||||
return s.hypervisor.addDevice(d, vhostuserDev)
|
||||
}
|
||||
|
||||
// Detach for vhostuser endpoint
|
||||
|
||||
@@ -78,9 +78,11 @@ func TestVhostUserEndpointAttach(t *testing.T) {
|
||||
EndpointType: VhostUserEndpointType,
|
||||
}
|
||||
|
||||
h := &mockHypervisor{}
|
||||
s := &Sandbox{
|
||||
hypervisor: &mockHypervisor{},
|
||||
}
|
||||
|
||||
err := v.Attach(h)
|
||||
err := v.Attach(s)
|
||||
assert.NoError(err)
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user