Files
kata-containers/virtcontainers/pkg/cgroups/manager.go
Julio Montes ea82922a54 virtcontainers/pkg/cgroups: implement cgroup manager
cgroup manager is in charge to create and setup cgroups for
virtual containers, for example it adds /dev/kvm and
/dev/vhost-net to the list of cgroup devices in order to have
virtual containers working.

fixes #2438
fixes #2419

Signed-off-by: Julio Montes <julio.montes@intel.com>
2020-02-21 02:54:34 +00:00

322 lines
7.6 KiB
Go

// Copyright (c) 2020 Intel Corporation
//
// SPDX-License-Identifier: Apache-2.0
//
package cgroups
import (
"bufio"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"strconv"
"strings"
"sync"
"github.com/kata-containers/runtime/virtcontainers/pkg/rootless"
libcontcgroups "github.com/opencontainers/runc/libcontainer/cgroups"
libcontcgroupsfs "github.com/opencontainers/runc/libcontainer/cgroups/fs"
libcontcgroupssystemd "github.com/opencontainers/runc/libcontainer/cgroups/systemd"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/specconv"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
)
type Config struct {
// Cgroups specifies specific cgroup settings for the various subsystems that the container is
// placed into to limit the resources the container has available
// If nil, New() will create one.
Cgroups *configs.Cgroup
// CgroupPaths contains paths to all the cgroups setup for a container. Key is cgroup subsystem name
// with the value as the path.
CgroupPaths map[string]string
// Resources represents the runtime resource constraints
Resources specs.LinuxResources
// CgroupPath is the OCI spec cgroup path
CgroupPath string
}
type Manager struct {
sync.Mutex
mgr libcontcgroups.Manager
}
const (
// file in the cgroup that contains the pids
cgroupProcs = "cgroup.procs"
)
var (
// If set to true, expects cgroupsPath to be of form "slice:prefix:name", otherwise cgroups creation will fail
systemdCgroup *bool
cgroupsLogger = logrus.WithField("source", "virtcontainers/pkg/cgroups")
)
func EnableSystemdCgroup() {
systemd := true
systemdCgroup = &systemd
}
func UseSystemdCgroup() bool {
if systemdCgroup != nil {
return *systemdCgroup
}
return false
}
// returns the list of devices that a hypervisor may need
func hypervisorDevices() []specs.LinuxDeviceCgroup {
wildcard := int64(-1)
devicemapperMajor := int64(253)
devices := []specs.LinuxDeviceCgroup{}
devices = append(devices,
// hypervisor needs access to all devicemapper devices,
// since they can be hotplugged in the VM.
specs.LinuxDeviceCgroup{
Allow: true,
Type: "b",
Major: &devicemapperMajor,
Minor: &wildcard,
Access: "rwm",
})
// Processes running in a device-cgroup are constrained, they have acccess
// only to the devices listed in the devices.list file.
// In order to run Virtual Machines and create virtqueues, hypervisors
// need access to certain character devices in the host, like kvm and vhost-net.
hypervisorDevices := []string{
"/dev/kvm", // To run virtual machines
"/dev/vhost-net", // To create virtqueues
}
for _, device := range hypervisorDevices {
var st unix.Stat_t
linuxDevice := specs.LinuxDeviceCgroup{
Allow: true,
Access: "rwm",
}
if err := unix.Stat(device, &st); err != nil {
cgroupsLogger.WithError(err).WithField("device", device).Warn("Could not get device information")
continue
}
switch st.Mode & unix.S_IFMT {
case unix.S_IFCHR:
linuxDevice.Type = "c"
case unix.S_IFBLK:
linuxDevice.Type = "b"
}
major := int64(unix.Major(st.Rdev))
minor := int64(unix.Minor(st.Rdev))
linuxDevice.Major = &major
linuxDevice.Minor = &minor
devices = append(devices, linuxDevice)
}
return devices
}
// New creates a new CgroupManager
func New(config *Config) (*Manager, error) {
var err error
useSystemdCgroup := UseSystemdCgroup()
devices := []specs.LinuxDeviceCgroup{}
copy(devices, config.Resources.Devices)
devices = append(devices, hypervisorDevices()...)
// Do not modify original devices
config.Resources.Devices = devices
newSpec := specs.Spec{
Linux: &specs.Linux{
Resources: &config.Resources,
},
}
rootless := rootless.IsRootless()
cgroups := config.Cgroups
cgroupPaths := config.CgroupPaths
// Create a new cgroup if the current one is nil
// this cgroups must be saved later
if cgroups == nil {
if config.CgroupPath == "" && !rootless {
cgroupsLogger.Warn("cgroups have not been created and cgroup path is empty")
}
newSpec.Linux.CgroupsPath, err = ValidCgroupPath(config.CgroupPath, useSystemdCgroup)
if err != nil {
return nil, fmt.Errorf("Invalid cgroup path: %v", err)
}
if cgroups, err = specconv.CreateCgroupConfig(&specconv.CreateOpts{
// cgroup name is taken from spec
CgroupName: "",
UseSystemdCgroup: useSystemdCgroup,
Spec: &newSpec,
RootlessCgroups: rootless,
}); err != nil {
return nil, fmt.Errorf("Could not create cgroup config: %v", err)
}
}
// Set cgroupPaths to nil when the map is empty, it can and will be
// populated by `Manager.Apply()` when the runtime or any other process
// is moved to the cgroup.
if len(cgroupPaths) == 0 {
cgroupPaths = nil
}
if useSystemdCgroup {
systemdCgroupFunc, err := libcontcgroupssystemd.NewSystemdCgroupsManager()
if err != nil {
return nil, fmt.Errorf("Could not create systemd cgroup manager: %v", err)
}
libcontcgroupssystemd.UseSystemd()
return &Manager{
mgr: systemdCgroupFunc(cgroups, cgroupPaths),
}, nil
}
return &Manager{
mgr: &libcontcgroupsfs.Manager{
Cgroups: cgroups,
Rootless: rootless,
Paths: cgroupPaths,
},
}, nil
}
// read all the pids in cgroupPath
func readPids(cgroupPath string) ([]int, error) {
pids := []int{}
f, err := os.Open(filepath.Join(cgroupPath, cgroupProcs))
if err != nil {
return nil, err
}
defer f.Close()
buf := bufio.NewScanner(f)
for buf.Scan() {
if t := buf.Text(); t != "" {
pid, err := strconv.Atoi(t)
if err != nil {
return nil, err
}
pids = append(pids, pid)
}
}
return pids, nil
}
// write the pids into cgroup.procs
func writePids(pids []int, cgroupPath string) error {
cgroupProcsPath := filepath.Join(cgroupPath, cgroupProcs)
for _, pid := range pids {
if err := ioutil.WriteFile(cgroupProcsPath,
[]byte(strconv.Itoa(pid)),
os.FileMode(0),
); err != nil {
return err
}
}
return nil
}
func (m *Manager) logger() *logrus.Entry {
return cgroupsLogger.WithField("source", "cgroup-manager")
}
// move all the processes in the current cgroup to the parent
func (m *Manager) moveToParent() error {
m.Lock()
defer m.Unlock()
for _, cgroupPath := range m.mgr.GetPaths() {
pids, err := readPids(cgroupPath)
if err != nil {
return err
}
if len(pids) == 0 {
// no pids in this cgroup
continue
}
cgroupParentPath := filepath.Dir(filepath.Clean(cgroupPath))
if err = writePids(pids, cgroupParentPath); err != nil {
if !strings.Contains(err.Error(), "no such process") {
return err
}
}
}
return nil
}
// Add pid to cgroups
func (m *Manager) Add(pid int) error {
if rootless.IsRootless() {
m.logger().Debug("Unable to setup add pids to cgroup: running rootless")
return nil
}
m.Lock()
defer m.Unlock()
return m.mgr.Apply(pid)
}
// Apply constraints
func (m *Manager) Apply() error {
if rootless.IsRootless() {
m.logger().Debug("Unable to apply constraints: running rootless")
return nil
}
cgroups, err := m.GetCgroups()
if err != nil {
return err
}
m.Lock()
defer m.Unlock()
return m.mgr.Set(&configs.Config{
Cgroups: cgroups,
})
}
func (m *Manager) GetCgroups() (*configs.Cgroup, error) {
m.Lock()
defer m.Unlock()
return m.mgr.GetCgroups()
}
func (m *Manager) GetPaths() map[string]string {
m.Lock()
defer m.Unlock()
return m.mgr.GetPaths()
}
func (m *Manager) Destroy() error {
// cgroup can't be destroyed if it contains running processes
if err := m.moveToParent(); err != nil {
return fmt.Errorf("Could not move processes into parent cgroup: %v", err)
}
m.Lock()
defer m.Unlock()
return m.mgr.Destroy()
}