mirror of
https://github.com/aljazceru/kata-containers.git
synced 2025-12-28 11:34:40 +01:00
cgroup manager is in charge to create and setup cgroups for virtual containers, for example it adds /dev/kvm and /dev/vhost-net to the list of cgroup devices in order to have virtual containers working. fixes #2438 fixes #2419 Signed-off-by: Julio Montes <julio.montes@intel.com>
322 lines
7.6 KiB
Go
322 lines
7.6 KiB
Go
// Copyright (c) 2020 Intel Corporation
|
|
//
|
|
// SPDX-License-Identifier: Apache-2.0
|
|
//
|
|
|
|
package cgroups
|
|
|
|
import (
|
|
"bufio"
|
|
"fmt"
|
|
"io/ioutil"
|
|
"os"
|
|
"path/filepath"
|
|
"strconv"
|
|
"strings"
|
|
"sync"
|
|
|
|
"github.com/kata-containers/runtime/virtcontainers/pkg/rootless"
|
|
libcontcgroups "github.com/opencontainers/runc/libcontainer/cgroups"
|
|
libcontcgroupsfs "github.com/opencontainers/runc/libcontainer/cgroups/fs"
|
|
libcontcgroupssystemd "github.com/opencontainers/runc/libcontainer/cgroups/systemd"
|
|
"github.com/opencontainers/runc/libcontainer/configs"
|
|
"github.com/opencontainers/runc/libcontainer/specconv"
|
|
"github.com/opencontainers/runtime-spec/specs-go"
|
|
"github.com/sirupsen/logrus"
|
|
"golang.org/x/sys/unix"
|
|
)
|
|
|
|
type Config struct {
|
|
// Cgroups specifies specific cgroup settings for the various subsystems that the container is
|
|
// placed into to limit the resources the container has available
|
|
// If nil, New() will create one.
|
|
Cgroups *configs.Cgroup
|
|
|
|
// CgroupPaths contains paths to all the cgroups setup for a container. Key is cgroup subsystem name
|
|
// with the value as the path.
|
|
CgroupPaths map[string]string
|
|
|
|
// Resources represents the runtime resource constraints
|
|
Resources specs.LinuxResources
|
|
|
|
// CgroupPath is the OCI spec cgroup path
|
|
CgroupPath string
|
|
}
|
|
|
|
type Manager struct {
|
|
sync.Mutex
|
|
mgr libcontcgroups.Manager
|
|
}
|
|
|
|
const (
|
|
// file in the cgroup that contains the pids
|
|
cgroupProcs = "cgroup.procs"
|
|
)
|
|
|
|
var (
|
|
// If set to true, expects cgroupsPath to be of form "slice:prefix:name", otherwise cgroups creation will fail
|
|
systemdCgroup *bool
|
|
|
|
cgroupsLogger = logrus.WithField("source", "virtcontainers/pkg/cgroups")
|
|
)
|
|
|
|
func EnableSystemdCgroup() {
|
|
systemd := true
|
|
systemdCgroup = &systemd
|
|
}
|
|
|
|
func UseSystemdCgroup() bool {
|
|
if systemdCgroup != nil {
|
|
return *systemdCgroup
|
|
}
|
|
return false
|
|
}
|
|
|
|
// returns the list of devices that a hypervisor may need
|
|
func hypervisorDevices() []specs.LinuxDeviceCgroup {
|
|
wildcard := int64(-1)
|
|
devicemapperMajor := int64(253)
|
|
|
|
devices := []specs.LinuxDeviceCgroup{}
|
|
|
|
devices = append(devices,
|
|
// hypervisor needs access to all devicemapper devices,
|
|
// since they can be hotplugged in the VM.
|
|
specs.LinuxDeviceCgroup{
|
|
Allow: true,
|
|
Type: "b",
|
|
Major: &devicemapperMajor,
|
|
Minor: &wildcard,
|
|
Access: "rwm",
|
|
})
|
|
|
|
// Processes running in a device-cgroup are constrained, they have acccess
|
|
// only to the devices listed in the devices.list file.
|
|
// In order to run Virtual Machines and create virtqueues, hypervisors
|
|
// need access to certain character devices in the host, like kvm and vhost-net.
|
|
hypervisorDevices := []string{
|
|
"/dev/kvm", // To run virtual machines
|
|
"/dev/vhost-net", // To create virtqueues
|
|
}
|
|
|
|
for _, device := range hypervisorDevices {
|
|
var st unix.Stat_t
|
|
linuxDevice := specs.LinuxDeviceCgroup{
|
|
Allow: true,
|
|
Access: "rwm",
|
|
}
|
|
|
|
if err := unix.Stat(device, &st); err != nil {
|
|
cgroupsLogger.WithError(err).WithField("device", device).Warn("Could not get device information")
|
|
continue
|
|
}
|
|
|
|
switch st.Mode & unix.S_IFMT {
|
|
case unix.S_IFCHR:
|
|
linuxDevice.Type = "c"
|
|
case unix.S_IFBLK:
|
|
linuxDevice.Type = "b"
|
|
}
|
|
|
|
major := int64(unix.Major(st.Rdev))
|
|
minor := int64(unix.Minor(st.Rdev))
|
|
linuxDevice.Major = &major
|
|
linuxDevice.Minor = &minor
|
|
|
|
devices = append(devices, linuxDevice)
|
|
}
|
|
|
|
return devices
|
|
}
|
|
|
|
// New creates a new CgroupManager
|
|
func New(config *Config) (*Manager, error) {
|
|
var err error
|
|
useSystemdCgroup := UseSystemdCgroup()
|
|
|
|
devices := []specs.LinuxDeviceCgroup{}
|
|
copy(devices, config.Resources.Devices)
|
|
devices = append(devices, hypervisorDevices()...)
|
|
// Do not modify original devices
|
|
config.Resources.Devices = devices
|
|
|
|
newSpec := specs.Spec{
|
|
Linux: &specs.Linux{
|
|
Resources: &config.Resources,
|
|
},
|
|
}
|
|
|
|
rootless := rootless.IsRootless()
|
|
|
|
cgroups := config.Cgroups
|
|
cgroupPaths := config.CgroupPaths
|
|
|
|
// Create a new cgroup if the current one is nil
|
|
// this cgroups must be saved later
|
|
if cgroups == nil {
|
|
if config.CgroupPath == "" && !rootless {
|
|
cgroupsLogger.Warn("cgroups have not been created and cgroup path is empty")
|
|
}
|
|
|
|
newSpec.Linux.CgroupsPath, err = ValidCgroupPath(config.CgroupPath, useSystemdCgroup)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("Invalid cgroup path: %v", err)
|
|
}
|
|
|
|
if cgroups, err = specconv.CreateCgroupConfig(&specconv.CreateOpts{
|
|
// cgroup name is taken from spec
|
|
CgroupName: "",
|
|
UseSystemdCgroup: useSystemdCgroup,
|
|
Spec: &newSpec,
|
|
RootlessCgroups: rootless,
|
|
}); err != nil {
|
|
return nil, fmt.Errorf("Could not create cgroup config: %v", err)
|
|
}
|
|
}
|
|
|
|
// Set cgroupPaths to nil when the map is empty, it can and will be
|
|
// populated by `Manager.Apply()` when the runtime or any other process
|
|
// is moved to the cgroup.
|
|
if len(cgroupPaths) == 0 {
|
|
cgroupPaths = nil
|
|
}
|
|
|
|
if useSystemdCgroup {
|
|
systemdCgroupFunc, err := libcontcgroupssystemd.NewSystemdCgroupsManager()
|
|
if err != nil {
|
|
return nil, fmt.Errorf("Could not create systemd cgroup manager: %v", err)
|
|
}
|
|
libcontcgroupssystemd.UseSystemd()
|
|
return &Manager{
|
|
mgr: systemdCgroupFunc(cgroups, cgroupPaths),
|
|
}, nil
|
|
}
|
|
|
|
return &Manager{
|
|
mgr: &libcontcgroupsfs.Manager{
|
|
Cgroups: cgroups,
|
|
Rootless: rootless,
|
|
Paths: cgroupPaths,
|
|
},
|
|
}, nil
|
|
}
|
|
|
|
// read all the pids in cgroupPath
|
|
func readPids(cgroupPath string) ([]int, error) {
|
|
pids := []int{}
|
|
f, err := os.Open(filepath.Join(cgroupPath, cgroupProcs))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer f.Close()
|
|
buf := bufio.NewScanner(f)
|
|
|
|
for buf.Scan() {
|
|
if t := buf.Text(); t != "" {
|
|
pid, err := strconv.Atoi(t)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
pids = append(pids, pid)
|
|
}
|
|
}
|
|
return pids, nil
|
|
}
|
|
|
|
// write the pids into cgroup.procs
|
|
func writePids(pids []int, cgroupPath string) error {
|
|
cgroupProcsPath := filepath.Join(cgroupPath, cgroupProcs)
|
|
for _, pid := range pids {
|
|
if err := ioutil.WriteFile(cgroupProcsPath,
|
|
[]byte(strconv.Itoa(pid)),
|
|
os.FileMode(0),
|
|
); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (m *Manager) logger() *logrus.Entry {
|
|
return cgroupsLogger.WithField("source", "cgroup-manager")
|
|
}
|
|
|
|
// move all the processes in the current cgroup to the parent
|
|
func (m *Manager) moveToParent() error {
|
|
m.Lock()
|
|
defer m.Unlock()
|
|
for _, cgroupPath := range m.mgr.GetPaths() {
|
|
pids, err := readPids(cgroupPath)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if len(pids) == 0 {
|
|
// no pids in this cgroup
|
|
continue
|
|
}
|
|
|
|
cgroupParentPath := filepath.Dir(filepath.Clean(cgroupPath))
|
|
if err = writePids(pids, cgroupParentPath); err != nil {
|
|
if !strings.Contains(err.Error(), "no such process") {
|
|
return err
|
|
}
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// Add pid to cgroups
|
|
func (m *Manager) Add(pid int) error {
|
|
if rootless.IsRootless() {
|
|
m.logger().Debug("Unable to setup add pids to cgroup: running rootless")
|
|
return nil
|
|
}
|
|
|
|
m.Lock()
|
|
defer m.Unlock()
|
|
return m.mgr.Apply(pid)
|
|
}
|
|
|
|
// Apply constraints
|
|
func (m *Manager) Apply() error {
|
|
if rootless.IsRootless() {
|
|
m.logger().Debug("Unable to apply constraints: running rootless")
|
|
return nil
|
|
}
|
|
|
|
cgroups, err := m.GetCgroups()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
m.Lock()
|
|
defer m.Unlock()
|
|
return m.mgr.Set(&configs.Config{
|
|
Cgroups: cgroups,
|
|
})
|
|
}
|
|
|
|
func (m *Manager) GetCgroups() (*configs.Cgroup, error) {
|
|
m.Lock()
|
|
defer m.Unlock()
|
|
return m.mgr.GetCgroups()
|
|
}
|
|
|
|
func (m *Manager) GetPaths() map[string]string {
|
|
m.Lock()
|
|
defer m.Unlock()
|
|
return m.mgr.GetPaths()
|
|
}
|
|
|
|
func (m *Manager) Destroy() error {
|
|
// cgroup can't be destroyed if it contains running processes
|
|
if err := m.moveToParent(); err != nil {
|
|
return fmt.Errorf("Could not move processes into parent cgroup: %v", err)
|
|
}
|
|
|
|
m.Lock()
|
|
defer m.Unlock()
|
|
return m.mgr.Destroy()
|
|
}
|