Files
kata-containers/virtcontainers/cgroups.go
Wei Zhang 34fe3b9d6d cgroups: add host cgroup support
Fixes #344

Add host cgroup support for kata.

This commits only adds cpu.cfs_period and cpu.cfs_quota support.

It will create 3-level hierarchy, take "cpu" cgroup as an example:

```
/sys/fs/cgroup
|---cpu
   |---kata
      |---<sandbox-id>
         |--vcpu
      |---<sandbox-id>
```

* `vc` cgroup is common parent for all kata-container sandbox, it won't be removed
after sandbox removed. This cgroup has no limitation.
* `<sandbox-id>` cgroup is the layer for each sandbox, it contains all other qemu
threads except for vcpu threads. In future, we can consider putting all shim
processes and proxy process here. This cgroup has no limitation yet.
* `vcpu` cgroup contains vcpu threads from qemu. Currently cpu quota and period
constraint applies to this cgroup.

Signed-off-by: Wei Zhang <zhangwei555@huawei.com>
Signed-off-by: Jingxiao Lu <lujingxiao@huawei.com>
2018-10-27 09:41:35 +08:00

192 lines
5.1 KiB
Go

// Copyright (c) 2018 Huawei Corporation
//
// SPDX-License-Identifier: Apache-2.0
//
package virtcontainers
import (
"encoding/json"
"fmt"
"github.com/containerd/cgroups"
"github.com/kata-containers/runtime/virtcontainers/pkg/annotations"
specs "github.com/opencontainers/runtime-spec/specs-go"
)
const (
vcpuGroupName = "vcpu"
defaultCgroupParent = "/kata"
)
type sandboxCgroups struct {
commonParent cgroups.Cgroup
sandboxSub cgroups.Cgroup
vcpuSub cgroups.Cgroup
}
func (s *Sandbox) newCgroups() error {
// New will still succeed when cgroup exists
// create common parent for all kata-containers
// e.g. /sys/fs/cgroup/cpu/vc
parent, err := cgroups.New(cgroups.V1,
cgroups.StaticPath(defaultCgroupParent), &specs.LinuxResources{})
if err != nil {
return fmt.Errorf("failed to create cgroup for %q", defaultCgroupParent)
}
// create sub-cgroup for each sandbox
// e.g. /sys/fs/cgroup/cpu/vc/<sandbox>
sandboxSub, err := parent.New(s.id, &specs.LinuxResources{})
if err != nil {
return fmt.Errorf("failed to create cgroup for %s/%s", defaultCgroupParent, s.id)
}
// create sub-cgroup for vcpu threads
vcpuSub, err := sandboxSub.New(vcpuGroupName, &specs.LinuxResources{})
if err != nil {
return fmt.Errorf("failed to create cgroup for %s/%s/%s", defaultCgroupParent, s.id, vcpuGroupName)
}
s.cgroup = &sandboxCgroups{
commonParent: parent,
sandboxSub: sandboxSub,
vcpuSub: vcpuSub,
}
return nil
}
func (s *Sandbox) destroyCgroups() error {
if s.cgroup == nil {
s.Logger().Warningf("cgroup is not initialized, no need to destroy")
return nil
}
// first move all processes in subgroup to parent in case live process blocks
// deletion of cgroup
if err := s.cgroup.sandboxSub.MoveTo(s.cgroup.commonParent); err != nil {
return fmt.Errorf("failed to clear cgroup processes")
}
return s.cgroup.sandboxSub.Delete()
}
func (s *Sandbox) setupCgroups() error {
if s.cgroup == nil {
return fmt.Errorf("failed to setup uninitialized cgroup for sandbox")
}
resource, err := s.mergeSpecResource()
if err != nil {
return err
}
if err := s.applyCPUCgroup(resource); err != nil {
return err
}
return nil
}
func (s *Sandbox) applyCPUCgroup(rc *specs.LinuxResources) error {
if s.cgroup == nil {
return fmt.Errorf("failed to setup uninitialized cgroup for sandbox")
}
// apply cpu constraint to vcpu cgroup
if err := s.cgroup.vcpuSub.Update(rc); err != nil {
return err
}
// when new container joins, new CPU could be hotplugged, so we
// have to query fresh vcpu info from hypervisor for every time.
tids, err := s.hypervisor.getThreadIDs()
if err != nil || tids == nil {
return fmt.Errorf("failed to get thread ids from hypervisor: %v", err)
}
// use Add() to add vcpu thread to s.cgroup, it will write thread id to
// `cgroup.procs` which will move all threads in qemu process to this cgroup
// immediately as default behaviour.
if len(tids.vcpus) > 0 {
if err := s.cgroup.sandboxSub.Add(cgroups.Process{
Pid: tids.vcpus[0],
}); err != nil {
return err
}
}
for _, i := range tids.vcpus {
if i <= 0 {
continue
}
// In contrast, AddTask will write thread id to `tasks`
// After this, vcpu threads are in "vcpu" sub-cgroup, other threads in
// qemu will be left in parent cgroup untouched.
if err := s.cgroup.vcpuSub.AddTask(cgroups.Process{
Pid: i,
}); err != nil {
return err
}
}
return nil
}
func (s *Sandbox) mergeSpecResource() (*specs.LinuxResources, error) {
if s.config == nil {
return nil, fmt.Errorf("sandbox config is nil")
}
resource := &specs.LinuxResources{
CPU: &specs.LinuxCPU{},
}
for _, c := range s.config.Containers {
config, ok := c.Annotations[annotations.ConfigJSONKey]
if !ok {
s.Logger().WithField("container", c.ID).Warningf("failed to find config from container annotations")
continue
}
var spec specs.Spec
if err := json.Unmarshal([]byte(config), &spec); err != nil {
return nil, err
}
// TODO: how to handle empty/unlimited resource?
// maybe we should add a default CPU/Memory delta when no
// resource limit is given. -- @WeiZhang555
if spec.Linux == nil || spec.Linux.Resources == nil {
continue
}
// calculate cpu quota and period
s.mergeCPUResource(resource, spec.Linux.Resources)
}
return resource, nil
}
func (s *Sandbox) mergeCPUResource(orig, rc *specs.LinuxResources) {
if orig.CPU == nil {
orig.CPU = &specs.LinuxCPU{}
}
if rc.CPU != nil && rc.CPU.Quota != nil && rc.CPU.Period != nil &&
*rc.CPU.Quota > 0 && *rc.CPU.Period > 0 {
if orig.CPU.Period == nil {
orig.CPU.Period = rc.CPU.Period
orig.CPU.Quota = rc.CPU.Quota
} else {
// this is an example to show how it works:
// container A and `orig` has quota: 5000 and period 10000
// here comes container B with quota 40 and period 100,
// so use previous period 10000 as a baseline, container B
// has proportional resource of quota 4000 and period 10000, calculated as
// delta := 40 / 100 * 10000 = 4000
// and final `*orig.CPU.Quota` = 5000 + 4000 = 9000
delta := float64(*rc.CPU.Quota) / float64(*rc.CPU.Period) * float64(*orig.CPU.Period)
*orig.CPU.Quota += int64(delta)
}
}
}