mirror of
https://github.com/aljazceru/kata-containers.git
synced 2026-01-07 08:24:23 +01:00
CCv0: Merge main into CCv0 branch
Merge remote-tracking branch 'upstream/main' into CCv0 Fixes: #3807 Signed-off-by: stevenhorsman <steven@uk.ibm.com>
This commit is contained in:
88
src/runtime/pkg/containerd-shim-v2/event_forwarder.go
Normal file
88
src/runtime/pkg/containerd-shim-v2/event_forwarder.go
Normal file
@@ -0,0 +1,88 @@
|
||||
// Copyright (c) 2022 Ant Group
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
package containerdshim
|
||||
|
||||
import (
|
||||
"context"
|
||||
"os"
|
||||
"time"
|
||||
|
||||
"github.com/containerd/containerd/events"
|
||||
)
|
||||
|
||||
type forwarderType string
|
||||
|
||||
const (
|
||||
forwarderTypeLog forwarderType = "log"
|
||||
forwarderTypeContainerd forwarderType = "containerd"
|
||||
|
||||
// A time span used to wait for publish a containerd event,
|
||||
// once it costs a longer time than timeOut, it will be canceld.
|
||||
timeOut = 5 * time.Second
|
||||
|
||||
// ttrpc address passed from container runtime.
|
||||
// For now containerd will pass the address, and CRI-O will not
|
||||
ttrpcAddressEnv = "TTRPC_ADDRESS"
|
||||
)
|
||||
|
||||
type eventsForwarder interface {
|
||||
forward()
|
||||
forwarderType() forwarderType
|
||||
}
|
||||
|
||||
type logForwarder struct {
|
||||
s *service
|
||||
}
|
||||
|
||||
func (lf *logForwarder) forward() {
|
||||
for e := range lf.s.events {
|
||||
shimLog.WithField("topic", getTopic(e)).Infof("post event: %+v", e)
|
||||
}
|
||||
}
|
||||
|
||||
func (lf *logForwarder) forwarderType() forwarderType {
|
||||
return forwarderTypeLog
|
||||
}
|
||||
|
||||
type containerdForwarder struct {
|
||||
s *service
|
||||
ctx context.Context
|
||||
publisher events.Publisher
|
||||
}
|
||||
|
||||
func (cf *containerdForwarder) forward() {
|
||||
for e := range cf.s.events {
|
||||
ctx, cancel := context.WithTimeout(cf.ctx, timeOut)
|
||||
err := cf.publisher.Publish(ctx, getTopic(e), e)
|
||||
cancel()
|
||||
if err != nil {
|
||||
shimLog.WithError(err).Error("post event")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (cf *containerdForwarder) forwarderType() forwarderType {
|
||||
return forwarderTypeContainerd
|
||||
}
|
||||
|
||||
func (s *service) newEventsForwarder(ctx context.Context, publisher events.Publisher) eventsForwarder {
|
||||
var forwarder eventsForwarder
|
||||
ttrpcAddress := os.Getenv(ttrpcAddressEnv)
|
||||
if ttrpcAddress == "" {
|
||||
// non containerd will use log forwarder to write events to log
|
||||
forwarder = &logForwarder{
|
||||
s: s,
|
||||
}
|
||||
} else {
|
||||
forwarder = &containerdForwarder{
|
||||
s: s,
|
||||
ctx: ctx,
|
||||
publisher: publisher,
|
||||
}
|
||||
}
|
||||
|
||||
return forwarder
|
||||
}
|
||||
45
src/runtime/pkg/containerd-shim-v2/event_forwarder_test.go
Normal file
45
src/runtime/pkg/containerd-shim-v2/event_forwarder_test.go
Normal file
@@ -0,0 +1,45 @@
|
||||
// Copyright (c) 2022 Ant Group
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
package containerdshim
|
||||
|
||||
import (
|
||||
"context"
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
"github.com/containerd/containerd/events"
|
||||
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/vcmock"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestNewEventsForwarder(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
|
||||
sandbox := &vcmock.Sandbox{
|
||||
MockID: testSandboxID,
|
||||
}
|
||||
|
||||
s := &service{
|
||||
id: testSandboxID,
|
||||
sandbox: sandbox,
|
||||
containers: make(map[string]*container),
|
||||
}
|
||||
|
||||
// newEventsForwarder will not call publisher to publish events
|
||||
// so here we can use a nil pointer to test newEventsForwarder
|
||||
var publisher events.Publisher
|
||||
|
||||
// check log forwarder
|
||||
forwarder := s.newEventsForwarder(context.Background(), publisher)
|
||||
assert.Equal(forwarderTypeLog, forwarder.forwarderType())
|
||||
|
||||
// check containerd forwarder
|
||||
os.Setenv(ttrpcAddressEnv, "/foo/bar.sock")
|
||||
defer os.Setenv(ttrpcAddressEnv, "")
|
||||
forwarder = s.newEventsForwarder(context.Background(), publisher)
|
||||
assert.Equal(forwarderTypeContainerd, forwarder.forwarderType())
|
||||
}
|
||||
@@ -17,7 +17,6 @@ import (
|
||||
eventstypes "github.com/containerd/containerd/api/events"
|
||||
"github.com/containerd/containerd/api/types/task"
|
||||
"github.com/containerd/containerd/errdefs"
|
||||
"github.com/containerd/containerd/events"
|
||||
"github.com/containerd/containerd/namespaces"
|
||||
cdruntime "github.com/containerd/containerd/runtime"
|
||||
cdshim "github.com/containerd/containerd/runtime/v2/shim"
|
||||
@@ -51,10 +50,6 @@ const (
|
||||
|
||||
chSize = 128
|
||||
exitCode255 = 255
|
||||
|
||||
// A time span used to wait for publish a containerd event,
|
||||
// once it costs a longer time than timeOut, it will be canceld.
|
||||
timeOut = 5 * time.Second
|
||||
)
|
||||
|
||||
var (
|
||||
@@ -100,7 +95,8 @@ func New(ctx context.Context, id string, publisher cdshim.Publisher, shutdown fu
|
||||
|
||||
go s.processExits()
|
||||
|
||||
go s.forward(ctx, publisher)
|
||||
forwarder := s.newEventsForwarder(ctx, publisher)
|
||||
go forwarder.forward()
|
||||
|
||||
return s, nil
|
||||
}
|
||||
@@ -256,17 +252,6 @@ func (s *service) StartShim(ctx context.Context, opts cdshim.StartOpts) (_ strin
|
||||
return address, nil
|
||||
}
|
||||
|
||||
func (s *service) forward(ctx context.Context, publisher events.Publisher) {
|
||||
for e := range s.events {
|
||||
ctx, cancel := context.WithTimeout(ctx, timeOut)
|
||||
err := publisher.Publish(ctx, getTopic(e), e)
|
||||
cancel()
|
||||
if err != nil {
|
||||
shimLog.WithError(err).Error("post event")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (s *service) send(evt interface{}) {
|
||||
// for unit test, it will not initialize s.events
|
||||
if s.events != nil {
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
//go:build !s390x
|
||||
// +build !s390x
|
||||
|
||||
// Copyright contributors to the Virtual Machine Manager for Go project
|
||||
|
||||
12
src/runtime/pkg/govmm/vmm_amd64.go
Normal file
12
src/runtime/pkg/govmm/vmm_amd64.go
Normal file
@@ -0,0 +1,12 @@
|
||||
//
|
||||
// Copyright (c) 2018 Intel Corporation
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
package govmm
|
||||
|
||||
// MaxVCPUs returns the maximum number of vCPUs supported
|
||||
func MaxVCPUs() uint32 {
|
||||
return uint32(240)
|
||||
}
|
||||
25
src/runtime/pkg/govmm/vmm_arm64.go
Normal file
25
src/runtime/pkg/govmm/vmm_arm64.go
Normal file
@@ -0,0 +1,25 @@
|
||||
//
|
||||
// Copyright (c) 2018 Intel Corporation
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
package govmm
|
||||
|
||||
//In qemu, maximum number of vCPUs depends on the GIC version, or on how
|
||||
//many redistributors we can fit into the memory map.
|
||||
//related codes are under github.com/qemu/qemu/hw/arm/virt.c(Line 135 and 1306 in stable-2.11)
|
||||
//for now, qemu only supports v2 and v3, we treat v4 as v3 based on
|
||||
//backward compatibility.
|
||||
var gicList = map[uint32]uint32{
|
||||
uint32(2): uint32(8),
|
||||
uint32(3): uint32(123),
|
||||
uint32(4): uint32(123),
|
||||
}
|
||||
|
||||
var defaultGICVersion = uint32(3)
|
||||
|
||||
// MaxVCPUs returns the maximum number of vCPUs supported
|
||||
func MaxVCPUs() uint32 {
|
||||
return gicList[defaultGICVersion]
|
||||
}
|
||||
12
src/runtime/pkg/govmm/vmm_ppc64le.go
Normal file
12
src/runtime/pkg/govmm/vmm_ppc64le.go
Normal file
@@ -0,0 +1,12 @@
|
||||
//
|
||||
// Copyright (c) 2018 IBM
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
package govmm
|
||||
|
||||
// MaxVCPUs returns the maximum number of vCPUs supported
|
||||
func MaxVCPUs() uint32 {
|
||||
return uint32(128)
|
||||
}
|
||||
15
src/runtime/pkg/govmm/vmm_s390x.go
Normal file
15
src/runtime/pkg/govmm/vmm_s390x.go
Normal file
@@ -0,0 +1,15 @@
|
||||
//
|
||||
// Copyright (c) 2018 IBM
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
package govmm
|
||||
|
||||
// MaxVCPUs returns the maximum number of vCPUs supported
|
||||
func MaxVCPUs() uint32 {
|
||||
// Max number of virtual Cpu defined in qemu. See
|
||||
// https://github.com/qemu/qemu/blob/80422b00196a7af4c6efb628fae0ad8b644e98af/target/s390x/cpu.h#L55
|
||||
// #define S390_MAX_CPUS 248
|
||||
return uint32(248)
|
||||
}
|
||||
@@ -142,7 +142,7 @@ func (km *KataMonitor) syncSandboxes(sandboxList []string) ([]string, error) {
|
||||
for _, pod := range r.Items {
|
||||
for _, sandbox := range sandboxList {
|
||||
if pod.Id == sandbox {
|
||||
km.sandboxCache.setMetadata(sandbox, sandboxKubeData{
|
||||
km.sandboxCache.setCRIMetadata(sandbox, sandboxCRIMetadata{
|
||||
uid: pod.Metadata.Uid,
|
||||
name: pod.Metadata.Name,
|
||||
namespace: pod.Metadata.Namespace,
|
||||
@@ -151,9 +151,9 @@ func (km *KataMonitor) syncSandboxes(sandboxList []string) ([]string, error) {
|
||||
sandboxList = removeFromSandboxList(sandboxList, sandbox)
|
||||
|
||||
monitorLog.WithFields(logrus.Fields{
|
||||
"Pod Name": pod.Metadata.Name,
|
||||
"Pod Namespace": pod.Metadata.Namespace,
|
||||
"Pod UID": pod.Metadata.Uid,
|
||||
"cri-name": pod.Metadata.Name,
|
||||
"cri-namespace": pod.Metadata.Namespace,
|
||||
"cri-uid": pod.Metadata.Uid,
|
||||
}).Debugf("Synced KATA POD %s", pod.Id)
|
||||
|
||||
break
|
||||
|
||||
@@ -160,12 +160,12 @@ func (km *KataMonitor) aggregateSandboxMetrics(encoder expfmt.Encoder) error {
|
||||
|
||||
// get metrics from sandbox's shim
|
||||
for _, sandboxID := range sandboxes {
|
||||
sandboxMetadata, ok := km.sandboxCache.getMetadata(sandboxID)
|
||||
sandboxMetadata, ok := km.sandboxCache.getCRIMetadata(sandboxID)
|
||||
if !ok { // likely the sandbox has been just removed
|
||||
continue
|
||||
}
|
||||
wg.Add(1)
|
||||
go func(sandboxID string, sandboxMetadata sandboxKubeData, results chan<- []*dto.MetricFamily) {
|
||||
go func(sandboxID string, sandboxMetadata sandboxCRIMetadata, results chan<- []*dto.MetricFamily) {
|
||||
sandboxMetrics, err := getParsedMetrics(sandboxID, sandboxMetadata)
|
||||
if err != nil {
|
||||
monitorLog.WithError(err).WithField("sandbox_id", sandboxID).Errorf("failed to get metrics for sandbox")
|
||||
@@ -223,7 +223,7 @@ func (km *KataMonitor) aggregateSandboxMetrics(encoder expfmt.Encoder) error {
|
||||
|
||||
}
|
||||
|
||||
func getParsedMetrics(sandboxID string, sandboxMetadata sandboxKubeData) ([]*dto.MetricFamily, error) {
|
||||
func getParsedMetrics(sandboxID string, sandboxMetadata sandboxCRIMetadata) ([]*dto.MetricFamily, error) {
|
||||
body, err := doGet(sandboxID, defaultTimeout, "metrics")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@@ -244,7 +244,7 @@ func GetSandboxMetrics(sandboxID string) (string, error) {
|
||||
|
||||
// parsePrometheusMetrics will decode metrics from Prometheus text format
|
||||
// and return array of *dto.MetricFamily with an ASC order
|
||||
func parsePrometheusMetrics(sandboxID string, sandboxMetadata sandboxKubeData, body []byte) ([]*dto.MetricFamily, error) {
|
||||
func parsePrometheusMetrics(sandboxID string, sandboxMetadata sandboxCRIMetadata, body []byte) ([]*dto.MetricFamily, error) {
|
||||
reader := bytes.NewReader(body)
|
||||
decoder := expfmt.NewDecoder(reader, expfmt.FmtText)
|
||||
|
||||
@@ -268,15 +268,15 @@ func parsePrometheusMetrics(sandboxID string, sandboxMetadata sandboxKubeData, b
|
||||
Value: mutils.String2Pointer(sandboxID),
|
||||
},
|
||||
&dto.LabelPair{
|
||||
Name: mutils.String2Pointer("kube_uid"),
|
||||
Name: mutils.String2Pointer("cri_uid"),
|
||||
Value: mutils.String2Pointer(sandboxMetadata.uid),
|
||||
},
|
||||
&dto.LabelPair{
|
||||
Name: mutils.String2Pointer("kube_name"),
|
||||
Name: mutils.String2Pointer("cri_name"),
|
||||
Value: mutils.String2Pointer(sandboxMetadata.name),
|
||||
},
|
||||
&dto.LabelPair{
|
||||
Name: mutils.String2Pointer("kube_namespace"),
|
||||
Name: mutils.String2Pointer("cri_namespace"),
|
||||
Value: mutils.String2Pointer(sandboxMetadata.namespace),
|
||||
},
|
||||
)
|
||||
|
||||
@@ -40,7 +40,7 @@ ttt 999
|
||||
func TestParsePrometheusMetrics(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
sandboxID := "sandboxID-abc"
|
||||
sandboxMetadata := sandboxKubeData{"123", "pod-name", "pod-namespace"}
|
||||
sandboxMetadata := sandboxCRIMetadata{"123", "pod-name", "pod-namespace"}
|
||||
|
||||
// parse metrics
|
||||
list, err := parsePrometheusMetrics(sandboxID, sandboxMetadata, []byte(shimMetricBody))
|
||||
@@ -60,12 +60,12 @@ func TestParsePrometheusMetrics(t *testing.T) {
|
||||
assert.Equal(4, len(m.Label), "should have 4 labels")
|
||||
assert.Equal("sandbox_id", *m.Label[0].Name, "label name should be sandbox_id")
|
||||
assert.Equal(sandboxID, *m.Label[0].Value, "label value should be", sandboxID)
|
||||
assert.Equal("kube_uid", *m.Label[1].Name, "label name should be kube_uid")
|
||||
assert.Equal("cri_uid", *m.Label[1].Name, "label name should be cri_uid")
|
||||
assert.Equal(sandboxMetadata.uid, *m.Label[1].Value, "label value should be", sandboxMetadata.uid)
|
||||
|
||||
assert.Equal("kube_name", *m.Label[2].Name, "label name should be kube_name")
|
||||
assert.Equal("cri_name", *m.Label[2].Name, "label name should be cri_name")
|
||||
assert.Equal(sandboxMetadata.name, *m.Label[2].Value, "label value should be", sandboxMetadata.name)
|
||||
assert.Equal("kube_namespace", *m.Label[3].Name, "label name should be kube_namespace")
|
||||
assert.Equal("cri_namespace", *m.Label[3].Name, "label name should be cri_namespace")
|
||||
assert.Equal(sandboxMetadata.namespace, *m.Label[3].Value, "label value should be", sandboxMetadata.namespace)
|
||||
|
||||
summary := m.Summary
|
||||
|
||||
@@ -53,7 +53,7 @@ func NewKataMonitor(runtimeEndpoint string) (*KataMonitor, error) {
|
||||
runtimeEndpoint: runtimeEndpoint,
|
||||
sandboxCache: &sandboxCache{
|
||||
Mutex: &sync.Mutex{},
|
||||
sandboxes: make(map[string]sandboxKubeData),
|
||||
sandboxes: make(map[string]sandboxCRIMetadata),
|
||||
},
|
||||
}
|
||||
|
||||
@@ -104,10 +104,14 @@ func (km *KataMonitor) startPodCacheUpdater() {
|
||||
monitorLog.WithError(err).Fatal("cannot read sandboxes fs")
|
||||
os.Exit(1)
|
||||
}
|
||||
for _, sandbox := range sandboxList {
|
||||
km.sandboxCache.putIfNotExists(sandbox, sandboxCRIMetadata{})
|
||||
}
|
||||
|
||||
monitorLog.Debug("initial sync of sbs directory completed")
|
||||
monitorLog.Tracef("pod list from sbs: %v", sandboxList)
|
||||
|
||||
// We should get kubernetes metadata from the container manager for each new kata sandbox we detect.
|
||||
// We try to get CRI (kubernetes) metadata from the container manager for each new kata sandbox we detect.
|
||||
// It may take a while for data to be available, so we always wait podCacheRefreshDelaySeconds before checking.
|
||||
cacheUpdateTimer := time.NewTimer(podCacheRefreshDelaySeconds * time.Second)
|
||||
cacheUpdateTimerIsSet := true
|
||||
@@ -123,7 +127,7 @@ func (km *KataMonitor) startPodCacheUpdater() {
|
||||
case fsnotify.Create:
|
||||
splitPath := strings.Split(event.Name, string(os.PathSeparator))
|
||||
id := splitPath[len(splitPath)-1]
|
||||
if !km.sandboxCache.putIfNotExists(id, sandboxKubeData{}) {
|
||||
if !km.sandboxCache.putIfNotExists(id, sandboxCRIMetadata{}) {
|
||||
monitorLog.WithField("pod", id).Warn(
|
||||
"CREATE event but pod already present in the sandbox cache")
|
||||
}
|
||||
|
||||
@@ -9,15 +9,15 @@ import (
|
||||
"sync"
|
||||
)
|
||||
|
||||
type sandboxKubeData struct {
|
||||
type sandboxCRIMetadata struct {
|
||||
uid string
|
||||
name string
|
||||
namespace string
|
||||
}
|
||||
type sandboxCache struct {
|
||||
*sync.Mutex
|
||||
// the sandboxKubeData links the sandbox id from the container manager to the pod metadata of kubernetes
|
||||
sandboxes map[string]sandboxKubeData
|
||||
// the sandboxCRIMetadata links the sandbox id from the container manager to the pod metadata of kubernetes
|
||||
sandboxes map[string]sandboxCRIMetadata
|
||||
}
|
||||
|
||||
func (sc *sandboxCache) getSandboxList() []string {
|
||||
@@ -43,7 +43,7 @@ func (sc *sandboxCache) deleteIfExists(id string) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func (sc *sandboxCache) putIfNotExists(id string, value sandboxKubeData) bool {
|
||||
func (sc *sandboxCache) putIfNotExists(id string, value sandboxCRIMetadata) bool {
|
||||
sc.Lock()
|
||||
defer sc.Unlock()
|
||||
|
||||
@@ -56,14 +56,14 @@ func (sc *sandboxCache) putIfNotExists(id string, value sandboxKubeData) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func (sc *sandboxCache) setMetadata(id string, value sandboxKubeData) {
|
||||
func (sc *sandboxCache) setCRIMetadata(id string, value sandboxCRIMetadata) {
|
||||
sc.Lock()
|
||||
defer sc.Unlock()
|
||||
|
||||
sc.sandboxes[id] = value
|
||||
}
|
||||
|
||||
func (sc *sandboxCache) getMetadata(id string) (sandboxKubeData, bool) {
|
||||
func (sc *sandboxCache) getCRIMetadata(id string) (sandboxCRIMetadata, bool) {
|
||||
sc.Lock()
|
||||
defer sc.Unlock()
|
||||
|
||||
|
||||
@@ -16,19 +16,19 @@ func TestSandboxCache(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
sc := &sandboxCache{
|
||||
Mutex: &sync.Mutex{},
|
||||
sandboxes: map[string]sandboxKubeData{"111": {"1-2-3", "test-name", "test-namespace"}},
|
||||
sandboxes: map[string]sandboxCRIMetadata{"111": {"1-2-3", "test-name", "test-namespace"}},
|
||||
}
|
||||
|
||||
assert.Equal(1, len(sc.getSandboxList()))
|
||||
|
||||
// put new item
|
||||
id := "new-id"
|
||||
b := sc.putIfNotExists(id, sandboxKubeData{})
|
||||
b := sc.putIfNotExists(id, sandboxCRIMetadata{})
|
||||
assert.Equal(true, b)
|
||||
assert.Equal(2, len(sc.getSandboxList()))
|
||||
|
||||
// put key that alreay exists
|
||||
b = sc.putIfNotExists(id, sandboxKubeData{})
|
||||
b = sc.putIfNotExists(id, sandboxCRIMetadata{})
|
||||
assert.Equal(false, b)
|
||||
|
||||
b = sc.deleteIfExists(id)
|
||||
|
||||
@@ -15,6 +15,7 @@ import (
|
||||
"strings"
|
||||
|
||||
"github.com/BurntSushi/toml"
|
||||
"github.com/kata-containers/kata-containers/src/runtime/pkg/govmm"
|
||||
govmmQemu "github.com/kata-containers/kata-containers/src/runtime/pkg/govmm/qemu"
|
||||
"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils/katatrace"
|
||||
"github.com/kata-containers/kata-containers/src/runtime/pkg/oci"
|
||||
@@ -135,6 +136,7 @@ type hypervisor struct {
|
||||
GuestSwap bool `toml:"enable_guest_swap"`
|
||||
Rootless bool `toml:"rootless"`
|
||||
DisableSeccomp bool `toml:"disable_seccomp"`
|
||||
DisableSeLinux bool `toml:"disable_selinux"`
|
||||
}
|
||||
|
||||
type runtime struct {
|
||||
@@ -343,7 +345,7 @@ func (h hypervisor) defaultVCPUs() uint32 {
|
||||
|
||||
func (h hypervisor) defaultMaxVCPUs() uint32 {
|
||||
numcpus := uint32(goruntime.NumCPU())
|
||||
maxvcpus := vc.MaxQemuVCPUs()
|
||||
maxvcpus := govmm.MaxVCPUs()
|
||||
reqVCPUs := h.DefaultMaxVCPUs
|
||||
|
||||
//don't exceed the number of physical CPUs. If a default is not provided, use the
|
||||
@@ -876,6 +878,8 @@ func newClhHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
|
||||
SGXEPCSize: defaultSGXEPCSize,
|
||||
EnableAnnotations: h.EnableAnnotations,
|
||||
DisableSeccomp: h.DisableSeccomp,
|
||||
ConfidentialGuest: h.ConfidentialGuest,
|
||||
DisableSeLinux: h.DisableSeLinux,
|
||||
}, nil
|
||||
}
|
||||
|
||||
|
||||
@@ -17,6 +17,7 @@ import (
|
||||
"syscall"
|
||||
"testing"
|
||||
|
||||
"github.com/kata-containers/kata-containers/src/runtime/pkg/govmm"
|
||||
ktu "github.com/kata-containers/kata-containers/src/runtime/pkg/katatestutils"
|
||||
"github.com/kata-containers/kata-containers/src/runtime/pkg/oci"
|
||||
vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers"
|
||||
@@ -919,7 +920,7 @@ func TestHypervisorDefaults(t *testing.T) {
|
||||
h.DefaultMaxVCPUs = numCPUs + 1
|
||||
assert.Equal(h.defaultMaxVCPUs(), numCPUs, "default max vCPU number is wrong")
|
||||
|
||||
maxvcpus := vc.MaxQemuVCPUs()
|
||||
maxvcpus := govmm.MaxVCPUs()
|
||||
h.DefaultMaxVCPUs = maxvcpus + 1
|
||||
assert.Equal(h.defaultMaxVCPUs(), numCPUs, "default max vCPU number is wrong")
|
||||
|
||||
|
||||
@@ -155,11 +155,19 @@ func CreateSandbox(ctx context.Context, vci vc.VC, ociSpec specs.Spec, runtimeCo
|
||||
}
|
||||
}()
|
||||
|
||||
// Run pre-start OCI hooks.
|
||||
err = EnterNetNS(sandboxConfig.NetworkConfig.NetworkID, func() error {
|
||||
return PreStartHooks(ctx, ociSpec, containerID, bundlePath)
|
||||
})
|
||||
if err != nil {
|
||||
if ociSpec.Annotations == nil {
|
||||
ociSpec.Annotations = make(map[string]string)
|
||||
}
|
||||
ociSpec.Annotations["nerdctl/network-namespace"] = sandboxConfig.NetworkConfig.NetworkID
|
||||
sandboxConfig.Annotations["nerdctl/network-namespace"] = ociSpec.Annotations["nerdctl/network-namespace"]
|
||||
|
||||
// Run pre-start OCI hooks, in the runtime namespace.
|
||||
if err := PreStartHooks(ctx, ociSpec, containerID, bundlePath); err != nil {
|
||||
return nil, vc.Process{}, err
|
||||
}
|
||||
|
||||
// Run create runtime OCI hooks, in the runtime namespace.
|
||||
if err := CreateRuntimeHooks(ctx, ociSpec, containerID, bundlePath); err != nil {
|
||||
return nil, vc.Process{}, err
|
||||
}
|
||||
|
||||
|
||||
@@ -264,6 +264,46 @@ func TestCreateSandboxFail(t *testing.T) {
|
||||
assert.True(vcmock.IsMockError(err))
|
||||
}
|
||||
|
||||
func TestCreateSandboxAnnotations(t *testing.T) {
|
||||
if tc.NotValid(ktu.NeedRoot()) {
|
||||
t.Skip(ktu.TestDisabledNeedRoot)
|
||||
}
|
||||
|
||||
assert := assert.New(t)
|
||||
|
||||
tmpdir, bundlePath, _ := ktu.SetupOCIConfigFile(t)
|
||||
defer os.RemoveAll(tmpdir)
|
||||
|
||||
runtimeConfig, err := newTestRuntimeConfig(tmpdir, testConsole, true)
|
||||
assert.NoError(err)
|
||||
|
||||
spec, err := compatoci.ParseConfigJSON(bundlePath)
|
||||
assert.NoError(err)
|
||||
|
||||
rootFs := vc.RootFs{Mounted: true}
|
||||
|
||||
testingImpl.CreateSandboxFunc = func(ctx context.Context, sandboxConfig vc.SandboxConfig) (vc.VCSandbox, error) {
|
||||
return &vcmock.Sandbox{
|
||||
MockID: testSandboxID,
|
||||
MockContainers: []*vcmock.Container{
|
||||
{MockID: testContainerID},
|
||||
},
|
||||
MockAnnotations: sandboxConfig.Annotations,
|
||||
}, nil
|
||||
}
|
||||
|
||||
defer func() {
|
||||
testingImpl.CreateSandboxFunc = nil
|
||||
}()
|
||||
|
||||
sandbox, _, err := CreateSandbox(context.Background(), testingImpl, spec, runtimeConfig, rootFs, testContainerID, bundlePath, testConsole, true, true)
|
||||
assert.NoError(err)
|
||||
|
||||
netNsPath, err := sandbox.Annotations("nerdctl/network-namespace")
|
||||
assert.NoError(err)
|
||||
assert.Equal(path.Dir(netNsPath), "/var/run/netns")
|
||||
}
|
||||
|
||||
func TestCheckForFips(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
|
||||
|
||||
@@ -16,6 +16,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils/katatrace"
|
||||
syscallWrapper "github.com/kata-containers/kata-containers/src/runtime/pkg/syscall"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
@@ -32,15 +33,16 @@ func hookLogger() *logrus.Entry {
|
||||
return kataUtilsLogger.WithField("subsystem", "hook")
|
||||
}
|
||||
|
||||
func runHook(ctx context.Context, hook specs.Hook, cid, bundlePath string) error {
|
||||
func runHook(ctx context.Context, spec specs.Spec, hook specs.Hook, cid, bundlePath string) error {
|
||||
span, _ := katatrace.Trace(ctx, hookLogger(), "runHook", hookTracingTags)
|
||||
defer span.End()
|
||||
katatrace.AddTags(span, "path", hook.Path, "args", hook.Args)
|
||||
|
||||
state := specs.State{
|
||||
Pid: syscall.Gettid(),
|
||||
Bundle: bundlePath,
|
||||
ID: cid,
|
||||
Pid: syscallWrapper.Gettid(),
|
||||
Bundle: bundlePath,
|
||||
ID: cid,
|
||||
Annotations: spec.Annotations,
|
||||
}
|
||||
|
||||
stateJSON, err := json.Marshal(state)
|
||||
@@ -90,13 +92,13 @@ func runHook(ctx context.Context, hook specs.Hook, cid, bundlePath string) error
|
||||
return nil
|
||||
}
|
||||
|
||||
func runHooks(ctx context.Context, hooks []specs.Hook, cid, bundlePath, hookType string) error {
|
||||
func runHooks(ctx context.Context, spec specs.Spec, hooks []specs.Hook, cid, bundlePath, hookType string) error {
|
||||
span, ctx := katatrace.Trace(ctx, hookLogger(), "runHooks", hookTracingTags)
|
||||
katatrace.AddTags(span, "type", hookType)
|
||||
defer span.End()
|
||||
|
||||
for _, hook := range hooks {
|
||||
if err := runHook(ctx, hook, cid, bundlePath); err != nil {
|
||||
if err := runHook(ctx, spec, hook, cid, bundlePath); err != nil {
|
||||
hookLogger().WithFields(logrus.Fields{
|
||||
"hook-type": hookType,
|
||||
"error": err,
|
||||
@@ -109,6 +111,15 @@ func runHooks(ctx context.Context, hooks []specs.Hook, cid, bundlePath, hookType
|
||||
return nil
|
||||
}
|
||||
|
||||
func CreateRuntimeHooks(ctx context.Context, spec specs.Spec, cid, bundlePath string) error {
|
||||
// If no hook available, nothing needs to be done.
|
||||
if spec.Hooks == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
return runHooks(ctx, spec, spec.Hooks.CreateRuntime, cid, bundlePath, "createRuntime")
|
||||
}
|
||||
|
||||
// PreStartHooks run the hooks before start container
|
||||
func PreStartHooks(ctx context.Context, spec specs.Spec, cid, bundlePath string) error {
|
||||
// If no hook available, nothing needs to be done.
|
||||
@@ -116,7 +127,7 @@ func PreStartHooks(ctx context.Context, spec specs.Spec, cid, bundlePath string)
|
||||
return nil
|
||||
}
|
||||
|
||||
return runHooks(ctx, spec.Hooks.Prestart, cid, bundlePath, "pre-start")
|
||||
return runHooks(ctx, spec, spec.Hooks.Prestart, cid, bundlePath, "pre-start")
|
||||
}
|
||||
|
||||
// PostStartHooks run the hooks just after start container
|
||||
@@ -126,7 +137,7 @@ func PostStartHooks(ctx context.Context, spec specs.Spec, cid, bundlePath string
|
||||
return nil
|
||||
}
|
||||
|
||||
return runHooks(ctx, spec.Hooks.Poststart, cid, bundlePath, "post-start")
|
||||
return runHooks(ctx, spec, spec.Hooks.Poststart, cid, bundlePath, "post-start")
|
||||
}
|
||||
|
||||
// PostStopHooks run the hooks after stop container
|
||||
@@ -136,5 +147,5 @@ func PostStopHooks(ctx context.Context, spec specs.Spec, cid, bundlePath string)
|
||||
return nil
|
||||
}
|
||||
|
||||
return runHooks(ctx, spec.Hooks.Poststop, cid, bundlePath, "post-stop")
|
||||
return runHooks(ctx, spec, spec.Hooks.Poststop, cid, bundlePath, "post-stop")
|
||||
}
|
||||
|
||||
@@ -57,26 +57,27 @@ func TestRunHook(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
|
||||
ctx := context.Background()
|
||||
spec := specs.Spec{}
|
||||
|
||||
// Run with timeout 0
|
||||
hook := createHook(0)
|
||||
err := runHook(ctx, hook, testSandboxID, testBundlePath)
|
||||
err := runHook(ctx, spec, hook, testSandboxID, testBundlePath)
|
||||
assert.NoError(err)
|
||||
|
||||
// Run with timeout 1
|
||||
hook = createHook(1)
|
||||
err = runHook(ctx, hook, testSandboxID, testBundlePath)
|
||||
err = runHook(ctx, spec, hook, testSandboxID, testBundlePath)
|
||||
assert.NoError(err)
|
||||
|
||||
// Run timeout failure
|
||||
hook = createHook(1)
|
||||
hook.Args = append(hook.Args, "2")
|
||||
err = runHook(ctx, hook, testSandboxID, testBundlePath)
|
||||
err = runHook(ctx, spec, hook, testSandboxID, testBundlePath)
|
||||
assert.Error(err)
|
||||
|
||||
// Failure due to wrong hook
|
||||
hook = createWrongHook()
|
||||
err = runHook(ctx, hook, testSandboxID, testBundlePath)
|
||||
err = runHook(ctx, spec, hook, testSandboxID, testBundlePath)
|
||||
assert.Error(err)
|
||||
}
|
||||
|
||||
|
||||
22
src/runtime/pkg/katautils/network_darwin.go
Normal file
22
src/runtime/pkg/katautils/network_darwin.go
Normal file
@@ -0,0 +1,22 @@
|
||||
// Copyright (c) 2022 Apple Inc.
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
package katautils
|
||||
|
||||
import (
|
||||
vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers"
|
||||
)
|
||||
|
||||
func EnterNetNS(networkID string, cb func() error) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func SetupNetworkNamespace(config *vc.NetworkConfig) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func cleanupNetNS(netNSPath string) error {
|
||||
return nil
|
||||
}
|
||||
@@ -23,6 +23,7 @@ import (
|
||||
"github.com/sirupsen/logrus"
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
|
||||
"github.com/kata-containers/kata-containers/src/runtime/pkg/govmm"
|
||||
vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers"
|
||||
|
||||
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/device/config"
|
||||
@@ -640,8 +641,8 @@ func addHypervisorCPUOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig) e
|
||||
return fmt.Errorf("Number of cpus %d in annotation default_maxvcpus is greater than the number of CPUs %d on the system", max, numCPUs)
|
||||
}
|
||||
|
||||
if sbConfig.HypervisorType == vc.QemuHypervisor && max > vc.MaxQemuVCPUs() {
|
||||
return fmt.Errorf("Number of cpus %d in annotation default_maxvcpus is greater than max no of CPUs %d supported for qemu", max, vc.MaxQemuVCPUs())
|
||||
if sbConfig.HypervisorType == vc.QemuHypervisor && max > govmm.MaxVCPUs() {
|
||||
return fmt.Errorf("Number of cpus %d in annotation default_maxvcpus is greater than max no of CPUs %d supported for qemu", max, govmm.MaxVCPUs())
|
||||
}
|
||||
sbConfig.HypervisorConfig.DefaultMaxVCPUs = max
|
||||
return nil
|
||||
|
||||
333
src/runtime/pkg/resourcecontrol/cgroups.go
Normal file
333
src/runtime/pkg/resourcecontrol/cgroups.go
Normal file
@@ -0,0 +1,333 @@
|
||||
//go:build linux
|
||||
// +build linux
|
||||
|
||||
// Copyright (c) 2021-2022 Apple Inc.
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
package resourcecontrol
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sync"
|
||||
|
||||
"github.com/containerd/cgroups"
|
||||
v1 "github.com/containerd/cgroups/stats/v1"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// prepend a kata specific string to oci cgroup path to
|
||||
// form a different cgroup path, thus cAdvisor couldn't
|
||||
// find kata containers cgroup path on host to prevent it
|
||||
// from grabbing the stats data.
|
||||
const CgroupKataPrefix = "kata"
|
||||
|
||||
func RenameCgroupPath(path string) (string, error) {
|
||||
if path == "" {
|
||||
path = DefaultResourceControllerID
|
||||
}
|
||||
|
||||
cgroupPathDir := filepath.Dir(path)
|
||||
cgroupPathName := fmt.Sprintf("%s_%s", CgroupKataPrefix, filepath.Base(path))
|
||||
return filepath.Join(cgroupPathDir, cgroupPathName), nil
|
||||
|
||||
}
|
||||
|
||||
type LinuxCgroup struct {
|
||||
cgroup cgroups.Cgroup
|
||||
path string
|
||||
cpusets *specs.LinuxCPU
|
||||
devices []specs.LinuxDeviceCgroup
|
||||
|
||||
sync.Mutex
|
||||
}
|
||||
|
||||
func sandboxDevices() []specs.LinuxDeviceCgroup {
|
||||
devices := []specs.LinuxDeviceCgroup{}
|
||||
|
||||
defaultDevices := []string{
|
||||
"/dev/null",
|
||||
"/dev/random",
|
||||
"/dev/full",
|
||||
"/dev/tty",
|
||||
"/dev/zero",
|
||||
"/dev/urandom",
|
||||
"/dev/console",
|
||||
}
|
||||
|
||||
// Processes running in a device-cgroup are constrained, they have acccess
|
||||
// only to the devices listed in the devices.list file.
|
||||
// In order to run Virtual Machines and create virtqueues, hypervisors
|
||||
// need access to certain character devices in the host, like kvm and vhost-net.
|
||||
hypervisorDevices := []string{
|
||||
"/dev/kvm", // To run virtual machines
|
||||
"/dev/vhost-net", // To create virtqueues
|
||||
"/dev/vfio/vfio", // To access VFIO devices
|
||||
"/dev/vhost-vsock", // To interact with vsock if
|
||||
}
|
||||
|
||||
defaultDevices = append(defaultDevices, hypervisorDevices...)
|
||||
|
||||
for _, device := range defaultDevices {
|
||||
ldevice, err := DeviceToLinuxDevice(device)
|
||||
if err != nil {
|
||||
controllerLogger.WithField("source", "cgroups").Warnf("Could not add %s to the devices cgroup", device)
|
||||
continue
|
||||
}
|
||||
devices = append(devices, ldevice)
|
||||
}
|
||||
|
||||
wildcardMajor := int64(-1)
|
||||
wildcardMinor := int64(-1)
|
||||
ptsMajor := int64(136)
|
||||
tunMajor := int64(10)
|
||||
tunMinor := int64(200)
|
||||
|
||||
wildcardDevices := []specs.LinuxDeviceCgroup{
|
||||
// allow mknod for any device
|
||||
{
|
||||
Allow: true,
|
||||
Type: "c",
|
||||
Major: &wildcardMajor,
|
||||
Minor: &wildcardMinor,
|
||||
Access: "m",
|
||||
},
|
||||
{
|
||||
Allow: true,
|
||||
Type: "b",
|
||||
Major: &wildcardMajor,
|
||||
Minor: &wildcardMinor,
|
||||
Access: "m",
|
||||
},
|
||||
// /dev/pts/ - pts namespaces are "coming soon"
|
||||
{
|
||||
Allow: true,
|
||||
Type: "c",
|
||||
Major: &ptsMajor,
|
||||
Minor: &wildcardMinor,
|
||||
Access: "rwm",
|
||||
},
|
||||
// tuntap
|
||||
{
|
||||
Allow: true,
|
||||
Type: "c",
|
||||
Major: &tunMajor,
|
||||
Minor: &tunMinor,
|
||||
Access: "rwm",
|
||||
},
|
||||
}
|
||||
|
||||
devices = append(devices, wildcardDevices...)
|
||||
|
||||
return devices
|
||||
}
|
||||
|
||||
func NewResourceController(path string, resources *specs.LinuxResources) (ResourceController, error) {
|
||||
var err error
|
||||
|
||||
cgroupPath, err := ValidCgroupPath(path, IsSystemdCgroup(path))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
cgroup, err := cgroups.New(cgroups.V1, cgroups.StaticPath(cgroupPath), resources)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &LinuxCgroup{
|
||||
path: cgroupPath,
|
||||
devices: resources.Devices,
|
||||
cpusets: resources.CPU,
|
||||
cgroup: cgroup,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func NewSandboxResourceController(path string, resources *specs.LinuxResources, sandboxCgroupOnly bool) (ResourceController, error) {
|
||||
var cgroup cgroups.Cgroup
|
||||
sandboxResources := *resources
|
||||
sandboxResources.Devices = append(sandboxResources.Devices, sandboxDevices()...)
|
||||
|
||||
// Currently we know to handle systemd cgroup path only when it's the only cgroup (no overhead group), hence,
|
||||
// if sandboxCgroupOnly is not true we treat it as cgroupfs path as it used to be, although it may be incorrect
|
||||
if !IsSystemdCgroup(path) || !sandboxCgroupOnly {
|
||||
return NewResourceController(path, &sandboxResources)
|
||||
}
|
||||
|
||||
slice, unit, err := getSliceAndUnit(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// github.com/containerd/cgroups doesn't support creating a scope unit with
|
||||
// v1 cgroups against systemd, the following interacts directly with systemd
|
||||
// to create the cgroup and then load it using containerd's api
|
||||
err = createCgroupsSystemd(slice, unit, uint32(os.Getpid())) // adding runtime process, it makes calling setupCgroups redundant
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
cgHierarchy, cgPath, err := cgroupHierarchy(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// load created cgroup and update with resources
|
||||
if cgroup, err = cgroups.Load(cgHierarchy, cgPath); err == nil {
|
||||
err = cgroup.Update(&sandboxResources)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &LinuxCgroup{
|
||||
path: path,
|
||||
devices: sandboxResources.Devices,
|
||||
cpusets: sandboxResources.CPU,
|
||||
cgroup: cgroup,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func LoadResourceController(path string) (ResourceController, error) {
|
||||
cgHierarchy, cgPath, err := cgroupHierarchy(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
cgroup, err := cgroups.Load(cgHierarchy, cgPath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &LinuxCgroup{
|
||||
path: path,
|
||||
cgroup: cgroup,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (c *LinuxCgroup) Logger() *logrus.Entry {
|
||||
return controllerLogger.WithField("source", "cgroups")
|
||||
}
|
||||
|
||||
func (c *LinuxCgroup) Delete() error {
|
||||
return c.cgroup.Delete()
|
||||
}
|
||||
|
||||
func (c *LinuxCgroup) Stat() (*v1.Metrics, error) {
|
||||
return c.cgroup.Stat(cgroups.ErrorHandler(cgroups.IgnoreNotExist))
|
||||
}
|
||||
|
||||
func (c *LinuxCgroup) AddProcess(pid int, subsystems ...string) error {
|
||||
return c.cgroup.Add(cgroups.Process{Pid: pid})
|
||||
}
|
||||
|
||||
func (c *LinuxCgroup) AddThread(pid int, subsystems ...string) error {
|
||||
return c.cgroup.AddTask(cgroups.Process{Pid: pid})
|
||||
}
|
||||
|
||||
func (c *LinuxCgroup) Update(resources *specs.LinuxResources) error {
|
||||
return c.cgroup.Update(resources)
|
||||
}
|
||||
|
||||
func (c *LinuxCgroup) MoveTo(path string) error {
|
||||
cgHierarchy, cgPath, err := cgroupHierarchy(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
newCgroup, err := cgroups.Load(cgHierarchy, cgPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return c.cgroup.MoveTo(newCgroup)
|
||||
}
|
||||
|
||||
func (c *LinuxCgroup) AddDevice(deviceHostPath string) error {
|
||||
deviceResource, err := DeviceToLinuxDevice(deviceHostPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
c.Lock()
|
||||
defer c.Unlock()
|
||||
|
||||
c.devices = append(c.devices, deviceResource)
|
||||
|
||||
if err := c.cgroup.Update(&specs.LinuxResources{
|
||||
Devices: c.devices,
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *LinuxCgroup) RemoveDevice(deviceHostPath string) error {
|
||||
deviceResource, err := DeviceToLinuxDevice(deviceHostPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
c.Lock()
|
||||
defer c.Unlock()
|
||||
|
||||
for i, d := range c.devices {
|
||||
if d.Type == deviceResource.Type &&
|
||||
d.Major == deviceResource.Major &&
|
||||
d.Minor == deviceResource.Minor {
|
||||
c.devices = append(c.devices[:i], c.devices[i+1:]...)
|
||||
}
|
||||
}
|
||||
|
||||
if err := c.cgroup.Update(&specs.LinuxResources{
|
||||
Devices: c.devices,
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *LinuxCgroup) UpdateCpuSet(cpuset, memset string) error {
|
||||
c.Lock()
|
||||
defer c.Unlock()
|
||||
|
||||
if len(cpuset) > 0 {
|
||||
// If we didn't have a cpuset defined, let's create:
|
||||
if c.cpusets == nil {
|
||||
c.cpusets = &specs.LinuxCPU{}
|
||||
}
|
||||
|
||||
c.cpusets.Cpus = cpuset
|
||||
}
|
||||
|
||||
if len(memset) > 0 {
|
||||
// If we didn't have a cpuset defined, let's now create:
|
||||
if c.cpusets == nil {
|
||||
c.cpusets = &specs.LinuxCPU{}
|
||||
}
|
||||
|
||||
c.cpusets.Mems = memset
|
||||
}
|
||||
|
||||
return c.cgroup.Update(&specs.LinuxResources{
|
||||
CPU: c.cpusets,
|
||||
})
|
||||
}
|
||||
|
||||
func (c *LinuxCgroup) Type() ResourceControllerType {
|
||||
return LinuxCgroups
|
||||
}
|
||||
|
||||
func (c *LinuxCgroup) ID() string {
|
||||
return c.path
|
||||
}
|
||||
|
||||
func (c *LinuxCgroup) Parent() string {
|
||||
return filepath.Dir(c.path)
|
||||
}
|
||||
82
src/runtime/pkg/resourcecontrol/controller.go
Normal file
82
src/runtime/pkg/resourcecontrol/controller.go
Normal file
@@ -0,0 +1,82 @@
|
||||
// Copyright (c) 2021 Apple Inc.
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
package resourcecontrol
|
||||
|
||||
import (
|
||||
v1 "github.com/containerd/cgroups/stats/v1"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
var (
|
||||
controllerLogger = logrus.WithField("source", "virtcontainers/pkg/resourcecontrol")
|
||||
)
|
||||
|
||||
// SetLogger sets up a logger for this pkg
|
||||
func SetLogger(logger *logrus.Entry) {
|
||||
fields := controllerLogger.Data
|
||||
|
||||
controllerLogger = logger.WithFields(fields)
|
||||
}
|
||||
|
||||
// HypervisorType describes an hypervisor type.
|
||||
type ResourceControllerType string
|
||||
|
||||
const (
|
||||
LinuxCgroups ResourceControllerType = "cgroups"
|
||||
)
|
||||
|
||||
// String converts an hypervisor type to a string.
|
||||
func (rType *ResourceControllerType) String() string {
|
||||
switch *rType {
|
||||
case LinuxCgroups:
|
||||
return string(LinuxCgroups)
|
||||
default:
|
||||
return "Unknown controller type"
|
||||
}
|
||||
}
|
||||
|
||||
// ResourceController represents a system resources controller.
|
||||
// On Linux this interface is implemented through the cgroups API.
|
||||
type ResourceController interface {
|
||||
// Type returns the resource controller implementation type.
|
||||
Type() ResourceControllerType
|
||||
|
||||
// The controller identifier, e.g. a Linux cgroups path.
|
||||
ID() string
|
||||
|
||||
// Parent returns the parent controller, on hierarchically
|
||||
// defined resource (e.g. Linux cgroups).
|
||||
Parent() string
|
||||
|
||||
// Delete the controller.
|
||||
Delete() error
|
||||
|
||||
// Stat returns the statistics for the controller.
|
||||
Stat() (*v1.Metrics, error)
|
||||
|
||||
// AddProcess adds a process to a set of controllers.
|
||||
AddProcess(int, ...string) error
|
||||
|
||||
// AddThread adds a process thread to a set of controllers.
|
||||
AddThread(int, ...string) error
|
||||
|
||||
// Update updates the set of resources controlled, based on
|
||||
// an OCI resources description.
|
||||
Update(*specs.LinuxResources) error
|
||||
|
||||
// MoveTo moves a controller to another one.
|
||||
MoveTo(string) error
|
||||
|
||||
// AddDevice adds a device resource to the controller.
|
||||
AddDevice(string) error
|
||||
|
||||
// RemoveDevice removes a device resource to the controller.
|
||||
RemoveDevice(string) error
|
||||
|
||||
// UpdateCpuSet updates the set of controlled CPUs and memory nodes.
|
||||
UpdateCpuSet(string, string) error
|
||||
}
|
||||
77
src/runtime/pkg/resourcecontrol/utils.go
Normal file
77
src/runtime/pkg/resourcecontrol/utils.go
Normal file
@@ -0,0 +1,77 @@
|
||||
// Copyright (c) 2020 Intel Corporation
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
package resourcecontrol
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/devices"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
func DeviceToCgroupDeviceRule(device string) (*devices.Rule, error) {
|
||||
var st unix.Stat_t
|
||||
deviceRule := devices.Rule{
|
||||
Allow: true,
|
||||
Permissions: "rwm",
|
||||
}
|
||||
|
||||
if err := unix.Stat(device, &st); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
devType := st.Mode & unix.S_IFMT
|
||||
|
||||
switch devType {
|
||||
case unix.S_IFCHR:
|
||||
deviceRule.Type = 'c'
|
||||
case unix.S_IFBLK:
|
||||
deviceRule.Type = 'b'
|
||||
default:
|
||||
return nil, fmt.Errorf("unsupported device type: %v", devType)
|
||||
}
|
||||
|
||||
major := int64(unix.Major(uint64(st.Rdev)))
|
||||
minor := int64(unix.Minor(uint64(st.Rdev)))
|
||||
deviceRule.Major = major
|
||||
deviceRule.Minor = minor
|
||||
|
||||
return &deviceRule, nil
|
||||
}
|
||||
|
||||
func DeviceToLinuxDevice(device string) (specs.LinuxDeviceCgroup, error) {
|
||||
dev, err := DeviceToCgroupDeviceRule(device)
|
||||
if err != nil {
|
||||
return specs.LinuxDeviceCgroup{}, err
|
||||
}
|
||||
|
||||
return specs.LinuxDeviceCgroup{
|
||||
Allow: dev.Allow,
|
||||
Type: string(dev.Type),
|
||||
Major: &dev.Major,
|
||||
Minor: &dev.Minor,
|
||||
Access: string(dev.Permissions),
|
||||
}, nil
|
||||
}
|
||||
|
||||
func IsSystemdCgroup(cgroupPath string) bool {
|
||||
|
||||
// If we are utilizing systemd to manage cgroups, we expect to receive a path
|
||||
// in the format slice:scopeprefix:name. A typical example would be:
|
||||
//
|
||||
// system.slice:docker:6b4c4a4d0cc2a12c529dcb13a2b8e438dfb3b2a6af34d548d7d
|
||||
//
|
||||
// Based on this, let's split by the ':' delimiter and verify that the first
|
||||
// section has .slice as a suffix.
|
||||
parts := strings.Split(cgroupPath, ":")
|
||||
if len(parts) == 3 && strings.HasSuffix(parts[0], ".slice") {
|
||||
return true
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
114
src/runtime/pkg/resourcecontrol/utils_linux.go
Normal file
114
src/runtime/pkg/resourcecontrol/utils_linux.go
Normal file
@@ -0,0 +1,114 @@
|
||||
// Copyright (c) 2020 Intel Corporation
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
package resourcecontrol
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/containerd/cgroups"
|
||||
systemdDbus "github.com/coreos/go-systemd/v22/dbus"
|
||||
"github.com/godbus/dbus/v5"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/systemd"
|
||||
)
|
||||
|
||||
// DefaultResourceControllerID runtime-determined location in the cgroups hierarchy.
|
||||
const DefaultResourceControllerID = "/vc"
|
||||
|
||||
// validCgroupPath returns a valid cgroup path.
|
||||
// see https://github.com/opencontainers/runtime-spec/blob/master/config-linux.md#cgroups-path
|
||||
func ValidCgroupPath(path string, systemdCgroup bool) (string, error) {
|
||||
if IsSystemdCgroup(path) {
|
||||
return path, nil
|
||||
}
|
||||
|
||||
if systemdCgroup {
|
||||
return "", fmt.Errorf("malformed systemd path '%v': expected to be of form 'slice:prefix:name'", path)
|
||||
}
|
||||
|
||||
// In the case of an absolute path (starting with /), the runtime MUST
|
||||
// take the path to be relative to the cgroups mount point.
|
||||
if filepath.IsAbs(path) {
|
||||
return filepath.Clean(path), nil
|
||||
}
|
||||
|
||||
// In the case of a relative path (not starting with /), the runtime MAY
|
||||
// interpret the path relative to a runtime-determined location in the cgroups hierarchy.
|
||||
// clean up path and return a new path relative to DefaultResourceControllerID
|
||||
return filepath.Join(DefaultResourceControllerID, filepath.Clean("/"+path)), nil
|
||||
}
|
||||
|
||||
func newProperty(name string, units interface{}) systemdDbus.Property {
|
||||
return systemdDbus.Property{
|
||||
Name: name,
|
||||
Value: dbus.MakeVariant(units),
|
||||
}
|
||||
}
|
||||
|
||||
func cgroupHierarchy(path string) (cgroups.Hierarchy, cgroups.Path, error) {
|
||||
if !IsSystemdCgroup(path) {
|
||||
return cgroups.V1, cgroups.StaticPath(path), nil
|
||||
} else {
|
||||
slice, unit, err := getSliceAndUnit(path)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
cgroupSlicePath, _ := systemd.ExpandSlice(slice)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
return cgroups.Systemd, cgroups.Slice(cgroupSlicePath, unit), nil
|
||||
}
|
||||
}
|
||||
|
||||
func createCgroupsSystemd(slice string, unit string, pid uint32) error {
|
||||
ctx := context.TODO()
|
||||
conn, err := systemdDbus.NewWithContext(ctx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer conn.Close()
|
||||
|
||||
properties := []systemdDbus.Property{
|
||||
systemdDbus.PropDescription("cgroup " + unit),
|
||||
newProperty("DefaultDependencies", false),
|
||||
newProperty("MemoryAccounting", true),
|
||||
newProperty("CPUAccounting", true),
|
||||
newProperty("IOAccounting", true),
|
||||
}
|
||||
|
||||
// https://github.com/opencontainers/runc/blob/master/docs/systemd.md
|
||||
if strings.HasSuffix(unit, ".scope") {
|
||||
// It's a scope, which we put into a Slice=.
|
||||
properties = append(properties, systemdDbus.PropSlice(slice))
|
||||
properties = append(properties, newProperty("Delegate", true))
|
||||
properties = append(properties, systemdDbus.PropPids(pid))
|
||||
} else {
|
||||
return fmt.Errorf("Failed to create cgroups with systemd: unit %s is not a scope", unit)
|
||||
}
|
||||
|
||||
ch := make(chan string)
|
||||
// https://www.freedesktop.org/wiki/Software/systemd/ControlGroupInterface/
|
||||
_, err = conn.StartTransientUnitContext(ctx, unit, "replace", properties, ch)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
<-ch
|
||||
return nil
|
||||
}
|
||||
|
||||
func getSliceAndUnit(cgroupPath string) (string, string, error) {
|
||||
parts := strings.Split(cgroupPath, ":")
|
||||
if len(parts) == 3 && strings.HasSuffix(parts[0], ".slice") {
|
||||
return parts[0], fmt.Sprintf("%s-%s.scope", parts[1], parts[2]), nil
|
||||
}
|
||||
|
||||
return "", "", fmt.Errorf("Path: %s is not valid systemd's cgroups path", cgroupPath)
|
||||
}
|
||||
160
src/runtime/pkg/resourcecontrol/utils_linux_test.go
Normal file
160
src/runtime/pkg/resourcecontrol/utils_linux_test.go
Normal file
@@ -0,0 +1,160 @@
|
||||
// Copyright (c) 2020 Intel Corporation
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
package resourcecontrol
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestIsSystemdCgroup(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
|
||||
tests := []struct {
|
||||
path string
|
||||
expected bool
|
||||
}{
|
||||
{"foo.slice:kata:afhts2e5d4g5s", true},
|
||||
{"system.slice:kata:afhts2e5d4g5s", true},
|
||||
{"/kata/afhts2e5d4g5s", false},
|
||||
{"a:b:c:d", false},
|
||||
{":::", false},
|
||||
{"", false},
|
||||
{":", false},
|
||||
{"::", false},
|
||||
{":::", false},
|
||||
{"a:b", false},
|
||||
{"a:b:", false},
|
||||
{":a:b", false},
|
||||
{"@:@:@", false},
|
||||
}
|
||||
|
||||
for _, t := range tests {
|
||||
assert.Equal(t.expected, IsSystemdCgroup(t.path), "invalid systemd cgroup path: %v", t.path)
|
||||
}
|
||||
}
|
||||
|
||||
func TestValidCgroupPath(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
|
||||
for _, t := range []struct {
|
||||
path string
|
||||
systemdCgroup bool
|
||||
error bool
|
||||
}{
|
||||
// empty paths
|
||||
{"../../../", false, false},
|
||||
{"../", false, false},
|
||||
{".", false, false},
|
||||
{"../../../", false, false},
|
||||
{"./../", false, false},
|
||||
|
||||
// valid no-systemd paths
|
||||
{"../../../foo", false, false},
|
||||
{"/../hi", false, false},
|
||||
{"/../hi/foo", false, false},
|
||||
{"o / m /../ g", false, false},
|
||||
{"/overhead/foobar", false, false},
|
||||
{"/sys/fs/cgroup/cpu/sandbox/kata_foobar", false, false},
|
||||
|
||||
// invalid systemd paths
|
||||
{"o / m /../ g", true, true},
|
||||
{"slice:kata", true, true},
|
||||
{"/kata/afhts2e5d4g5s", true, true},
|
||||
{"a:b:c:d", true, true},
|
||||
{":::", true, true},
|
||||
{"", true, true},
|
||||
{":", true, true},
|
||||
{"::", true, true},
|
||||
{":::", true, true},
|
||||
{"a:b", true, true},
|
||||
{"a:b:", true, true},
|
||||
{":a:b", true, true},
|
||||
{"@:@:@", true, true},
|
||||
|
||||
// valid systemd paths
|
||||
{"x.slice:kata:55555", true, false},
|
||||
{"system.slice:kata:afhts2e5d4g5s", true, false},
|
||||
} {
|
||||
path, err := ValidCgroupPath(t.path, t.systemdCgroup)
|
||||
if t.error {
|
||||
assert.Error(err)
|
||||
continue
|
||||
} else {
|
||||
assert.NoError(err)
|
||||
}
|
||||
|
||||
if filepath.IsAbs(t.path) {
|
||||
cleanPath := filepath.Dir(filepath.Clean(t.path))
|
||||
assert.True(strings.HasPrefix(path, cleanPath),
|
||||
"%v should have prefix %v", path, cleanPath)
|
||||
} else if t.systemdCgroup {
|
||||
assert.Equal(t.path, path)
|
||||
} else {
|
||||
assert.True(
|
||||
strings.HasPrefix(path, DefaultResourceControllerID),
|
||||
"%v should have prefix /%v", path, DefaultResourceControllerID)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func TestDeviceToCgroupDeviceRule(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
|
||||
f, err := os.CreateTemp("", "device")
|
||||
assert.NoError(err)
|
||||
f.Close()
|
||||
|
||||
// fail: regular file to device
|
||||
dev, err := DeviceToCgroupDeviceRule(f.Name())
|
||||
assert.Error(err)
|
||||
assert.Nil(dev)
|
||||
|
||||
// fail: no such file
|
||||
os.Remove(f.Name())
|
||||
dev, err = DeviceToCgroupDeviceRule(f.Name())
|
||||
assert.Error(err)
|
||||
assert.Nil(dev)
|
||||
|
||||
devPath := "/dev/null"
|
||||
if _, err := os.Stat(devPath); os.IsNotExist(err) {
|
||||
t.Skipf("no such device: %v", devPath)
|
||||
return
|
||||
}
|
||||
dev, err = DeviceToCgroupDeviceRule(devPath)
|
||||
assert.NoError(err)
|
||||
assert.NotNil(dev)
|
||||
assert.Equal(rune(dev.Type), 'c')
|
||||
assert.NotZero(dev.Major)
|
||||
assert.NotZero(dev.Minor)
|
||||
assert.NotEmpty(dev.Permissions)
|
||||
assert.True(dev.Allow)
|
||||
}
|
||||
|
||||
func TestDeviceToLinuxDevice(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
|
||||
devPath := "/dev/null"
|
||||
if _, err := os.Stat(devPath); os.IsNotExist(err) {
|
||||
t.Skipf("no such device: %v", devPath)
|
||||
return
|
||||
}
|
||||
dev, err := DeviceToLinuxDevice(devPath)
|
||||
assert.NoError(err)
|
||||
assert.NotNil(dev)
|
||||
assert.Equal(dev.Type, "c")
|
||||
assert.NotNil(dev.Major)
|
||||
assert.NotZero(*dev.Major)
|
||||
assert.NotNil(dev.Minor)
|
||||
assert.NotZero(*dev.Minor)
|
||||
assert.NotEmpty(dev.Access)
|
||||
assert.True(dev.Allow)
|
||||
}
|
||||
16
src/runtime/pkg/syscall/syscall_darwin.go
Normal file
16
src/runtime/pkg/syscall/syscall_darwin.go
Normal file
@@ -0,0 +1,16 @@
|
||||
// Copyright (c) 2022 Apple Inc.
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
package syscall
|
||||
|
||||
import (
|
||||
"syscall"
|
||||
)
|
||||
|
||||
func Gettid() int {
|
||||
// There is no equivalent to a thread ID on Darwin.
|
||||
// We use the PID instead.
|
||||
return syscall.Getpid()
|
||||
}
|
||||
14
src/runtime/pkg/syscall/syscall_linux.go
Normal file
14
src/runtime/pkg/syscall/syscall_linux.go
Normal file
@@ -0,0 +1,14 @@
|
||||
// Copyright (c) 2022 Apple Inc.
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
package syscall
|
||||
|
||||
import (
|
||||
"syscall"
|
||||
)
|
||||
|
||||
func Gettid() int {
|
||||
return syscall.Gettid()
|
||||
}
|
||||
Reference in New Issue
Block a user