runtime: Pass SELinux policy for containers to the agent

Pass SELinux policy for containers to the agent if `disable_guest_selinux`
is set to `false` in the runtime configuration. The `container_t` type
is applied to the container process inside the guest by default.
Users can also set a custom SELinux policy to the container process using
`guest_selinux_label` in the runtime configuration. This will be an
alternative configuration of Kubernetes' security context for SELinux
because users cannot specify the policy in Kata through Kubernetes's security
context. To apply SELinux policy to the container, the guest rootfs must
be CentOS that is created and built with `SELINUX=yes`.

Fixes: #4812

Signed-off-by: Manabu Sugimoto <Manabu.Sugimoto@sony.com>
This commit is contained in:
Manabu Sugimoto
2022-08-07 19:46:07 +09:00
parent 9354769286
commit c617bbe70d
18 changed files with 196 additions and 70 deletions

View File

@@ -74,6 +74,8 @@ const (
MinHypervisorMemory = 256
defaultMsize9p = 8192
defaultDisableGuestSeLinux = true
)
var (
@@ -560,6 +562,9 @@ type HypervisorConfig struct {
// Disable selinux from the hypervisor process
DisableSeLinux bool
// Disable selinux from the container process
DisableGuestSeLinux bool
// Use legacy serial for the guest console
LegacySerial bool

View File

@@ -92,22 +92,24 @@ func TestHypervisorConfigValidTemplateConfig(t *testing.T) {
func TestHypervisorConfigDefaults(t *testing.T) {
assert := assert.New(t)
hypervisorConfig := &HypervisorConfig{
KernelPath: fmt.Sprintf("%s/%s", testDir, testKernel),
ImagePath: fmt.Sprintf("%s/%s", testDir, testImage),
HypervisorPath: "",
KernelPath: fmt.Sprintf("%s/%s", testDir, testKernel),
ImagePath: fmt.Sprintf("%s/%s", testDir, testImage),
HypervisorPath: "",
DisableGuestSeLinux: defaultDisableGuestSeLinux,
}
testHypervisorConfigValid(t, hypervisorConfig, true)
hypervisorConfigDefaultsExpected := &HypervisorConfig{
KernelPath: fmt.Sprintf("%s/%s", testDir, testKernel),
ImagePath: fmt.Sprintf("%s/%s", testDir, testImage),
HypervisorPath: "",
NumVCPUs: defaultVCPUs,
MemorySize: defaultMemSzMiB,
DefaultBridges: defaultBridges,
BlockDeviceDriver: defaultBlockDriver,
DefaultMaxVCPUs: defaultMaxVCPUs,
Msize9p: defaultMsize9p,
KernelPath: fmt.Sprintf("%s/%s", testDir, testKernel),
ImagePath: fmt.Sprintf("%s/%s", testDir, testImage),
HypervisorPath: "",
NumVCPUs: defaultVCPUs,
MemorySize: defaultMemSzMiB,
DefaultBridges: defaultBridges,
BlockDeviceDriver: defaultBlockDriver,
DefaultMaxVCPUs: defaultMaxVCPUs,
Msize9p: defaultMsize9p,
DisableGuestSeLinux: defaultDisableGuestSeLinux,
}
assert.Exactly(hypervisorConfig, hypervisorConfigDefaultsExpected)

View File

@@ -36,6 +36,7 @@ import (
"context"
"github.com/gogo/protobuf/proto"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/opencontainers/selinux/go-selinux"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
"google.golang.org/grpc/codes"
@@ -69,6 +70,9 @@ const (
kernelParamDebugConsole = "agent.debug_console"
kernelParamDebugConsoleVPort = "agent.debug_console_vport"
kernelParamDebugConsoleVPortValue = "1026"
// Default SELinux type applied to the container process inside guest
defaultSeLinuxContainerType = "container_t"
)
var (
@@ -895,7 +899,7 @@ func (k *kataAgent) removeIgnoredOCIMount(spec *specs.Spec, ignoredMounts map[st
return nil
}
func (k *kataAgent) constrainGRPCSpec(grpcSpec *grpc.Spec, passSeccomp bool, stripVfio bool) {
func (k *kataAgent) constrainGRPCSpec(grpcSpec *grpc.Spec, passSeccomp bool, disableGuestSeLinux bool, guestSeLinuxLabel string, stripVfio bool) error {
// Disable Hooks since they have been handled on the host and there is
// no reason to send them to the agent. It would make no sense to try
// to apply them on the guest.
@@ -907,11 +911,34 @@ func (k *kataAgent) constrainGRPCSpec(grpcSpec *grpc.Spec, passSeccomp bool, str
grpcSpec.Linux.Seccomp = nil
}
// Disable SELinux inside of the virtual machine, the label will apply
// to the KVM process
// Pass SELinux label for the container process to the agent.
if grpcSpec.Process.SelinuxLabel != "" {
k.Logger().Info("SELinux label from config will be applied to the hypervisor process, not the VM workload")
grpcSpec.Process.SelinuxLabel = ""
if !disableGuestSeLinux {
k.Logger().Info("SELinux label will be applied to the container process inside guest")
var label string
if guestSeLinuxLabel != "" {
label = guestSeLinuxLabel
} else {
label = grpcSpec.Process.SelinuxLabel
}
processContext, err := selinux.NewContext(label)
if err != nil {
return err
}
// Change the type from KVM to container because the type passed from the high-level
// runtime is for KVM process.
if guestSeLinuxLabel == "" {
processContext["type"] = defaultSeLinuxContainerType
}
grpcSpec.Process.SelinuxLabel = processContext.Get()
} else {
k.Logger().Info("Empty SELinux label for the process and the mount because guest SELinux is disabled")
grpcSpec.Process.SelinuxLabel = ""
grpcSpec.Linux.MountLabel = ""
}
}
// By now only CPU constraints are supported
@@ -973,6 +1000,8 @@ func (k *kataAgent) constrainGRPCSpec(grpcSpec *grpc.Spec, passSeccomp bool, str
}
grpcSpec.Linux.Devices = linuxDevices
}
return nil
}
func (k *kataAgent) handleShm(mounts []specs.Mount, sandbox *Sandbox) {
@@ -1256,9 +1285,20 @@ func (k *kataAgent) createContainer(ctx context.Context, sandbox *Sandbox, c *Co
passSeccomp := !sandbox.config.DisableGuestSeccomp && sandbox.seccompSupported
// Currently, guest SELinux can be enabled only when SELinux is enabled on the host side.
if !sandbox.config.HypervisorConfig.DisableGuestSeLinux && !selinux.GetEnabled() {
return nil, fmt.Errorf("Guest SELinux is enabled, but SELinux is disabled on the host side")
}
if sandbox.config.HypervisorConfig.DisableGuestSeLinux && sandbox.config.GuestSeLinuxLabel != "" {
return nil, fmt.Errorf("Custom SELinux security policy is provided, but guest SELinux is disabled")
}
// We need to constrain the spec to make sure we're not
// passing irrelevant information to the agent.
k.constrainGRPCSpec(grpcSpec, passSeccomp, sandbox.config.VfioMode == config.VFIOModeGuestKernel)
err = k.constrainGRPCSpec(grpcSpec, passSeccomp, sandbox.config.HypervisorConfig.DisableGuestSeLinux, sandbox.config.GuestSeLinuxLabel, sandbox.config.VfioMode == config.VFIOModeGuestKernel)
if err != nil {
return nil, err
}
req := &grpc.CreateContainerRequest{
ContainerId: c.id,

View File

@@ -619,7 +619,7 @@ func TestConstrainGRPCSpec(t *testing.T) {
}
k := kataAgent{}
k.constrainGRPCSpec(g, true, true)
k.constrainGRPCSpec(g, true, true, "", true)
// Check nil fields
assert.Nil(g.Hooks)

View File

@@ -189,6 +189,7 @@ func (s *Sandbox) dumpConfig(ss *persistapi.SandboxState) {
SystemdCgroup: sconfig.SystemdCgroup,
SandboxCgroupOnly: sconfig.SandboxCgroupOnly,
DisableGuestSeccomp: sconfig.DisableGuestSeccomp,
GuestSeLinuxLabel: sconfig.GuestSeLinuxLabel,
}
ss.Config.SandboxBindMounts = append(ss.Config.SandboxBindMounts, sconfig.SandboxBindMounts...)
@@ -429,6 +430,7 @@ func loadSandboxConfig(id string) (*SandboxConfig, error) {
SystemdCgroup: savedConf.SystemdCgroup,
SandboxCgroupOnly: savedConf.SandboxCgroupOnly,
DisableGuestSeccomp: savedConf.DisableGuestSeccomp,
GuestSeLinuxLabel: savedConf.GuestSeLinuxLabel,
}
sconfig.SandboxBindMounts = append(sconfig.SandboxBindMounts, savedConf.SandboxBindMounts...)

View File

@@ -243,19 +243,6 @@ type ContainerConfig struct {
// SandboxConfig is a sandbox configuration.
// Refs: virtcontainers/sandbox.go:SandboxConfig
type SandboxConfig struct {
// Information for fields not saved:
// * Annotation: this is kind of casual data, we don't need casual data in persist file,
// if you know this data needs to persist, please gives it
// a specific field
ContainerConfigs []ContainerConfig
// SandboxBindMounts - list of paths to mount into guest
SandboxBindMounts []string
// Experimental enables experimental features
Experimental []string
// Cgroups specifies specific cgroup settings for the various subsystems that the container is
// placed into to limit the resources the container has available
Cgroups *configs.Cgroup `json:"cgroups"`
@@ -265,8 +252,24 @@ type SandboxConfig struct {
KataShimConfig *ShimConfig
HypervisorType string
NetworkConfig NetworkConfig
// Custom SELinux security policy to the container process inside the VM
GuestSeLinuxLabel string
HypervisorType string
// SandboxBindMounts - list of paths to mount into guest
SandboxBindMounts []string
// Experimental enables experimental features
Experimental []string
// Information for fields not saved:
// * Annotation: this is kind of casual data, we don't need casual data in persist file,
// if you know this data needs to persist, please gives it a specific field
ContainerConfigs []ContainerConfig
NetworkConfig NetworkConfig
HypervisorConfig HypervisorConfig
ShmSize uint64

View File

@@ -247,6 +247,9 @@ const (
// DisableGuestSeccomp is a sandbox annotation that determines if seccomp should be applied inside guest.
DisableGuestSeccomp = kataAnnotRuntimePrefix + "disable_guest_seccomp"
// GuestSeLinuxLabel is a SELinux security policy that is applied to a container process inside guest.
GuestSeLinuxLabel = kataAnnotRuntimePrefix + "guest_selinux_label"
// SandboxCgroupOnly is a sandbox annotation that determines if kata processes are managed only in sandbox cgroup.
SandboxCgroupOnly = kataAnnotRuntimePrefix + "sandbox_cgroup_only"

View File

@@ -181,6 +181,15 @@ func (q *qemu) kernelParameters() string {
// set the maximum number of vCPUs
params = append(params, Param{"nr_cpus", fmt.Sprintf("%d", q.config.DefaultMaxVCPUs)})
// set the SELinux params in accordance with the runtime configuration, disable_guest_selinux.
if q.config.DisableGuestSeLinux {
q.Logger().Info("Set selinux=0 to kernel params because SELinux on the guest is disabled")
params = append(params, Param{"selinux", "0"})
} else {
q.Logger().Info("Set selinux=1 to kernel params because SELinux on the guest is enabled")
params = append(params, Param{"selinux", "1"})
}
// add the params specified by the provided config. As the kernel
// honours the last parameter value set and since the config-provided
// params are added here, they will take priority over the defaults.
@@ -476,6 +485,13 @@ func (q *qemu) createVirtiofsDaemon(sharedPath string) (VirtiofsDaemon, error) {
return nd, nil
}
// Set the xattr option for virtiofsd daemon to enable extended attributes
// in virtiofs if SELinux on the guest side is enabled.
if !q.config.DisableGuestSeLinux {
q.Logger().Info("Set the xattr option for virtiofsd")
q.config.VirtioFSExtraArgs = append(q.config.VirtioFSExtraArgs, "-o", "xattr")
}
// default use virtiofsd
return &virtiofsd{
path: q.config.VirtioFSDaemon,
@@ -846,7 +862,6 @@ func (q *qemu) StartVM(ctx context.Context, timeout int) error {
// the SELinux label. If these processes require privileged, we do
// notwant to run them under confinement.
if !q.config.DisableSeLinux {
if err := label.SetProcessLabel(q.config.SELinuxProcessLabel); err != nil {
return err
}

View File

@@ -27,15 +27,16 @@ import (
func newQemuConfig() HypervisorConfig {
return HypervisorConfig{
KernelPath: testQemuKernelPath,
InitrdPath: testQemuInitrdPath,
HypervisorPath: testQemuPath,
NumVCPUs: defaultVCPUs,
MemorySize: defaultMemSzMiB,
DefaultBridges: defaultBridges,
BlockDeviceDriver: defaultBlockDriver,
DefaultMaxVCPUs: defaultMaxVCPUs,
Msize9p: defaultMsize9p,
KernelPath: testQemuKernelPath,
InitrdPath: testQemuInitrdPath,
HypervisorPath: testQemuPath,
NumVCPUs: defaultVCPUs,
MemorySize: defaultMemSzMiB,
DefaultBridges: defaultBridges,
BlockDeviceDriver: defaultBlockDriver,
DefaultMaxVCPUs: defaultMaxVCPUs,
Msize9p: defaultMsize9p,
DisableGuestSeLinux: defaultDisableGuestSeLinux,
}
}
@@ -58,7 +59,7 @@ func testQemuKernelParameters(t *testing.T, kernelParams []Param, expected strin
}
func TestQemuKernelParameters(t *testing.T) {
expectedOut := fmt.Sprintf("panic=1 nr_cpus=%d foo=foo bar=bar", govmm.MaxVCPUs())
expectedOut := fmt.Sprintf("panic=1 nr_cpus=%d selinux=0 foo=foo bar=bar", govmm.MaxVCPUs())
params := []Param{
{
Key: "foo",

View File

@@ -126,14 +126,17 @@ type SandboxResourceSizing struct {
// SandboxConfig is a Sandbox configuration.
type SandboxConfig struct {
// Volumes is a list of shared volumes between the host and the Sandbox.
Volumes []types.Volume
// Annotations keys must be unique strings and must be name-spaced
Annotations map[string]string
// Containers describe the list of containers within a Sandbox.
// This list can be empty and populated by adding containers
// to the Sandbox a posteriori.
//TODO: this should be a map to avoid duplicated containers
Containers []ContainerConfig
// Custom SELinux security policy to the container process inside the VM
GuestSeLinuxLabel string
HypervisorType HypervisorType
ID string
Hostname string
// SandboxBindMounts - list of paths to mount into guest
SandboxBindMounts []string
@@ -141,31 +144,29 @@ type SandboxConfig struct {
// Experimental features enabled
Experimental []exp.Feature
// Annotations keys must be unique strings and must be name-spaced
// with e.g. reverse domain notation (org.clearlinux.key).
Annotations map[string]string
// Containers describe the list of containers within a Sandbox.
// This list can be empty and populated by adding containers
// to the Sandbox a posteriori.
// TODO: this should be a map to avoid duplicated containers
Containers []ContainerConfig
ID string
Hostname string
HypervisorType HypervisorType
AgentConfig KataAgentConfig
Volumes []types.Volume
NetworkConfig NetworkConfig
AgentConfig KataAgentConfig
HypervisorConfig HypervisorConfig
SandboxResources SandboxResourceSizing
// StaticResourceMgmt indicates if the shim should rely on statically sizing the sandbox (VM)
StaticResourceMgmt bool
ShmSize uint64
SandboxResources SandboxResourceSizing
VfioMode config.VFIOModeType
// StaticResourceMgmt indicates if the shim should rely on statically sizing the sandbox (VM)
StaticResourceMgmt bool
// SharePidNs sets all containers to share the same sandbox level pid namespace.
SharePidNs bool