diff --git a/src/runtime/Makefile b/src/runtime/Makefile index 8e9530755..6d645ea2a 100644 --- a/src/runtime/Makefile +++ b/src/runtime/Makefile @@ -216,6 +216,11 @@ DEFBINDMOUNTS := [] # Image Service Offload DEFSERVICEOFFLOAD ?= false +# SEV Guest Pre-Attestation +DEFGUESTPREATTESTATIONSECRETGUID ?= 0a46e24d-478c-4eb1-8696-113eeec3aa99 +DEFGUESTPREATTESTATIONSECRETTYPE ?= bundle +DEFSEVCERTCHAIN ?= /opt/sev/cert_chain.cert + SED = sed CLI_DIR = cmd @@ -486,6 +491,9 @@ USER_VARS += DEFBINDMOUNTS USER_VARS += DEFVFIOMODE USER_VARS += BUILDFLAGS USER_VARS += DEFSERVICEOFFLOAD +USER_VARS += DEFGUESTPREATTESTATIONSECRETGUID +USER_VARS += DEFGUESTPREATTESTATIONSECRETTYPE +USER_VARS += DEFSEVCERTCHAIN V = @ diff --git a/src/runtime/config/configuration-qemu.toml.in b/src/runtime/config/configuration-qemu.toml.in index a08719d96..ecc992bb0 100644 --- a/src/runtime/config/configuration-qemu.toml.in +++ b/src/runtime/config/configuration-qemu.toml.in @@ -33,6 +33,31 @@ machine_type = "@MACHINETYPE@" # Default false # confidential_guest = true +# Enable pre-attestation AMD SEV and SEV-ES confidential guests. +# Applies only if confidential_guest is true. +# (default: false) +#guest_pre_attestation = true +# +# Guest owner proxy that handles remote attestation +#guest_pre_attestation_proxy="localhost:50051" +# +# Keyset ID for injected secrets +#guest_pre_attestation_keyset="KEYSET-1" +# +# GUID of injected secret +# Key Broker Server for SEV(-ES) expects secrets with this guid +# See https://github.com/confidential-containers/simple-kbs +#guest_pre_attestation_secret_guid = "@DEFGUESTPREATTESTATIONSECRETGUID@" +# +# Type of injected secret +#guest_pre_attestation_secret_type = "@DEFGUESTPREATTESTATIONSECRETTYPE@" +# +# SEV guest policy +#sev_guest_policy=0 + +# SEV certificate chain path +#sev_cert_chain="@DEFSEVCERTCHAIN@" + # Enable running QEMU VMM as a non-root user. # By default QEMU VMM run as root. When this is set to true, QEMU VMM process runs as # a non-root random user. See documentation for the limitations of this mode. diff --git a/src/runtime/pkg/govmm/qemu/qemu.go b/src/runtime/pkg/govmm/qemu/qemu.go index 100316dd9..0e18f5c98 100644 --- a/src/runtime/pkg/govmm/qemu/qemu.go +++ b/src/runtime/pkg/govmm/qemu/qemu.go @@ -283,6 +283,22 @@ type Object struct { // Prealloc enables memory preallocation Prealloc bool + + // SevPolicy is the policy for the SEV instance. For more info, see AMD document 55766 + // This is only relevant for sev-guest objects + SevPolicy uint32 + + // SevCertFilePath is the path to the guest Diffie–Hellman key + // This is only relevant for sev-guest objects + SevCertFilePath string + + // SevSessionFilePath is the path to the launch blog + // This is only relevant for sev-guest objects + SevSessionFilePath string + + // SevKernelHashes specifies whether the hashes of the kernel, initrd, & cmdline are included in the measurement + // This is only relevant for sev-guest objects + SevKernelHashes bool } // Valid returns true if the Object structure is valid and complete. @@ -353,7 +369,17 @@ func (object Object) QemuParams(config *Config) []string { objectParams = append(objectParams, fmt.Sprintf("id=%s", object.ID)) objectParams = append(objectParams, fmt.Sprintf("cbitpos=%d", object.CBitPos)) objectParams = append(objectParams, fmt.Sprintf("reduced-phys-bits=%d", object.ReducedPhysBits)) - + objectParams = append(objectParams, fmt.Sprintf("policy=%d", object.SevPolicy)) + if object.SevCertFilePath != "" { + objectParams = append(objectParams, fmt.Sprintf("dh-cert-file=%s", object.SevCertFilePath)) + } + if object.SevSessionFilePath != "" { + objectParams = append(objectParams, fmt.Sprintf("session-file=%s", object.SevSessionFilePath)) + } + if object.SevKernelHashes { + objectParams = append(objectParams, "kernel-hashes=on") + } + // Add OVMF firmware as pflash drive driveParams = append(driveParams, "if=pflash,format=raw,readonly=on") driveParams = append(driveParams, fmt.Sprintf("file=%s", object.File)) case SecExecGuest: diff --git a/src/runtime/pkg/govmm/qemu/qmp.go b/src/runtime/pkg/govmm/qemu/qmp.go index 9bf091af8..fe0e3f97a 100644 --- a/src/runtime/pkg/govmm/qemu/qmp.go +++ b/src/runtime/pkg/govmm/qemu/qmp.go @@ -257,6 +257,22 @@ type StatusInfo struct { Status string `json:"status"` } +// SEVInfo represents the SEV guest information +type SEVInfo struct { + State string `json:"state"` + Enabled bool `json:"enabled"` + APIMajor uint32 `json:"api-major"` + APIMinor uint32 `json:"api-minor"` + BuildId uint32 `json:"build-id"` + Policy uint32 `json:"policy"` + Handle uint32 `json:"handle"` +} + +// SEVLaunchMeasurement represents the SEV prelaunch measurement +type SEVLaunchMeasurement struct { + Measurement string `json:"data"` +} + func (q *QMP) readLoop(fromVMCh chan<- []byte) { scanner := bufio.NewScanner(q.conn) if q.cfg.MaxCapacity > 0 { @@ -1661,3 +1677,53 @@ func (q *QMP) ExecuteDumpGuestMemory(ctx context.Context, protocol string, pagin return q.executeCommand(ctx, "dump-guest-memory", args, nil) } + +// ExecuteQuerySEV queries SEV hardware details +func (q *QMP) ExecuteQuerySEV(ctx context.Context) (SEVInfo, error) { + response, err := q.executeCommandWithResponse(ctx, "query-sev", nil, nil, nil) + if err != nil { + return SEVInfo{}, err + } + + data, err := json.Marshal(response) + if err != nil { + return SEVInfo{}, fmt.Errorf("unable to extract SEV information: %v", err) + } + + var info SEVInfo + if err = json.Unmarshal(data, &info); err != nil { + return SEVInfo{}, fmt.Errorf("unable to convert SEV information: %v", err) + } + + return info, nil +} + +// ExecuteQuerySEVLaunchMeasure queries SEV launch measurement +func (q *QMP) ExecuteQuerySEVLaunchMeasure(ctx context.Context) (SEVLaunchMeasurement, error) { + response, err := q.executeCommandWithResponse(ctx, "query-sev-launch-measure", nil, nil, nil) + if err != nil { + return SEVLaunchMeasurement{}, err + } + + data, err := json.Marshal(response) + if err != nil { + return SEVLaunchMeasurement{}, fmt.Errorf("unable to extract launch measurement: %v", err) + } + + var measurement SEVLaunchMeasurement + if err = json.Unmarshal(data, &measurement); err != nil { + return SEVLaunchMeasurement{}, fmt.Errorf("unable to convert launch measurement: %v", err) + } + + return measurement, nil +} + +// ExecuteSEVInjectLaunchSecret injects launch secret bundle into SEV guest +func (q *QMP) ExecuteSEVInjectLaunchSecret(ctx context.Context, packetHeader string, secret string) error { + args := map[string]interface{}{ + "packet-header": packetHeader, + "secret": secret, + } + + return q.executeCommand(ctx, "sev-inject-launch-secret", args, nil) +} diff --git a/src/runtime/pkg/katautils/config-settings.go.in b/src/runtime/pkg/katautils/config-settings.go.in index 2aad22bd8..d0a64afe6 100644 --- a/src/runtime/pkg/katautils/config-settings.go.in +++ b/src/runtime/pkg/katautils/config-settings.go.in @@ -90,6 +90,13 @@ const defaultRootlessHypervisor = false const defaultDisableSeccomp = false const defaultVfioMode = "guest-kernel" const defaultLegacySerial = false +const defaultGuestPreAttestation = false +const defaultGuestPreAttestationProxy string = "" +const defaultGuestPreAttestationKeyset string = "" +const defaultGuestPreAttestationSecretGuid string = "" +const defaultGuestPreAttestationSecretType string = "" +const defaultSEVCertChainPath string = "" +const defaultSEVGuestPolicy uint32 = 0 var defaultSGXEPCSize = int64(0) diff --git a/src/runtime/pkg/katautils/config.go b/src/runtime/pkg/katautils/config.go index e7f8a916c..5f1454ec0 100644 --- a/src/runtime/pkg/katautils/config.go +++ b/src/runtime/pkg/katautils/config.go @@ -99,6 +99,11 @@ type hypervisor struct { GuestHookPath string `toml:"guest_hook_path"` GuestMemoryDumpPath string `toml:"guest_memory_dump_path"` SeccompSandbox string `toml:"seccompsandbox"` + GuestPreAttestationProxy string `toml:"guest_pre_attestation_proxy"` + GuestPreAttestationKeyset string `toml:"guest_pre_attestation_keyset"` + GuestPreAttestationSecretGuid string `toml:"guest_pre_attestation_secret_guid"` + GuestPreAttestationSecretType string `toml:"guest_pre_attestation_secret_type"` + SEVCertChainPath string `toml:"sev_cert_chain"` HypervisorPathList []string `toml:"valid_hypervisor_paths"` JailerPathList []string `toml:"valid_jailer_paths"` CtlPathList []string `toml:"valid_ctlpaths"` @@ -128,6 +133,8 @@ type hypervisor struct { DefaultBridges uint32 `toml:"default_bridges"` Msize9p uint32 `toml:"msize_9p"` PCIeRootPort uint32 `toml:"pcie_root_port"` + GuestPreAttestationGRPCTimeout uint32 `toml:"guest_pre_attestation_grpc_timeout"` + SEVGuestPolicy uint32 `toml:"sev_guest_policy"` NumVCPUs int32 `toml:"default_vcpus"` BlockDeviceCacheSet bool `toml:"block_device_cache_set"` BlockDeviceCacheDirect bool `toml:"block_device_cache_direct"` @@ -152,6 +159,7 @@ type hypervisor struct { DisableSeccomp bool `toml:"disable_seccomp"` DisableSeLinux bool `toml:"disable_selinux"` LegacySerial bool `toml:"use_legacy_serial"` + GuestPreAttestation bool `toml:"guest_pre_attestation"` } type runtime struct { @@ -745,68 +753,75 @@ func newQemuHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { txRateLimiterMaxRate := h.getTxRateLimiterCfg() return vc.HypervisorConfig{ - HypervisorPath: hypervisor, - HypervisorPathList: h.HypervisorPathList, - KernelPath: kernel, - InitrdPath: initrd, - ImagePath: image, - FirmwarePath: firmware, - FirmwareVolumePath: firmwareVolume, - PFlash: pflashes, - MachineAccelerators: machineAccelerators, - CPUFeatures: cpuFeatures, - KernelParams: vc.DeserializeParams(strings.Fields(kernelParams)), - HypervisorMachineType: machineType, - NumVCPUs: h.defaultVCPUs(), - DefaultMaxVCPUs: h.defaultMaxVCPUs(), - MemorySize: h.defaultMemSz(), - MemSlots: h.defaultMemSlots(), - MemOffset: h.defaultMemOffset(), - DefaultMaxMemorySize: h.defaultMaxMemSz(), - VirtioMem: h.VirtioMem, - EntropySource: h.GetEntropySource(), - EntropySourceList: h.EntropySourceList, - DefaultBridges: h.defaultBridges(), - DisableBlockDeviceUse: h.DisableBlockDeviceUse, - SharedFS: sharedFS, - VirtioFSDaemon: h.VirtioFSDaemon, - VirtioFSDaemonList: h.VirtioFSDaemonList, - VirtioFSCacheSize: h.VirtioFSCacheSize, - VirtioFSCache: h.defaultVirtioFSCache(), - VirtioFSExtraArgs: h.VirtioFSExtraArgs, - MemPrealloc: h.MemPrealloc, - HugePages: h.HugePages, - IOMMU: h.IOMMU, - IOMMUPlatform: h.getIOMMUPlatform(), - FileBackedMemRootDir: h.FileBackedMemRootDir, - FileBackedMemRootList: h.FileBackedMemRootList, - Debug: h.Debug, - DisableNestingChecks: h.DisableNestingChecks, - BlockDeviceDriver: blockDriver, - BlockDeviceCacheSet: h.BlockDeviceCacheSet, - BlockDeviceCacheDirect: h.BlockDeviceCacheDirect, - BlockDeviceCacheNoflush: h.BlockDeviceCacheNoflush, - EnableIOThreads: h.EnableIOThreads, - Msize9p: h.msize9p(), - DisableImageNvdimm: h.DisableImageNvdimm, - HotplugVFIOOnRootBus: h.HotplugVFIOOnRootBus, - PCIeRootPort: h.PCIeRootPort, - DisableVhostNet: h.DisableVhostNet, - EnableVhostUserStore: h.EnableVhostUserStore, - VhostUserStorePath: h.vhostUserStorePath(), - VhostUserStorePathList: h.VhostUserStorePathList, - SeccompSandbox: h.SeccompSandbox, - GuestHookPath: h.guestHookPath(), - RxRateLimiterMaxRate: rxRateLimiterMaxRate, - TxRateLimiterMaxRate: txRateLimiterMaxRate, - EnableAnnotations: h.EnableAnnotations, - GuestMemoryDumpPath: h.GuestMemoryDumpPath, - GuestMemoryDumpPaging: h.GuestMemoryDumpPaging, - ConfidentialGuest: h.ConfidentialGuest, - GuestSwap: h.GuestSwap, - Rootless: h.Rootless, - LegacySerial: h.LegacySerial, - DisableSeLinux: h.DisableSeLinux, + HypervisorPath: hypervisor, + HypervisorPathList: h.HypervisorPathList, + KernelPath: kernel, + InitrdPath: initrd, + ImagePath: image, + FirmwarePath: firmware, + FirmwareVolumePath: firmwareVolume, + PFlash: pflashes, + MachineAccelerators: machineAccelerators, + CPUFeatures: cpuFeatures, + KernelParams: vc.DeserializeParams(strings.Fields(kernelParams)), + HypervisorMachineType: machineType, + NumVCPUs: h.defaultVCPUs(), + DefaultMaxVCPUs: h.defaultMaxVCPUs(), + MemorySize: h.defaultMemSz(), + MemSlots: h.defaultMemSlots(), + MemOffset: h.defaultMemOffset(), + DefaultMaxMemorySize: h.defaultMaxMemSz(), + VirtioMem: h.VirtioMem, + EntropySource: h.GetEntropySource(), + EntropySourceList: h.EntropySourceList, + DefaultBridges: h.defaultBridges(), + DisableBlockDeviceUse: h.DisableBlockDeviceUse, + SharedFS: sharedFS, + VirtioFSDaemon: h.VirtioFSDaemon, + VirtioFSDaemonList: h.VirtioFSDaemonList, + VirtioFSCacheSize: h.VirtioFSCacheSize, + VirtioFSCache: h.defaultVirtioFSCache(), + VirtioFSExtraArgs: h.VirtioFSExtraArgs, + MemPrealloc: h.MemPrealloc, + HugePages: h.HugePages, + IOMMU: h.IOMMU, + IOMMUPlatform: h.getIOMMUPlatform(), + FileBackedMemRootDir: h.FileBackedMemRootDir, + FileBackedMemRootList: h.FileBackedMemRootList, + Debug: h.Debug, + DisableNestingChecks: h.DisableNestingChecks, + BlockDeviceDriver: blockDriver, + BlockDeviceCacheSet: h.BlockDeviceCacheSet, + BlockDeviceCacheDirect: h.BlockDeviceCacheDirect, + BlockDeviceCacheNoflush: h.BlockDeviceCacheNoflush, + EnableIOThreads: h.EnableIOThreads, + Msize9p: h.msize9p(), + DisableImageNvdimm: h.DisableImageNvdimm, + HotplugVFIOOnRootBus: h.HotplugVFIOOnRootBus, + PCIeRootPort: h.PCIeRootPort, + DisableVhostNet: h.DisableVhostNet, + EnableVhostUserStore: h.EnableVhostUserStore, + VhostUserStorePath: h.vhostUserStorePath(), + VhostUserStorePathList: h.VhostUserStorePathList, + SeccompSandbox: h.SeccompSandbox, + GuestHookPath: h.guestHookPath(), + RxRateLimiterMaxRate: rxRateLimiterMaxRate, + TxRateLimiterMaxRate: txRateLimiterMaxRate, + EnableAnnotations: h.EnableAnnotations, + GuestMemoryDumpPath: h.GuestMemoryDumpPath, + GuestMemoryDumpPaging: h.GuestMemoryDumpPaging, + ConfidentialGuest: h.ConfidentialGuest, + GuestSwap: h.GuestSwap, + Rootless: h.Rootless, + LegacySerial: h.LegacySerial, + DisableSeLinux: h.DisableSeLinux, + GuestPreAttestation: h.GuestPreAttestation, + GuestPreAttestationProxy: h.GuestPreAttestationProxy, + GuestPreAttestationKeyset: h.GuestPreAttestationKeyset, + GuestPreAttestationSecretGuid: h.GuestPreAttestationSecretGuid, + GuestPreAttestationSecretType: h.GuestPreAttestationSecretType, + SEVGuestPolicy: h.SEVGuestPolicy, + SEVCertChainPath: h.SEVCertChainPath, }, nil } @@ -1129,50 +1144,57 @@ func updateRuntimeConfig(configPath string, tomlConf tomlConfig, config *oci.Run func GetDefaultHypervisorConfig() vc.HypervisorConfig { return vc.HypervisorConfig{ - HypervisorPath: defaultHypervisorPath, - JailerPath: defaultJailerPath, - KernelPath: defaultKernelPath, - ImagePath: defaultImagePath, - InitrdPath: defaultInitrdPath, - FirmwarePath: defaultFirmwarePath, - FirmwareVolumePath: defaultFirmwareVolumePath, - MachineAccelerators: defaultMachineAccelerators, - CPUFeatures: defaultCPUFeatures, - HypervisorMachineType: defaultMachineType, - NumVCPUs: defaultVCPUCount, - DefaultMaxVCPUs: defaultMaxVCPUCount, - MemorySize: defaultMemSize, - MemOffset: defaultMemOffset, - VirtioMem: defaultVirtioMem, - DisableBlockDeviceUse: defaultDisableBlockDeviceUse, - DefaultBridges: defaultBridgesCount, - MemPrealloc: defaultEnableMemPrealloc, - HugePages: defaultEnableHugePages, - IOMMU: defaultEnableIOMMU, - IOMMUPlatform: defaultEnableIOMMUPlatform, - FileBackedMemRootDir: defaultFileBackedMemRootDir, - Debug: defaultEnableDebug, - DisableNestingChecks: defaultDisableNestingChecks, - BlockDeviceDriver: defaultBlockDeviceDriver, - BlockDeviceCacheSet: defaultBlockDeviceCacheSet, - BlockDeviceCacheDirect: defaultBlockDeviceCacheDirect, - BlockDeviceCacheNoflush: defaultBlockDeviceCacheNoflush, - EnableIOThreads: defaultEnableIOThreads, - Msize9p: defaultMsize9p, - HotplugVFIOOnRootBus: defaultHotplugVFIOOnRootBus, - PCIeRootPort: defaultPCIeRootPort, - GuestHookPath: defaultGuestHookPath, - VhostUserStorePath: defaultVhostUserStorePath, - VirtioFSCache: defaultVirtioFSCacheMode, - DisableImageNvdimm: defaultDisableImageNvdimm, - RxRateLimiterMaxRate: defaultRxRateLimiterMaxRate, - TxRateLimiterMaxRate: defaultTxRateLimiterMaxRate, - SGXEPCSize: defaultSGXEPCSize, - ConfidentialGuest: defaultConfidentialGuest, - GuestSwap: defaultGuestSwap, - Rootless: defaultRootlessHypervisor, - DisableSeccomp: defaultDisableSeccomp, - LegacySerial: defaultLegacySerial, + HypervisorPath: defaultHypervisorPath, + JailerPath: defaultJailerPath, + KernelPath: defaultKernelPath, + ImagePath: defaultImagePath, + InitrdPath: defaultInitrdPath, + FirmwarePath: defaultFirmwarePath, + FirmwareVolumePath: defaultFirmwareVolumePath, + MachineAccelerators: defaultMachineAccelerators, + CPUFeatures: defaultCPUFeatures, + HypervisorMachineType: defaultMachineType, + NumVCPUs: defaultVCPUCount, + DefaultMaxVCPUs: defaultMaxVCPUCount, + MemorySize: defaultMemSize, + MemOffset: defaultMemOffset, + VirtioMem: defaultVirtioMem, + DisableBlockDeviceUse: defaultDisableBlockDeviceUse, + DefaultBridges: defaultBridgesCount, + MemPrealloc: defaultEnableMemPrealloc, + HugePages: defaultEnableHugePages, + IOMMU: defaultEnableIOMMU, + IOMMUPlatform: defaultEnableIOMMUPlatform, + FileBackedMemRootDir: defaultFileBackedMemRootDir, + Debug: defaultEnableDebug, + DisableNestingChecks: defaultDisableNestingChecks, + BlockDeviceDriver: defaultBlockDeviceDriver, + BlockDeviceCacheSet: defaultBlockDeviceCacheSet, + BlockDeviceCacheDirect: defaultBlockDeviceCacheDirect, + BlockDeviceCacheNoflush: defaultBlockDeviceCacheNoflush, + EnableIOThreads: defaultEnableIOThreads, + Msize9p: defaultMsize9p, + HotplugVFIOOnRootBus: defaultHotplugVFIOOnRootBus, + PCIeRootPort: defaultPCIeRootPort, + GuestHookPath: defaultGuestHookPath, + VhostUserStorePath: defaultVhostUserStorePath, + VirtioFSCache: defaultVirtioFSCacheMode, + DisableImageNvdimm: defaultDisableImageNvdimm, + RxRateLimiterMaxRate: defaultRxRateLimiterMaxRate, + TxRateLimiterMaxRate: defaultTxRateLimiterMaxRate, + SGXEPCSize: defaultSGXEPCSize, + ConfidentialGuest: defaultConfidentialGuest, + GuestSwap: defaultGuestSwap, + Rootless: defaultRootlessHypervisor, + DisableSeccomp: defaultDisableSeccomp, + LegacySerial: defaultLegacySerial, + GuestPreAttestation: defaultGuestPreAttestation, + GuestPreAttestationProxy: defaultGuestPreAttestationProxy, + GuestPreAttestationKeyset: defaultGuestPreAttestationKeyset, + GuestPreAttestationSecretGuid: defaultGuestPreAttestationSecretGuid, + GuestPreAttestationSecretType: defaultGuestPreAttestationSecretType, + SEVGuestPolicy: defaultSEVGuestPolicy, + SEVCertChainPath: defaultSEVCertChainPath, } } diff --git a/src/runtime/pkg/oci/utils_test.go b/src/runtime/pkg/oci/utils_test.go index b5e7440b0..59aa21bad 100644 --- a/src/runtime/pkg/oci/utils_test.go +++ b/src/runtime/pkg/oci/utils_test.go @@ -1205,8 +1205,8 @@ func TestNewMount(t *testing.T) { assert := assert.New(t) testCases := []struct { - out vc.Mount in specs.Mount + out vc.Mount }{ { in: specs.Mount{ diff --git a/src/runtime/pkg/sev/sev.go b/src/runtime/pkg/sev/sev.go index 2667c4b8b..d95d2012b 100644 --- a/src/runtime/pkg/sev/sev.go +++ b/src/runtime/pkg/sev/sev.go @@ -16,14 +16,17 @@ import ( ) type GuestPreAttestationConfig struct { - Proxy string - Policy uint32 - Keyset string - LaunchId string - KernelPath string - InitrdPath string - FwPath string - KernelParameters string + Proxy string + Keyset string + LaunchId string + KernelPath string + InitrdPath string + FwPath string + KernelParameters string + CertChainPath string + KeyBrokerSecretType string + KeyBrokerSecretGuid string + Policy uint32 } type guidLE [16]byte diff --git a/src/runtime/protocols/simple-kbs/keybroker.proto b/src/runtime/protocols/simple-kbs/keybroker.proto index 28a90041e..2e723df7e 100644 --- a/src/runtime/protocols/simple-kbs/keybroker.proto +++ b/src/runtime/protocols/simple-kbs/keybroker.proto @@ -1,3 +1,7 @@ +// Copyright (c) 2022 IBM +// +// SPDX-License-Identifier: Apache-2.0 +// syntax = "proto3"; package keybroker; diff --git a/src/runtime/virtcontainers/acrn.go b/src/runtime/virtcontainers/acrn.go index 1c3ebc147..c3d54a0dc 100644 --- a/src/runtime/virtcontainers/acrn.go +++ b/src/runtime/virtcontainers/acrn.go @@ -586,6 +586,15 @@ func (a *Acrn) PauseVM(ctx context.Context) error { return nil } +func (a *Acrn) AttestVM(ctx context.Context) error { + span, _ := katatrace.Trace(ctx, a.Logger(), "AttestVM", acrnTracingTags, map[string]string{"sandbox_id": a.id}) + defer span.End() + + a.Logger().Warning("AttestVM: unimplemented") + + return nil +} + func (a *Acrn) ResumeVM(ctx context.Context) error { span, _ := katatrace.Trace(ctx, a.Logger(), "ResumeVM", acrnTracingTags, map[string]string{"sandbox_id": a.id}) defer span.End() diff --git a/src/runtime/virtcontainers/clh.go b/src/runtime/virtcontainers/clh.go index aaa8e2886..f3ef290e0 100644 --- a/src/runtime/virtcontainers/clh.go +++ b/src/runtime/virtcontainers/clh.go @@ -242,15 +242,15 @@ func (s *CloudHypervisorState) reset() { } type cloudHypervisor struct { + vmconfig chclient.VmConfig console console.Console virtiofsDaemon VirtiofsDaemon - APIClient clhClient ctx context.Context - id string + APIClient clhClient netDevices *[]chclient.NetConfig devicesIds map[string]string netDevicesFiles map[string][]*os.File - vmconfig chclient.VmConfig + id string state CloudHypervisorState config HypervisorConfig } @@ -984,6 +984,11 @@ func (clh *cloudHypervisor) PauseVM(ctx context.Context) error { return nil } +func (clh *cloudHypervisor) AttestVM(ctx context.Context) error { + clh.Logger().WithField("function", "AttestVM").Info("Attest Sandbox") + return nil +} + func (clh *cloudHypervisor) SaveVM() error { clh.Logger().WithField("function", "saveSandboxC").Info("Save Sandbox") return nil diff --git a/src/runtime/virtcontainers/fc.go b/src/runtime/virtcontainers/fc.go index 703e6e88b..51245318e 100644 --- a/src/runtime/virtcontainers/fc.go +++ b/src/runtime/virtcontainers/fc.go @@ -889,6 +889,10 @@ func (fc *firecracker) PauseVM(ctx context.Context) error { return nil } +func (fc *firecracker) AttestVM(ctx context.Context) error { + return nil +} + func (fc *firecracker) SaveVM() error { return nil } diff --git a/src/runtime/virtcontainers/hypervisor.go b/src/runtime/virtcontainers/hypervisor.go index 48eda0977..f80a73cb0 100644 --- a/src/runtime/virtcontainers/hypervisor.go +++ b/src/runtime/virtcontainers/hypervisor.go @@ -238,312 +238,103 @@ type Param struct { // HypervisorConfig is the hypervisor configuration. type HypervisorConfig struct { - // customAssets is a map of assets. - // Each value in that map takes precedence over the configured assets. - // For example, if there is a value for the "kernel" key in this map, - // it will be used for the sandbox's kernel path instead of KernelPath. - customAssets map[types.AssetType]*types.Asset - - // Supplementary group IDs. - Groups []uint32 - - // KernelPath is the guest kernel host path. - KernelPath string - - // ImagePath is the guest image host path. - ImagePath string - - // InitrdPath is the guest initrd image host path. - // ImagePath and InitrdPath cannot be set at the same time. - InitrdPath string - - // FirmwarePath is the bios host path - FirmwarePath string - - // FirmwareVolumePath is the configuration volume path for the firmware - FirmwareVolumePath string - - // MachineAccelerators are machine specific accelerators - MachineAccelerators string - - // CPUFeatures are cpu specific features - CPUFeatures string - - // HypervisorPath is the hypervisor executable host path. - HypervisorPath string - - // HypervisorCtlPath is the hypervisor ctl executable host path. - HypervisorCtlPath string - - // JailerPath is the jailer executable host path. - JailerPath string - - // BlockDeviceDriver specifies the driver to be used for block device - // either VirtioSCSI or VirtioBlock with the default driver being defaultBlockDriver - BlockDeviceDriver string - - // HypervisorMachineType specifies the type of machine being - // emulated. - HypervisorMachineType string - - // MemoryPath is the memory file path of VM memory. Used when either BootToBeTemplate or - // BootFromTemplate is true. - MemoryPath string - - // DevicesStatePath is the VM device state file path. Used when either BootToBeTemplate or - // BootFromTemplate is true. - DevicesStatePath string - - // EntropySource is the path to a host source of - // entropy (/dev/random, /dev/urandom or real hardware RNG device) - EntropySource string - - // Shared file system type: - // - virtio-9p - // - virtio-fs (default) - SharedFS string - - // Path for filesystem sharing - SharedPath string - - // VirtioFSDaemon is the virtio-fs vhost-user daemon path - VirtioFSDaemon string - - // VirtioFSCache cache mode for fs version cache or "none" - VirtioFSCache string - - // File based memory backend root directory - FileBackedMemRootDir string - - // VhostUserStorePath is the directory path where vhost-user devices - // related folders, sockets and device nodes should be. - VhostUserStorePath string - - // GuestCoredumpPath is the path in host for saving guest memory dump - GuestMemoryDumpPath string - - // GuestHookPath is the path within the VM that will be used for 'drop-in' hooks - GuestHookPath string - - // VMid is the id of the VM that create the hypervisor if the VM is created by the factory. - // VMid is "" if the hypervisor is not created by the factory. - VMid string - - // VMStorePath is the location on disk where VM information will persist - VMStorePath string - - // VMStorePath is the location on disk where runtime information will persist - RunStorePath string - - // SELinux label for the VM - SELinuxProcessLabel string - - // HypervisorPathList is the list of hypervisor paths names allowed in annotations - HypervisorPathList []string - - // HypervisorCtlPathList is the list of hypervisor control paths names allowed in annotations - HypervisorCtlPathList []string - - // JailerPathList is the list of jailer paths names allowed in annotations - JailerPathList []string - - // EntropySourceList is the list of valid entropy sources - EntropySourceList []string - - // VirtioFSDaemonList is the list of valid virtiofs names for annotations - VirtioFSDaemonList []string - - // VirtioFSExtraArgs passes options to virtiofsd daemon - VirtioFSExtraArgs []string - - // Enable annotations by name - EnableAnnotations []string - - // FileBackedMemRootList is the list of valid root directories values for annotations - FileBackedMemRootList []string - - // PFlash image paths - PFlash []string - - // VhostUserStorePathList is the list of valid values for vhost-user paths - VhostUserStorePathList []string - - // SeccompSandbox is the qemu function which enables the seccomp feature - SeccompSandbox string - - // KernelParams are additional guest kernel parameters. - KernelParams []Param - - // HypervisorParams are additional hypervisor parameters. - HypervisorParams []Param - - // SGXEPCSize specifies the size in bytes for the EPC Section. - // Enable SGX. Hardware-based isolation and memory encryption. - SGXEPCSize int64 - - // DiskRateLimiterBwRate is used to control disk I/O bandwidth on VM level. - // The same value, defined in bits per second, is used for inbound and outbound bandwidth. - DiskRateLimiterBwMaxRate int64 - - // DiskRateLimiterBwOneTimeBurst is used to control disk I/O bandwidth on VM level. - // This increases the initial max rate and this initial extra credit does *NOT* replenish - // and can be used for an *initial* burst of data. - DiskRateLimiterBwOneTimeBurst int64 - - // DiskRateLimiterOpsRate is used to control disk I/O operations on VM level. - // The same value, defined in operations per second, is used for inbound and outbound bandwidth. - DiskRateLimiterOpsMaxRate int64 - - // DiskRateLimiterOpsOneTimeBurst is used to control disk I/O operations on VM level. - // This increases the initial max rate and this initial extra credit does *NOT* replenish - // and can be used for an *initial* burst of data. + customAssets map[types.AssetType]*types.Asset + SeccompSandbox string + KernelPath string + ImagePath string + InitrdPath string + FirmwarePath string + FirmwareVolumePath string + MachineAccelerators string + CPUFeatures string + HypervisorPath string + HypervisorCtlPath string + GuestPreAttestationKeyset string + BlockDeviceDriver string + HypervisorMachineType string + GuestPreAttestationProxy string + DevicesStatePath string + EntropySource string + SharedFS string + SharedPath string + VirtioFSDaemon string + VirtioFSCache string + FileBackedMemRootDir string + VhostUserStorePath string + GuestMemoryDumpPath string + GuestHookPath string + VMid string + VMStorePath string + RunStorePath string + SELinuxProcessLabel string + JailerPath string + MemoryPath string + GuestPreAttestationSecretGuid string + GuestPreAttestationSecretType string + SEVCertChainPath string + JailerPathList []string + EntropySourceList []string + VirtioFSDaemonList []string + VirtioFSExtraArgs []string + EnableAnnotations []string + FileBackedMemRootList []string + PFlash []string + VhostUserStorePathList []string + HypervisorCtlPathList []string + KernelParams []Param + Groups []uint32 + HypervisorPathList []string + HypervisorParams []Param + DiskRateLimiterBwOneTimeBurst int64 + DiskRateLimiterOpsMaxRate int64 DiskRateLimiterOpsOneTimeBurst int64 - - // RxRateLimiterMaxRate is used to control network I/O inbound bandwidth on VM level. - RxRateLimiterMaxRate uint64 - - // TxRateLimiterMaxRate is used to control network I/O outbound bandwidth on VM level. - TxRateLimiterMaxRate uint64 - - // NetRateLimiterBwRate is used to control network I/O bandwidth on VM level. - // The same value, defined in bits per second, is used for inbound and outbound bandwidth. - NetRateLimiterBwMaxRate int64 - - // NetRateLimiterBwOneTimeBurst is used to control network I/O bandwidth on VM level. - // This increases the initial max rate and this initial extra credit does *NOT* replenish - // and can be used for an *initial* burst of data. - NetRateLimiterBwOneTimeBurst int64 - - // NetRateLimiterOpsRate is used to control network I/O operations on VM level. - // The same value, defined in operations per second, is used for inbound and outbound bandwidth. - NetRateLimiterOpsMaxRate int64 - - // NetRateLimiterOpsOneTimeBurst is used to control network I/O operations on VM level. - // This increases the initial max rate and this initial extra credit does *NOT* replenish - // and can be used for an *initial* burst of data. - NetRateLimiterOpsOneTimeBurst int64 - - // MemOffset specifies memory space for nvdimm device - MemOffset uint64 - - // PCIeRootPort is used to indicate the number of PCIe Root Port devices - // The PCIe Root Port device is used to hot-plug the PCIe device - PCIeRootPort uint32 - - // NumVCPUs specifies default number of vCPUs for the VM. - NumVCPUs uint32 - - //DefaultMaxVCPUs specifies the maximum number of vCPUs for the VM. - DefaultMaxVCPUs uint32 - - // DefaultMem specifies default memory size in MiB for the VM. - MemorySize uint32 - - // DefaultMaxMemorySize specifies the maximum amount of RAM in MiB for the VM. - DefaultMaxMemorySize uint64 - - // DefaultBridges specifies default number of bridges for the VM. - // Bridges can be used to hot plug devices - DefaultBridges uint32 - - // Msize9p is used as the msize for 9p shares - Msize9p uint32 - - // MemSlots specifies default memory slots the VM. - MemSlots uint32 - - // VirtioFSCacheSize is the DAX cache size in MiB - VirtioFSCacheSize uint32 - - // User ID. - Uid uint32 - - // Group ID. - Gid uint32 - - // BlockDeviceCacheSet specifies cache-related options will be set to block devices or not. - BlockDeviceCacheSet bool - - // BlockDeviceCacheDirect specifies cache-related options for block devices. - // Denotes whether use of O_DIRECT (bypass the host page cache) is enabled. - BlockDeviceCacheDirect bool - - // BlockDeviceCacheNoflush specifies cache-related options for block devices. - // Denotes whether flush requests for the device are ignored. - BlockDeviceCacheNoflush bool - - // DisableBlockDeviceUse disallows a block device from being used. - DisableBlockDeviceUse bool - - // EnableIOThreads enables IO to be processed in a separate thread. - // Supported currently for virtio-scsi driver. - EnableIOThreads bool - - // Debug changes the default hypervisor and kernel parameters to - // enable debug output where available. - Debug bool - - // MemPrealloc specifies if the memory should be pre-allocated - MemPrealloc bool - - // HugePages specifies if the memory should be pre-allocated from huge pages - HugePages bool - - // VirtioMem is used to enable/disable virtio-mem - VirtioMem bool - - // IOMMU specifies if the VM should have a vIOMMU - IOMMU bool - - // IOMMUPlatform is used to indicate if IOMMU_PLATFORM is enabled for supported devices - IOMMUPlatform bool - - // DisableNestingChecks is used to override customizations performed - // when running on top of another VMM. - DisableNestingChecks bool - - // DisableImageNvdimm is used to disable guest rootfs image nvdimm devices - DisableImageNvdimm bool - - // HotplugVFIOOnRootBus is used to indicate if devices need to be hotplugged on the - // root bus instead of a bridge. - HotplugVFIOOnRootBus bool - - // GuestMemoryDumpPaging is used to indicate if enable paging - // for QEMU dump-guest-memory command - GuestMemoryDumpPaging bool - - // Enable confidential guest support. - // Enable or disable different hardware features, ranging - // from memory encryption to both memory and CPU-state encryption and integrity. - ConfidentialGuest bool - - // BootToBeTemplate used to indicate if the VM is created to be a template VM - BootToBeTemplate bool - - // BootFromTemplate used to indicate if the VM should be created from a template VM - BootFromTemplate bool - - // DisableVhostNet is used to indicate if host supports vhost_net - DisableVhostNet bool - - // EnableVhostUserStore is used to indicate if host supports vhost-user-blk/scsi - EnableVhostUserStore bool - - // GuestSwap Used to enable/disable swap in the guest - GuestSwap bool - - // Rootless is used to enable rootless VMM process - Rootless bool - - // Disable seccomp from the hypervisor process - DisableSeccomp bool - - // Disable selinux from the hypervisor process - DisableSeLinux bool - - // Use legacy serial for the guest console - LegacySerial bool + SGXEPCSize int64 + DefaultMaxMemorySize uint64 + NetRateLimiterBwMaxRate int64 + NetRateLimiterBwOneTimeBurst int64 + NetRateLimiterOpsMaxRate int64 + NetRateLimiterOpsOneTimeBurst int64 + MemOffset uint64 + TxRateLimiterMaxRate uint64 + DiskRateLimiterBwMaxRate int64 + RxRateLimiterMaxRate uint64 + MemorySize uint32 + DefaultMaxVCPUs uint32 + DefaultBridges uint32 + Msize9p uint32 + MemSlots uint32 + VirtioFSCacheSize uint32 + Uid uint32 + Gid uint32 + SEVGuestPolicy uint32 + PCIeRootPort uint32 + NumVCPUs uint32 + IOMMUPlatform bool + EnableIOThreads bool + Debug bool + MemPrealloc bool + HugePages bool + VirtioMem bool + IOMMU bool + DisableBlockDeviceUse bool + DisableNestingChecks bool + DisableImageNvdimm bool + HotplugVFIOOnRootBus bool + GuestMemoryDumpPaging bool + ConfidentialGuest bool + GuestPreAttestation bool + BlockDeviceCacheNoflush bool + BlockDeviceCacheDirect bool + BlockDeviceCacheSet bool + BootToBeTemplate bool + BootFromTemplate bool + DisableVhostNet bool + EnableVhostUserStore bool + GuestSwap bool + Rootless bool + DisableSeccomp bool + DisableSeLinux bool + LegacySerial bool } // vcpu mapping from vcpu number to thread number @@ -901,6 +692,7 @@ func AvailableGuestProtections() (protections []string) { type Hypervisor interface { CreateVM(ctx context.Context, id string, network Network, hypervisorConfig *HypervisorConfig) error StartVM(ctx context.Context, timeout int) error + AttestVM(ctx context.Context) error // If wait is set, don't actively stop the sandbox: // just perform cleanup. diff --git a/src/runtime/virtcontainers/kata_agent.go b/src/runtime/virtcontainers/kata_agent.go index ce82f4cb0..0e5bccdea 100644 --- a/src/runtime/virtcontainers/kata_agent.go +++ b/src/runtime/virtcontainers/kata_agent.go @@ -280,23 +280,17 @@ type KataAgentState struct { // nolint: govet type kataAgent struct { - ctx context.Context - vmSocket interface{} - - client *kataclient.AgentClient - + ctx context.Context + vmSocket interface{} + client *kataclient.AgentClient + reqHandlers map[string]reqFunc + state KataAgentState + kmodules []string // lock protects the client pointer sync.Mutex - - state KataAgentState - - reqHandlers map[string]reqFunc - kmodules []string - dialTimout uint32 - - keepConn bool - dead bool + keepConn bool + dead bool } func (k *kataAgent) Logger() *logrus.Entry { diff --git a/src/runtime/virtcontainers/mock_hypervisor.go b/src/runtime/virtcontainers/mock_hypervisor.go index f4a0b934e..e66ca209a 100644 --- a/src/runtime/virtcontainers/mock_hypervisor.go +++ b/src/runtime/virtcontainers/mock_hypervisor.go @@ -54,6 +54,10 @@ func (m *mockHypervisor) PauseVM(ctx context.Context) error { return nil } +func (m *mockHypervisor) AttestVM(ctx context.Context) error { + return nil +} + func (m *mockHypervisor) ResumeVM(ctx context.Context) error { return nil } diff --git a/src/runtime/virtcontainers/monitor.go b/src/runtime/virtcontainers/monitor.go index ae7843bea..3b47ebc67 100644 --- a/src/runtime/virtcontainers/monitor.go +++ b/src/runtime/virtcontainers/monitor.go @@ -22,15 +22,12 @@ var monitorLog = virtLog.WithField("subsystem", "virtcontainers/monitor") // nolint: govet type monitor struct { - watchers []chan error - sandbox *Sandbox - - wg sync.WaitGroup - sync.Mutex - + sandbox *Sandbox stopCh chan bool + watchers []chan error + wg sync.WaitGroup checkInterval time.Duration - + sync.Mutex running bool } diff --git a/src/runtime/virtcontainers/mount.go b/src/runtime/virtcontainers/mount.go index 5e7582619..5c8a7d002 100644 --- a/src/runtime/virtcontainers/mount.go +++ b/src/runtime/virtcontainers/mount.go @@ -328,40 +328,32 @@ func bindMountContainerRootfs(ctx context.Context, shareDir, cid, cRootFs string // Mount describes a container mount. // nolint: govet type Mount struct { + // FSGroup a group ID that the group ownership of the files for the mounted volume + // will need to be changed when set. + FSGroup *int // Source is the source of the mount. Source string // Destination is the destination of the mount (within the container). Destination string - - // Type specifies the type of filesystem to mount. - Type string - // HostPath used to store host side bind mount path HostPath string - // GuestDeviceMount represents the path within the VM that the device // is mounted. Only relevant for block devices. This is tracked in the event // runtime wants to query the agent for mount stats. GuestDeviceMount string - // BlockDeviceID represents block device that is attached to the // VM in case this mount is a block device file or a directory // backed by a block device. BlockDeviceID string - - // Options list all the mount options of the filesystem. - Options []string - - // ReadOnly specifies if the mount should be read only or not - ReadOnly bool - - // FSGroup a group ID that the group ownership of the files for the mounted volume - // will need to be changed when set. - FSGroup *int - + // Type specifies the type of filesystem to mount. + Type string // FSGroupChangePolicy specifies the policy that will be used when applying // group id ownership change for a volume. FSGroupChangePolicy volume.FSGroupChangePolicy + // Options list all the mount options of the filesystem. + Options []string + // ReadOnly specifies if the mount should be read only or not + ReadOnly bool } func isSymlink(path string) bool { diff --git a/src/runtime/virtcontainers/qemu.go b/src/runtime/virtcontainers/qemu.go index 115ff5e7d..62d298be0 100644 --- a/src/runtime/virtcontainers/qemu.go +++ b/src/runtime/virtcontainers/qemu.go @@ -37,6 +37,7 @@ import ( "github.com/kata-containers/kata-containers/src/runtime/pkg/device/drivers" hv "github.com/kata-containers/kata-containers/src/runtime/pkg/hypervisors" "github.com/kata-containers/kata-containers/src/runtime/pkg/katautils/katatrace" + "github.com/kata-containers/kata-containers/src/runtime/pkg/sev" pkgUtils "github.com/kata-containers/kata-containers/src/runtime/pkg/utils" "github.com/kata-containers/kata-containers/src/runtime/pkg/uuid" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types" @@ -84,32 +85,21 @@ type QemuState struct { // qemu is an Hypervisor interface implementation for the Linux qemu hypervisor. // nolint: govet type qemu struct { - arch qemuArch - + arch qemuArch virtiofsDaemon VirtiofsDaemon - - ctx context.Context - + ctx context.Context + id string + qemuConfig govmmQemu.Config + qmpMonitorCh qmpChannel // fds is a list of file descriptors inherited by QEMU process // they'll be closed once QEMU process is running - fds []*os.File - - id string - - state QemuState - - qmpMonitorCh qmpChannel - - qemuConfig govmmQemu.Config - - config HypervisorConfig - + fds []*os.File + state QemuState + config HypervisorConfig + nvdimmCount int // if in memory dump progress memoryDumpFlag sync.Mutex - - nvdimmCount int - - stopped bool + stopped bool } const ( @@ -634,10 +624,39 @@ func (q *qemu) CreateVM(ctx context.Context, id string, network Network, hypervi PFlash: pflash, PidFile: filepath.Join(q.config.VMStorePath, q.id, "pid"), } + if q.arch.guestProtection() == sevProtection { + sevConfig := sev.GuestPreAttestationConfig{ + Proxy: q.config.GuestPreAttestationProxy, + Policy: q.config.SEVGuestPolicy, + CertChainPath: q.config.SEVCertChainPath, + } + sevConfig.LaunchId = "" - qemuConfig.Devices, qemuConfig.Bios, err = q.arch.appendProtectionDevice(qemuConfig.Devices, firmwarePath, firmwareVolumePath) - if err != nil { - return err + if q.config.GuestPreAttestation { + sevConfig.LaunchId, err = q.arch.setupSEVGuestPreAttestation(ctx, sevConfig) + if err != nil { + q.Logger().Warning("SEV attestation setup failed.") + return err + } + + // SEV(-ES) guest is started paused for launch secrets injection + qemuConfig.Knobs.Stopped = true + q.arch.setSEVPreAttestationId(sevConfig.LaunchId) + } else { + q.Logger().Infof("SEV guest attestation skipped") + } + + qemuConfig.Devices, qemuConfig.Bios, err = q.arch.appendSEVObject(qemuConfig.Devices, firmwarePath, firmwareVolumePath, sevConfig) + if err != nil { + q.Logger().Warning("Unable to append SEV object to qemu command") + return err + } + + } else { + qemuConfig.Devices, qemuConfig.Bios, err = q.arch.appendProtectionDevice(qemuConfig.Devices, firmwarePath, firmwareVolumePath) + if err != nil { + return err + } } if ioThread != nil { @@ -794,6 +813,56 @@ func (q *qemu) setupVirtioMem(ctx context.Context) error { return err } +func (q *qemu) AttestVM(ctx context.Context) error { + if q.arch.guestProtection() != sevProtection { + return nil + } + + if err := q.qmpSetup(); err != nil { + return err + } + kernelPath, err := q.config.KernelAssetPath() + if err != nil { + return err + } + + initrdPath, err := q.config.InitrdAssetPath() + if err != nil { + return err + } + + firmwarePath, err := q.config.FirmwareAssetPath() + if err != nil { + return err + } + + kernelParameters := q.kernelParameters() + launchId := q.arch.getSEVPreAttestationId() + + // Guest must be paused so that secrets can be injected. + // Guest will be continued by the Attestation function + sevConfig := sev.GuestPreAttestationConfig{ + Proxy: q.config.GuestPreAttestationProxy, + Policy: q.config.SEVGuestPolicy, + Keyset: q.config.GuestPreAttestationKeyset, + KeyBrokerSecretGuid: q.config.GuestPreAttestationSecretGuid, + KeyBrokerSecretType: q.config.GuestPreAttestationSecretType, + LaunchId: launchId, + KernelPath: kernelPath, + InitrdPath: initrdPath, + FwPath: firmwarePath, + KernelParameters: kernelParameters, + } + if err := q.arch.sevGuestPreAttestation( + q.qmpMonitorCh.ctx, + q.qmpMonitorCh.qmp, + sevConfig); err != nil { + return err + } + + return nil +} + // StartVM will start the Sandbox's VM. func (q *qemu) StartVM(ctx context.Context, timeout int) error { span, ctx := katatrace.Trace(ctx, q.Logger(), "StartVM", qemuTracingTags, map[string]string{"sandbox_id": q.id}) diff --git a/src/runtime/virtcontainers/qemu_amd64.go b/src/runtime/virtcontainers/qemu_amd64.go index 61c18c68e..c5c5cb63e 100644 --- a/src/runtime/virtcontainers/qemu_amd64.go +++ b/src/runtime/virtcontainers/qemu_amd64.go @@ -10,11 +10,19 @@ package virtcontainers import ( "context" + b64 "encoding/base64" "fmt" + "log" + "os" + "path/filepath" "time" + "github.com/kata-containers/kata-containers/src/runtime/pkg/sev" + pb "github.com/kata-containers/kata-containers/src/runtime/protocols/simple-kbs" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types" "github.com/sirupsen/logrus" + "google.golang.org/grpc" + "google.golang.org/grpc/credentials/insecure" "github.com/intel-go/cpuid" govmmQemu "github.com/kata-containers/kata-containers/src/runtime/pkg/govmm/qemu" @@ -39,6 +47,14 @@ const ( defaultQemuMachineOptions = "accel=kvm,kernel_irqchip=on" qmpMigrationWaitTimeout = 5 * time.Second + + sevAttestationGrpcTimeout = 10 * time.Second + + sevAttestationTempDir = "sev" + + sevAttestationGodhName = "godh.b64" + + sevAttestationSessionFileName = "session_file.b64" ) var qemuPaths = map[string]string{ @@ -270,7 +286,37 @@ func (q *qemuAmd64) appendProtectionDevice(devices []govmmQemu.Device, firmware, File: firmware, FirmwareVolume: firmwareVolume, }), "", nil - case sevProtection: + case noneProtection: + return devices, firmware, nil + + default: + return devices, "", fmt.Errorf("Unsupported guest protection technology: %v", q.protection) + } +} + +// Add the SEV Object qemu parameters for sev guest protection +func (q *qemuAmd64) appendSEVObject(devices []govmmQemu.Device, firmware, firmwareVolume string, config sev.GuestPreAttestationConfig) ([]govmmQemu.Device, string, error) { + attestationDataPath := filepath.Join(os.TempDir(), sevAttestationTempDir, config.LaunchId) + sevGodhPath := filepath.Join(attestationDataPath, sevAttestationGodhName) + sevSessionFilePath := filepath.Join(attestationDataPath, sevAttestationSessionFileName) + + // If attestation is enabled, add the certfile and session file + // and the kernel hashes flag. + if len(config.LaunchId) > 0 { + return append(devices, + govmmQemu.Object{ + Type: govmmQemu.SEVGuest, + ID: "sev", + Debug: false, + File: firmware, + CBitPos: cpuid.AMDMemEncrypt.CBitPosition, + ReducedPhysBits: cpuid.AMDMemEncrypt.PhysAddrReduction, + SevPolicy: config.Policy, + SevCertFilePath: sevGodhPath, + SevSessionFilePath: sevSessionFilePath, + SevKernelHashes: true, + }), "", nil + } else { return append(devices, govmmQemu.Object{ Type: govmmQemu.SEVGuest, @@ -279,11 +325,143 @@ func (q *qemuAmd64) appendProtectionDevice(devices []govmmQemu.Device, firmware, File: firmware, CBitPos: cpuid.AMDMemEncrypt.CBitPosition, ReducedPhysBits: cpuid.AMDMemEncrypt.PhysAddrReduction, + SevPolicy: config.Policy, }), "", nil - case noneProtection: - return devices, firmware, nil - - default: - return devices, "", fmt.Errorf("Unsupported guest protection technology: %v", q.protection) } } + +// setup prelaunch attestation for AMD SEV guests +func (q *qemuAmd64) setupSEVGuestPreAttestation(ctx context.Context, config sev.GuestPreAttestationConfig) (string, error) { + + logger := virtLog.WithField("subsystem", "SEV attestation") + logger.Info("Set up prelaunch attestation") + + certChainBin, err := os.ReadFile(config.CertChainPath) + if err != nil { + return "", fmt.Errorf("Attestation certificate chain file not found: %v", err) + } + + certChain := b64.StdEncoding.EncodeToString([]byte(certChainBin)) + + conn, err := grpc.Dial(config.Proxy, grpc.WithTransportCredentials(insecure.NewCredentials())) + if err != nil { + return "", fmt.Errorf("Could not connect to attestation proxy: %v", err) + } + + client := pb.NewKeyBrokerServiceClient(conn) + clientContext, cancel := context.WithTimeout(context.Background(), sevAttestationGrpcTimeout) + defer cancel() + + request := pb.BundleRequest{ + CertificateChain: string(certChain), + Policy: config.Policy, + } + bundleResponse, err := client.GetBundle(clientContext, &request) + if err != nil { + return "", fmt.Errorf("Error receiving launch bundle from attestation proxy: %v", err) + } + + attestationId := bundleResponse.LaunchId + if attestationId == "" { + return "", fmt.Errorf("Error receiving launch ID from attestation proxy: %v", err) + } + attestationDataPath := filepath.Join(os.TempDir(), sevAttestationTempDir, attestationId) + err = os.MkdirAll(attestationDataPath, os.ModePerm) + if err != nil { + return "", fmt.Errorf("Could not create attestation directory: %v", err) + } + + sevGodhPath := filepath.Join(attestationDataPath, sevAttestationGodhName) + sevSessionFilePath := filepath.Join(attestationDataPath, sevAttestationSessionFileName) + + err = os.WriteFile(sevGodhPath, []byte(bundleResponse.GuestOwnerPublicKey), 0777) + if err != nil { + return "", fmt.Errorf("Could not write godh file: %v", err) + } + err = os.WriteFile(sevSessionFilePath, []byte(bundleResponse.LaunchBlob), 0777) + if err != nil { + return "", fmt.Errorf("Could not write session file: %v", err) + } + + return attestationId, nil +} + +// wait for prelaunch attestation to complete +func (q *qemuAmd64) sevGuestPreAttestation(ctx context.Context, + qmp *govmmQemu.QMP, config sev.GuestPreAttestationConfig) error { + + logger := virtLog.WithField("subsystem", "SEV attestation") + logger.Info("Processing prelaunch attestation") + + // Pull the launch measurement from VM + launchMeasure, err := qmp.ExecuteQuerySEVLaunchMeasure(ctx) + if err != nil { + return fmt.Errorf("ExecuteQuerySEVLaunchMeasure error: %v", err) + } + + qemuSevInfo, err := qmp.ExecuteQuerySEV(ctx) + if err != nil { + return fmt.Errorf("ExecuteQuerySEV error: %v", err) + } + + // gRPC connection + conn, err := grpc.Dial(config.Proxy, grpc.WithTransportCredentials(insecure.NewCredentials())) + if err != nil { + log.Fatalf("did not connect: %v", err) + return fmt.Errorf("Could not connected to attestation proxy: %v", err) + } + + client := pb.NewKeyBrokerServiceClient(conn) + clientContext, cancel := context.WithTimeout(context.Background(), sevAttestationGrpcTimeout) + defer cancel() + + requestDetails := pb.RequestDetails{ + Guid: config.KeyBrokerSecretGuid, + Format: "JSON", + SecretType: config.KeyBrokerSecretType, + Id: config.Keyset, + } + + secrets := []*pb.RequestDetails{&requestDetails} + + launchDigest, err := sev.CalculateLaunchDigest(config.FwPath, config.KernelPath, config.InitrdPath, config.KernelParameters) + if err != nil { + return fmt.Errorf("Could not calculate SEV launch digest: %v", err) + } + launchDigestBase64 := b64.StdEncoding.EncodeToString(launchDigest[:]) + + request := pb.SecretRequest{ + LaunchMeasurement: launchMeasure.Measurement, + LaunchId: config.LaunchId, // stored from bundle request + Policy: config.Policy, // Stored from startup + ApiMajor: qemuSevInfo.APIMajor, // from qemu.SEVInfo + ApiMinor: qemuSevInfo.APIMinor, + BuildId: qemuSevInfo.BuildId, + FwDigest: launchDigestBase64, + LaunchDescription: "shim launch", + SecretRequests: secrets, + } + logger.Info("requesting secrets") + secretResponse, err := client.GetSecret(clientContext, &request) + if err != nil { + return fmt.Errorf("Unable to acquire launch secret from KBS: %v", err) + } + + secretHeader := secretResponse.LaunchSecretHeader + secret := secretResponse.LaunchSecretData + + // Inject secret into VM + if err := qmp.ExecuteSEVInjectLaunchSecret(ctx, secretHeader, secret); err != nil { + return err + } + + // Clean up attestation state + err = os.RemoveAll(filepath.Join(os.TempDir(), sevAttestationTempDir, config.LaunchId)) + if err != nil { + logger.Warning("Unable to clean up attestation directory") + } + + // Continue the VM + logger.Info("Launch secrets injected. Continuing the VM.") + return qmp.ExecuteCont(ctx) +} diff --git a/src/runtime/virtcontainers/qemu_amd64_test.go b/src/runtime/virtcontainers/qemu_amd64_test.go index e8cf9fcd5..df07e8cc4 100644 --- a/src/runtime/virtcontainers/qemu_amd64_test.go +++ b/src/runtime/virtcontainers/qemu_amd64_test.go @@ -14,7 +14,6 @@ import ( "os" "testing" - "github.com/intel-go/cpuid" govmmQemu "github.com/kata-containers/kata-containers/src/runtime/pkg/govmm/qemu" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types" "github.com/stretchr/testify/assert" @@ -273,26 +272,6 @@ func TestQemuAmd64AppendProtectionDevice(t *testing.T) { assert.Error(err) assert.Empty(bios) - // sev protection - amd64.(*qemuAmd64).protection = sevProtection - - devices, bios, err = amd64.appendProtectionDevice(devices, firmware, "") - assert.NoError(err) - assert.Empty(bios) - - expectedOut := []govmmQemu.Device{ - govmmQemu.Object{ - Type: govmmQemu.SEVGuest, - ID: "sev", - Debug: false, - File: firmware, - CBitPos: cpuid.AMDMemEncrypt.CBitPosition, - ReducedPhysBits: cpuid.AMDMemEncrypt.PhysAddrReduction, - }, - } - - assert.Equal(expectedOut, devices) - // tdxProtection amd64.(*qemuAmd64).protection = tdxProtection @@ -300,7 +279,7 @@ func TestQemuAmd64AppendProtectionDevice(t *testing.T) { assert.NoError(err) assert.Empty(bios) - expectedOut = append(expectedOut, + expectedOut := []govmmQemu.Device{ govmmQemu.Object{ Driver: govmmQemu.Loader, Type: govmmQemu.TDXGuest, @@ -309,7 +288,7 @@ func TestQemuAmd64AppendProtectionDevice(t *testing.T) { Debug: false, File: firmware, }, - ) + } assert.Equal(expectedOut, devices) } diff --git a/src/runtime/virtcontainers/qemu_arch_base.go b/src/runtime/virtcontainers/qemu_arch_base.go index 1d7d76bfa..a2121cd71 100644 --- a/src/runtime/virtcontainers/qemu_arch_base.go +++ b/src/runtime/virtcontainers/qemu_arch_base.go @@ -21,6 +21,7 @@ import ( "gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvpci" "github.com/kata-containers/kata-containers/src/runtime/pkg/device/config" + "github.com/kata-containers/kata-containers/src/runtime/pkg/sev" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/utils" ) @@ -44,6 +45,15 @@ type qemuArch interface { // machine returns the machine type machine() govmmQemu.Machine + // protection returns guest protection type + guestProtection() guestProtection + + // set attestation id for SEV guests with pre-attestation + setSEVPreAttestationId(attestationId string) + + // get attestation id for SEV guests with pre-attestation + getSEVPreAttestationId() string + // qemuPath returns the path to the QEMU binary qemuPath() string @@ -156,16 +166,27 @@ type qemuArch interface { // scans the PCIe space and returns the biggest BAR sizes for 32-bit // and 64-bit addressable memory getBARsMaxAddressableMemory() (uint64, uint64) + + // append SEV object type to the VM definition + appendSEVObject(devices []govmmQemu.Device, firmware, firmwareVolume string, config sev.GuestPreAttestationConfig) ([]govmmQemu.Device, string, error) + + // setup SEV guest prelaunch attestation + setupSEVGuestPreAttestation(ctx context.Context, config sev.GuestPreAttestationConfig) (string, error) + + // wait for prelaunch attestation to complete + sevGuestPreAttestation(ctx context.Context, + qmp *govmmQemu.QMP, config sev.GuestPreAttestationConfig) error } type qemuArchBase struct { - qemuExePath string qemuMachine govmmQemu.Machine - PFlash []string + sevAttestationId string + qemuExePath string kernelParamsNonDebug []Param kernelParamsDebug []Param kernelParams []Param Bridges []types.Bridge + PFlash []string memoryOffset uint64 networkIndex int // Exclude from lint checking for it is ultimately only used in architecture-specific code @@ -265,6 +286,18 @@ func (q *qemuArchBase) machine() govmmQemu.Machine { return q.qemuMachine } +func (q *qemuArchBase) guestProtection() guestProtection { + return q.protection +} + +func (q *qemuArchBase) setSEVPreAttestationId(attestation_id string) { + q.sevAttestationId = attestation_id +} + +func (q *qemuArchBase) getSEVPreAttestationId() string { + return q.sevAttestationId +} + func (q *qemuArchBase) qemuPath() string { return q.qemuExePath } @@ -863,3 +896,22 @@ func (q *qemuArchBase) appendProtectionDevice(devices []govmmQemu.Device, firmwa hvLogger.WithField("arch", runtime.GOARCH).Warnf("Confidential Computing has not been implemented for this architecture") return devices, firmware, nil } + +// AMD SEV methods +func (q *qemuArchBase) appendSEVObject(devices []govmmQemu.Device, firmware, firmwareVolume string, config sev.GuestPreAttestationConfig) ([]govmmQemu.Device, string, error) { + hvLogger.WithField("arch", runtime.GOARCH).Warnf("Confidential Computing has not been implemented for this architecture") + return devices, firmware, nil +} + +// Setup SEV guest attestation +func (q *qemuArchBase) setupSEVGuestPreAttestation(ctx context.Context, config sev.GuestPreAttestationConfig) (string, error) { + hvLogger.WithField("arch", runtime.GOARCH).Warnf("Confidential Computing has not been implemented for this architecture") + return "", nil +} + +// Wait for SEV prelaunch attestation to complete +func (q *qemuArchBase) sevGuestPreAttestation(ctx context.Context, + qmp *govmmQemu.QMP, config sev.GuestPreAttestationConfig) error { + hvLogger.WithField("arch", runtime.GOARCH).Warnf("Confidential Computing has not been implemented for this architecture") + return nil +} diff --git a/src/runtime/virtcontainers/sandbox.go b/src/runtime/virtcontainers/sandbox.go index 4a9b56380..76eb380ed 100644 --- a/src/runtime/virtcontainers/sandbox.go +++ b/src/runtime/virtcontainers/sandbox.go @@ -118,58 +118,39 @@ type SandboxResourceSizing struct { // SandboxConfig is a Sandbox configuration. type SandboxConfig struct { + // Annotations keys must be unique strings and must be name-spaced + // with e.g. reverse domain notation (org.clearlinux.key). + Annotations map[string]string + Hostname string + ID string + HypervisorType HypervisorType // Volumes is a list of shared volumes between the host and the Sandbox. Volumes []types.Volume - + // SandboxBindMounts - list of paths to mount into guest + SandboxBindMounts []string + // Experimental features enabled + Experimental []exp.Feature // Containers describe the list of containers within a Sandbox. // This list can be empty and populated by adding containers // to the Sandbox a posteriori. //TODO: this should be a map to avoid duplicated containers - Containers []ContainerConfig - - // SandboxBindMounts - list of paths to mount into guest - SandboxBindMounts []string - - // Experimental features enabled - Experimental []exp.Feature - - // Annotations keys must be unique strings and must be name-spaced - // with e.g. reverse domain notation (org.clearlinux.key). - Annotations map[string]string - - ID string - - Hostname string - - HypervisorType HypervisorType - - AgentConfig KataAgentConfig - - NetworkConfig NetworkConfig - + Containers []ContainerConfig + NetworkConfig NetworkConfig + AgentConfig KataAgentConfig HypervisorConfig HypervisorConfig - + ShmSize uint64 SandboxResources SandboxResourceSizing - + VfioMode config.VFIOModeType // StaticResourceMgmt indicates if the shim should rely on statically sizing the sandbox (VM) StaticResourceMgmt bool - // Offload the CRI image management service to the Kata agent. ServiceOffload bool - - ShmSize uint64 - - VfioMode config.VFIOModeType - // SharePidNs sets all containers to share the same sandbox level pid namespace. SharePidNs bool - // SystemdCgroup enables systemd cgroup support SystemdCgroup bool - // SandboxCgroupOnly enables cgroup only at podlevel in the host - SandboxCgroupOnly bool - + SandboxCgroupOnly bool DisableGuestSeccomp bool } @@ -1227,6 +1208,13 @@ func (s *Sandbox) startVM(ctx context.Context) (err error) { return err } + // not sure how we know that this callback has been executed + if s.config.HypervisorConfig.ConfidentialGuest && s.config.HypervisorConfig.GuestPreAttestation { + if err := s.hypervisor.AttestVM(ctx); err != nil { + return err + } + } + // In case of vm factory, network interfaces are hotplugged // after vm is started. if s.factory != nil {