Files
kata-containers/virtcontainers/fc.go
Peng Tao c3629d37c4 Merge pull request #2239 from WeiZhang555/persist-storage
Land experimental "newstore" as formal feature
2020-01-14 13:12:05 +08:00

1215 lines
32 KiB
Go

// Copyright (c) 2018 Intel Corporation
//
// SPDX-License-Identifier: Apache-2.0
//
package virtcontainers
import (
"bufio"
"context"
"encoding/json"
"fmt"
"io"
"io/ioutil"
"net"
"net/http"
"os"
"os/exec"
"path/filepath"
"strconv"
"strings"
"sync"
"syscall"
"time"
"github.com/containerd/fifo"
httptransport "github.com/go-openapi/runtime/client"
"github.com/go-openapi/strfmt"
kataclient "github.com/kata-containers/agent/protocols/client"
persistapi "github.com/kata-containers/runtime/virtcontainers/persist/api"
"github.com/kata-containers/runtime/virtcontainers/pkg/firecracker/client"
models "github.com/kata-containers/runtime/virtcontainers/pkg/firecracker/client/models"
ops "github.com/kata-containers/runtime/virtcontainers/pkg/firecracker/client/operations"
opentracing "github.com/opentracing/opentracing-go"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"github.com/blang/semver"
"github.com/containerd/console"
"github.com/kata-containers/runtime/virtcontainers/device/config"
"github.com/kata-containers/runtime/virtcontainers/types"
"github.com/kata-containers/runtime/virtcontainers/utils"
)
type vmmState uint8
const (
notReady vmmState = iota
cfReady
vmReady
)
const (
//fcTimeout is the maximum amount of time in seconds to wait for the VMM to respond
fcTimeout = 10
fcSocket = "api.socket"
//Name of the files within jailer root
//Having predefined names helps with cleanup
fcKernel = "vmlinux"
fcRootfs = "rootfs"
fcStopSandboxTimeout = 15
// This indicates the number of block devices that can be attached to the
// firecracker guest VM.
// We attach a pool of placeholder drives before the guest has started, and then
// patch the replace placeholder drives with drives with actual contents.
fcDiskPoolSize = 8
defaultHybridVSocketName = "kata.hvsock"
// This is the first usable vsock context ID. All the vsocks can use the same
// ID, since it's only used in the guest.
defaultGuestVSockCID = int64(0x3)
// This is related to firecracker logging scheme
fcLogFifo = "logs.fifo"
fcMetricsFifo = "metrics.fifo"
defaultFcConfig = "fcConfig.json"
// storagePathSuffix mirrors persist/fs/fs.go:storagePathSuffix
storagePathSuffix = "vc"
)
// Specify the minimum version of firecracker supported
var fcMinSupportedVersion = semver.MustParse("0.19.0")
var fcKernelParams = append(commonVirtioblkKernelRootParams, []Param{
// The boot source is the first partition of the first block device added
{"pci", "off"},
{"reboot", "k"},
{"panic", "1"},
{"iommu", "off"},
{"net.ifnames", "0"},
{"random.trust_cpu", "on"},
// Firecracker doesn't support ACPI
// Fix kernel error "ACPI BIOS Error (bug)"
{"acpi", "off"},
}...)
func (s vmmState) String() string {
switch s {
case notReady:
return "FC not ready"
case cfReady:
return "FC configure ready"
case vmReady:
return "FC VM ready"
}
return ""
}
// FirecrackerInfo contains information related to the hypervisor that we
// want to store on disk
type FirecrackerInfo struct {
PID int
Version string
}
type firecrackerState struct {
sync.RWMutex
state vmmState
}
func (s *firecrackerState) set(state vmmState) {
s.Lock()
defer s.Unlock()
s.state = state
}
// firecracker is an Hypervisor interface implementation for the firecracker VMM.
type firecracker struct {
id string //Unique ID per pod. Normally maps to the sandbox id
vmPath string //All jailed VM assets need to be under this
chrootBaseDir string //chroot base for the jailer
jailerRoot string
socketPath string
netNSPath string
uid string //UID and GID to be used for the VMM
gid string
info FirecrackerInfo
firecrackerd *exec.Cmd //Tracks the firecracker process itself
connection *client.Firecracker //Tracks the current active connection
ctx context.Context
config HypervisorConfig
pendingDevices []firecrackerDevice // Devices to be added before the FC VM ready
state firecrackerState
jailed bool //Set to true if jailer is enabled
stateful bool //Set to true if running with shimv2
fcConfigPath string
fcConfig *types.FcConfig // Parameters configured before VM starts
}
type firecrackerDevice struct {
dev interface{}
devType deviceType
}
// Logger returns a logrus logger appropriate for logging firecracker messages
func (fc *firecracker) Logger() *logrus.Entry {
return virtLog.WithField("subsystem", "firecracker")
}
func (fc *firecracker) trace(name string) (opentracing.Span, context.Context) {
if fc.ctx == nil {
fc.Logger().WithField("type", "bug").Error("trace called before context set")
fc.ctx = context.Background()
}
span, ctx := opentracing.StartSpanFromContext(fc.ctx, name)
span.SetTag("subsystem", "hypervisor")
span.SetTag("type", "firecracker")
return span, ctx
}
// bindMount bind mounts a source in to a destination. This will
// do some bookkeeping:
// * evaluate all symlinks
// * ensure the source exists
// * recursively create the destination
func (fc *firecracker) bindMount(ctx context.Context, source, destination string, readonly bool) error {
span, _ := trace(ctx, "bindMount")
defer span.Finish()
if source == "" {
return fmt.Errorf("source must be specified")
}
if destination == "" {
return fmt.Errorf("destination must be specified")
}
absSource, err := filepath.EvalSymlinks(source)
if err != nil {
return fmt.Errorf("Could not resolve symlink for source %v", source)
}
if err := ensureDestinationExists(absSource, destination); err != nil {
return fmt.Errorf("Could not create destination mount point %v: %v", destination, err)
}
fc.Logger().WithFields(logrus.Fields{"src": absSource, "dst": destination}).Debug("Bind mounting resource")
if err := syscall.Mount(absSource, destination, "bind", syscall.MS_BIND|syscall.MS_SLAVE, ""); err != nil {
return fmt.Errorf("Could not bind mount %v to %v: %v", absSource, destination, err)
}
// For readonly bind mounts, we need to remount with the readonly flag.
// This is needed as only very recent versions of libmount/util-linux support "bind,ro"
if readonly {
return syscall.Mount(absSource, destination, "bind", uintptr(syscall.MS_BIND|syscall.MS_SLAVE|syscall.MS_REMOUNT|syscall.MS_RDONLY), "")
}
return nil
}
// For firecracker this call only sets the internal structure up.
// The sandbox will be created and started through startSandbox().
func (fc *firecracker) createSandbox(ctx context.Context, id string, networkNS NetworkNamespace, hypervisorConfig *HypervisorConfig, stateful bool) error {
fc.ctx = ctx
span, _ := fc.trace("createSandbox")
defer span.Finish()
//TODO: check validity of the hypervisor config provided
//https://github.com/kata-containers/runtime/issues/1065
fc.id = id
fc.state.set(notReady)
fc.config = *hypervisorConfig
fc.stateful = stateful
// When running with jailer all resources need to be under
// a specific location and that location needs to have
// exec permission (i.e. should not be mounted noexec, e.g. /run, /var/run)
// Also unix domain socket names have a hard limit
// #define UNIX_PATH_MAX 108
// Keep it short and live within the jailer expected paths
// <chroot_base>/<exec_file_name>/<id>/
// Also jailer based on the id implicitly sets up cgroups under
// <cgroups_base>/<exec_file_name>/<id>/
hypervisorName := filepath.Base(hypervisorConfig.HypervisorPath)
//fs.RunStoragePath cannot be used as we need exec perms
fc.chrootBaseDir = filepath.Join("/run", storagePathSuffix)
fc.vmPath = filepath.Join(fc.chrootBaseDir, hypervisorName, fc.id)
fc.jailerRoot = filepath.Join(fc.vmPath, "root") // auto created by jailer
fc.socketPath = filepath.Join(fc.jailerRoot, fcSocket)
// So we need to repopulate this at startSandbox where it is valid
fc.netNSPath = networkNS.NetNsPath
// Till we create lower privileged kata user run as root
// https://github.com/kata-containers/runtime/issues/1869
fc.uid = "0"
fc.gid = "0"
fc.fcConfig = &types.FcConfig{}
fc.fcConfigPath = filepath.Join(fc.vmPath, defaultFcConfig)
return nil
}
func (fc *firecracker) newFireClient() *client.Firecracker {
span, _ := fc.trace("newFireClient")
defer span.Finish()
httpClient := client.NewHTTPClient(strfmt.NewFormats())
socketTransport := &http.Transport{
DialContext: func(ctx context.Context, network, path string) (net.Conn, error) {
addr, err := net.ResolveUnixAddr("unix", fc.socketPath)
if err != nil {
return nil, err
}
return net.DialUnix("unix", nil, addr)
},
}
transport := httptransport.New(client.DefaultHost, client.DefaultBasePath, client.DefaultSchemes)
transport.SetLogger(fc.Logger())
transport.SetDebug(fc.Logger().Logger.Level == logrus.DebugLevel)
transport.Transport = socketTransport
httpClient.SetTransport(transport)
return httpClient
}
func (fc *firecracker) vmRunning() bool {
resp, err := fc.client().Operations.DescribeInstance(nil)
if err != nil {
fc.Logger().WithError(err).Error("getting vm status failed")
return false
}
// Be explicit
switch *resp.Payload.State {
case models.InstanceInfoStateStarting:
// Unsure what we should do here
fc.Logger().WithField("unexpected-state", models.InstanceInfoStateStarting).Debug("vmRunning")
return false
case models.InstanceInfoStateRunning:
return true
case models.InstanceInfoStateUninitialized:
return false
default:
return false
}
}
func (fc *firecracker) getVersionNumber() (string, error) {
args := []string{"--version"}
checkCMD := exec.Command(fc.config.HypervisorPath, args...)
data, err := checkCMD.Output()
if err != nil {
return "", fmt.Errorf("Running checking FC version command failed: %v", err)
}
var version string
fields := strings.Split(string(data), " ")
if len(fields) > 1 {
version = strings.TrimSpace(fields[1])
return version, nil
}
return "", errors.New("getting FC version failed, the output is malformed")
}
func (fc *firecracker) checkVersion(version string) error {
v, err := semver.Make(version)
if err != nil {
return fmt.Errorf("Malformed firecracker version: %v", err)
}
if v.LT(fcMinSupportedVersion) {
return fmt.Errorf("version %v is not supported. Minimum supported version of firecracker is %v", v.String(), fcMinSupportedVersion.String())
}
return nil
}
// waitVMMRunning will wait for timeout seconds for the VMM to be up and running.
func (fc *firecracker) waitVMMRunning(timeout int) error {
span, _ := fc.trace("wait VMM to be running")
defer span.Finish()
if timeout < 0 {
return fmt.Errorf("Invalid timeout %ds", timeout)
}
timeStart := time.Now()
for {
if fc.vmRunning() {
return nil
}
if int(time.Since(timeStart).Seconds()) > timeout {
return fmt.Errorf("Failed to connect to firecrackerinstance (timeout %ds)", timeout)
}
time.Sleep(time.Duration(10) * time.Millisecond)
}
}
func (fc *firecracker) fcInit(timeout int) error {
span, _ := fc.trace("fcInit")
defer span.Finish()
// Fetch sandbox network to be able to access it from the sandbox structure.
err := os.MkdirAll(fc.jailerRoot, DirMode)
if err != nil {
return err
}
defer func() {
if err != nil {
if err := os.RemoveAll(fc.vmPath); err != nil {
fc.Logger().WithError(err).Error("Fail to clean up vm directory")
}
}
}()
//FC version set and check
if fc.info.Version, err = fc.getVersionNumber(); err != nil {
return err
}
if err := fc.checkVersion(fc.info.Version); err != nil {
return err
}
var cmd *exec.Cmd
var args []string
if fc.fcConfigPath, err = fc.fcJailResource(fc.fcConfigPath, defaultFcConfig); err != nil {
return err
}
if !fc.config.Debug && fc.stateful {
args = append(args, "--daemonize")
}
//https://github.com/firecracker-microvm/firecracker/blob/master/docs/jailer.md#jailer-usage
//--seccomp-level specifies whether seccomp filters should be installed and how restrictive they should be. Possible values are:
//0 : disabled.
//1 : basic filtering. This prohibits syscalls not whitelisted by Firecracker.
//2 (default): advanced filtering. This adds further checks on some of the parameters of the allowed syscalls.
if fc.jailed {
jailedArgs := []string{
"--id", fc.id,
"--node", "0", //FIXME: Comprehend NUMA topology or explicit ignore
"--seccomp-level", "2",
"--exec-file", fc.config.HypervisorPath,
"--uid", "0", //https://github.com/kata-containers/runtime/issues/1869
"--gid", "0",
"--chroot-base-dir", fc.chrootBaseDir,
}
args = append(args, jailedArgs...)
if fc.netNSPath != "" {
args = append(args, "--netns", fc.netNSPath)
}
args = append(args, "--", "--config-file", fc.fcConfigPath)
cmd = exec.Command(fc.config.JailerPath, args...)
} else {
args = append(args,
"--api-sock", fc.socketPath,
"--config-file", fc.fcConfigPath)
cmd = exec.Command(fc.config.HypervisorPath, args...)
}
if fc.config.Debug && fc.stateful {
stdin, err := fc.watchConsole()
if err != nil {
return err
}
cmd.Stderr = stdin
cmd.Stdout = stdin
}
fc.Logger().WithField("hypervisor args", args).Debug()
fc.Logger().WithField("hypervisor cmd", cmd).Debug()
fc.Logger().Info("Starting VM")
if err := cmd.Start(); err != nil {
fc.Logger().WithField("Error starting firecracker", err).Debug()
return err
}
fc.info.PID = cmd.Process.Pid
fc.firecrackerd = cmd
fc.connection = fc.newFireClient()
if err := fc.waitVMMRunning(timeout); err != nil {
fc.Logger().WithField("fcInit failed:", err).Debug()
return err
}
return nil
}
func (fc *firecracker) fcEnd() (err error) {
span, _ := fc.trace("fcEnd")
defer span.Finish()
fc.Logger().Info("Stopping firecracker VM")
defer func() {
if err != nil {
fc.Logger().Info("fcEnd failed")
} else {
fc.Logger().Info("Firecracker VM stopped")
}
}()
pid := fc.info.PID
// Send a SIGTERM to the VM process to try to stop it properly
if err = syscall.Kill(pid, syscall.SIGTERM); err != nil {
if err == syscall.ESRCH {
return nil
}
return err
}
// Wait for the VM process to terminate
tInit := time.Now()
for {
if err = syscall.Kill(pid, syscall.Signal(0)); err != nil {
return nil
}
if time.Since(tInit).Seconds() >= fcStopSandboxTimeout {
fc.Logger().Warnf("VM still running after waiting %ds", fcStopSandboxTimeout)
break
}
// Let's avoid to run a too busy loop
time.Sleep(time.Duration(50) * time.Millisecond)
}
// Let's try with a hammer now, a SIGKILL should get rid of the
// VM process.
return syscall.Kill(pid, syscall.SIGKILL)
}
func (fc *firecracker) client() *client.Firecracker {
span, _ := fc.trace("client")
defer span.Finish()
if fc.connection == nil {
fc.connection = fc.newFireClient()
}
return fc.connection
}
func (fc *firecracker) createJailedDrive(name string) (string, error) {
// Don't bind mount the resource, just create a raw file
// that can be bind-mounted later
r := filepath.Join(fc.jailerRoot, name)
f, err := os.Create(r)
if err != nil {
return "", err
}
f.Close()
if fc.jailed {
// use path relative to the jail
r = filepath.Join("/", name)
}
return r, nil
}
func (fc *firecracker) fcJailResource(src, dst string) (string, error) {
if src == "" || dst == "" {
return "", fmt.Errorf("fcJailResource: invalid jail locations: src:%v, dst:%v",
src, dst)
}
jailedLocation := filepath.Join(fc.jailerRoot, dst)
if err := fc.bindMount(context.Background(), src, jailedLocation, false); err != nil {
fc.Logger().WithField("bindMount failed", err).Error()
return "", err
}
if !fc.jailed {
return jailedLocation, nil
}
// This is the path within the jailed root
absPath := filepath.Join("/", dst)
return absPath, nil
}
func (fc *firecracker) fcSetBootSource(path, params string) error {
span, _ := fc.trace("fcSetBootSource")
defer span.Finish()
fc.Logger().WithFields(logrus.Fields{"kernel-path": path,
"kernel-params": params}).Debug("fcSetBootSource")
kernelPath, err := fc.fcJailResource(path, fcKernel)
if err != nil {
return err
}
src := &models.BootSource{
KernelImagePath: &kernelPath,
BootArgs: params,
}
fc.fcConfig.BootSource = src
return nil
}
func (fc *firecracker) fcSetVMRootfs(path string) error {
span, _ := fc.trace("fcSetVMRootfs")
defer span.Finish()
jailedRootfs, err := fc.fcJailResource(path, fcRootfs)
if err != nil {
return err
}
driveID := "rootfs"
isReadOnly := true
//Add it as a regular block device
//This allows us to use a partitoned root block device
isRootDevice := false
// This is the path within the jailed root
drive := &models.Drive{
DriveID: &driveID,
IsReadOnly: &isReadOnly,
IsRootDevice: &isRootDevice,
PathOnHost: &jailedRootfs,
}
fc.fcConfig.Drives = append(fc.fcConfig.Drives, drive)
return nil
}
func (fc *firecracker) fcSetVMBaseConfig(mem int64, vcpus int64, htEnabled bool) {
span, _ := fc.trace("fcSetVMBaseConfig")
defer span.Finish()
fc.Logger().WithFields(logrus.Fields{"mem": mem,
"vcpus": vcpus,
"htEnabled": htEnabled}).Debug("fcSetVMBaseConfig")
cfg := &models.MachineConfiguration{
HtEnabled: &htEnabled,
MemSizeMib: &mem,
VcpuCount: &vcpus,
}
fc.fcConfig.MachineConfig = cfg
}
func (fc *firecracker) fcSetLogger() error {
span, _ := fc.trace("fcSetLogger")
defer span.Finish()
fcLogLevel := "Error"
if fc.config.Debug {
fcLogLevel = "Debug"
}
// listen to log fifo file and transfer error info
jailedLogFifo, err := fc.fcListenToFifo(fcLogFifo)
if err != nil {
return fmt.Errorf("Failed setting log: %s", err)
}
// listen to metrics file and transfer error info
jailedMetricsFifo, err := fc.fcListenToFifo(fcMetricsFifo)
if err != nil {
return fmt.Errorf("Failed setting log: %s", err)
}
fc.fcConfig.Logger = &models.Logger{
Level: &fcLogLevel,
LogFifo: &jailedLogFifo,
MetricsFifo: &jailedMetricsFifo,
Options: []string{},
}
return err
}
func (fc *firecracker) fcListenToFifo(fifoName string) (string, error) {
fcFifoPath := filepath.Join(fc.vmPath, fifoName)
fcFifo, err := fifo.OpenFifo(context.Background(), fcFifoPath, syscall.O_CREAT|syscall.O_RDONLY|syscall.O_NONBLOCK, 0)
if err != nil {
return "", fmt.Errorf("Failed to open/create fifo file %s", err)
}
jailedFifoPath, err := fc.fcJailResource(fcFifoPath, fifoName)
if err != nil {
return "", err
}
go func() {
scanner := bufio.NewScanner(fcFifo)
for scanner.Scan() {
fc.Logger().WithFields(logrus.Fields{
"fifoName": fifoName,
"contents": scanner.Text()}).Error("firecracker failed")
}
if err := scanner.Err(); err != nil {
fc.Logger().WithError(err).Errorf("Failed reading firecracker fifo file")
}
if err := fcFifo.Close(); err != nil {
fc.Logger().WithError(err).Errorf("Failed closing firecracker fifo file")
}
}()
return jailedFifoPath, nil
}
func (fc *firecracker) fcInitConfiguration() error {
if fc.config.JailerPath != "" {
fc.jailed = true
}
fc.fcSetVMBaseConfig(int64(fc.config.MemorySize),
int64(fc.config.NumVCPUs), false)
kernelPath, err := fc.config.KernelAssetPath()
if err != nil {
return err
}
if fc.config.Debug && fc.stateful {
fcKernelParams = append(fcKernelParams, Param{"console", "ttyS0"})
} else {
fcKernelParams = append(fcKernelParams, []Param{
{"8250.nr_uarts", "0"},
// Tell agent where to send the logs
{"agent.log_vport", fmt.Sprintf("%d", vSockLogsPort)},
}...)
}
kernelParams := append(fc.config.KernelParams, fcKernelParams...)
strParams := SerializeParams(kernelParams, "=")
formattedParams := strings.Join(strParams, " ")
if err := fc.fcSetBootSource(kernelPath, formattedParams); err != nil {
return err
}
image, err := fc.config.InitrdAssetPath()
if err != nil {
return err
}
if image == "" {
image, err = fc.config.ImageAssetPath()
if err != nil {
return err
}
}
if err := fc.fcSetVMRootfs(image); err != nil {
return err
}
if err := fc.createDiskPool(); err != nil {
return err
}
if err := fc.fcSetLogger(); err != nil {
return err
}
fc.state.set(cfReady)
for _, d := range fc.pendingDevices {
if err := fc.addDevice(d.dev, d.devType); err != nil {
return err
}
}
return nil
}
// startSandbox will start the hypervisor for the given sandbox.
// In the context of firecracker, this will start the hypervisor,
// for configuration, but not yet start the actual virtual machine
func (fc *firecracker) startSandbox(timeout int) error {
span, _ := fc.trace("startSandbox")
defer span.Finish()
if err := fc.fcInitConfiguration(); err != nil {
return err
}
data, errJSON := json.MarshalIndent(fc.fcConfig, "", "\t")
if errJSON != nil {
return errJSON
}
if err := ioutil.WriteFile(fc.fcConfigPath, data, 0640); err != nil {
return err
}
var err error
defer func() {
if err != nil {
fc.fcEnd()
}
}()
err = fc.fcInit(fcTimeout)
if err != nil {
return err
}
// make sure 'others' don't have access to this socket
err = os.Chmod(filepath.Join(fc.jailerRoot, defaultHybridVSocketName), 0640)
if err != nil {
return fmt.Errorf("Could not change socket permissions: %v", err)
}
fc.state.set(vmReady)
return nil
}
func fcDriveIndexToID(i int) string {
return "drive_" + strconv.Itoa(i)
}
func (fc *firecracker) createDiskPool() error {
span, _ := fc.trace("createDiskPool")
defer span.Finish()
for i := 0; i < fcDiskPoolSize; i++ {
driveID := fcDriveIndexToID(i)
isReadOnly := false
isRootDevice := false
// Create a temporary file as a placeholder backend for the drive
jailedDrive, err := fc.createJailedDrive(driveID)
if err != nil {
return err
}
drive := &models.Drive{
DriveID: &driveID,
IsReadOnly: &isReadOnly,
IsRootDevice: &isRootDevice,
PathOnHost: &jailedDrive,
}
fc.fcConfig.Drives = append(fc.fcConfig.Drives, drive)
}
return nil
}
func (fc *firecracker) umountResource(jailedPath string) {
hostPath := filepath.Join(fc.jailerRoot, jailedPath)
fc.Logger().WithField("resource", hostPath).Debug("Unmounting resource")
err := syscall.Unmount(hostPath, syscall.MNT_DETACH)
if err != nil {
fc.Logger().WithError(err).Error("Failed to umount resource")
}
}
// cleanup all jail artifacts
func (fc *firecracker) cleanupJail() {
span, _ := fc.trace("cleanupJail")
defer span.Finish()
fc.umountResource(fcKernel)
fc.umountResource(fcRootfs)
fc.umountResource(fcLogFifo)
fc.umountResource(fcMetricsFifo)
fc.umountResource(defaultFcConfig)
fc.Logger().WithField("cleaningJail", fc.vmPath).Info()
if err := os.RemoveAll(fc.vmPath); err != nil {
fc.Logger().WithField("cleanupJail failed", err).Error()
}
}
// stopSandbox will stop the Sandbox's VM.
func (fc *firecracker) stopSandbox() (err error) {
span, _ := fc.trace("stopSandbox")
defer span.Finish()
return fc.fcEnd()
}
func (fc *firecracker) pauseSandbox() error {
return nil
}
func (fc *firecracker) saveSandbox() error {
return nil
}
func (fc *firecracker) resumeSandbox() error {
return nil
}
func (fc *firecracker) fcAddVsock(hvs types.HybridVSock) {
span, _ := fc.trace("fcAddVsock")
defer span.Finish()
udsPath := hvs.UdsPath
if fc.jailed {
udsPath = filepath.Join("/", defaultHybridVSocketName)
}
vsockID := "root"
ctxID := defaultGuestVSockCID
vsock := &models.Vsock{
GuestCid: &ctxID,
UdsPath: &udsPath,
VsockID: &vsockID,
}
fc.fcConfig.Vsock = vsock
}
func (fc *firecracker) fcAddNetDevice(endpoint Endpoint) {
span, _ := fc.trace("fcAddNetDevice")
defer span.Finish()
ifaceID := endpoint.Name()
ifaceCfg := &models.NetworkInterface{
AllowMmdsRequests: false,
GuestMac: endpoint.HardwareAddr(),
IfaceID: &ifaceID,
HostDevName: &endpoint.NetworkPair().TapInterface.TAPIface.Name,
}
fc.fcConfig.NetworkInterfaces = append(fc.fcConfig.NetworkInterfaces, ifaceCfg)
}
func (fc *firecracker) fcAddBlockDrive(drive config.BlockDrive) error {
span, _ := fc.trace("fcAddBlockDrive")
defer span.Finish()
driveID := drive.ID
isReadOnly := false
isRootDevice := false
jailedDrive, err := fc.fcJailResource(drive.File, driveID)
if err != nil {
fc.Logger().WithField("fcAddBlockDrive failed", err).Error()
return err
}
driveFc := &models.Drive{
DriveID: &driveID,
IsReadOnly: &isReadOnly,
IsRootDevice: &isRootDevice,
PathOnHost: &jailedDrive,
}
fc.fcConfig.Drives = append(fc.fcConfig.Drives, driveFc)
return nil
}
// Firecracker supports replacing the host drive used once the VM has booted up
func (fc *firecracker) fcUpdateBlockDrive(path, id string) error {
span, _ := fc.trace("fcUpdateBlockDrive")
defer span.Finish()
// Use the global block index as an index into the pool of the devices
// created for firecracker.
driveParams := ops.NewPatchGuestDriveByIDParams()
driveParams.SetDriveID(id)
driveFc := &models.PartialDrive{
DriveID: &id,
PathOnHost: &path, //This is the only property that can be modified
}
driveParams.SetBody(driveFc)
if _, err := fc.client().Operations.PatchGuestDriveByID(driveParams); err != nil {
return err
}
// Rescan needs to used only if the VM is running
if fc.vmRunning() {
actionParams := ops.NewCreateSyncActionParams()
actionType := "BlockDeviceRescan"
actionInfo := &models.InstanceActionInfo{
ActionType: &actionType,
Payload: id,
}
actionParams.SetInfo(actionInfo)
if _, err := fc.client().Operations.CreateSyncAction(actionParams); err != nil {
return err
}
}
return nil
}
// addDevice will add extra devices to firecracker. Limited to configure before the
// virtual machine starts. Devices include drivers and network interfaces only.
func (fc *firecracker) addDevice(devInfo interface{}, devType deviceType) error {
span, _ := fc.trace("addDevice")
defer span.Finish()
fc.state.RLock()
defer fc.state.RUnlock()
if fc.state.state == notReady {
dev := firecrackerDevice{
dev: devInfo,
devType: devType,
}
fc.Logger().Info("FC not ready, queueing device")
fc.pendingDevices = append(fc.pendingDevices, dev)
return nil
}
var err error
switch v := devInfo.(type) {
case Endpoint:
fc.Logger().WithField("device-type-endpoint", devInfo).Info("Adding device")
fc.fcAddNetDevice(v)
case config.BlockDrive:
fc.Logger().WithField("device-type-blockdrive", devInfo).Info("Adding device")
err = fc.fcAddBlockDrive(v)
case types.HybridVSock:
fc.Logger().WithField("device-type-hybrid-vsock", devInfo).Info("Adding device")
fc.fcAddVsock(v)
default:
fc.Logger().WithField("unknown-device-type", devInfo).Error("Adding device")
}
return err
}
// hotplugBlockDevice supported in Firecracker VMM
// hot add or remove a block device.
func (fc *firecracker) hotplugBlockDevice(drive config.BlockDrive, op operation) (interface{}, error) {
var path string
var err error
driveID := fcDriveIndexToID(drive.Index)
if op == addDevice {
//The drive placeholder has to exist prior to Update
path, err = fc.fcJailResource(drive.File, driveID)
if err != nil {
fc.Logger().WithError(err).WithField("resource", drive.File).Error("Could not jail resource")
return nil, err
}
} else {
// umount the disk, it's no longer needed.
fc.umountResource(driveID)
// use previous raw file created at createDiskPool, that way
// the resource is released by firecracker and it can be destroyed in the host
path = filepath.Join(fc.jailerRoot, driveID)
}
return nil, fc.fcUpdateBlockDrive(path, driveID)
}
// hotplugAddDevice supported in Firecracker VMM
func (fc *firecracker) hotplugAddDevice(devInfo interface{}, devType deviceType) (interface{}, error) {
span, _ := fc.trace("hotplugAddDevice")
defer span.Finish()
switch devType {
case blockDev:
return fc.hotplugBlockDevice(*devInfo.(*config.BlockDrive), addDevice)
default:
fc.Logger().WithFields(logrus.Fields{"devInfo": devInfo,
"deviceType": devType}).Warn("hotplugAddDevice: unsupported device")
return nil, fmt.Errorf("Could not hot add device: unsupported device: %v, type: %v",
devInfo, devType)
}
}
// hotplugRemoveDevice supported in Firecracker VMM
func (fc *firecracker) hotplugRemoveDevice(devInfo interface{}, devType deviceType) (interface{}, error) {
span, _ := fc.trace("hotplugRemoveDevice")
defer span.Finish()
switch devType {
case blockDev:
return fc.hotplugBlockDevice(*devInfo.(*config.BlockDrive), removeDevice)
default:
fc.Logger().WithFields(logrus.Fields{"devInfo": devInfo,
"deviceType": devType}).Error("hotplugRemoveDevice: unsupported device")
return nil, fmt.Errorf("Could not hot remove device: unsupported device: %v, type: %v",
devInfo, devType)
}
}
// getSandboxConsole builds the path of the console where we can read
// logs coming from the sandbox.
func (fc *firecracker) getSandboxConsole(id string) (string, error) {
return fmt.Sprintf("%s://%s:%d", kataclient.HybridVSockScheme, filepath.Join(fc.jailerRoot, defaultHybridVSocketName), vSockLogsPort), nil
}
func (fc *firecracker) disconnect() {
fc.state.set(notReady)
}
// Adds all capabilities supported by firecracker implementation of hypervisor interface
func (fc *firecracker) capabilities() types.Capabilities {
span, _ := fc.trace("capabilities")
defer span.Finish()
var caps types.Capabilities
caps.SetFsSharingUnsupported()
caps.SetBlockDeviceHotplugSupport()
return caps
}
func (fc *firecracker) hypervisorConfig() HypervisorConfig {
return fc.config
}
func (fc *firecracker) resizeMemory(reqMemMB uint32, memoryBlockSizeMB uint32, probe bool) (uint32, memoryDevice, error) {
return 0, memoryDevice{}, nil
}
func (fc *firecracker) resizeVCPUs(reqVCPUs uint32) (currentVCPUs uint32, newVCPUs uint32, err error) {
return 0, 0, nil
}
// This is used to apply cgroup information on the host.
//
// As suggested by https://github.com/firecracker-microvm/firecracker/issues/718,
// let's use `ps -T -p <pid>` to get fc vcpu info.
func (fc *firecracker) getThreadIDs() (vcpuThreadIDs, error) {
var vcpuInfo vcpuThreadIDs
vcpuInfo.vcpus = make(map[int]int)
parent, err := utils.NewProc(fc.info.PID)
if err != nil {
return vcpuInfo, err
}
children, err := parent.Children()
if err != nil {
return vcpuInfo, err
}
for _, child := range children {
comm, err := child.Comm()
if err != nil {
return vcpuInfo, errors.New("Invalid fc thread info")
}
if !strings.HasPrefix(comm, "fc_vcpu") {
continue
}
cpus := strings.SplitAfter(comm, "fc_vcpu")
if len(cpus) != 2 {
return vcpuInfo, errors.Errorf("Invalid fc thread info: %v", comm)
}
cpuID, err := strconv.ParseInt(cpus[1], 10, 32)
if err != nil {
return vcpuInfo, errors.Wrapf(err, "Invalid fc thread info: %v", comm)
}
vcpuInfo.vcpus[int(cpuID)] = child.PID
}
return vcpuInfo, nil
}
func (fc *firecracker) cleanup() error {
fc.cleanupJail()
return nil
}
func (fc *firecracker) getPids() []int {
return []int{fc.info.PID}
}
func (fc *firecracker) fromGrpc(ctx context.Context, hypervisorConfig *HypervisorConfig, j []byte) error {
return errors.New("firecracker is not supported by VM cache")
}
func (fc *firecracker) toGrpc() ([]byte, error) {
return nil, errors.New("firecracker is not supported by VM cache")
}
func (fc *firecracker) save() (s persistapi.HypervisorState) {
s.Pid = fc.info.PID
s.Type = string(FirecrackerHypervisor)
return
}
func (fc *firecracker) load(s persistapi.HypervisorState) {
fc.info.PID = s.Pid
}
func (fc *firecracker) check() error {
if err := syscall.Kill(fc.info.PID, syscall.Signal(0)); err != nil {
return errors.Wrapf(err, "failed to ping fc process")
}
return nil
}
func (fc *firecracker) generateSocket(id string, useVsock bool) (interface{}, error) {
if !useVsock {
return nil, fmt.Errorf("Can't start firecracker: vsocks is disabled")
}
fc.Logger().Debug("Using hybrid-vsock endpoint")
udsPath := filepath.Join(fc.jailerRoot, defaultHybridVSocketName)
return types.HybridVSock{
UdsPath: udsPath,
Port: uint32(vSockPort),
}, nil
}
func (fc *firecracker) watchConsole() (*os.File, error) {
master, slave, err := console.NewPty()
if err != nil {
fc.Logger().WithField("Error create pseudo tty", err).Debug()
return nil, err
}
stdio, err := os.OpenFile(slave, syscall.O_RDWR, 0700)
if err != nil {
fc.Logger().WithError(err).Debugf("open pseudo tty %s", slave)
return nil, err
}
go func() {
scanner := bufio.NewScanner(master)
for scanner.Scan() {
fc.Logger().WithFields(logrus.Fields{
"sandbox": fc.id,
"vmconsole": scanner.Text(),
}).Infof("reading guest console")
}
if err := scanner.Err(); err != nil {
if err == io.EOF {
fc.Logger().Info("console watcher quits")
} else {
fc.Logger().WithError(err).Error("Failed to read guest console")
}
}
}()
return stdio, nil
}