CCv0: Merge main into CCv0 branch

Merge remote-tracking branch 'upstream/main' into CCv0

Fixes: #6428
Signed-off-by: Georgina Kinge <georgina.kinge@ibm.com>
This commit is contained in:
Georgina Kinge
2023-03-09 16:14:53 +00:00
112 changed files with 3535 additions and 1865 deletions

View File

@@ -0,0 +1,96 @@
name: CI | Publish kata-deploy payload for amd64
on:
workflow_call:
inputs:
target-arch:
required: true
type: string
jobs:
build-asset:
runs-on: ubuntu-latest
strategy:
matrix:
asset:
- cloud-hypervisor
- firecracker
- kernel
- nydus
- qemu
- rootfs-image
- rootfs-initrd
- virtiofsd
steps:
- name: Login to Kata Containers quay.io
uses: docker/login-action@v2
with:
registry: quay.io
username: ${{ secrets.QUAY_DEPLOYER_USERNAME }}
password: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
- uses: actions/checkout@v3
with:
fetch-depth: 0 # This is needed in order to keep the commit ids history
- name: Build ${{ matrix.asset }}
run: |
make "${KATA_ASSET}-tarball"
build_dir=$(readlink -f build)
# store-artifact does not work with symlink
sudo cp -r "${build_dir}" "kata-build"
env:
KATA_ASSET: ${{ matrix.asset }}
TAR_OUTPUT: ${{ matrix.asset }}.tar.gz
PUSH_TO_REGISTRY: yes
- name: store-artifact ${{ matrix.asset }}
uses: actions/upload-artifact@v3
with:
name: kata-artifacts-amd64
path: kata-build/kata-static-${{ matrix.asset }}.tar.xz
retention-days: 1
if-no-files-found: error
create-kata-tarball:
runs-on: ubuntu-latest
needs: build-asset
steps:
- uses: actions/checkout@v3
- name: get-artifacts
uses: actions/download-artifact@v3
with:
name: kata-artifacts-amd64
path: kata-artifacts
- name: merge-artifacts
run: |
./tools/packaging/kata-deploy/local-build/kata-deploy-merge-builds.sh kata-artifacts
- name: store-artifacts
uses: actions/upload-artifact@v3
with:
name: kata-static-tarball-amd64
path: kata-static.tar.xz
retention-days: 1
if-no-files-found: error
kata-payload:
needs: create-kata-tarball
runs-on: ubuntu-latest
steps:
- name: Login to Kata Containers quay.io
uses: docker/login-action@v2
with:
registry: quay.io
username: ${{ secrets.QUAY_DEPLOYER_USERNAME }}
password: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
- uses: actions/checkout@v3
- name: get-kata-tarball
uses: actions/download-artifact@v3
with:
name: kata-static-tarball-amd64
- name: build-and-push-kata-payload
id: build-and-push-kata-payload
run: |
./tools/packaging/kata-deploy/local-build/kata-deploy-build-and-upload-payload.sh \
$(pwd)/kata-static.tar.xz "quay.io/kata-containers/kata-deploy-ci" \
"kata-containers-${{ inputs.target-arch }}"

View File

@@ -0,0 +1,108 @@
name: CI | Publish kata-deploy payload for arm64
on:
workflow_call:
inputs:
target-arch:
required: true
type: string
jobs:
build-asset:
runs-on: arm64
strategy:
matrix:
asset:
- cloud-hypervisor
- firecracker
- kernel
- nydus
- qemu
- rootfs-image
- rootfs-initrd
- virtiofsd
steps:
- name: Login to Kata Containers quay.io
uses: docker/login-action@v2
with:
registry: quay.io
username: ${{ secrets.QUAY_DEPLOYER_USERNAME }}
password: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
- name: Adjust a permission for repo
run: |
sudo chown -R $USER:$USER $GITHUB_WORKSPACE
- uses: actions/checkout@v3
with:
fetch-depth: 0 # This is needed in order to keep the commit ids history
- name: Build ${{ matrix.asset }}
run: |
make "${KATA_ASSET}-tarball"
build_dir=$(readlink -f build)
# store-artifact does not work with symlink
sudo cp -r "${build_dir}" "kata-build"
env:
KATA_ASSET: ${{ matrix.asset }}
TAR_OUTPUT: ${{ matrix.asset }}.tar.gz
PUSH_TO_REGISTRY: yes
- name: store-artifact ${{ matrix.asset }}
uses: actions/upload-artifact@v3
with:
name: kata-artifacts-arm64
path: kata-build/kata-static-${{ matrix.asset }}.tar.xz
retention-days: 1
if-no-files-found: error
create-kata-tarball:
runs-on: arm64
needs: build-asset
steps:
- name: Adjust a permission for repo
run: |
sudo chown -R $USER:$USER $GITHUB_WORKSPACE
- uses: actions/checkout@v3
- name: get-artifacts
uses: actions/download-artifact@v3
with:
name: kata-artifacts-arm64
path: kata-artifacts
- name: merge-artifacts
run: |
./tools/packaging/kata-deploy/local-build/kata-deploy-merge-builds.sh kata-artifacts
- name: store-artifacts
uses: actions/upload-artifact@v3
with:
name: kata-static-tarball-arm64
path: kata-static.tar.xz
retention-days: 1
if-no-files-found: error
kata-payload:
needs: create-kata-tarball
runs-on: arm64
steps:
- name: Login to Kata Containers quay.io
uses: docker/login-action@v2
with:
registry: quay.io
username: ${{ secrets.QUAY_DEPLOYER_USERNAME }}
password: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
- name: Adjust a permission for repo
run: |
sudo chown -R $USER:$USER $GITHUB_WORKSPACE
- uses: actions/checkout@v3
- name: get-kata-tarball
uses: actions/download-artifact@v3
with:
name: kata-static-tarball-arm64
- name: build-and-push-kata-payload
id: build-and-push-kata-payload
run: |
./tools/packaging/kata-deploy/local-build/kata-deploy-build-and-upload-payload.sh \
$(pwd)/kata-static.tar.xz "quay.io/kata-containers/kata-deploy-ci" \
"kata-containers-${{ inputs.target-arch }}"

View File

@@ -0,0 +1,107 @@
name: CI | Publish kata-deploy payload for s390x
on:
workflow_call:
inputs:
target-arch:
required: true
type: string
jobs:
build-asset:
runs-on: s390x
strategy:
matrix:
asset:
- kernel
- shim-v2
- qemu
- rootfs-image
- rootfs-initrd
- virtiofsd
steps:
- name: Login to Kata Containers quay.io
uses: docker/login-action@v2
with:
registry: quay.io
username: ${{ secrets.QUAY_DEPLOYER_USERNAME }}
password: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
- name: Adjust a permission for repo
run: |
sudo chown -R $USER:$USER $GITHUB_WORKSPACE
- uses: actions/checkout@v3
with:
fetch-depth: 0 # This is needed in order to keep the commit ids history
- name: Build ${{ matrix.asset }}
run: |
make "${KATA_ASSET}-tarball"
build_dir=$(readlink -f build)
# store-artifact does not work with symlink
sudo cp -r "${build_dir}" "kata-build"
sudo chown -R $(id -u):$(id -g) "kata-build"
env:
KATA_ASSET: ${{ matrix.asset }}
TAR_OUTPUT: ${{ matrix.asset }}.tar.gz
PUSH_TO_REGISTRY: yes
- name: store-artifact ${{ matrix.asset }}
uses: actions/upload-artifact@v3
with:
name: kata-artifacts-s390x
path: kata-build/kata-static-${{ matrix.asset }}.tar.xz
retention-days: 1
if-no-files-found: error
create-kata-tarball:
runs-on: s390x
needs: build-asset
steps:
- name: Adjust a permission for repo
run: |
sudo chown -R $USER:$USER $GITHUB_WORKSPACE
- uses: actions/checkout@v3
- name: get-artifacts
uses: actions/download-artifact@v3
with:
name: kata-artifacts-s390x
path: kata-artifacts
- name: merge-artifacts
run: |
./tools/packaging/kata-deploy/local-build/kata-deploy-merge-builds.sh kata-artifacts
- name: store-artifacts
uses: actions/upload-artifact@v3
with:
name: kata-static-tarball-s390x
path: kata-static.tar.xz
retention-days: 1
if-no-files-found: error
kata-payload:
needs: create-kata-tarball
runs-on: s390x
steps:
- name: Login to Kata Containers quay.io
uses: docker/login-action@v2
with:
registry: quay.io
username: ${{ secrets.QUAY_DEPLOYER_USERNAME }}
password: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
- name: Adjust a permission for repo
run: |
sudo chown -R $USER:$USER $GITHUB_WORKSPACE
- uses: actions/checkout@v3
- name: get-kata-tarball
uses: actions/download-artifact@v3
with:
name: kata-static-tarball-s390x
- name: build-and-push-kata-payload
id: build-and-push-kata-payload
run: |
./tools/packaging/kata-deploy/local-build/kata-deploy-build-and-upload-payload.sh \
$(pwd)/kata-static.tar.xz "quay.io/kata-containers/kata-deploy-ci" \
"kata-containers-${{ inputs.target-arch }}"

View File

@@ -0,0 +1,47 @@
name: CI | Publish Kata Containers payload
on:
push:
branches:
- main
- stable-*
jobs:
build-assets-amd64:
uses: ./.github/workflows/payload-after-push-amd64.yaml
with:
target-arch: amd64
secrets: inherit
build-assets-arm64:
uses: ./.github/workflows/payload-after-push-arm64.yaml
with:
target-arch: arm64
secrets: inherit
build-assets-s390x:
uses: ./.github/workflows/payload-after-push-s390x.yaml
with:
target-arch: s390x
secrets: inherit
publish:
runs-on: ubuntu-latest
needs: [build-assets-amd64, build-assets-arm64, build-assets-s390x]
steps:
- name: Checkout repository
uses: actions/checkout@v3
- name: Login to Kata Containers quay.io
uses: docker/login-action@v2
with:
registry: quay.io
username: ${{ secrets.QUAY_DEPLOYER_USERNAME }}
password: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
- name: Push multi-arch manifest
run: |
docker manifest create quay.io/kata-containers/kata-deploy-ci:kata-containers-latest \
--amend quay.io/kata-containers/kata-deploy-ci:kata-containers-amd64 \
--amend quay.io/kata-containers/kata-deploy-ci:kata-containers-arm64 \
--amend quay.io/kata-containers/kata-deploy-ci:kata-containers-s390x
docker manifest push quay.io/kata-containers/kata-deploy-ci:kata-containers-latest

View File

@@ -43,8 +43,7 @@ jobs:
kernel_dir="tools/packaging/kernel/"
kernel_version_file="${kernel_dir}kata_config_version"
modified_files=$(git diff --name-only origin/CCv0..HEAD)
result=$(git whatchanged origin/CCv0..HEAD "${kernel_dir}" >>"/dev/null")
if git whatchanged origin/CCv0..HEAD "${kernel_dir}" >>"/dev/null"; then
if git diff --name-only origin/CCv0..HEAD "${kernel_dir}" | grep "${kernel_dir}"; then
echo "Kernel directory has changed, checking if $kernel_version_file has been updated"
if echo "$modified_files" | grep -v "README.md" | grep "${kernel_dir}" >>"/dev/null"; then
echo "$modified_files" | grep "$kernel_version_file" >>/dev/null || ( echo "Please bump version in $kernel_version_file" && exit 1)

View File

@@ -2,6 +2,8 @@
This document is written **specifically for developers**: it is not intended for end users.
If you want to contribute changes that you have made, please read the [community guidelines](https://github.com/kata-containers/community/blob/main/CONTRIBUTING.md) for information about our processes.
# Assumptions
- You are working on a non-critical test or development system.
@@ -654,7 +656,7 @@ section when using rootfs, or when using initrd, complete the steps in the [Buil
Install the image:
>**Note**: When using an initrd image, replace the below rootfs image name `kata-containers.img`
>**Note**: When using an initrd image, replace the below rootfs image name `kata-containers.img`
>with the initrd image name `kata-containers-initrd.img`.
```bash
@@ -688,25 +690,25 @@ $ sudo crictl run -r kata container.yaml pod.yaml
The steps required to enable debug console for QEMU slightly differ with
those for firecracker / cloud-hypervisor.
##### Enabling debug console for QEMU
Add `agent.debug_console` to the guest kernel command line to allow the agent process to start a debug console.
Add `agent.debug_console` to the guest kernel command line to allow the agent process to start a debug console.
```bash
$ sudo sed -i -e 's/^kernel_params = "\(.*\)"/kernel_params = "\1 agent.debug_console"/g' "${kata_configuration_file}"
```
Here `kata_configuration_file` could point to `/etc/kata-containers/configuration.toml`
Here `kata_configuration_file` could point to `/etc/kata-containers/configuration.toml`
or `/usr/share/defaults/kata-containers/configuration.toml`
or `/opt/kata/share/defaults/kata-containers/configuration-{hypervisor}.toml`, if
you installed Kata Containers using `kata-deploy`.
##### Enabling debug console for cloud-hypervisor / firecracker
Slightly different configuration is required in case of firecracker and cloud hypervisor.
Firecracker and cloud-hypervisor don't have a UNIX socket connected to `/dev/console`.
Hence, the kernel command line option `agent.debug_console` will not work for them.
Slightly different configuration is required in case of firecracker and cloud hypervisor.
Firecracker and cloud-hypervisor don't have a UNIX socket connected to `/dev/console`.
Hence, the kernel command line option `agent.debug_console` will not work for them.
These hypervisors support `hybrid vsocks`, which can be used for communication
between the host and the guest. The kernel command line option `agent.debug_console_vport`
was added to allow developers specify on which `vsock` port the debugging console should be connected.
@@ -719,7 +721,7 @@ sudo sed -i -e 's/^kernel_params = "\(.*\)"/kernel_params = "\1 agent.debug_cons
```
> **Note** Ports 1024 and 1025 are reserved for communication with the agent
> and gathering of agent logs respectively.
> and gathering of agent logs respectively.
##### Connecting to the debug console

View File

@@ -11,6 +11,7 @@ Kata Containers design documents:
- [Host cgroups](host-cgroups.md)
- [Agent systemd cgroup](agent-systemd-cgroup.md)
- [`Inotify` support](inotify.md)
- [`Hooks` support](hooks-handling.md)
- [Metrics(Kata 2.0)](kata-2-0-metrics.md)
- [Design for Kata Containers `Lazyload` ability with `nydus`](kata-nydus-design.md)
- [Design for direct-assigned volume](direct-blk-device-assignment.md)

View File

@@ -0,0 +1,63 @@
# Kata Containers support for `Hooks`
## Introduction
During container's lifecycle, different Hooks can be executed to do custom actions. In Kata Containers, we support two types of Hooks, `OCI Hooks` and `Kata Hooks`.
### OCI Hooks
The OCI Spec stipulates six hooks that can be executed at different time points and namespaces, including `Prestart Hooks`, `CreateRuntime Hooks`, `CreateContainer Hooks`, `StartContainer Hooks`, `Poststart Hooks` and `Poststop Hooks`. We support these types of Hooks as compatible as possible in Kata Containers.
The path and arguments of these hooks will be passed to Kata for execution via `bundle/config.json`. For example:
```
...
"hooks": {
"prestart": [
{
"path": "/usr/bin/prestart-hook",
"args": ["prestart-hook", "arg1", "arg2"],
"env": [ "key1=value1"]
}
],
"createRuntime": [
{
"path": "/usr/bin/createRuntime-hook",
"args": ["createRuntime-hook", "arg1", "arg2"],
"env": [ "key1=value1"]
}
]
}
...
```
### Kata Hooks
In Kata, we support another three kinds of hooks executed in guest VM, including `Guest Prestart Hook`, `Guest Poststart Hook`, `Guest Poststop Hook`.
The executable files for Kata Hooks must be packaged in the *guest rootfs*. The file path to those guest hooks should be specified in the configuration file, and guest hooks must be stored in a subdirectory of `guest_hook_path` according to their hook type. For example:
+ In configuration file:
```
guest_hook_path="/usr/share/hooks"
```
+ In guest rootfs, prestart-hook is stored in `/usr/share/hooks/prestart/prestart-hook`.
## Execution
The table below summarized when and where those different hooks will be executed in Kata Containers:
| Hook Name | Hook Type | Hook Path | Exec Place | Exec Time |
|---|---|---|---|---|
| `Prestart(deprecated)` | OCI hook | host runtime namespace | host runtime namespace | After VM is started, before container is created. |
| `CreateRuntime` | OCI hook | host runtime namespace | host runtime namespace | After VM is started, before container is created, after `Prestart` hooks. |
| `CreateContainer` | OCI hook | host runtime namespace | host vmm namespace* | After VM is started, before container is created, after `CreateRuntime` hooks. |
| `StartContainer` | OCI hook | guest container namespace | guest container namespace | After container is created, before container is started. |
| `Poststart` | OCI hook | host runtime namespace | host runtime namespace | After container is started, before start operation returns. |
| `Poststop` | OCI hook | host runtime namespace | host runtime namespace | After container is deleted, before delete operation returns. |
| `Guest Prestart` | Kata hook | guest agent namespace | guest agent namespace | During start operation, before container command is executed. |
| `Guest Poststart` | Kata hook | guest agent namespace | guest agent namespace | During start operation, after container command is executed, before start operation returns. |
| `Guest Poststop` | Kata hook | guest agent namespace | guest agent namespace | During delete operation, after container is deleted, before delete operation returns. |
+ `Hook Path` specifies where hook's path be resolved.
+ `Exec Place` specifies in which namespace those hooks can be executed.
+ For `CreateContainer` Hooks, OCI requires to run them inside the container namespace while the hook executable path is in the host runtime, which is a non-starter for VM-based containers. So we design to keep them running in the *host vmm namespace.*
+ `Exec Time` specifies at which time point those hooks can be executed.

View File

@@ -123,7 +123,7 @@ Refer to [this guide](https://docs.aws.amazon.com/cli/latest/userguide/cli-ec2-l
SSH into the machine
```bash
$ ssh -i MyKeyPair.pen ubuntu@${IP}
$ ssh -i MyKeyPair.pem ubuntu@${IP}
```
Go onto the next step.

View File

@@ -267,6 +267,10 @@ impl CgroupManager for Manager {
fn as_any(&self) -> Result<&dyn Any> {
Ok(self)
}
fn name(&self) -> &str {
"cgroupfs"
}
}
fn set_network_resources(

View File

@@ -66,6 +66,10 @@ impl CgroupManager for Manager {
fn as_any(&self) -> Result<&dyn Any> {
Ok(self)
}
fn name(&self) -> &str {
"mock"
}
}
impl Manager {

View File

@@ -52,10 +52,12 @@ pub trait Manager {
fn as_any(&self) -> Result<&dyn Any> {
Err(anyhow!("not supported!"))
}
fn name(&self) -> &str;
}
impl Debug for dyn Manager + Send + Sync {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
write!(f, "CgroupManager")
write!(f, "{}", self.name())
}
}

View File

@@ -101,6 +101,10 @@ impl CgroupManager for Manager {
fn as_any(&self) -> Result<&dyn Any> {
Ok(self)
}
fn name(&self) -> &str {
"systemd"
}
}
impl Manager {

View File

@@ -374,13 +374,18 @@ fn do_init_child(cwfd: RawFd) -> Result<()> {
let buf = read_sync(crfd)?;
let spec_str = std::str::from_utf8(&buf)?;
let spec: oci::Spec = serde_json::from_str(spec_str)?;
log_child!(cfd_log, "notify parent to send oci process");
write_sync(cwfd, SYNC_SUCCESS, "")?;
let buf = read_sync(crfd)?;
let process_str = std::str::from_utf8(&buf)?;
let oci_process: oci::Process = serde_json::from_str(process_str)?;
log_child!(cfd_log, "notify parent to send oci state");
write_sync(cwfd, SYNC_SUCCESS, "")?;
let buf = read_sync(crfd)?;
let state_str = std::str::from_utf8(&buf)?;
let mut state: oci::State = serde_json::from_str(state_str)?;
log_child!(cfd_log, "notify parent to send cgroup manager");
write_sync(cwfd, SYNC_SUCCESS, "")?;
@@ -743,6 +748,19 @@ fn do_init_child(cwfd: RawFd) -> Result<()> {
unistd::read(fd, buf)?;
}
if init {
// StartContainer Hooks:
// * should be run in container namespace
// * should be run after container is created and before container is started (before user-specific command is executed)
// * spec details: https://github.com/opencontainers/runtime-spec/blob/c1662686cff159595277b79322d0272f5182941b/config.md#startcontainer-hooks
state.pid = std::process::id() as i32;
state.status = oci::ContainerState::Created;
if let Some(hooks) = spec.hooks.as_ref() {
let mut start_container_states = HookStates::new();
start_container_states.execute_hooks(&hooks.start_container, Some(state))?;
}
}
// With NoNewPrivileges, we should set seccomp as close to
// do_exec as possible in order to reduce the amount of
// system calls in the seccomp profiles.
@@ -1323,7 +1341,6 @@ async fn join_namespaces(
write_async(pipe_w, SYNC_DATA, spec_str.as_str()).await?;
info!(logger, "wait child received oci spec");
read_async(pipe_r).await?;
info!(logger, "send oci process from parent to child");
@@ -1333,6 +1350,13 @@ async fn join_namespaces(
info!(logger, "wait child received oci process");
read_async(pipe_r).await?;
info!(logger, "try to send state from parent to child");
let state_str = serde_json::to_string(st)?;
write_async(pipe_w, SYNC_DATA, state_str.as_str()).await?;
info!(logger, "wait child received oci state");
read_async(pipe_r).await?;
let cm_str = if use_systemd_cgroup {
serde_json::to_string(cm.as_any()?.downcast_ref::<SystemdManager>().unwrap())
} else {
@@ -1449,7 +1473,7 @@ impl LinuxContainer {
pub fn new<T: Into<String> + Display + Clone>(
id: T,
base: T,
mut config: Config,
config: Config,
logger: &Logger,
) -> Result<Self> {
let base = base.into();
@@ -1475,26 +1499,18 @@ impl LinuxContainer {
.context(format!("Cannot change owner of container {} root", id))?;
let spec = config.spec.as_ref().unwrap();
let linux = spec.linux.as_ref().unwrap();
// determine which cgroup driver to take and then assign to config.use_systemd_cgroup
// systemd: "[slice]:[prefix]:[name]"
// fs: "/path_a/path_b"
let cpath = if SYSTEMD_CGROUP_PATH_FORMAT.is_match(linux.cgroups_path.as_str()) {
config.use_systemd_cgroup = true;
let cpath = if config.use_systemd_cgroup {
if linux.cgroups_path.len() == 2 {
format!("system.slice:kata_agent:{}", id.as_str())
} else {
linux.cgroups_path.clone()
}
} else if linux.cgroups_path.is_empty() {
format!("/{}", id.as_str())
} else {
config.use_systemd_cgroup = false;
if linux.cgroups_path.is_empty() {
format!("/{}", id.as_str())
} else {
linux.cgroups_path.clone()
}
// if we have a systemd cgroup path we need to convert it to a fs cgroup path
linux.cgroups_path.replace(':', "/")
};
let cgroup_manager: Box<dyn Manager + Send + Sync> = if config.use_systemd_cgroup {

View File

@@ -153,13 +153,17 @@ fn hook_grpc_to_oci(h: &[grpcHook]) -> Vec<oci::Hook> {
fn hooks_grpc_to_oci(h: &grpc::Hooks) -> oci::Hooks {
let prestart = hook_grpc_to_oci(h.Prestart.as_ref());
let create_runtime = hook_grpc_to_oci(h.CreateRuntime.as_ref());
let create_container = hook_grpc_to_oci(h.CreateContainer.as_ref());
let start_container = hook_grpc_to_oci(h.StartContainer.as_ref());
let poststart = hook_grpc_to_oci(h.Poststart.as_ref());
let poststop = hook_grpc_to_oci(h.Poststop.as_ref());
oci::Hooks {
prestart,
create_runtime,
create_container,
start_container,
poststart,
poststop,
}
@@ -837,6 +841,45 @@ mod tests {
Timeout: 10,
..Default::default()
}])),
CreateRuntime: protobuf::RepeatedField::from(Vec::from([grpc::Hook {
Path: String::from("createruntimepath"),
Args: protobuf::RepeatedField::from(Vec::from([
String::from("arg1"),
String::from("arg2"),
])),
Env: protobuf::RepeatedField::from(Vec::from([
String::from("env1"),
String::from("env2"),
])),
Timeout: 10,
..Default::default()
}])),
CreateContainer: protobuf::RepeatedField::from(Vec::from([grpc::Hook {
Path: String::from("createcontainerpath"),
Args: protobuf::RepeatedField::from(Vec::from([
String::from("arg1"),
String::from("arg2"),
])),
Env: protobuf::RepeatedField::from(Vec::from([
String::from("env1"),
String::from("env2"),
])),
Timeout: 10,
..Default::default()
}])),
StartContainer: protobuf::RepeatedField::from(Vec::from([grpc::Hook {
Path: String::from("startcontainerpath"),
Args: protobuf::RepeatedField::from(Vec::from([
String::from("arg1"),
String::from("arg2"),
])),
Env: protobuf::RepeatedField::from(Vec::from([
String::from("env1"),
String::from("env2"),
])),
Timeout: 10,
..Default::default()
}])),
..Default::default()
},
result: oci::Hooks {
@@ -866,6 +909,24 @@ mod tests {
env: Vec::from([String::from("env1"), String::from("env2")]),
timeout: Some(10),
}]),
create_runtime: Vec::from([oci::Hook {
path: String::from("createruntimepath"),
args: Vec::from([String::from("arg1"), String::from("arg2")]),
env: Vec::from([String::from("env1"), String::from("env2")]),
timeout: Some(10),
}]),
create_container: Vec::from([oci::Hook {
path: String::from("createcontainerpath"),
args: Vec::from([String::from("arg1"), String::from("arg2")]),
env: Vec::from([String::from("env1"), String::from("env2")]),
timeout: Some(10),
}]),
start_container: Vec::from([oci::Hook {
path: String::from("startcontainerpath"),
args: Vec::from([String::from("arg1"), String::from("arg2")]),
env: Vec::from([String::from("env1"), String::from("env2")]),
timeout: Some(10),
}]),
},
},
TestData {
@@ -898,6 +959,45 @@ mod tests {
Timeout: 10,
..Default::default()
}])),
CreateRuntime: protobuf::RepeatedField::from(Vec::from([grpc::Hook {
Path: String::from("createruntimepath"),
Args: protobuf::RepeatedField::from(Vec::from([
String::from("arg1"),
String::from("arg2"),
])),
Env: protobuf::RepeatedField::from(Vec::from([
String::from("env1"),
String::from("env2"),
])),
Timeout: 10,
..Default::default()
}])),
CreateContainer: protobuf::RepeatedField::from(Vec::from([grpc::Hook {
Path: String::from("createcontainerpath"),
Args: protobuf::RepeatedField::from(Vec::from([
String::from("arg1"),
String::from("arg2"),
])),
Env: protobuf::RepeatedField::from(Vec::from([
String::from("env1"),
String::from("env2"),
])),
Timeout: 10,
..Default::default()
}])),
StartContainer: protobuf::RepeatedField::from(Vec::from([grpc::Hook {
Path: String::from("startcontainerpath"),
Args: protobuf::RepeatedField::from(Vec::from([
String::from("arg1"),
String::from("arg2"),
])),
Env: protobuf::RepeatedField::from(Vec::from([
String::from("env1"),
String::from("env2"),
])),
Timeout: 10,
..Default::default()
}])),
..Default::default()
},
result: oci::Hooks {
@@ -914,6 +1014,24 @@ mod tests {
env: Vec::from([String::from("env1"), String::from("env2")]),
timeout: Some(10),
}]),
create_runtime: Vec::from([oci::Hook {
path: String::from("createruntimepath"),
args: Vec::from([String::from("arg1"), String::from("arg2")]),
env: Vec::from([String::from("env1"), String::from("env2")]),
timeout: Some(10),
}]),
create_container: Vec::from([oci::Hook {
path: String::from("createcontainerpath"),
args: Vec::from([String::from("arg1"), String::from("arg2")]),
env: Vec::from([String::from("env1"), String::from("env2")]),
timeout: Some(10),
}]),
start_container: Vec::from([oci::Hook {
path: String::from("startcontainerpath"),
args: Vec::from([String::from("arg1"), String::from("arg2")]),
env: Vec::from([String::from("env1"), String::from("env2")]),
timeout: Some(10),
}]),
},
},
];

View File

@@ -340,7 +340,7 @@ async fn start_sandbox(
sandbox.lock().await.sender = Some(tx);
// vsock:///dev/vsock, port
let mut server = rpc::start(sandbox.clone(), config.server_addr.as_str())?;
let mut server = rpc::start(sandbox.clone(), config.server_addr.as_str(), init_mode)?;
server.start().await?;
rx.await?;

View File

@@ -39,7 +39,7 @@ use protocols::{
image_ttrpc_async as image_ttrpc,
};
use rustjail::cgroups::notifier;
use rustjail::container::{BaseContainer, Container, LinuxContainer};
use rustjail::container::{BaseContainer, Container, LinuxContainer, SYSTEMD_CGROUP_PATH_FORMAT};
use rustjail::process::Process;
use rustjail::specconv::CreateOpts;
@@ -145,6 +145,7 @@ macro_rules! is_allowed {
#[derive(Clone, Debug)]
pub struct AgentService {
sandbox: Arc<Mutex<Sandbox>>,
init_mode: bool,
}
// A container ID must match this regex:
@@ -280,9 +281,20 @@ impl AgentService {
// restore the cwd for kata-agent process.
defer!(unistd::chdir(&olddir).unwrap());
// determine which cgroup driver to take and then assign to use_systemd_cgroup
// systemd: "[slice]:[prefix]:[name]"
// fs: "/path_a/path_b"
// If agent is init we can't use systemd cgroup mode, no matter what the host tells us
let cgroups_path = oci.linux.as_ref().map_or("", |linux| &linux.cgroups_path);
let use_systemd_cgroup = if self.init_mode {
false
} else {
SYSTEMD_CGROUP_PATH_FORMAT.is_match(cgroups_path)
};
let opts = CreateOpts {
cgroup_name: "".to_string(),
use_systemd_cgroup: false,
use_systemd_cgroup,
no_pivot_root: s.no_pivot_root,
no_new_keyring: false,
spec: Some(oci.clone()),
@@ -1791,9 +1803,12 @@ async fn read_stream(reader: Arc<Mutex<ReadHalf<PipeStream>>>, l: usize) -> Resu
Ok(content)
}
pub fn start(s: Arc<Mutex<Sandbox>>, server_address: &str) -> Result<TtrpcServer> {
let agent_service = Box::new(AgentService { sandbox: s.clone() })
as Box<dyn agent_ttrpc::AgentService + Send + Sync>;
pub fn start(s: Arc<Mutex<Sandbox>>, server_address: &str, init_mode: bool) -> Result<TtrpcServer> {
let agent_service = Box::new(AgentService {
sandbox: s.clone(),
init_mode,
}) as Box<dyn agent_ttrpc::AgentService + Send + Sync>;
let agent_worker = Arc::new(agent_service);
let health_service = Box::new(HealthService {}) as Box<dyn health_ttrpc::Health + Send + Sync>;
@@ -2000,23 +2015,18 @@ fn do_copy_file(req: &CopyFileRequest) -> Result<()> {
));
}
let parent = path.parent();
let dir = if let Some(parent) = parent {
parent.to_path_buf()
} else {
PathBuf::from("/")
};
fs::create_dir_all(&dir).or_else(|e| {
if e.kind() != std::io::ErrorKind::AlreadyExists {
return Err(e);
if let Some(parent) = path.parent() {
if !parent.exists() {
let dir = parent.to_path_buf();
if let Err(e) = fs::create_dir_all(&dir) {
if e.kind() != std::io::ErrorKind::AlreadyExists {
return Err(e.into());
}
} else {
std::fs::set_permissions(&dir, std::fs::Permissions::from_mode(req.dir_mode))?;
}
}
Ok(())
})?;
std::fs::set_permissions(&dir, std::fs::Permissions::from_mode(req.dir_mode))?;
}
let sflag = stat::SFlag::from_bits_truncate(req.file_mode);
@@ -2314,6 +2324,7 @@ mod tests {
let agent_service = Box::new(AgentService {
sandbox: Arc::new(Mutex::new(sandbox)),
init_mode: true,
});
let req = protocols::agent::UpdateInterfaceRequest::default();
@@ -2331,6 +2342,7 @@ mod tests {
let agent_service = Box::new(AgentService {
sandbox: Arc::new(Mutex::new(sandbox)),
init_mode: true,
});
let req = protocols::agent::UpdateRoutesRequest::default();
@@ -2348,6 +2360,7 @@ mod tests {
let agent_service = Box::new(AgentService {
sandbox: Arc::new(Mutex::new(sandbox)),
init_mode: true,
});
let req = protocols::agent::AddARPNeighborsRequest::default();
@@ -2481,6 +2494,7 @@ mod tests {
let agent_service = Box::new(AgentService {
sandbox: Arc::new(Mutex::new(sandbox)),
init_mode: true,
});
let result = agent_service
@@ -2961,6 +2975,7 @@ OtherField:other
let sandbox = Sandbox::new(&logger).unwrap();
let agent_service = Box::new(AgentService {
sandbox: Arc::new(Mutex::new(sandbox)),
init_mode: true,
});
let ctx = mk_ttrpc_context();

File diff suppressed because it is too large Load Diff

View File

@@ -19,11 +19,11 @@ dbs-boot = "0.3.0"
dbs-device = "0.2.0"
dbs-interrupt = { version = "0.2.0", features = ["kvm-irq"] }
dbs-legacy-devices = "0.1.0"
dbs-upcall = { version = "0.1.0", optional = true }
dbs-upcall = { version = "0.2.0", optional = true }
dbs-utils = "0.2.0"
dbs-virtio-devices = { version = "0.1.0", optional = true, features = ["virtio-mmio"] }
kvm-bindings = "0.5.0"
kvm-ioctls = "0.11.0"
dbs-virtio-devices = { version = "0.2.0", optional = true, features = ["virtio-mmio"] }
kvm-bindings = "0.6.0"
kvm-ioctls = "0.12.0"
lazy_static = "1.2"
libc = "0.2.39"
linux-loader = "0.6.0"
@@ -37,8 +37,9 @@ slog = "2.5.2"
slog-scope = "4.4.0"
thiserror = "1"
vmm-sys-util = "0.11.0"
virtio-queue = { version = "0.4.0", optional = true }
virtio-queue = { version = "0.6.0", optional = true }
vm-memory = { version = "0.9.0", features = ["backend-mmap"] }
crossbeam-channel = "0.5.6"
[dev-dependencies]
slog-term = "2.9.0"
@@ -47,7 +48,7 @@ test-utils = { path = "../libs/test-utils" }
[features]
acpi = []
atomic-guest-memory = [ "vm-memory/backend-atomic" ]
atomic-guest-memory = ["vm-memory/backend-atomic"]
hotplug = ["virtio-vsock"]
virtio-vsock = ["dbs-virtio-devices/virtio-vsock", "virtio-queue"]
virtio-blk = ["dbs-virtio-devices/virtio-blk", "virtio-queue"]

View File

@@ -59,6 +59,8 @@ pub struct InstanceInfo {
pub vmm_version: String,
/// The pid of the current VMM process.
pub pid: u32,
/// The tid of the current VMM master thread.
pub master_tid: u32,
/// The state of async actions.
pub async_state: AsyncState,
/// List of tids of vcpu threads (vcpu index, tid)
@@ -77,6 +79,7 @@ impl InstanceInfo {
state: InstanceState::Uninitialized,
vmm_version,
pid: std::process::id(),
master_tid: 0,
async_state: AsyncState::Uninitialized,
tids: Vec::new(),
last_instance_downtime: 0,
@@ -97,6 +100,7 @@ impl Default for InstanceInfo {
state: InstanceState::Uninitialized,
vmm_version: env!("CARGO_PKG_VERSION").to_string(),
pid: std::process::id(),
master_tid: 0,
async_state: AsyncState::Uninitialized,
tids: Vec::new(),
last_instance_downtime: 0,

View File

@@ -7,8 +7,8 @@
// found in the THIRD-PARTY file.
use std::fs::File;
use std::sync::mpsc::{Receiver, Sender, TryRecvError};
use crossbeam_channel::{Receiver, Sender, TryRecvError};
use log::{debug, error, info, warn};
use crate::error::{Result, StartMicroVmError, StopMicrovmError};
@@ -676,9 +676,9 @@ fn handle_cpu_topology(
#[cfg(test)]
mod tests {
use std::sync::mpsc::channel;
use std::sync::{Arc, Mutex};
use crossbeam_channel::unbounded;
use dbs_utils::epoll_manager::EpollManager;
use test_utils::skip_if_not_root;
use vmm_sys_util::tempfile::TempFile;
@@ -702,8 +702,8 @@ mod tests {
}
fn check_request(&mut self) {
let (to_vmm, from_api) = channel();
let (to_api, from_vmm) = channel();
let (to_vmm, from_api) = unbounded();
let (to_api, from_vmm) = unbounded();
let epoll_mgr = EpollManager::default();
let vmm = Arc::new(Mutex::new(create_vmm_instance(epoll_mgr.clone())));
@@ -728,8 +728,8 @@ mod tests {
fn test_vmm_action_receive_unknown() {
skip_if_not_root!();
let (_to_vmm, from_api) = channel();
let (to_api, _from_vmm) = channel();
let (_to_vmm, from_api) = unbounded();
let (to_api, _from_vmm) = unbounded();
let epoll_mgr = EpollManager::default();
let vmm = Arc::new(Mutex::new(create_vmm_instance(epoll_mgr.clone())));
let mut vservice = VmmService::new(from_api, to_api);
@@ -742,8 +742,8 @@ mod tests {
#[should_panic]
#[test]
fn test_vmm_action_disconnected() {
let (to_vmm, from_api) = channel();
let (to_api, _from_vmm) = channel();
let (to_vmm, from_api) = unbounded();
let (to_api, _from_vmm) = unbounded();
let epoll_mgr = EpollManager::default();
let vmm = Arc::new(Mutex::new(create_vmm_instance(epoll_mgr.clone())));
let mut vservice = VmmService::new(from_api, to_api);

View File

@@ -231,7 +231,7 @@ where
info.config.check_conflicts(config)?;
}
}
self.info_list[index] = device_info;
self.info_list[index].config = config.clone();
index
}
None => {

View File

@@ -147,17 +147,13 @@ pub type Result<T> = ::std::result::Result<T, DeviceMgrError>;
/// Type of the dragonball virtio devices.
#[cfg(feature = "dbs-virtio-devices")]
pub type DbsVirtioDevice = Box<
dyn VirtioDevice<
GuestAddressSpaceImpl,
virtio_queue::QueueStateSync,
vm_memory::GuestRegionMmap,
>,
dyn VirtioDevice<GuestAddressSpaceImpl, virtio_queue::QueueSync, vm_memory::GuestRegionMmap>,
>;
/// Type of the dragonball virtio mmio devices.
#[cfg(feature = "dbs-virtio-devices")]
pub type DbsMmioV2Device =
MmioV2Device<GuestAddressSpaceImpl, virtio_queue::QueueStateSync, vm_memory::GuestRegionMmap>;
MmioV2Device<GuestAddressSpaceImpl, virtio_queue::QueueSync, vm_memory::GuestRegionMmap>;
/// Struct to support transactional operations for device management.
pub struct DeviceManagerTx {

View File

@@ -9,6 +9,8 @@
//! Error codes for the virtual machine monitor subsystem.
#[cfg(target_arch = "aarch64")]
use dbs_arch::pmu::PmuError;
#[cfg(feature = "dbs-virtio-devices")]
use dbs_virtio_devices::Error as VirtIoError;
@@ -61,6 +63,11 @@ pub enum Error {
#[error("failed to write MP table to guest memory: {0}")]
MpTableSetup(#[source] dbs_boot::mptable::Error),
/// Create pmu device error
#[cfg(target_arch = "aarch64")]
#[error("Create pmu device error: {0}")]
PmuDeviceError(#[source] PmuError),
/// Fail to boot system
#[error("failed to boot system: {0}")]
BootSystem(#[source] dbs_boot::Error),

View File

@@ -11,7 +11,7 @@ use std::sync::mpsc::{channel, Sender};
use std::sync::Arc;
use crate::IoManagerCached;
use dbs_arch::regs;
use dbs_arch::{regs, VpmuFeatureLevel};
use dbs_boot::get_fdt_addr;
use dbs_utils::time::TimestampUs;
use kvm_ioctls::{VcpuFd, VmFd};
@@ -81,7 +81,7 @@ impl Vcpu {
/// * `_pgtable_addr` - pgtable address for ap vcpu (not used in aarch64)
pub fn configure(
&mut self,
_vcpu_config: &VcpuConfig,
vcpu_config: &VcpuConfig,
vm_fd: &VmFd,
vm_as: &GuestAddressSpaceImpl,
kernel_load_addr: Option<GuestAddress>,
@@ -99,6 +99,9 @@ impl Vcpu {
if self.id > 0 {
kvi.features[0] |= 1 << kvm_bindings::KVM_ARM_VCPU_POWER_OFF;
}
if vcpu_config.vpmu_feature == VpmuFeatureLevel::FullyEnabled {
kvi.features[0] |= 1 << kvm_bindings::KVM_ARM_VCPU_PMU_V3;
}
self.fd.vcpu_init(&kvi).map_err(VcpuError::VcpuArmInit)?;

View File

@@ -7,9 +7,7 @@ mod sm;
mod vcpu_impl;
mod vcpu_manager;
#[cfg(target_arch = "x86_64")]
use dbs_arch::cpuid::VpmuFeatureLevel;
use dbs_arch::VpmuFeatureLevel;
pub use vcpu_manager::{VcpuManager, VcpuManagerError, VcpuResizeInfo};
#[cfg(feature = "hotplug")]
@@ -32,6 +30,6 @@ pub struct VcpuConfig {
/// if vpmu feature is Disabled, it means vpmu feature is off (by default)
/// if vpmu feature is LimitedlyEnabled, it means minimal vpmu counters are supported (cycles and instructions)
/// if vpmu feature is FullyEnabled, it means all vpmu counters are supported
#[cfg(target_arch = "x86_64")]
/// For aarch64, VpmuFeatureLevel only supports Disabled and FullyEnabled.
pub vpmu_feature: VpmuFeatureLevel,
}

View File

@@ -441,75 +441,77 @@ impl Vcpu {
/// Returns error or enum specifying whether emulation was handled or interrupted.
fn run_emulation(&mut self) -> Result<VcpuEmulation> {
match Vcpu::emulate(&self.fd) {
Ok(run) => match run {
#[cfg(target_arch = "x86_64")]
VcpuExit::IoIn(addr, data) => {
let _ = self.io_mgr.pio_read(addr, data);
METRICS.vcpu.exit_io_in.inc();
Ok(VcpuEmulation::Handled)
}
#[cfg(target_arch = "x86_64")]
VcpuExit::IoOut(addr, data) => {
if !self.check_io_port_info(addr, data)? {
let _ = self.io_mgr.pio_write(addr, data);
Ok(run) => {
match run {
#[cfg(target_arch = "x86_64")]
VcpuExit::IoIn(addr, data) => {
let _ = self.io_mgr.pio_read(addr, data);
METRICS.vcpu.exit_io_in.inc();
Ok(VcpuEmulation::Handled)
}
METRICS.vcpu.exit_io_out.inc();
Ok(VcpuEmulation::Handled)
}
VcpuExit::MmioRead(addr, data) => {
let _ = self.io_mgr.mmio_read(addr, data);
METRICS.vcpu.exit_mmio_read.inc();
Ok(VcpuEmulation::Handled)
}
VcpuExit::MmioWrite(addr, data) => {
let _ = self.io_mgr.mmio_write(addr, data);
METRICS.vcpu.exit_mmio_write.inc();
Ok(VcpuEmulation::Handled)
}
VcpuExit::Hlt => {
info!("Received KVM_EXIT_HLT signal");
Err(VcpuError::VcpuUnhandledKvmExit)
}
VcpuExit::Shutdown => {
info!("Received KVM_EXIT_SHUTDOWN signal");
Err(VcpuError::VcpuUnhandledKvmExit)
}
// Documentation specifies that below kvm exits are considered errors.
VcpuExit::FailEntry => {
METRICS.vcpu.failures.inc();
error!("Received KVM_EXIT_FAIL_ENTRY signal");
Err(VcpuError::VcpuUnhandledKvmExit)
}
VcpuExit::InternalError => {
METRICS.vcpu.failures.inc();
error!("Received KVM_EXIT_INTERNAL_ERROR signal");
Err(VcpuError::VcpuUnhandledKvmExit)
}
VcpuExit::SystemEvent(event_type, event_flags) => match event_type {
KVM_SYSTEM_EVENT_RESET | KVM_SYSTEM_EVENT_SHUTDOWN => {
info!(
"Received KVM_SYSTEM_EVENT: type: {}, event: {}",
event_type, event_flags
);
Ok(VcpuEmulation::Stopped)
#[cfg(target_arch = "x86_64")]
VcpuExit::IoOut(addr, data) => {
if !self.check_io_port_info(addr, data)? {
let _ = self.io_mgr.pio_write(addr, data);
}
METRICS.vcpu.exit_io_out.inc();
Ok(VcpuEmulation::Handled)
}
_ => {
METRICS.vcpu.failures.inc();
error!(
"Received KVM_SYSTEM_EVENT signal type: {}, flag: {}",
event_type, event_flags
);
VcpuExit::MmioRead(addr, data) => {
let _ = self.io_mgr.mmio_read(addr, data);
METRICS.vcpu.exit_mmio_read.inc();
Ok(VcpuEmulation::Handled)
}
VcpuExit::MmioWrite(addr, data) => {
let _ = self.io_mgr.mmio_write(addr, data);
METRICS.vcpu.exit_mmio_write.inc();
Ok(VcpuEmulation::Handled)
}
VcpuExit::Hlt => {
info!("Received KVM_EXIT_HLT signal");
Err(VcpuError::VcpuUnhandledKvmExit)
}
VcpuExit::Shutdown => {
info!("Received KVM_EXIT_SHUTDOWN signal");
Err(VcpuError::VcpuUnhandledKvmExit)
}
// Documentation specifies that below kvm exits are considered errors.
VcpuExit::FailEntry(reason, cpu) => {
METRICS.vcpu.failures.inc();
error!("Received KVM_EXIT_FAIL_ENTRY signal, reason {reason}, cpu number {cpu}");
Err(VcpuError::VcpuUnhandledKvmExit)
}
VcpuExit::InternalError => {
METRICS.vcpu.failures.inc();
error!("Received KVM_EXIT_INTERNAL_ERROR signal");
Err(VcpuError::VcpuUnhandledKvmExit)
}
VcpuExit::SystemEvent(event_type, event_flags) => match event_type {
KVM_SYSTEM_EVENT_RESET | KVM_SYSTEM_EVENT_SHUTDOWN => {
info!(
"Received KVM_SYSTEM_EVENT: type: {}, event: {}",
event_type, event_flags
);
Ok(VcpuEmulation::Stopped)
}
_ => {
METRICS.vcpu.failures.inc();
error!(
"Received KVM_SYSTEM_EVENT signal type: {}, flag: {}",
event_type, event_flags
);
Err(VcpuError::VcpuUnhandledKvmExit)
}
},
r => {
METRICS.vcpu.failures.inc();
// TODO: Are we sure we want to finish running a vcpu upon
// receiving a vm exit that is not necessarily an error?
error!("Unexpected exit reason on vcpu run: {:?}", r);
Err(VcpuError::VcpuUnhandledKvmExit)
}
},
r => {
METRICS.vcpu.failures.inc();
// TODO: Are we sure we want to finish running a vcpu upon
// receiving a vm exit that is not necessarily an error?
error!("Unexpected exit reason on vcpu run: {:?}", r);
Err(VcpuError::VcpuUnhandledKvmExit)
}
},
}
// The unwrap on raw_os_error can only fail if we have a logic
// error in our code in which case it is better to panic.
Err(ref e) => {
@@ -758,6 +760,11 @@ impl Vcpu {
// State machine reached its end.
StateMachine::finish(Self::exited)
}
/// Get vcpu file descriptor.
pub fn vcpu_fd(&self) -> &VcpuFd {
self.fd.as_ref()
}
}
impl Drop for Vcpu {
@@ -786,7 +793,7 @@ pub mod tests {
MmioWrite,
Hlt,
Shutdown,
FailEntry,
FailEntry(u64, u32),
InternalError,
Unknown,
SystemEvent(u32, u64),
@@ -807,7 +814,9 @@ pub mod tests {
EmulationCase::MmioWrite => Ok(VcpuExit::MmioWrite(0, &[])),
EmulationCase::Hlt => Ok(VcpuExit::Hlt),
EmulationCase::Shutdown => Ok(VcpuExit::Shutdown),
EmulationCase::FailEntry => Ok(VcpuExit::FailEntry),
EmulationCase::FailEntry(error_type, cpu_num) => {
Ok(VcpuExit::FailEntry(*error_type, *cpu_num))
}
EmulationCase::InternalError => Ok(VcpuExit::InternalError),
EmulationCase::Unknown => Ok(VcpuExit::Unknown),
EmulationCase::SystemEvent(event_type, event_flags) => {
@@ -850,6 +859,8 @@ pub mod tests {
#[cfg(target_arch = "aarch64")]
fn create_vcpu() -> (Vcpu, Receiver<VcpuStateEvent>) {
use kvm_ioctls::Kvm;
use std::os::fd::AsRawFd;
// Call for kvm too frequently would cause error in some host kernel.
std::thread::sleep(std::time::Duration::from_millis(5));
@@ -918,7 +929,7 @@ pub mod tests {
assert!(matches!(res, Err(VcpuError::VcpuUnhandledKvmExit)));
// KVM_EXIT_FAIL_ENTRY signal
*(EMULATE_RES.lock().unwrap()) = EmulationCase::FailEntry;
*(EMULATE_RES.lock().unwrap()) = EmulationCase::FailEntry(0, 0);
let res = vcpu.run_emulation();
assert!(matches!(res, Err(VcpuError::VcpuUnhandledKvmExit)));

View File

@@ -15,6 +15,7 @@ use std::sync::mpsc::{channel, Receiver, RecvError, RecvTimeoutError, Sender};
use std::sync::{Arc, Barrier, Mutex, RwLock};
use std::time::Duration;
use dbs_arch::VpmuFeatureLevel;
#[cfg(all(feature = "hotplug", feature = "dbs-upcall"))]
use dbs_upcall::{DevMgrService, UpcallClient};
use dbs_utils::epoll_manager::{EpollManager, EventOps, EventSet, Events, MutEventSubscriber};
@@ -281,11 +282,20 @@ impl VcpuManager {
let supported_cpuid = kvm_context
.supported_cpuid(kvm_bindings::KVM_MAX_CPUID_ENTRIES)
.map_err(VcpuManagerError::Kvm)?;
#[cfg(target_arch = "x86_64")]
#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))]
let vpmu_feature_level = match vm_config_info.vpmu_feature {
1 => dbs_arch::cpuid::VpmuFeatureLevel::LimitedlyEnabled,
2 => dbs_arch::cpuid::VpmuFeatureLevel::FullyEnabled,
_ => dbs_arch::cpuid::VpmuFeatureLevel::Disabled,
#[cfg(target_arch = "x86_64")]
1 => VpmuFeatureLevel::LimitedlyEnabled,
#[cfg(target_arch = "aarch64")]
1 => {
log::warn!(
"Limitedly enabled vpmu feature isn't supported on aarch64 for now.\
This will be supported in the future. The vpmu_feature will be set disabled!"
);
VpmuFeatureLevel::Disabled
}
2 => VpmuFeatureLevel::FullyEnabled,
_ => VpmuFeatureLevel::Disabled,
};
let vcpu_manager = Arc::new(Mutex::new(VcpuManager {
@@ -297,7 +307,6 @@ impl VcpuManager {
cores_per_die: vm_config_info.cpu_topology.cores_per_die,
dies_per_socket: vm_config_info.cpu_topology.dies_per_socket,
sockets: vm_config_info.cpu_topology.sockets,
#[cfg(target_arch = "x86_64")]
vpmu_feature: vpmu_feature_level,
},
vcpu_seccomp_filter,
@@ -799,6 +808,11 @@ impl VcpuManager {
)
.map_err(VcpuManagerError::Vcpu)
}
/// get vpmu_feature config
pub fn vpmu_feature(&self) -> VpmuFeatureLevel {
self.vcpu_config.vpmu_feature
}
}
#[cfg(feature = "hotplug")]
@@ -887,7 +901,9 @@ mod hotplug {
cpu_ids_array[..cpu_ids.len()].copy_from_slice(&cpu_ids[..cpu_ids.len()]);
let req = DevMgrRequest::AddVcpu(CpuDevRequest {
count: cpu_ids.len() as u8,
#[cfg(target_arch = "x86_64")]
apic_ids: cpu_ids_array,
#[cfg(target_arch = "x86_64")]
apic_ver: APIC_VERSION,
});
self.send_upcall_action(upcall_client, req)?;
@@ -924,7 +940,9 @@ mod hotplug {
cpu_ids_array[..cpu_ids.len()].copy_from_slice(&cpu_ids[..cpu_ids.len()]);
let req = DevMgrRequest::DelVcpu(CpuDevRequest {
count: cpu_num_to_be_del as u8,
#[cfg(target_arch = "x86_64")]
apic_ids: cpu_ids_array,
#[cfg(target_arch = "x86_64")]
apic_ver: APIC_VERSION,
});
self.send_upcall_action(upcall_client, req)?;
@@ -969,7 +987,10 @@ mod hotplug {
vcpu_state_sender
.send(VcpuStateEvent::Hotplug((
result,
#[cfg(target_arch = "x86_64")]
resp.info.apic_id_index,
#[cfg(target_arch = "aarch64")]
resp.info.cpu_id,
)))
.unwrap();
vcpu_state_event.write(1).unwrap();

View File

@@ -11,7 +11,8 @@ use std::fmt::Debug;
use std::ops::Deref;
use dbs_arch::gic::GICDevice;
use dbs_arch::{DeviceInfoForFDT, DeviceType};
use dbs_arch::pmu::initialize_pmu;
use dbs_arch::{DeviceInfoForFDT, DeviceType, VpmuFeatureLevel};
use dbs_boot::InitrdConfig;
use dbs_utils::epoll_manager::EpollManager;
use dbs_utils::time::TimestampUs;
@@ -43,6 +44,7 @@ fn configure_system<T: DeviceInfoForFDT + Clone + Debug, M: GuestMemory>(
device_info: Option<&HashMap<(DeviceType, String), T>>,
gic_device: &Box<dyn GICDevice>,
initrd: &Option<super::InitrdConfig>,
vpmu_feature: &VpmuFeatureLevel,
) -> super::Result<()> {
dbs_boot::fdt::create_fdt(
guest_mem,
@@ -51,6 +53,7 @@ fn configure_system<T: DeviceInfoForFDT + Clone + Debug, M: GuestMemory>(
device_info,
gic_device,
initrd,
vpmu_feature,
)
.map_err(Error::BootSystem)?;
Ok(())
@@ -76,6 +79,23 @@ impl Vm {
Ok(())
}
/// Setup pmu devices for guest vm.
pub fn setup_pmu_devices(&mut self) -> std::result::Result<(), StartMicroVmError> {
let vm = self.vm_fd();
let mut vcpu_manager = self.vcpu_manager().map_err(StartMicroVmError::Vcpu)?;
let vpmu_feature = vcpu_manager.vpmu_feature();
if vpmu_feature == VpmuFeatureLevel::Disabled {
return Ok(());
}
for vcpu in vcpu_manager.vcpus_mut() {
initialize_pmu(vm, vcpu.vcpu_fd())
.map_err(|e| StartMicroVmError::ConfigureVm(VmError::SetupPmu(e)))?;
}
Ok(())
}
/// Initialize the virtual machine instance.
///
/// It initialize the virtual machine instance by:
@@ -113,6 +133,7 @@ impl Vm {
.create_boot_vcpus(request_ts, kernel_loader_result.kernel_load)
.map_err(StartMicroVmError::Vcpu)?;
self.setup_interrupt_controller()?;
self.setup_pmu_devices()?;
self.init_devices(epoll_mgr)?;
Ok(())
@@ -129,6 +150,7 @@ impl Vm {
initrd: Option<InitrdConfig>,
) -> std::result::Result<(), StartMicroVmError> {
let vcpu_manager = self.vcpu_manager().map_err(StartMicroVmError::Vcpu)?;
let vpmu_feature = vcpu_manager.vpmu_feature();
let vcpu_mpidr = vcpu_manager
.vcpus()
.into_iter()
@@ -143,6 +165,7 @@ impl Vm {
self.device_manager.get_mmio_device_info(),
self.get_irqchip(),
&initrd,
&vpmu_feature,
)
.map_err(StartMicroVmError::ConfigureSystem)
}

View File

@@ -10,6 +10,8 @@ use std::sync::{Arc, Mutex, RwLock};
use dbs_address_space::AddressSpace;
#[cfg(target_arch = "aarch64")]
use dbs_arch::gic::GICDevice;
#[cfg(target_arch = "aarch64")]
use dbs_arch::pmu::PmuError;
use dbs_boot::InitrdConfig;
use dbs_utils::epoll_manager::EpollManager;
use dbs_utils::time::TimestampUs;
@@ -69,6 +71,11 @@ pub enum VmError {
#[cfg(target_arch = "aarch64")]
#[error("failed to configure GIC")]
SetupGIC(GICError),
/// Cannot setup pmu device
#[cfg(target_arch = "aarch64")]
#[error("failed to setup pmu device")]
SetupPmu(#[source] PmuError),
}
/// Configuration information for user defined NUMA nodes.
@@ -1033,6 +1040,7 @@ pub mod tests {
assert!(vm.remove_devices().is_ok());
}
#[cfg(target_arch = "x86_64")]
#[test]
fn test_run_code() {
skip_if_not_root!();

View File

@@ -71,6 +71,7 @@ pub const MIN_QEMU_MEMORY_SIZE_MB: u32 = 64;
// Default configuration for Cloud Hypervisor (CH)
pub const DEFAULT_CH_BINARY_PATH: &str = "/usr/bin/cloud-hypervisor";
pub const DEFAULT_CH_ROOTFS_TYPE: &str = "ext4";
pub const DEFAULT_CH_CONTROL_PATH: &str = "";
pub const DEFAULT_CH_ENTROPY_SOURCE: &str = "/dev/urandom";
pub const DEFAULT_CH_GUEST_KERNEL_IMAGE: &str = "vmlinuz";

View File

@@ -103,8 +103,9 @@ pub struct Runtime {
pub enable_pprof: bool,
/// If enabled, static resource management will calculate the vcpu and memory for the sandbox/container
/// And pod configured this will not be able to further update its CPU/Memory resource
#[serde(default)]
pub static_resource_mgmt: bool,
pub static_sandbox_resource_mgmt: bool,
/// Determines whether container seccomp profiles are passed to the virtual machine and
/// applied by the kata agent. If set to true, seccomp is not applied within the guest.

View File

@@ -193,6 +193,12 @@ pub struct Hooks {
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub prestart: Vec<Hook>,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub create_runtime: Vec<Hook>,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub create_container: Vec<Hook>,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub start_container: Vec<Hook>,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub poststart: Vec<Hook>,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub poststop: Vec<Hook>,
@@ -1401,6 +1407,7 @@ mod tests {
env: vec![],
timeout: None,
}],
..Default::default()
}),
annotations: [
("com.example.key1".to_string(), "value1".to_string()),

View File

@@ -166,6 +166,15 @@ message Hooks {
// Poststop is a list of hooks to be run after the container process exits.
repeated Hook Poststop = 3 [(gogoproto.nullable) = false];
// Createruntime is a list of hooks to be run during the creation of runtime(sandbox).
repeated Hook CreateRuntime = 4 [(gogoproto.nullable) = false];
// CreateContainer is a list of hooks to be run after VM is started, and before container is created.
repeated Hook CreateContainer = 5 [(gogoproto.nullable) = false];
// StartContainer is a list of hooks to be run after container is created, but before it is started.
repeated Hook StartContainer = 6 [(gogoproto.nullable) = false];
}
message Hook {

View File

@@ -294,6 +294,9 @@ impl From<oci::Hooks> for crate::oci::Hooks {
fn from(from: Hooks) -> Self {
crate::oci::Hooks {
Prestart: from_vec(from.prestart),
CreateRuntime: from_vec(from.create_runtime),
CreateContainer: from_vec(from.create_container),
StartContainer: from_vec(from.start_container),
Poststart: from_vec(from.poststart),
Poststop: from_vec(from.poststop),
unknown_fields: Default::default(),
@@ -970,20 +973,34 @@ impl From<crate::oci::Hook> for oci::Hook {
impl From<crate::oci::Hooks> for oci::Hooks {
fn from(mut from: crate::oci::Hooks) -> Self {
let mut prestart = Vec::new();
for hook in from.take_Prestart().to_vec() {
prestart.push(hook.into())
}
let mut poststart = Vec::new();
for hook in from.take_Poststart().to_vec() {
poststart.push(hook.into());
}
let mut poststop = Vec::new();
for hook in from.take_Poststop().to_vec() {
poststop.push(hook.into());
}
let prestart = from.take_Prestart().into_iter().map(|i| i.into()).collect();
let create_runtime = from
.take_CreateRuntime()
.into_iter()
.map(|i| i.into())
.collect();
let create_container = from
.take_CreateContainer()
.into_iter()
.map(|i| i.into())
.collect();
let start_container = from
.take_StartContainer()
.into_iter()
.map(|i| i.into())
.collect();
let poststart = from
.take_Poststart()
.into_iter()
.map(|i| i.into())
.collect();
let poststop = from.take_Poststop().into_iter().map(|i| i.into()).collect();
oci::Hooks {
prestart,
create_runtime,
create_container,
start_container,
poststart,
poststop,
}

1243
src/runtime-rs/Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -293,5 +293,12 @@ experimental=@DEFAULTEXPFEATURES@
# (default: false)
# enable_pprof = true
static_resource_mgmt=@DEFSTATICRESOURCEMGMT_DB@
# If enabled, the runtime will attempt to determine appropriate sandbox size (memory, CPU) before booting the virtual machine. In
# this case, the runtime will not dynamically update the amount of memory and CPU in the virtual machine. This is generally helpful
# when a hardware architecture or hypervisor solutions is utilized which does not support CPU and/or memory hotplug.
# Compatibility for determining appropriate sandbox (VM) size:
# - When running with pods, sandbox sizing information will only be available if using Kubernetes >= 1.23 and containerd >= 1.6. CRI-O
# does not yet support sandbox sizing annotations.
# - When running single containers using a tool like ctr, container sizing information will be available.
static_sandbox_resource_mgmt=@DEFSTATICRESOURCEMGMT_DB@

View File

@@ -15,9 +15,6 @@ use tokio::{
use super::{ConnectConfig, Sock, Stream};
unsafe impl Send for HybridVsock {}
unsafe impl Sync for HybridVsock {}
#[derive(Debug, PartialEq)]
pub struct HybridVsock {
uds: String,

View File

@@ -16,9 +16,6 @@ use tokio::net::UnixStream;
use super::{ConnectConfig, Sock, Stream};
unsafe impl Send for Vsock {}
unsafe impl Sync for Vsock {}
#[derive(Debug, PartialEq)]
pub struct Vsock {
vsock_cid: u32,

View File

@@ -124,7 +124,6 @@ pub struct CreateContainerRequest {
pub devices: Vec<Device>,
pub storages: Vec<Storage>,
pub oci: Option<oci::Spec>,
pub guest_hooks: Option<oci::Hooks>,
pub sandbox_pidns: bool,
pub rootfs_mounts: Vec<oci::Mount>,
}

View File

@@ -36,6 +36,7 @@ ch-config = { path = "ch-config", optional = true }
futures = "0.3.25"
safe-path = "0.1.0"
crossbeam-channel = "0.5.6"
[features]
default = []

View File

@@ -20,3 +20,6 @@ tokio = { version = "1.25.0", features = ["sync", "rt"] }
# being used. This version is used to pin the CH config structure
# which is relatively static.
api_client = { git = "https://github.com/cloud-hypervisor/cloud-hypervisor", crate = "api_client", tag = "v27.0" }
kata-types = { path = "../../../../libs/kata-types"}
nix = "0.26.2"

View File

@@ -2,18 +2,11 @@
//
// SPDX-License-Identifier: Apache-2.0
use crate::net_util::MAC_ADDR_LEN;
use crate::{
ConsoleConfig, ConsoleOutputMode, CpuTopology, CpusConfig, DeviceConfig, FsConfig, MacAddr,
MemoryConfig, NetConfig, PayloadConfig, PmemConfig, RngConfig, VmConfig, VsockConfig,
};
use anyhow::{anyhow, Context, Result};
use crate::{DeviceConfig, FsConfig, VmConfig};
use anyhow::{anyhow, Result};
use api_client::simple_api_full_command_and_response;
use std::fmt::Display;
use std::net::Ipv4Addr;
use std::os::unix::net::UnixStream;
use std::path::PathBuf;
use tokio::task;
pub async fn cloud_hypervisor_vmm_ping(mut socket: UnixStream) -> Result<Option<String>> {
@@ -38,20 +31,9 @@ pub async fn cloud_hypervisor_vmm_shutdown(mut socket: UnixStream) -> Result<Opt
}
pub async fn cloud_hypervisor_vm_create(
sandbox_path: String,
vsock_socket_path: String,
mut socket: UnixStream,
shared_fs_devices: Option<Vec<FsConfig>>,
pmem_devices: Option<Vec<PmemConfig>>,
cfg: VmConfig,
) -> Result<Option<String>> {
let cfg = cloud_hypervisor_vm_create_cfg(
sandbox_path,
vsock_socket_path,
shared_fs_devices,
pmem_devices,
)
.await?;
let serialised = serde_json::to_string_pretty(&cfg)?;
task::spawn_blocking(move || -> Result<Option<String>> {
@@ -124,151 +106,3 @@ pub async fn cloud_hypervisor_vm_fs_add(
result
}
pub async fn cloud_hypervisor_vm_create_cfg(
// FIXME:
_sandbox_path: String,
vsock_socket_path: String,
shared_fs_devices: Option<Vec<FsConfig>>,
pmem_devices: Option<Vec<PmemConfig>>,
) -> Result<VmConfig> {
let topology = CpuTopology {
threads_per_core: 1,
cores_per_die: 12,
dies_per_package: 1,
packages: 1,
};
let cpus = CpusConfig {
boot_vcpus: 1,
max_vcpus: 12,
max_phys_bits: 46,
topology: Some(topology),
..Default::default()
};
let rng = RngConfig {
src: PathBuf::from("/dev/urandom"),
..Default::default()
};
let kernel_args = vec![
"root=/dev/pmem0p1",
"rootflags=dax,data=ordered,errors=remount-ro",
"ro",
"rootfstype=ext4",
"panic=1",
"no_timer_check",
"noreplace-smp",
"console=ttyS0,115200n8",
"systemd.log_target=console",
"systemd.unit=kata-containers",
"systemd.mask=systemd-networkd.service",
"systemd.mask=systemd-networkd.socket",
"agent.log=debug",
];
let cmdline = kernel_args.join(" ");
let kernel = PathBuf::from("/opt/kata/share/kata-containers/vmlinux.container");
// Note that PmemConfig replaces the PayloadConfig.initrd.
let payload = PayloadConfig {
kernel: Some(kernel),
cmdline: Some(cmdline),
..Default::default()
};
let serial = ConsoleConfig {
mode: ConsoleOutputMode::Tty,
..Default::default()
};
let ip = Ipv4Addr::new(192, 168, 10, 10);
let mask = Ipv4Addr::new(255, 255, 255, 0);
let mac_str = "12:34:56:78:90:01";
let mac = parse_mac(mac_str)?;
let network = NetConfig {
ip,
mask,
mac,
..Default::default()
};
let memory = MemoryConfig {
size: (1024 * 1024 * 2048),
// Required
shared: true,
prefault: false,
hugepages: false,
mergeable: false,
// FIXME:
hotplug_size: Some(16475226112),
..Default::default()
};
let fs = shared_fs_devices;
let pmem = pmem_devices;
let vsock = VsockConfig {
cid: 3,
socket: PathBuf::from(vsock_socket_path),
..Default::default()
};
let cfg = VmConfig {
cpus,
memory,
fs,
serial,
pmem,
payload: Some(payload),
vsock: Some(vsock),
rng,
net: Some(vec![network]),
..Default::default()
};
Ok(cfg)
}
fn parse_mac<S>(s: &S) -> Result<MacAddr>
where
S: AsRef<str> + ?Sized + Display,
{
let v: Vec<&str> = s.as_ref().split(':').collect();
let mut bytes = [0u8; MAC_ADDR_LEN];
if v.len() != MAC_ADDR_LEN {
return Err(anyhow!(
"invalid MAC {} (length {}, expected {})",
s,
v.len(),
MAC_ADDR_LEN
));
}
for i in 0..MAC_ADDR_LEN {
if v[i].len() != 2 {
return Err(anyhow!(
"invalid MAC {} (segment {} length {}, expected {})",
s,
i,
v.len(),
2
));
}
bytes[i] =
u8::from_str_radix(v[i], 16).context(format!("failed to parse MAC address: {}", s))?;
}
Ok(MacAddr { bytes })
}

View File

@@ -0,0 +1,324 @@
// Copyright (c) 2023 Intel Corporation
//
// SPDX-License-Identifier: Apache-2.0
use crate::net_util::MAC_ADDR_LEN;
use crate::NamedHypervisorConfig;
use crate::VmConfig;
use crate::{
ConsoleConfig, ConsoleOutputMode, CpuFeatures, CpuTopology, CpusConfig, MacAddr, MemoryConfig,
PayloadConfig, PmemConfig, RngConfig, VsockConfig,
};
use anyhow::{anyhow, Context, Result};
use kata_types::config::default::DEFAULT_CH_ENTROPY_SOURCE;
use kata_types::config::hypervisor::{CpuInfo, MachineInfo, MemoryInfo};
use kata_types::config::BootInfo;
use std::convert::TryFrom;
use std::fmt::Display;
use std::path::PathBuf;
// 1 MiB
const MIB: u64 = 1024 * 1024;
const PMEM_ALIGN_BYTES: u64 = 2 * MIB;
const DEFAULT_CH_MAX_PHYS_BITS: u8 = 46;
impl TryFrom<NamedHypervisorConfig> for VmConfig {
type Error = anyhow::Error;
fn try_from(n: NamedHypervisorConfig) -> Result<Self, Self::Error> {
let kernel_params = n.kernel_params;
let cfg = n.cfg;
let vsock_socket_path = n.vsock_socket_path;
let sandbox_path = n.sandbox_path;
let fs = n.shared_fs_devices;
let cpus = CpusConfig::try_from(cfg.cpu_info)?;
let rng = RngConfig::try_from(cfg.machine_info)?;
// Note how CH handles the different image types:
//
// - An image is specified in PmemConfig.
// - An initrd/initramfs is specified in PayloadConfig.
let boot_info = cfg.boot_info;
let use_initrd = !boot_info.initrd.is_empty();
let use_image = !boot_info.image.is_empty();
if use_initrd && use_image {
return Err(anyhow!("cannot specify image and initrd"));
}
if !use_initrd && !use_image {
return Err(anyhow!("missing boot file (no image or initrd)"));
}
let initrd = if use_initrd {
Some(PathBuf::from(boot_info.initrd.clone()))
} else {
None
};
let pmem = if use_initrd {
None
} else {
let pmem = PmemConfig::try_from(&boot_info)?;
Some(vec![pmem])
};
let payload = PayloadConfig::try_from((boot_info, kernel_params, initrd))?;
let serial = get_serial_cfg()?;
let console = get_console_cfg()?;
let memory = MemoryConfig::try_from(cfg.memory_info)?;
std::fs::create_dir_all(sandbox_path).context("failed to create sandbox path")?;
let vsock = VsockConfig {
cid: 3,
socket: PathBuf::from(vsock_socket_path),
..Default::default()
};
let cfg = VmConfig {
cpus,
memory,
serial,
console,
payload: Some(payload),
fs,
pmem,
vsock: Some(vsock),
rng,
..Default::default()
};
Ok(cfg)
}
}
impl TryFrom<MemoryInfo> for MemoryConfig {
type Error = anyhow::Error;
fn try_from(mem: MemoryInfo) -> Result<Self, Self::Error> {
let sysinfo = nix::sys::sysinfo::sysinfo()?;
let max_mem_bytes = sysinfo.ram_total();
let mem_bytes: u64 = MIB
.checked_mul(mem.default_memory as u64)
.ok_or("cannot convert default memory to bytes")
.map_err(|e| anyhow!(e))?;
// The amount of memory that can be hot-plugged is the total less the
// amount allocated at VM start.
let hotplug_size_bytes = max_mem_bytes
.checked_sub(mem_bytes)
.ok_or("failed to calculate max hotplug size for CH")
.map_err(|e| anyhow!(e))?;
let aligned_hotplug_size_bytes =
checked_next_multiple_of(hotplug_size_bytes, PMEM_ALIGN_BYTES)
.ok_or("cannot handle pmem alignment for CH")
.map_err(|e| anyhow!(e))?;
let cfg = MemoryConfig {
size: mem_bytes,
// Required
shared: true,
hotplug_size: Some(aligned_hotplug_size_bytes),
..Default::default()
};
Ok(cfg)
}
}
// Return the next multiple of 'multiple' starting from the specified value
// (aka align value to multiple).
//
// This is a temporary solution until checked_next_multiple_of() integer
// method is available in the rust language.
//
// See: https://github.com/rust-lang/rust/issues/88581
fn checked_next_multiple_of(value: u64, multiple: u64) -> Option<u64> {
match value.checked_rem(multiple) {
None => Some(value),
Some(r) => value.checked_add(multiple - r),
}
}
impl TryFrom<CpuInfo> for CpusConfig {
type Error = anyhow::Error;
fn try_from(cpu: CpuInfo) -> Result<Self, Self::Error> {
let boot_vcpus = u8::try_from(cpu.default_vcpus)?;
let max_vcpus = u8::try_from(cpu.default_maxvcpus)?;
let topology = CpuTopology {
threads_per_core: 1,
cores_per_die: max_vcpus,
dies_per_package: 1,
packages: 1,
};
let max_phys_bits = DEFAULT_CH_MAX_PHYS_BITS;
let cfg = CpusConfig {
boot_vcpus,
max_vcpus,
max_phys_bits,
topology: Some(topology),
..Default::default()
};
Ok(cfg)
}
}
impl TryFrom<String> for CpuFeatures {
type Error = anyhow::Error;
#[cfg(target_arch = "x86_64")]
fn try_from(s: String) -> Result<Self, Self::Error> {
let amx = s.split(',').any(|x| x == "amx");
let cpu_features = CpuFeatures { amx };
Ok(cpu_features)
}
#[cfg(not(target_arch = "x86_64"))]
fn try_from(_s: String) -> Result<Self, Self::Error> {
Ok(CpuFeatures::default())
}
}
// The 2nd tuple element is the space separated kernel parameters list.
// The 3rd tuple element is an optional initramfs image to use.
// This cannot be created only from BootInfo since that contains the
// user-specified kernel parameters only.
impl TryFrom<(BootInfo, String, Option<PathBuf>)> for PayloadConfig {
type Error = anyhow::Error;
fn try_from(args: (BootInfo, String, Option<PathBuf>)) -> Result<Self, Self::Error> {
let b = args.0;
let cmdline = args.1;
let initramfs = args.2;
let kernel = PathBuf::from(b.kernel);
let payload = PayloadConfig {
kernel: Some(kernel),
cmdline: Some(cmdline),
initramfs,
..Default::default()
};
Ok(payload)
}
}
impl TryFrom<MachineInfo> for RngConfig {
type Error = anyhow::Error;
fn try_from(m: MachineInfo) -> Result<Self, Self::Error> {
let entropy_source = if !m.entropy_source.is_empty() {
m.entropy_source
} else {
DEFAULT_CH_ENTROPY_SOURCE.to_string()
};
let rng = RngConfig {
src: PathBuf::from(entropy_source),
..Default::default()
};
Ok(rng)
}
}
impl TryFrom<&BootInfo> for PmemConfig {
type Error = anyhow::Error;
fn try_from(b: &BootInfo) -> Result<Self, Self::Error> {
let file = if b.image.is_empty() {
return Err(anyhow!("CH PmemConfig only used for images"));
} else {
b.image.clone()
};
let cfg = PmemConfig {
file: PathBuf::from(file),
discard_writes: true,
..Default::default()
};
Ok(cfg)
}
}
fn get_serial_cfg() -> Result<ConsoleConfig> {
let cfg = ConsoleConfig {
file: None,
mode: ConsoleOutputMode::Tty,
iommu: false,
};
Ok(cfg)
}
fn get_console_cfg() -> Result<ConsoleConfig> {
let cfg = ConsoleConfig {
file: None,
mode: ConsoleOutputMode::Off,
iommu: false,
};
Ok(cfg)
}
#[allow(dead_code)]
fn parse_mac<S>(s: &S) -> Result<MacAddr>
where
S: AsRef<str> + ?Sized + Display,
{
let v: Vec<&str> = s.as_ref().split(':').collect();
let mut bytes = [0u8; MAC_ADDR_LEN];
if v.len() != MAC_ADDR_LEN {
return Err(anyhow!(
"invalid MAC {} (length {}, expected {})",
s,
v.len(),
MAC_ADDR_LEN
));
}
for i in 0..MAC_ADDR_LEN {
if v[i].len() != 2 {
return Err(anyhow!(
"invalid MAC {} (segment {} length {}, expected {})",
s,
i,
v.len(),
2
));
}
bytes[i] =
u8::from_str_radix(v[i], 16).context(format!("failed to parse MAC address: {}", s))?;
}
Ok(MacAddr { bytes })
}

View File

@@ -7,10 +7,12 @@ use std::net::Ipv4Addr;
use std::path::PathBuf;
pub mod ch_api;
pub mod convert;
pub mod net_util;
mod virtio_devices;
use crate::virtio_devices::RateLimiterConfig;
use kata_types::config::hypervisor::Hypervisor as HypervisorConfig;
pub use net_util::MacAddr;
pub const MAX_NUM_PCI_SEGMENTS: u16 = 16;
@@ -479,3 +481,15 @@ fn usize_is_zero(v: &usize) -> bool {
fn u16_is_zero(v: &u16) -> bool {
*v == 0
}
// Type used to simplify conversion from a generic Hypervisor config
// to a CH specific VmConfig.
#[derive(Debug, Clone)]
pub struct NamedHypervisorConfig {
pub kernel_params: String,
pub sandbox_path: String,
pub vsock_socket_path: String,
pub cfg: HypervisorConfig,
pub shared_fs_devices: Option<Vec<FsConfig>>,
}

View File

@@ -53,9 +53,6 @@ pub struct CloudHypervisorInner {
pub(crate) tasks: Option<Vec<JoinHandle<Result<()>>>>,
}
unsafe impl Send for CloudHypervisorInner {}
unsafe impl Sync for CloudHypervisorInner {}
const CH_DEFAULT_TIMEOUT_SECS: u32 = 10;
impl CloudHypervisorInner {

View File

@@ -10,7 +10,7 @@ use crate::HybridVsockConfig;
use crate::VmmState;
use anyhow::{anyhow, Context, Result};
use ch_config::ch_api::cloud_hypervisor_vm_fs_add;
use ch_config::{FsConfig, PmemConfig};
use ch_config::FsConfig;
use safe_path::scoped_join;
use std::convert::TryFrom;
use std::path::PathBuf;
@@ -148,41 +148,6 @@ impl CloudHypervisorInner {
Ok(None)
}
}
pub(crate) async fn get_boot_file(&mut self) -> Result<PathBuf> {
if let Some(ref config) = self.config {
let boot_info = &config.boot_info;
let file = if !boot_info.initrd.is_empty() {
boot_info.initrd.clone()
} else if !boot_info.image.is_empty() {
boot_info.image.clone()
} else {
return Err(anyhow!("missing boot file (no image or initrd)"));
};
Ok(PathBuf::from(file))
} else {
Err(anyhow!("no hypervisor config"))
}
}
pub(crate) async fn get_pmem_devices(&mut self) -> Result<Option<Vec<PmemConfig>>> {
let file = self.get_boot_file().await?;
let pmem_cfg = PmemConfig {
file,
size: None,
iommu: false,
discard_writes: true,
id: None,
pci_segment: 0,
};
let pmem_devices = vec![pmem_cfg];
Ok(Some(pmem_devices))
}
}
#[derive(Debug)]

View File

@@ -6,18 +6,23 @@
use super::inner::CloudHypervisorInner;
use crate::ch::utils::get_api_socket_path;
use crate::ch::utils::{get_jailer_root, get_sandbox_path, get_vsock_path};
use crate::kernel_param::KernelParams;
use crate::Device;
use crate::VsockConfig;
use crate::VM_ROOTFS_DRIVER_PMEM;
use crate::{VcpuThreadIds, VmmState};
use anyhow::{anyhow, Context, Result};
use ch_config::ch_api::{
cloud_hypervisor_vm_create, cloud_hypervisor_vm_start, cloud_hypervisor_vmm_ping,
cloud_hypervisor_vmm_shutdown,
};
use ch_config::{NamedHypervisorConfig, VmConfig};
use core::future::poll_fn;
use futures::executor::block_on;
use futures::future::join_all;
use kata_types::capabilities::{Capabilities, CapabilityBits};
use kata_types::config::default::DEFAULT_CH_ROOTFS_TYPE;
use std::convert::TryFrom;
use std::fs::create_dir_all;
use std::os::unix::net::UnixStream;
use std::path::Path;
@@ -54,11 +59,43 @@ impl CloudHypervisorInner {
Ok(())
}
async fn get_kernel_params(&self) -> Result<String> {
let cfg = self
.config
.as_ref()
.ok_or("no hypervisor config for CH")
.map_err(|e| anyhow!(e))?;
let enable_debug = cfg.debug_info.enable_debug;
// Note that the configuration option hypervisor.block_device_driver is not used.
let rootfs_driver = VM_ROOTFS_DRIVER_PMEM;
let rootfs_type = match cfg.boot_info.rootfs_type.is_empty() {
true => DEFAULT_CH_ROOTFS_TYPE,
false => &cfg.boot_info.rootfs_type,
};
// Start by adding the default set of kernel parameters.
let mut params = KernelParams::new(enable_debug);
let mut rootfs_param = KernelParams::new_rootfs_kernel_params(rootfs_driver, rootfs_type)?;
// Add the rootfs device
params.append(&mut rootfs_param);
// Finally, add the user-specified options at the end
// (so they will take priority).
params.append(&mut KernelParams::from_string(&cfg.boot_info.kernel_params));
let kernel_params = params.to_string()?;
Ok(kernel_params)
}
async fn boot_vm(&mut self) -> Result<()> {
let shared_fs_devices = self.get_shared_fs_devices().await?;
let pmem_devices = self.get_pmem_devices().await?;
let socket = self
.api_socket
.as_ref()
@@ -71,14 +108,34 @@ impl CloudHypervisorInner {
let vsock_socket_path = get_vsock_path(&self.id)?;
let response = cloud_hypervisor_vm_create(
let hypervisor_config = self
.config
.as_ref()
.ok_or("no hypervisor config for CH")
.map_err(|e| anyhow!(e))?;
debug!(
sl!(),
"generic Hypervisor configuration: {:?}", hypervisor_config
);
let kernel_params = self.get_kernel_params().await?;
let named_cfg = NamedHypervisorConfig {
kernel_params,
sandbox_path,
vsock_socket_path,
socket.try_clone().context("failed to clone socket")?,
cfg: hypervisor_config.clone(),
shared_fs_devices,
pmem_devices,
)
.await?;
};
let cfg = VmConfig::try_from(named_cfg)?;
debug!(sl!(), "CH specific VmConfig configuration: {:?}", cfg);
let response =
cloud_hypervisor_vm_create(socket.try_clone().context("failed to clone socket")?, cfg)
.await?;
if let Some(detail) = response {
debug!(sl!(), "vm boot response: {:?}", detail);
@@ -415,6 +472,10 @@ impl CloudHypervisorInner {
Ok(Vec::<u32>::new())
}
pub(crate) async fn get_vmm_master_tid(&self) -> Result<u32> {
todo!()
}
pub(crate) async fn check(&self) -> Result<()> {
Ok(())
}

View File

@@ -33,9 +33,6 @@ pub struct CloudHypervisor {
inner: Arc<RwLock<CloudHypervisorInner>>,
}
unsafe impl Send for CloudHypervisor {}
unsafe impl Sync for CloudHypervisor {}
impl CloudHypervisor {
pub fn new() -> Self {
Self {
@@ -121,6 +118,11 @@ impl Hypervisor for CloudHypervisor {
inner.get_pids().await
}
async fn get_vmm_master_tid(&self) -> Result<u32> {
let inner = self.inner.read().await;
inner.get_vmm_master_tid().await
}
async fn check(&self) -> Result<()> {
let inner = self.inner.read().await;
inner.check().await

View File

@@ -27,7 +27,6 @@ use std::{collections::HashSet, fs::create_dir_all, path::PathBuf};
const DRAGONBALL_KERNEL: &str = "vmlinux";
const DRAGONBALL_ROOT_FS: &str = "rootfs";
unsafe impl Sync for DragonballInner {}
pub struct DragonballInner {
/// sandbox id
pub(crate) id: String,

View File

@@ -127,6 +127,11 @@ impl DragonballInner {
Ok(Vec::from_iter(pids.into_iter()))
}
pub(crate) async fn get_vmm_master_tid(&self) -> Result<u32> {
let master_tid = self.vmm_instance.get_vmm_master_tid();
Ok(master_tid)
}
pub(crate) async fn check(&self) -> Result<()> {
Ok(())
}

View File

@@ -117,6 +117,11 @@ impl Hypervisor for Dragonball {
inner.get_pids().await
}
async fn get_vmm_master_tid(&self) -> Result<u32> {
let inner = self.inner.read().await;
inner.get_vmm_master_tid().await
}
async fn check(&self) -> Result<()> {
let inner = self.inner.read().await;
inner.check().await

View File

@@ -7,14 +7,12 @@
use std::{
fs::{File, OpenOptions},
os::unix::{io::IntoRawFd, prelude::AsRawFd},
sync::{
mpsc::{channel, Receiver, Sender},
Arc, Mutex, RwLock,
},
sync::{Arc, Mutex, RwLock},
thread,
};
use anyhow::{anyhow, Context, Result};
use crossbeam_channel::{unbounded, Receiver, Sender};
use dragonball::{
api::v1::{
BlockDeviceConfigInfo, BootSourceConfig, FsDeviceConfigInfo, FsMountConfigInfo,
@@ -77,6 +75,12 @@ impl VmmInstance {
share_info_lock.write().unwrap().id = String::from(id);
}
pub fn get_vmm_master_tid(&self) -> u32 {
let info = self.vmm_shared_info.clone();
let result = info.read().unwrap().master_tid;
result
}
pub fn get_vcpu_tids(&self) -> Vec<(u8, u32)> {
let info = self.vmm_shared_info.clone();
let result = info.read().unwrap().tids.clone();
@@ -86,8 +90,8 @@ impl VmmInstance {
pub fn run_vmm_server(&mut self, id: &str, netns: Option<String>) -> Result<()> {
let kvm = OpenOptions::new().read(true).write(true).open(KVM_DEVICE)?;
let (to_vmm, from_runtime) = channel();
let (to_runtime, from_vmm) = channel();
let (to_vmm, from_runtime) = unbounded();
let (to_runtime, from_vmm) = unbounded();
self.set_instance_id(id);
@@ -105,6 +109,7 @@ impl VmmInstance {
Some(kvm.into_raw_fd()),
)
.expect("Failed to start vmm");
let vmm_shared_info = self.get_shared_info();
self.vmm_thread = Some(
thread::Builder::new()
@@ -112,6 +117,9 @@ impl VmmInstance {
.spawn(move || {
|| -> Result<i32> {
debug!(sl!(), "run vmm thread start");
let cur_tid = nix::unistd::gettid().as_raw() as u32;
vmm_shared_info.write().unwrap().master_tid = cur_tid;
if let Some(netns_path) = netns {
info!(sl!(), "set netns for vmm master {}", &netns_path);
let netns_fd = File::open(&netns_path)

View File

@@ -87,6 +87,7 @@ pub trait Hypervisor: Send + Sync {
async fn hypervisor_config(&self) -> HypervisorConfig;
async fn get_thread_ids(&self) -> Result<VcpuThreadIds>;
async fn get_pids(&self) -> Result<Vec<u32>>;
async fn get_vmm_master_tid(&self) -> Result<u32>;
async fn cleanup(&self) -> Result<()>;
async fn check(&self) -> Result<()>;
async fn get_jailer_root(&self) -> Result<String>;

View File

@@ -12,9 +12,6 @@ const VSOCK_SCHEME: &str = "vsock";
const VSOCK_AGENT_CID: u32 = 3;
const VSOCK_AGENT_PORT: u32 = 1024;
unsafe impl Send for QemuInner {}
unsafe impl Sync for QemuInner {}
pub struct QemuInner {
config: HypervisorConfig,
}
@@ -92,6 +89,11 @@ impl QemuInner {
todo!()
}
pub(crate) async fn get_vmm_master_tid(&self) -> Result<u32> {
info!(sl!(), "QemuInner::get_vmm_master_tid()");
todo!()
}
pub(crate) async fn cleanup(&self) -> Result<()> {
info!(sl!(), "QemuInner::cleanup()");
todo!()

View File

@@ -103,6 +103,11 @@ impl Hypervisor for Qemu {
inner.get_thread_ids().await
}
async fn get_vmm_master_tid(&self) -> Result<u32> {
let inner = self.inner.read().await;
inner.get_vmm_master_tid().await
}
async fn cleanup(&self) -> Result<()> {
let inner = self.inner.read().await;
inner.cleanup().await

View File

@@ -114,7 +114,7 @@ impl CgroupsResource {
pub async fn delete(&self) -> Result<()> {
for cg_pid in self.cgroup_manager.tasks() {
// For now, we can't guarantee that the thread in cgroup_manager does still
// exist. Once it exit, we should ignor that error returned by remove_task
// exist. Once it exit, we should ignore that error returned by remove_task
// to let it go.
if let Err(error) = self.cgroup_manager.remove_task(cg_pid) {
match error.source() {

View File

@@ -101,9 +101,9 @@ impl ResourceManager {
inner.update_cgroups(cid, linux_resources).await
}
pub async fn delete_cgroups(&self) -> Result<()> {
pub async fn cleanup(&self) -> Result<()> {
let inner = self.inner.read().await;
inner.delete_cgroups().await
inner.cleanup().await
}
}

View File

@@ -8,7 +8,7 @@ use std::{sync::Arc, thread};
use crate::resource_persist::ResourceState;
use agent::{Agent, Storage};
use anyhow::{anyhow, Context, Result};
use anyhow::{anyhow, Context, Ok, Result};
use async_trait::async_trait;
use hypervisor::Hypervisor;
use kata_types::config::TomlConfig;
@@ -233,8 +233,22 @@ impl ResourceManagerInner {
.await
}
pub async fn delete_cgroups(&self) -> Result<()> {
self.cgroups_resource.delete().await
pub async fn cleanup(&self) -> Result<()> {
// clean up cgroup
self.cgroups_resource
.delete()
.await
.context("delete cgroup")?;
// clean up share fs mount
if let Some(share_fs) = &self.share_fs {
share_fs
.get_share_fs_mount()
.cleanup(&self.sid)
.await
.context("failed to cleanup host path")?;
}
// TODO cleanup other resources
Ok(())
}
pub async fn dump(&self) {

View File

@@ -5,6 +5,7 @@
//
mod endpoint;
pub use endpoint::endpoint_persist::EndpointState;
pub use endpoint::Endpoint;
mod network_entity;
mod network_info;
@@ -17,7 +18,7 @@ use network_with_netns::NetworkWithNetns;
mod network_pair;
use network_pair::NetworkPair;
mod utils;
pub use endpoint::endpoint_persist::EndpointState;
pub use utils::netns::NetnsGuard;
use std::sync::Arc;

View File

@@ -10,12 +10,12 @@ use anyhow::{Context, Result};
use nix::sched::{setns, CloneFlags};
use nix::unistd::{getpid, gettid};
pub(crate) struct NetnsGuard {
pub struct NetnsGuard {
old_netns: Option<File>,
}
impl NetnsGuard {
pub(crate) fn new(new_netns_path: &str) -> Result<Self> {
pub fn new(new_netns_path: &str) -> Result<Self> {
let old_netns = if !new_netns_path.is_empty() {
let current_netns_path = format!("/proc/{}/task/{}/ns/{}", getpid(), gettid(), "net");
let old_netns = File::open(&current_netns_path)

View File

@@ -131,6 +131,8 @@ pub trait ShareFsMount: Send + Sync {
async fn umount_volume(&self, file_name: &str) -> Result<()>;
/// Umount the rootfs
async fn umount_rootfs(&self, config: &ShareFsRootfsConfig) -> Result<()>;
/// Clean up share fs mount
async fn cleanup(&self, sid: &str) -> Result<()>;
}
pub fn new(id: &str, config: &SharedFsInfo) -> Result<Arc<dyn ShareFs>> {

View File

@@ -59,6 +59,10 @@ pub fn get_host_rw_shared_path(sid: &str) -> PathBuf {
Path::new(KATA_HOST_SHARED_DIR).join(sid).join("rw")
}
pub fn get_host_shared_path(sid: &str) -> PathBuf {
Path::new(KATA_HOST_SHARED_DIR).join(sid)
}
fn do_get_guest_any_path(
target: &str,
cid: &str,

View File

@@ -7,7 +7,7 @@
use agent::Storage;
use anyhow::{anyhow, Context, Result};
use async_trait::async_trait;
use kata_sys_util::mount::{bind_remount, umount_timeout};
use kata_sys_util::mount::{bind_remount, umount_all, umount_timeout};
use kata_types::k8s::is_watchable_mount;
use kata_types::mount;
use nix::sys::stat::stat;
@@ -20,7 +20,8 @@ const WATCHABLE_BIND_DEV_TYPE: &str = "watchable-bind";
pub const EPHEMERAL_PATH: &str = "/run/kata-containers/sandbox/ephemeral";
use super::{
utils::{self, do_get_host_path},
get_host_rw_shared_path,
utils::{self, do_get_host_path, get_host_ro_shared_path, get_host_shared_path},
ShareFsMount, ShareFsMountResult, ShareFsRootfsConfig, ShareFsVolumeConfig,
KATA_GUEST_SHARE_DIR, PASSTHROUGH_FS_DIR,
};
@@ -224,4 +225,18 @@ impl ShareFsMount for VirtiofsShareMount {
Ok(())
}
async fn cleanup(&self, sid: &str) -> Result<()> {
// Unmount ro path
let host_ro_dest = get_host_ro_shared_path(sid);
umount_all(host_ro_dest.clone(), true).context("failed to umount ro path")?;
fs::remove_dir_all(host_ro_dest).context("failed to remove ro path")?;
// As the rootfs and volume have been umounted before calling this function, so just remove the rw dir directly
let host_rw_dest = get_host_rw_shared_path(sid);
fs::remove_dir_all(host_rw_dest).context("failed to remove rw path")?;
// remove the host share directory
let host_path = get_host_shared_path(sid);
fs::remove_dir_all(host_path).context("failed to remove host shared path")?;
Ok(())
}
}

View File

@@ -13,9 +13,12 @@ slog-scope = "4.4.0"
tokio = { version = "1.8.0", features = ["rt-multi-thread"] }
hyper = { version = "0.14.20", features = ["stream", "server", "http1"] }
hyperlocal = "0.8"
serde_json = "1.0.88"
nix = "0.25.0"
common = { path = "./common" }
kata-types = { path = "../../../libs/kata-types" }
kata-sys-util = { path = "../../../libs/kata-sys-util" }
logging = { path = "../../../libs/logging"}
oci = { path = "../../../libs/oci" }
shim-interface = { path = "../../../libs/shim-interface" }

View File

@@ -26,3 +26,4 @@ agent = { path = "../../agent" }
kata-sys-util = { path = "../../../../libs/kata-sys-util" }
kata-types = { path = "../../../../libs/kata-types" }
oci = { path = "../../../../libs/oci" }

View File

@@ -17,12 +17,9 @@ pub enum Action {
Start,
Stop,
Shutdown,
Event(Arc<dyn Event>),
Event(Arc<dyn Event + Send + Sync>),
}
unsafe impl Send for Message {}
unsafe impl Sync for Message {}
#[derive(Debug)]
pub struct Message {
pub action: Action,

View File

@@ -9,9 +9,15 @@ use async_trait::async_trait;
#[async_trait]
pub trait Sandbox: Send + Sync {
async fn start(&self, netns: Option<String>, dns: Vec<String>) -> Result<()>;
async fn start(
&self,
netns: Option<String>,
dns: Vec<String>,
spec: &oci::Spec,
state: &oci::State,
) -> Result<()>;
async fn stop(&self) -> Result<()>;
async fn cleanup(&self, container_id: &str) -> Result<()>;
async fn cleanup(&self) -> Result<()>;
async fn shutdown(&self) -> Result<()>;
// agent function

View File

@@ -184,6 +184,7 @@ pub enum ProcessStatus {
Stopped = 3,
Paused = 4,
Pausing = 5,
Exited = 6,
}
#[derive(Debug, Clone)]

View File

@@ -56,6 +56,7 @@ impl From<ProcessStatus> for api::Status {
ProcessStatus::Stopped => api::Status::STOPPED,
ProcessStatus::Paused => api::Status::PAUSED,
ProcessStatus::Pausing => api::Status::PAUSING,
ProcessStatus::Exited => api::Status::STOPPED,
}
}
}

View File

@@ -18,6 +18,7 @@ use hypervisor::Param;
use kata_types::{
annotations::Annotation, config::default::DEFAULT_GUEST_DNS_FILE, config::TomlConfig,
};
#[cfg(feature = "linux")]
use linux_container::LinuxContainer;
use persist::sandbox_persist::Persist;
@@ -50,6 +51,8 @@ impl RuntimeHandlerManagerInner {
async fn init_runtime_handler(
&mut self,
spec: &oci::Spec,
state: &oci::State,
netns: Option<String>,
dns: Vec<String>,
config: Arc<TomlConfig>,
@@ -74,14 +77,19 @@ impl RuntimeHandlerManagerInner {
// start sandbox
runtime_instance
.sandbox
.start(netns, dns)
.start(netns, dns, spec, state)
.await
.context("start sandbox")?;
self.runtime_instance = Some(Arc::new(runtime_instance));
Ok(())
}
async fn try_init(&mut self, spec: &oci::Spec, options: &Option<Vec<u8>>) -> Result<()> {
async fn try_init(
&mut self,
spec: &oci::Spec,
state: &oci::State,
options: &Option<Vec<u8>>,
) -> Result<()> {
// return if runtime instance has init
if self.runtime_instance.is_some() {
return Ok(());
@@ -121,7 +129,7 @@ impl RuntimeHandlerManagerInner {
}
let config = load_config(spec, options).context("load config")?;
self.init_runtime_handler(netns, dns, Arc::new(config))
self.init_runtime_handler(spec, state, netns, dns, Arc::new(config))
.await
.context("init runtime handler")?;
@@ -185,7 +193,7 @@ impl RuntimeHandlerManager {
.await
.context("failed to restore the sandbox")?;
sandbox
.cleanup(&inner.id)
.cleanup()
.await
.context("failed to cleanup the resource")?;
}
@@ -207,10 +215,11 @@ impl RuntimeHandlerManager {
async fn try_init_runtime_instance(
&self,
spec: &oci::Spec,
state: &oci::State,
options: &Option<Vec<u8>>,
) -> Result<()> {
let mut inner = self.inner.write().await;
inner.try_init(spec, options).await
inner.try_init(spec, state, options).await
}
pub async fn handler_message(&self, req: Request) -> Result<Response> {
@@ -222,8 +231,16 @@ impl RuntimeHandlerManager {
oci::OCI_SPEC_CONFIG_FILE_NAME
);
let spec = oci::Spec::load(&bundler_path).context("load spec")?;
let state = oci::State {
version: spec.version.clone(),
id: container_config.container_id.to_string(),
status: oci::ContainerState::Creating,
pid: 0,
bundle: bundler_path,
annotations: spec.annotations.clone(),
};
self.try_init_runtime_instance(&spec, &container_config.options)
self.try_init_runtime_instance(&spec, &state, &container_config.options)
.await
.context("try init runtime instance")?;
let instance = self
@@ -374,7 +391,7 @@ fn load_config(spec: &oci::Spec, option: &Option<Vec<u8>>) -> Result<TomlConfig>
// 2. If this is not a sandbox infrastructure container, but instead a standalone single container (analogous to "docker run..."),
// then the container spec itself will contain appropriate sizing information for the entire sandbox (since it is
// a single container.
if toml_config.runtime.static_resource_mgmt {
if toml_config.runtime.static_sandbox_resource_mgmt {
info!(sl!(), "static resource management enabled");
let static_resource_manager = StaticResourceManager::new(spec)
.context("failed to construct static resource manager")?;
@@ -382,6 +399,7 @@ fn load_config(spec: &oci::Spec, option: &Option<Vec<u8>>) -> Result<TomlConfig>
.setup_config(&mut toml_config)
.context("failed to setup static resource mgmt config")?;
}
info!(sl!(), "get config content {:?}", &toml_config);
Ok(toml_config)
}

View File

@@ -37,6 +37,7 @@ pub struct Container {
pid: u32,
pub container_id: ContainerID,
config: ContainerConfig,
spec: oci::Spec,
inner: Arc<RwLock<ContainerInner>>,
agent: Arc<dyn Agent>,
resource_manager: Arc<ResourceManager>,
@@ -47,6 +48,7 @@ impl Container {
pub fn new(
pid: u32,
config: ContainerConfig,
spec: oci::Spec,
agent: Arc<dyn Agent>,
resource_manager: Arc<ResourceManager>,
) -> Result<Self> {
@@ -67,6 +69,7 @@ impl Container {
pid,
container_id,
config,
spec,
inner: Arc::new(RwLock::new(ContainerInner::new(
agent.clone(),
init_process,
@@ -382,11 +385,31 @@ impl Container {
.context("agent update container")?;
Ok(())
}
pub async fn config(&self) -> ContainerConfig {
self.config.clone()
}
pub async fn spec(&self) -> oci::Spec {
self.spec.clone()
}
}
fn amend_spec(spec: &mut oci::Spec, disable_guest_seccomp: bool) -> Result<()> {
// hook should be done on host
spec.hooks = None;
// Only the StartContainer hook needs to be reserved for execution in the guest
let start_container_hooks = match spec.hooks.as_ref() {
Some(hooks) => hooks.start_container.clone(),
None => Vec::new(),
};
spec.hooks = if start_container_hooks.is_empty() {
None
} else {
Some(oci::Hooks {
start_container: start_container_hooks,
..Default::default()
})
};
// special process K8s ephemeral volumes.
update_ephemeral_storage_type(spec);

View File

@@ -200,20 +200,22 @@ impl ContainerInner {
return Ok(());
}
self.check_state(vec![ProcessStatus::Running])
self.check_state(vec![ProcessStatus::Running, ProcessStatus::Exited])
.await
.context("check state")?;
// if use force mode to stop container, stop always successful
// send kill signal to container
// ignore the error of sending signal, since the process would
// have been killed and exited yet.
self.signal_process(process, Signal::SIGKILL as u32, false)
.await
.map_err(|e| {
warn!(logger, "failed to signal kill. {:?}", e);
})
.ok();
if state == ProcessStatus::Running {
// if use force mode to stop container, stop always successful
// send kill signal to container
// ignore the error of sending signal, since the process would
// have been killed and exited yet.
self.signal_process(process, Signal::SIGKILL as u32, false)
.await
.map_err(|e| {
warn!(logger, "failed to signal kill. {:?}", e);
})
.ok();
}
match process.process_type {
ProcessType::Container => self

View File

@@ -5,11 +5,10 @@
//
use anyhow::{anyhow, Context, Result};
use async_trait::async_trait;
use std::{collections::HashMap, sync::Arc};
use agent::Agent;
use async_trait::async_trait;
use common::{
error::Error,
types::{
@@ -19,10 +18,14 @@ use common::{
},
ContainerManager,
};
use hypervisor::Hypervisor;
use oci::Process as OCIProcess;
use resource::network::NetnsGuard;
use resource::ResourceManager;
use tokio::sync::RwLock;
use kata_sys_util::hooks::HookStates;
use super::{logger_with_process, Container};
pub struct VirtContainerManager {
@@ -31,6 +34,7 @@ pub struct VirtContainerManager {
containers: Arc<RwLock<HashMap<String, Container>>>,
resource_manager: Arc<ResourceManager>,
agent: Arc<dyn Agent>,
hypervisor: Arc<dyn Hypervisor>,
}
impl VirtContainerManager {
@@ -38,6 +42,7 @@ impl VirtContainerManager {
sid: &str,
pid: u32,
agent: Arc<dyn Agent>,
hypervisor: Arc<dyn Hypervisor>,
resource_manager: Arc<ResourceManager>,
) -> Self {
Self {
@@ -46,6 +51,7 @@ impl VirtContainerManager {
containers: Default::default(),
resource_manager,
agent,
hypervisor,
}
}
}
@@ -55,12 +61,37 @@ impl ContainerManager for VirtContainerManager {
async fn create_container(&self, config: ContainerConfig, spec: oci::Spec) -> Result<PID> {
let container = Container::new(
self.pid,
config,
config.clone(),
spec.clone(),
self.agent.clone(),
self.resource_manager.clone(),
)
.context("new container")?;
// CreateContainer Hooks:
// * should be run in vmm namespace (hook path in runtime namespace)
// * should be run after the vm is started, before container is created, and after CreateRuntime Hooks
// * spec details: https://github.com/opencontainers/runtime-spec/blob/c1662686cff159595277b79322d0272f5182941b/config.md#createcontainer-hooks
let vmm_master_tid = self.hypervisor.get_vmm_master_tid().await?;
let vmm_netns_path = format!("/proc/{}/task/{}/ns/{}", self.pid, vmm_master_tid, "net");
let state = oci::State {
version: spec.version.clone(),
id: config.container_id.clone(),
status: oci::ContainerState::Creating,
pid: vmm_master_tid as i32,
bundle: config.bundle.clone(),
annotations: spec.annotations.clone(),
};
// new scope, CreateContainer hooks in which will execute in a new network namespace
{
let _netns_guard = NetnsGuard::new(&vmm_netns_path).context("vmm netns guard")?;
if let Some(hooks) = spec.hooks.as_ref() {
let mut create_container_hook_states = HookStates::new();
create_container_hook_states.execute_hooks(&hooks.create_container, Some(state))?;
}
}
let mut containers = self.containers.write().await;
container.create(spec).await.context("create")?;
containers.insert(container.container_id.to_string(), container);
@@ -87,6 +118,26 @@ impl ContainerManager for VirtContainerManager {
let c = containers
.remove(container_id)
.ok_or_else(|| Error::ContainerNotFound(container_id.to_string()))?;
// Poststop Hooks:
// * should be run in runtime namespace
// * should be run after the container is deleted but before delete operation returns
// * spec details: https://github.com/opencontainers/runtime-spec/blob/c1662686cff159595277b79322d0272f5182941b/config.md#poststop
let c_spec = c.spec().await;
let vmm_master_tid = self.hypervisor.get_vmm_master_tid().await?;
let state = oci::State {
version: c_spec.version.clone(),
id: c.container_id.to_string(),
status: oci::ContainerState::Stopped,
pid: vmm_master_tid as i32,
bundle: c.config().await.bundle,
annotations: c_spec.annotations.clone(),
};
if let Some(hooks) = c_spec.hooks.as_ref() {
let mut poststop_hook_states = HookStates::new();
poststop_hook_states.execute_hooks(&hooks.poststop, Some(state))?;
}
c.state_process(process).await.context("state process")
}
ProcessType::Exec => {
@@ -190,6 +241,26 @@ impl ContainerManager for VirtContainerManager {
.get(container_id)
.ok_or_else(|| Error::ContainerNotFound(container_id.clone()))?;
c.start(process).await.context("start")?;
// Poststart Hooks:
// * should be run in runtime namespace
// * should be run after user-specific command is executed but before start operation returns
// * spec details: https://github.com/opencontainers/runtime-spec/blob/c1662686cff159595277b79322d0272f5182941b/config.md#poststart
let c_spec = c.spec().await;
let vmm_master_tid = self.hypervisor.get_vmm_master_tid().await?;
let state = oci::State {
version: c_spec.version.clone(),
id: c.container_id.to_string(),
status: oci::ContainerState::Running,
pid: vmm_master_tid as i32,
bundle: c.config().await.bundle,
annotations: c_spec.annotations.clone(),
};
if let Some(hooks) = c_spec.hooks.as_ref() {
let mut poststart_hook_states = HookStates::new();
poststart_hook_states.execute_hooks(&hooks.poststart, Some(state))?;
}
Ok(PID { pid: self.pid })
}

View File

@@ -182,7 +182,7 @@ impl Process {
drop(exit_status);
let mut status = status.write().await;
*status = ProcessStatus::Stopped;
*status = ProcessStatus::Exited;
drop(status);
drop(exit_notifier);

View File

@@ -86,13 +86,18 @@ impl RuntimeHandler for VirtContainer {
sid,
msg_sender,
agent.clone(),
hypervisor,
hypervisor.clone(),
resource_manager.clone(),
)
.await
.context("new virt sandbox")?;
let container_manager =
container_manager::VirtContainerManager::new(sid, pid, agent, resource_manager);
let container_manager = container_manager::VirtContainerManager::new(
sid,
pid,
agent,
hypervisor,
resource_manager,
);
Ok(RuntimeInstance {
sandbox: Arc::new(sandbox),
container_manager: Arc::new(container_manager),

View File

@@ -17,6 +17,7 @@ use common::{
};
use containerd_shim_protos::events::task::TaskOOM;
use hypervisor::{dragonball::Dragonball, Hypervisor, HYPERVISOR_DRAGONBALL};
use kata_sys_util::hooks::HookStates;
use kata_types::config::{
default::{DEFAULT_AGENT_LOG_PORT, DEFAULT_AGENT_VSOCK_PORT},
TomlConfig,
@@ -117,11 +118,50 @@ impl VirtSandbox {
Ok(resource_configs)
}
async fn execute_oci_hook_functions(
&self,
prestart_hooks: &[oci::Hook],
create_runtime_hooks: &[oci::Hook],
state: &oci::State,
) -> Result<()> {
let mut st = state.clone();
// for dragonball, we use vmm_master_tid
let vmm_pid = self
.hypervisor
.get_vmm_master_tid()
.await
.context("get vmm master tid")?;
st.pid = vmm_pid as i32;
// Prestart Hooks [DEPRECATED in newest oci spec]:
// * should be run in runtime namespace
// * should be run after vm is started, but before container is created
// if Prestart Hook and CreateRuntime Hook are both supported
// * spec details: https://github.com/opencontainers/runtime-spec/blob/c1662686cff159595277b79322d0272f5182941b/config.md#prestart
let mut prestart_hook_states = HookStates::new();
prestart_hook_states.execute_hooks(prestart_hooks, Some(st.clone()))?;
// CreateRuntime Hooks:
// * should be run in runtime namespace
// * should be run when creating the runtime
// * spec details: https://github.com/opencontainers/runtime-spec/blob/c1662686cff159595277b79322d0272f5182941b/config.md#createruntime-hooks
let mut create_runtime_hook_states = HookStates::new();
create_runtime_hook_states.execute_hooks(create_runtime_hooks, Some(st.clone()))?;
Ok(())
}
}
#[async_trait]
impl Sandbox for VirtSandbox {
async fn start(&self, netns: Option<String>, dns: Vec<String>) -> Result<()> {
async fn start(
&self,
netns: Option<String>,
dns: Vec<String>,
spec: &oci::Spec,
state: &oci::State,
) -> Result<()> {
let id = &self.sid;
// if sandbox running, return
@@ -149,6 +189,17 @@ impl Sandbox for VirtSandbox {
self.hypervisor.start_vm(10_000).await.context("start vm")?;
info!(sl!(), "start vm");
// execute pre-start hook functions, including Prestart Hooks and CreateRuntime Hooks
let (prestart_hooks, create_runtime_hooks) = match spec.hooks.as_ref() {
Some(hooks) => (hooks.prestart.clone(), hooks.create_runtime.clone()),
None => (Vec::new(), Vec::new()),
};
self.execute_oci_hook_functions(&prestart_hooks, &create_runtime_hooks, state)
.await?;
// TODO: if prestart_hooks is not empty, rescan the network endpoints(rely on hotplug endpoints).
// see: https://github.com/kata-containers/kata-containers/issues/6378
// connect agent
// set agent socket
let address = self
@@ -240,17 +291,7 @@ impl Sandbox for VirtSandbox {
self.stop().await.context("stop")?;
info!(sl!(), "delete cgroup");
self.resource_manager
.delete_cgroups()
.await
.context("delete cgroups")?;
info!(sl!(), "delete hypervisor");
self.hypervisor
.cleanup()
.await
.context("delete hypervisor")?;
self.cleanup().await.context("do the clean up")?;
info!(sl!(), "stop monitor");
self.monitor.stop().await;
@@ -267,9 +308,19 @@ impl Sandbox for VirtSandbox {
Ok(())
}
async fn cleanup(&self, _id: &str) -> Result<()> {
self.resource_manager.delete_cgroups().await?;
self.hypervisor.cleanup().await?;
async fn cleanup(&self) -> Result<()> {
info!(sl!(), "delete hypervisor");
self.hypervisor
.cleanup()
.await
.context("delete hypervisor")?;
info!(sl!(), "resource clean up");
self.resource_manager
.cleanup()
.await
.context("resource clean up")?;
// TODO: cleanup other snadbox resource
Ok(())
}

View File

@@ -225,7 +225,7 @@ DEFSHAREDFS_CLH_VIRTIOFS := virtio-fs
DEFSHAREDFS_QEMU_VIRTIOFS := virtio-fs
DEFVIRTIOFSDAEMON := $(LIBEXECDIR)/virtiofsd
ifeq ($(ARCH),ppc64le)
DEFVIRTIOFSDAEMON := $(LIBEXECDIR)/kata-qemu/virtiofsd
DEFVIRTIOFSDAEMON := $(LIBEXECDIR)/qemu/virtiofsd
endif
DEFVALIDVIRTIOFSDAEMONPATHS := [\"$(DEFVIRTIOFSDAEMON)\"]
# Default DAX mapping cache size in MiB
@@ -789,7 +789,7 @@ install-bin: $(BINLIST)
install-runtime: runtime install-scripts install-completions install-configs install-bin
install-containerd-shim-v2: $(SHIMV2)
install-containerd-shim-v2: $(SHIMV2_OUTPUT)
$(QUIET_INST)$(call INSTALL_EXEC,$<,$(BINDIR))
install-monitor: $(MONITOR)

View File

@@ -307,7 +307,7 @@ func GetSandboxesStoragePath() string {
return "/run/vc/sbs"
}
// GetSandboxesStoragePath returns the storage path where sandboxes info are stored in runtime-rs
// GetSandboxesStoragePathRust returns the storage path where sandboxes info are stored in runtime-rs
func GetSandboxesStoragePathRust() string {
return "/run/kata"
}

View File

@@ -44,6 +44,7 @@ func mountLogger() *logrus.Entry {
}
func isSystemMount(m string) bool {
m = filepath.Clean(m)
for _, p := range systemMountPrefixes {
if m == p || strings.HasPrefix(m, p+"/") {
return true
@@ -54,6 +55,7 @@ func isSystemMount(m string) bool {
}
func isHostDevice(m string) bool {
m = filepath.Clean(m)
if m == "/dev" {
return true
}

View File

@@ -249,6 +249,9 @@ func TestIsHostDevice(t *testing.T) {
{"/dev/zero", true},
{"/dev/block", true},
{"/mnt/dev/block", false},
{"/../dev", true},
{"/../dev/block", true},
{"/../mnt/dev/block", false},
}
for _, test := range tests {

View File

@@ -41,6 +41,10 @@ func TestIsSystemMount(t *testing.T) {
{"/home", false},
{"/dev/block/", false},
{"/mnt/dev/foo", false},
{"/../sys", true},
{"/../sys/", true},
{"/../sys/fs/cgroup", true},
{"/../sysfoo", false},
}
for _, test := range tests {

View File

@@ -1131,6 +1131,9 @@ components:
items:
type: integer
type: array
required:
- host_cpus
- vcpu
type: object
CpuFeatures:
example:

View File

@@ -4,14 +4,14 @@
Name | Type | Description | Notes
------------ | ------------- | ------------- | -------------
**Vcpu** | Pointer to **int32** | | [optional]
**HostCpus** | Pointer to **[]int32** | | [optional]
**Vcpu** | **int32** | |
**HostCpus** | **[]int32** | |
## Methods
### NewCpuAffinity
`func NewCpuAffinity() *CpuAffinity`
`func NewCpuAffinity(vcpu int32, hostCpus []int32, ) *CpuAffinity`
NewCpuAffinity instantiates a new CpuAffinity object
This constructor will assign default values to properties that have it defined,
@@ -45,11 +45,6 @@ and a boolean to check if the value has been set.
SetVcpu sets Vcpu field to given value.
### HasVcpu
`func (o *CpuAffinity) HasVcpu() bool`
HasVcpu returns a boolean if a field has been set.
### GetHostCpus
@@ -70,11 +65,6 @@ and a boolean to check if the value has been set.
SetHostCpus sets HostCpus field to given value.
### HasHostCpus
`func (o *CpuAffinity) HasHostCpus() bool`
HasHostCpus returns a boolean if a field has been set.
[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md)

View File

@@ -16,16 +16,18 @@ import (
// CpuAffinity struct for CpuAffinity
type CpuAffinity struct {
Vcpu *int32 `json:"vcpu,omitempty"`
HostCpus *[]int32 `json:"host_cpus,omitempty"`
Vcpu int32 `json:"vcpu"`
HostCpus []int32 `json:"host_cpus"`
}
// NewCpuAffinity instantiates a new CpuAffinity object
// This constructor will assign default values to properties that have it defined,
// and makes sure properties required by API are set, but the set of arguments
// will change when the set of required properties is changed
func NewCpuAffinity() *CpuAffinity {
func NewCpuAffinity(vcpu int32, hostCpus []int32) *CpuAffinity {
this := CpuAffinity{}
this.Vcpu = vcpu
this.HostCpus = hostCpus
return &this
}
@@ -37,76 +39,60 @@ func NewCpuAffinityWithDefaults() *CpuAffinity {
return &this
}
// GetVcpu returns the Vcpu field value if set, zero value otherwise.
// GetVcpu returns the Vcpu field value
func (o *CpuAffinity) GetVcpu() int32 {
if o == nil || o.Vcpu == nil {
if o == nil {
var ret int32
return ret
}
return *o.Vcpu
return o.Vcpu
}
// GetVcpuOk returns a tuple with the Vcpu field value if set, nil otherwise
// GetVcpuOk returns a tuple with the Vcpu field value
// and a boolean to check if the value has been set.
func (o *CpuAffinity) GetVcpuOk() (*int32, bool) {
if o == nil || o.Vcpu == nil {
if o == nil {
return nil, false
}
return o.Vcpu, true
return &o.Vcpu, true
}
// HasVcpu returns a boolean if a field has been set.
func (o *CpuAffinity) HasVcpu() bool {
if o != nil && o.Vcpu != nil {
return true
}
return false
}
// SetVcpu gets a reference to the given int32 and assigns it to the Vcpu field.
// SetVcpu sets field value
func (o *CpuAffinity) SetVcpu(v int32) {
o.Vcpu = &v
o.Vcpu = v
}
// GetHostCpus returns the HostCpus field value if set, zero value otherwise.
// GetHostCpus returns the HostCpus field value
func (o *CpuAffinity) GetHostCpus() []int32 {
if o == nil || o.HostCpus == nil {
if o == nil {
var ret []int32
return ret
}
return *o.HostCpus
return o.HostCpus
}
// GetHostCpusOk returns a tuple with the HostCpus field value if set, nil otherwise
// GetHostCpusOk returns a tuple with the HostCpus field value
// and a boolean to check if the value has been set.
func (o *CpuAffinity) GetHostCpusOk() (*[]int32, bool) {
if o == nil || o.HostCpus == nil {
if o == nil {
return nil, false
}
return o.HostCpus, true
return &o.HostCpus, true
}
// HasHostCpus returns a boolean if a field has been set.
func (o *CpuAffinity) HasHostCpus() bool {
if o != nil && o.HostCpus != nil {
return true
}
return false
}
// SetHostCpus gets a reference to the given []int32 and assigns it to the HostCpus field.
// SetHostCpus sets field value
func (o *CpuAffinity) SetHostCpus(v []int32) {
o.HostCpus = &v
o.HostCpus = v
}
func (o CpuAffinity) MarshalJSON() ([]byte, error) {
toSerialize := map[string]interface{}{}
if o.Vcpu != nil {
if true {
toSerialize["vcpu"] = o.Vcpu
}
if o.HostCpus != nil {
if true {
toSerialize["host_cpus"] = o.HostCpus
}
return json.Marshal(toSerialize)

View File

@@ -578,6 +578,9 @@ components:
description: Virtual machine configuration
CpuAffinity:
required:
- vcpu
- host_cpus
type: object
properties:
vcpu:

File diff suppressed because it is too large Load Diff

View File

@@ -31,6 +31,14 @@ kata-types = { path = "../../libs/kata-types" }
safe-path = { path = "../../libs/safe-path" }
agent = { path = "../../runtime-rs/crates/agent"}
serial_test = "0.5.1"
vmm-sys-util = "0.11.0"
epoll = "4.0.1"
libc = "0.2.138"
slog = "2.7.0"
slog-scope = "4.4.0"
hyper = "0.14.20"
ttrpc = "0.6.0"
tokio = "1.8.0"
[target.'cfg(target_arch = "s390x")'.dependencies]
reqwest = { version = "0.11", default-features = false, features = ["json", "blocking", "native-tls"] }
@@ -42,3 +50,4 @@ reqwest = { version = "0.11", default-features = false, features = ["json", "blo
semver = "1.0.12"
tempfile = "3.1.0"
test-utils = { path = "../../libs/test-utils" }
micro_http = { git = "https://github.com/firecracker-microvm/micro-http", branch = "main" }

View File

@@ -26,7 +26,7 @@ pub enum Commands {
Env,
/// Enter into guest VM by debug console
Exec,
Exec(ExecArguments),
/// Manage VM factory
Factory,
@@ -136,3 +136,12 @@ pub struct DirectVolResizeArgs {
pub volume_path: String,
pub resize_size: u64,
}
#[derive(Debug, Args)]
pub struct ExecArguments {
/// pod sandbox ID.
pub sandbox_id: String,
#[clap(short = 'p', long = "kata-debug-port", default_value_t = 1026)]
/// kata debug console vport same as configuration, default is 1026.
pub vport: u32,
}

View File

@@ -69,7 +69,7 @@ pub fn get_cpu_flags(cpu_info: &str, cpu_flags_tag: &str) -> Result<String> {
}
if cpu_flags_tag.is_empty() {
return Err(anyhow!("cpu flags delimiter string is empty"))?;
return Err(anyhow!("cpu flags delimiter string is empty"));
}
let subcontents: Vec<&str> = cpu_info.split('\n').collect();

View File

@@ -17,9 +17,9 @@ use std::process::exit;
use args::{Commands, KataCtlCli};
use ops::check_ops::{
handle_check, handle_env, handle_exec, handle_factory, handle_iptables, handle_metrics,
handle_version,
handle_check, handle_env, handle_factory, handle_iptables, handle_metrics, handle_version,
};
use ops::exec_ops::handle_exec;
use ops::volume_ops::handle_direct_volume;
fn real_main() -> Result<()> {
@@ -28,8 +28,8 @@ fn real_main() -> Result<()> {
match args.command {
Commands::Check(args) => handle_check(args),
Commands::DirectVolume(args) => handle_direct_volume(args),
Commands::Exec(args) => handle_exec(args),
Commands::Env => handle_env(),
Commands::Exec => handle_exec(),
Commands::Factory => handle_factory(),
Commands::Iptables(args) => handle_iptables(args),
Commands::Metrics(args) => handle_metrics(args),

View File

@@ -4,5 +4,6 @@
//
pub mod check_ops;
pub mod exec_ops;
pub mod version;
pub mod volume_ops;

View File

@@ -108,10 +108,6 @@ pub fn handle_env() -> Result<()> {
Ok(())
}
pub fn handle_exec() -> Result<()> {
Ok(())
}
pub fn handle_factory() -> Result<()> {
Ok(())
}

View File

@@ -0,0 +1,444 @@
// Copyright (c) 2022 Ant Group
//
// SPDX-License-Identifier: Apache-2.0
//
// Description:
// Implementation of entering into guest VM by debug console.
// Ensure that `kata-debug-port` is consistent with the port
// set in the configuration.
use std::{
io::{self, BufRead, BufReader, Read, Write},
os::unix::{
io::{AsRawFd, FromRawFd, RawFd},
net::UnixStream,
},
time::Duration,
};
use anyhow::{anyhow, Context};
use nix::sys::socket::{connect, socket, AddressFamily, SockFlag, SockType, VsockAddr};
use reqwest::StatusCode;
use slog::debug;
use vmm_sys_util::terminal::Terminal;
use crate::args::ExecArguments;
use shim_interface::shim_mgmt::{client::MgmtClient, AGENT_URL};
const CMD_CONNECT: &str = "CONNECT";
const CMD_OK: &str = "OK";
const SCHEME_VSOCK: &str = "VSOCK";
const SCHEME_HYBRID_VSOCK: &str = "HVSOCK";
const EPOLL_EVENTS_LEN: usize = 16;
const KATA_AGENT_VSOCK_TIMEOUT: u64 = 5;
const TIMEOUT: Duration = Duration::from_millis(2000);
type Result<T> = std::result::Result<T, Error>;
// Convenience macro to obtain the scope logger
#[macro_export]
macro_rules! sl {
() => {
slog_scope::logger()
};
}
#[derive(Debug)]
pub enum Error {
EpollWait(io::Error),
EpollCreate(io::Error),
EpollAdd(io::Error),
SocketWrite(io::Error),
StdioErr(io::Error),
}
#[derive(Debug, PartialEq)]
enum EpollDispatch {
Stdin,
ServerSock,
}
struct EpollContext {
epoll_raw_fd: RawFd,
stdin_index: u64,
dispatch_table: Vec<EpollDispatch>,
stdin_handle: io::Stdin,
debug_console_sock: Option<UnixStream>,
}
impl EpollContext {
fn new() -> Result<Self> {
let epoll_raw_fd = epoll::create(true).map_err(Error::EpollCreate)?;
let dispatch_table = Vec::new();
let stdin_index = 0;
Ok(EpollContext {
epoll_raw_fd,
stdin_index,
dispatch_table,
stdin_handle: io::stdin(),
debug_console_sock: None,
})
}
fn init_debug_console_sock(&mut self, sock: UnixStream) -> Result<()> {
let dispatch_index = self.dispatch_table.len() as u64;
epoll::ctl(
self.epoll_raw_fd,
epoll::ControlOptions::EPOLL_CTL_ADD,
sock.as_raw_fd(),
epoll::Event::new(epoll::Events::EPOLLIN, dispatch_index),
)
.map_err(Error::EpollAdd)?;
self.dispatch_table.push(EpollDispatch::ServerSock);
self.debug_console_sock = Some(sock);
Ok(())
}
fn enable_stdin_event(&mut self) -> Result<()> {
let stdin_index = self.dispatch_table.len() as u64;
epoll::ctl(
self.epoll_raw_fd,
epoll::ControlOptions::EPOLL_CTL_ADD,
libc::STDIN_FILENO,
epoll::Event::new(epoll::Events::EPOLLIN, stdin_index),
)
.map_err(Error::EpollAdd)?;
self.stdin_index = stdin_index;
self.dispatch_table.push(EpollDispatch::Stdin);
Ok(())
}
fn do_exit(&self) {
self.stdin_handle
.lock()
.set_canon_mode()
.expect("Fail to set stdin to RAW mode");
}
fn do_process_handler(&mut self) -> Result<()> {
let mut events = vec![epoll::Event::new(epoll::Events::empty(), 0); EPOLL_EVENTS_LEN];
let epoll_raw_fd = self.epoll_raw_fd;
let debug_console_sock = self.debug_console_sock.as_mut().unwrap();
loop {
let num_events =
epoll::wait(epoll_raw_fd, -1, &mut events[..]).map_err(Error::EpollWait)?;
for event in events.iter().take(num_events) {
let dispatch_index = event.data as usize;
match self.dispatch_table[dispatch_index] {
EpollDispatch::Stdin => {
let mut out = [0u8; 128];
let stdin_lock = self.stdin_handle.lock();
match stdin_lock.read_raw(&mut out[..]) {
Ok(0) => {
return Ok(());
}
Err(e) => {
println!("error with errno {:?} while reading stdin", e);
return Ok(());
}
Ok(count) => {
debug_console_sock
.write(&out[..count])
.map_err(Error::SocketWrite)?;
}
}
}
EpollDispatch::ServerSock => {
let mut out = [0u8; 128];
match debug_console_sock.read(&mut out[..]) {
Ok(0) => {
return Ok(());
}
Err(e) => {
println!("error with errno {:?} while reading server", e);
return Ok(());
}
Ok(count) => {
io::stdout()
.write_all(&out[..count])
.map_err(Error::StdioErr)?;
io::stdout().flush().map_err(Error::StdioErr)?;
}
}
}
}
}
}
}
}
trait SockHandler {
fn setup_sock(&self) -> anyhow::Result<UnixStream>;
}
struct VsockConfig {
sock_cid: u32,
sock_port: u32,
}
impl VsockConfig {
fn new(sock_cid: u32, sock_port: u32) -> VsockConfig {
VsockConfig {
sock_cid,
sock_port,
}
}
}
impl SockHandler for VsockConfig {
fn setup_sock(&self) -> anyhow::Result<UnixStream> {
let sock_addr = VsockAddr::new(self.sock_cid, self.sock_port);
// Create socket fd
let vsock_fd = socket(
AddressFamily::Vsock,
SockType::Stream,
SockFlag::SOCK_CLOEXEC,
None,
)
.context("create vsock socket")?;
// Wrap the socket fd in UnixStream, so that it is closed
// when anything fails.
let stream = unsafe { UnixStream::from_raw_fd(vsock_fd) };
// Connect the socket to vsock server.
connect(stream.as_raw_fd(), &sock_addr)
.with_context(|| format!("failed to connect to server {:?}", &sock_addr))?;
Ok(stream)
}
}
struct HvsockConfig {
sock_addr: String,
sock_port: u32,
}
impl HvsockConfig {
fn new(sock_addr: String, sock_port: u32) -> Self {
HvsockConfig {
sock_addr,
sock_port,
}
}
}
impl SockHandler for HvsockConfig {
fn setup_sock(&self) -> anyhow::Result<UnixStream> {
let mut stream = match UnixStream::connect(self.sock_addr.clone()) {
Ok(s) => s,
Err(e) => return Err(anyhow!(e).context("failed to create UNIX Stream socket")),
};
// Ensure the Unix Stream directly connects to the real VSOCK server which
// the Kata agent is listening to in the VM.
{
let test_msg = format!("{} {}\n", CMD_CONNECT, self.sock_port);
stream.set_read_timeout(Some(Duration::new(KATA_AGENT_VSOCK_TIMEOUT, 0)))?;
stream.set_write_timeout(Some(Duration::new(KATA_AGENT_VSOCK_TIMEOUT, 0)))?;
stream.write_all(test_msg.as_bytes())?;
// Now, see if we get the expected response
let stream_reader = stream.try_clone()?;
let mut reader = BufReader::new(&stream_reader);
let mut msg = String::new();
reader.read_line(&mut msg)?;
if msg.is_empty() {
return Err(anyhow!(
"stream reader get message is empty with port: {:?}",
self.sock_port
));
}
// Expected response message returned was successful.
if msg.starts_with(CMD_OK) {
let response = msg
.strip_prefix(CMD_OK)
.ok_or(format!("invalid response: {:?}", msg))
.map_err(|e| anyhow!(e))?
.trim();
debug!(sl!(), "Hybrid Vsock host-side port: {:?}", response);
// Unset the timeout in order to turn the sokect to bloking mode.
stream.set_read_timeout(None)?;
stream.set_write_timeout(None)?;
} else {
return Err(anyhow!(
"failed to setup Hybrid Vsock connection: {:?}",
msg
));
}
}
Ok(stream)
}
}
fn setup_client(server_url: String, dbg_console_port: u32) -> anyhow::Result<UnixStream> {
// server address format: scheme://[cid|/x/domain.sock]:port
let url_fields: Vec<&str> = server_url.split("://").collect();
if url_fields.len() != 2 {
return Err(anyhow!("invalid URI"));
}
let scheme = url_fields[0].to_uppercase();
let sock_addr: Vec<&str> = url_fields[1].split(':').collect();
if sock_addr.len() != 2 {
return Err(anyhow!("invalid VSOCK server address URI"));
}
match scheme.as_str() {
// Hybrid Vsock: hvsock://<path>:<port>.
// Example: "hvsock:///x/y/z/kata.hvsock:port"
// Firecracker/Dragonball/CLH implements the hybrid vsock device model.
SCHEME_HYBRID_VSOCK => {
let hvsock_path = sock_addr[0].to_string();
if hvsock_path.is_empty() {
return Err(anyhow!("hvsock path cannot be empty"));
}
let hvsock = HvsockConfig::new(hvsock_path, dbg_console_port);
hvsock.setup_sock().context("set up hvsock")
}
// Vsock: vsock://<cid>:<port>
// Example: "vsock://31513974:1024"
// Qemu using the Vsock device model.
SCHEME_VSOCK => {
let sock_cid: u32 = match sock_addr[0] {
"-1" | "" => libc::VMADDR_CID_ANY,
_ => match sock_addr[0].parse::<u32>() {
Ok(cid) => cid,
Err(e) => return Err(anyhow!("vsock addr CID is INVALID: {:?}", e)),
},
};
let vsock = VsockConfig::new(sock_cid, dbg_console_port);
vsock.setup_sock().context("set up vsock")
}
// Others will be INVALID URI.
_ => {
return Err(anyhow!("invalid URI scheme: {:?}", scheme));
}
}
}
async fn get_agent_socket(sandbox_id: &str) -> anyhow::Result<String> {
let shim_client = MgmtClient::new(sandbox_id, Some(TIMEOUT))?;
// get agent sock from body when status code is OK.
let response = shim_client.get(AGENT_URL).await?;
let status = response.status();
if status != StatusCode::OK {
return Err(anyhow!("shim client get connection failed: {:?} ", status));
}
let body = hyper::body::to_bytes(response.into_body()).await?;
let agent_sock = String::from_utf8(body.to_vec())?;
Ok(agent_sock)
}
fn get_server_socket(sandbox_id: &str) -> anyhow::Result<String> {
let server_url = tokio::runtime::Builder::new_current_thread()
.enable_all()
.build()?
.block_on(get_agent_socket(sandbox_id))
.context("get connection vsock")?;
Ok(server_url)
}
fn do_run_exec(sandbox_id: &str, dbg_console_vport: u32) -> anyhow::Result<()> {
// sandbox_id MUST be a long ID.
let server_url = get_server_socket(sandbox_id).context("get debug console socket URL")?;
if server_url.is_empty() {
return Err(anyhow!("server url is empty."));
}
let sock_stream = setup_client(server_url, dbg_console_vport)?;
let mut epoll_context = EpollContext::new().expect("create epoll context");
epoll_context
.enable_stdin_event()
.expect("enable stdin event");
epoll_context
.init_debug_console_sock(sock_stream)
.expect("enable debug console sock");
let stdin_handle = io::stdin();
stdin_handle.lock().set_raw_mode().expect("set raw mode");
epoll_context
.do_process_handler()
.expect("do process handler");
epoll_context.do_exit();
Ok(())
}
// kata-ctl handle exec command starts here.
pub fn handle_exec(exec_args: ExecArguments) -> anyhow::Result<()> {
do_run_exec(exec_args.sandbox_id.as_str(), exec_args.vport)?;
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use micro_http::HttpServer;
#[test]
fn test_epoll_context_methods() {
let kata_hybrid_addr = "/tmp/kata_hybrid_vsock01.hvsock";
std::fs::remove_file(kata_hybrid_addr).unwrap_or_default();
let mut server = HttpServer::new(kata_hybrid_addr).unwrap();
server.start_server().unwrap();
let sock_addr: UnixStream = UnixStream::connect(kata_hybrid_addr).unwrap();
let mut epoll_ctx = EpollContext::new().expect("epoll context");
epoll_ctx
.init_debug_console_sock(sock_addr)
.expect("enable debug console sock");
assert_eq!(epoll_ctx.stdin_index, 0);
assert!(epoll_ctx.debug_console_sock.is_some());
assert_eq!(epoll_ctx.dispatch_table[0], EpollDispatch::ServerSock);
assert_eq!(epoll_ctx.dispatch_table.len(), 1);
epoll_ctx.enable_stdin_event().expect("enable stdin event");
assert_eq!(epoll_ctx.stdin_index, 1);
assert_eq!(epoll_ctx.dispatch_table[1], EpollDispatch::Stdin);
assert_eq!(epoll_ctx.dispatch_table.len(), 2);
std::fs::remove_file(kata_hybrid_addr).unwrap_or_default();
}
#[test]
fn test_setup_hvsock_failed() {
let kata_hybrid_addr = "/tmp/kata_hybrid_vsock02.hvsock";
let hybrid_sock_addr = "hvsock:///tmp/kata_hybrid_vsock02.hvsock:1024";
std::fs::remove_file(kata_hybrid_addr).unwrap_or_default();
let dbg_console_port: u32 = 1026;
let mut server = HttpServer::new(kata_hybrid_addr).unwrap();
server.start_server().unwrap();
let stream = setup_client(hybrid_sock_addr.to_string(), dbg_console_port);
assert!(stream.is_err());
std::fs::remove_file(kata_hybrid_addr).unwrap_or_default();
}
#[test]
fn test_setup_vsock_client_failed() {
let hybrid_sock_addr = "hvsock://8:1024";
let dbg_console_port: u32 = 1026;
let stream = setup_client(hybrid_sock_addr.to_string(), dbg_console_port);
assert!(stream.is_err());
}
}

View File

@@ -85,6 +85,9 @@ endif
################################################################################
.PHONY: all
all: image initrd
rootfs-%: $(ROOTFS_BUILD_DEST)/.%$(ROOTFS_MARKER_SUFFIX)
@ # DONT remove. This is not cancellation rule.
@@ -97,11 +100,13 @@ $(ROOTFS_BUILD_DEST)/.%$(ROOTFS_MARKER_SUFFIX):: rootfs-builder/%
# extract it in a local folder.
# Notes:
# - assuming a not compressed initrd.
ifeq (dracut,$(BUILD_METHOD))
.PRECIOUS: $(ROOTFS_BUILD_DEST)/.dracut$(ROOTFS_MARKER_SUFFIX)
$(ROOTFS_BUILD_DEST)/.dracut$(ROOTFS_MARKER_SUFFIX): $(TARGET_INITRD)
mkdir -p $(TARGET_ROOTFS)
(cd $(TARGET_ROOTFS); cat $< | cpio --extract --preserve-modification-time --make-directories)
@touch $@
endif
image-%: $(IMAGES_BUILD_DEST)/kata-containers-image-%.img
@ # DONT remove. This is not cancellation rule.
@@ -117,9 +122,6 @@ initrd-%: $(IMAGES_BUILD_DEST)/kata-containers-initrd-%.img
$(IMAGES_BUILD_DEST)/kata-containers-initrd-%.img: rootfs-%
$(call silent_run,Creating initrd image for $*,$(INITRD_BUILDER) -o $@ $(ROOTFS_BUILD_DEST)/$*_rootfs)
.PHONY: all
all: image initrd
.PHONY: rootfs
rootfs: $(TARGET_ROOTFS_MARKER)

View File

@@ -17,6 +17,8 @@ RUN CGO_ENABLED=0 DISABLE_DOCS=1 make BUILDTAGS=containers_image_openpgp GO_DYN_
FROM ${IMAGE_REGISTRY}/ubuntu:@OS_VERSION@
@SET_PROXY@
# makedev tries to mknod from postinst
RUN [ -x /usr/bin/systemd-detect-virt ] || ( echo "echo docker" >/usr/bin/systemd-detect-virt && chmod +x /usr/bin/systemd-detect-virt )
RUN apt-get update && \
DEBIAN_FRONTEND=noninteractive \
apt-get --no-install-recommends -y install \
@@ -35,6 +37,7 @@ RUN apt-get update && \
libgpgme-dev \
libssl-dev \
make \
makedev \
multistrap \
musl-tools \
pkg-config \

View File

@@ -60,4 +60,10 @@ EOF
# Reduce image size and memory footprint by removing unnecessary files and directories.
rm -rf $rootfs_dir/usr/share/{bash-completion,bug,doc,info,lintian,locale,man,menu,misc,pixmaps,terminfo,zsh}
# Minimal set of device nodes needed when AGENT_INIT=yes so that the
# kernel can properly setup stdout/stdin/stderr for us
pushd $rootfs_dir/dev
MAKEDEV -v console tty ttyS null zero fd
popd
}

View File

@@ -3,27 +3,27 @@
# SPDX-License-Identifier: Apache-2.0
# Specify alternative base image, e.g. clefos for s390x
ARG IMAGE
FROM ${IMAGE:-registry.centos.org/centos}:7
ARG BASE_IMAGE_NAME=ubuntu
ARG BASE_IMAGE_TAG=20.04
FROM $BASE_IMAGE_NAME:$BASE_IMAGE_TAG
ENV DEBIAN_FRONTEND=noninteractive
ARG KATA_ARTIFACTS=./kata-static.tar.xz
ARG DESTINATION=/opt/kata-artifacts
COPY ${KATA_ARTIFACTS} ${WORKDIR}
SHELL ["/bin/bash", "-o", "pipefail", "-c"]
RUN \
yum -y update && \
yum -y install xz && \
yum clean all && \
apt-get update && \
apt-get install -y --no-install-recommends apt-transport-https ca-certificates curl xz-utils systemd && \
mkdir -p /etc/apt/keyrings/ && \
curl -fsSLo /etc/apt/keyrings/kubernetes-archive-keyring.gpg https://packages.cloud.google.com/apt/doc/apt-key.gpg && \
echo "deb [signed-by=/etc/apt/keyrings/kubernetes-archive-keyring.gpg] https://apt.kubernetes.io/ kubernetes-xenial main" | tee /etc/apt/sources.list.d/kubernetes.list && \
apt-get update && \
apt-get install -y --no-install-recommends kubectl && \
apt-get clean && rm -rf /var/lib/apt/lists/ && \
mkdir -p ${DESTINATION} && \
tar xvf ${KATA_ARTIFACTS} -C ${DESTINATION}
# hadolint will deny echo -e, heredocs don't work in Dockerfiles, shell substitution doesn't work with $'...'
RUN \
echo "[kubernetes]" >> /etc/yum.repos.d/kubernetes.repo && \
echo "name=Kubernetes" >> /etc/yum.repos.d/kubernetes.repo && \
echo "baseurl=https://packages.cloud.google.com/yum/repos/kubernetes-el7-$(uname -m)" >> /etc/yum.repos.d/kubernetes.repo && \
echo "gpgkey=https://packages.cloud.google.com/yum/doc/yum-key.gpg https://packages.cloud.google.com/yum/doc/rpm-package-key.gpg" >> /etc/yum.repos.d/kubernetes.repo && \
yum -y install kubectl && \
yum clean all
COPY scripts ${DESTINATION}/scripts

View File

@@ -27,19 +27,19 @@ spec:
fieldRef:
fieldPath: spec.nodeName
securityContext:
privileged: false
privileged: true
volumeMounts:
- name: dbus
mountPath: /var/run/dbus
mountPath: /var/run/dbus/system_bus_socket
- name: systemd
mountPath: /run/systemd
mountPath: /run/systemd/system
volumes:
- name: dbus
hostPath:
path: /var/run/dbus
path: /var/run/dbus/system_bus_socket
- name: systemd
hostPath:
path: /run/systemd
path: /run/systemd/system
updateStrategy:
rollingUpdate:
maxUnavailable: 1

View File

@@ -31,7 +31,7 @@ spec:
- name: CONFIGURE_CC
value: "yes"
securityContext:
privileged: false
privileged: true
volumeMounts:
- name: crio-conf
mountPath: /etc/crio/
@@ -40,9 +40,9 @@ spec:
- name: kata-artifacts
mountPath: /opt/kata/
- name: dbus
mountPath: /var/run/dbus
mountPath: /var/run/dbus/system_bus_socket
- name: systemd
mountPath: /run/systemd
mountPath: /run/systemd/system
- name: local-bin
mountPath: /usr/local/bin/
volumes:
@@ -58,10 +58,10 @@ spec:
type: DirectoryOrCreate
- name: dbus
hostPath:
path: /var/run/dbus
path: /var/run/dbus/system_bus_socket
- name: systemd
hostPath:
path: /run/systemd
path: /run/systemd/system
- name: local-bin
hostPath:
path: /usr/local/bin/

Some files were not shown because too many files have changed in this diff Show More