CCv0: Merge main into CCv0 branch

Merge remote-tracking branch 'upstream/main' into CCv0 Fixes: #6428 Signed-off-by: Georgina Kinge <georgina.kinge@ibm.com>
2025-12-18 06:44:23 +01:00 · 2023-03-09 16:14:53 +00:00
parent 129e43d1ea 7566a7eae4
commit ec6ce46e7e
112 changed files with 3535 additions and 1865 deletions
--- a/.github/workflows/payload-after-push-amd64.yaml
+++ b/.github/workflows/payload-after-push-amd64.yaml
@@ -0,0 +1,96 @@
+name: CI | Publish kata-deploy payload for amd64
+on:
+  workflow_call:
+    inputs:
+      target-arch:
+        required: true
+        type: string
+
+jobs:
+  build-asset:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        asset:
+          - cloud-hypervisor
+          - firecracker
+          - kernel
+          - nydus
+          - qemu
+          - rootfs-image
+          - rootfs-initrd
+          - virtiofsd
+    steps:
+      - name: Login to Kata Containers quay.io
+        uses: docker/login-action@v2
+        with:
+          registry: quay.io
+          username: ${{ secrets.QUAY_DEPLOYER_USERNAME }}
+          password: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
+
+      - uses: actions/checkout@v3
+        with:
+          fetch-depth: 0 # This is needed in order to keep the commit ids history
+      - name: Build ${{ matrix.asset }}
+        run: |
+          make "${KATA_ASSET}-tarball"
+          build_dir=$(readlink -f build)
+          # store-artifact does not work with symlink
+          sudo cp -r "${build_dir}" "kata-build"
+        env:
+          KATA_ASSET: ${{ matrix.asset }}
+          TAR_OUTPUT: ${{ matrix.asset }}.tar.gz
+          PUSH_TO_REGISTRY: yes
+
+      - name: store-artifact ${{ matrix.asset }}
+        uses: actions/upload-artifact@v3
+        with:
+          name: kata-artifacts-amd64
+          path: kata-build/kata-static-${{ matrix.asset }}.tar.xz
+          retention-days: 1
+          if-no-files-found: error
+
+  create-kata-tarball:
+    runs-on: ubuntu-latest
+    needs: build-asset
+    steps:
+      - uses: actions/checkout@v3
+      - name: get-artifacts
+        uses: actions/download-artifact@v3
+        with:
+          name: kata-artifacts-amd64
+          path: kata-artifacts
+      - name: merge-artifacts
+        run: |
+          ./tools/packaging/kata-deploy/local-build/kata-deploy-merge-builds.sh kata-artifacts
+      - name: store-artifacts
+        uses: actions/upload-artifact@v3
+        with:
+          name: kata-static-tarball-amd64
+          path: kata-static.tar.xz
+          retention-days: 1
+          if-no-files-found: error
+
+  kata-payload:
+    needs: create-kata-tarball
+    runs-on: ubuntu-latest
+    steps:
+      - name: Login to Kata Containers quay.io
+        uses: docker/login-action@v2
+        with:
+          registry: quay.io
+          username: ${{ secrets.QUAY_DEPLOYER_USERNAME }}
+          password: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
+
+      - uses: actions/checkout@v3
+      - name: get-kata-tarball
+        uses: actions/download-artifact@v3
+        with:
+          name: kata-static-tarball-amd64
+
+      - name: build-and-push-kata-payload
+        id: build-and-push-kata-payload
+        run: |
+          ./tools/packaging/kata-deploy/local-build/kata-deploy-build-and-upload-payload.sh \
+          $(pwd)/kata-static.tar.xz "quay.io/kata-containers/kata-deploy-ci" \
+          "kata-containers-${{ inputs.target-arch }}"
--- a/.github/workflows/payload-after-push-arm64.yaml
+++ b/.github/workflows/payload-after-push-arm64.yaml
@@ -0,0 +1,108 @@
+name: CI | Publish kata-deploy payload for arm64
+on:
+  workflow_call:
+    inputs:
+      target-arch:
+        required: true
+        type: string
+
+jobs:
+  build-asset:
+    runs-on: arm64
+    strategy:
+      matrix:
+        asset:
+          - cloud-hypervisor
+          - firecracker
+          - kernel
+          - nydus
+          - qemu
+          - rootfs-image
+          - rootfs-initrd
+          - virtiofsd
+    steps:
+      - name: Login to Kata Containers quay.io
+        uses: docker/login-action@v2
+        with:
+          registry: quay.io
+          username: ${{ secrets.QUAY_DEPLOYER_USERNAME }}
+          password: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
+
+      - name: Adjust a permission for repo
+        run: |
+          sudo chown -R $USER:$USER $GITHUB_WORKSPACE
+
+      - uses: actions/checkout@v3
+        with:
+          fetch-depth: 0 # This is needed in order to keep the commit ids history
+      - name: Build ${{ matrix.asset }}
+        run: |
+          make "${KATA_ASSET}-tarball"
+          build_dir=$(readlink -f build)
+          # store-artifact does not work with symlink
+          sudo cp -r "${build_dir}" "kata-build"
+        env:
+          KATA_ASSET: ${{ matrix.asset }}
+          TAR_OUTPUT: ${{ matrix.asset }}.tar.gz
+          PUSH_TO_REGISTRY: yes
+
+      - name: store-artifact ${{ matrix.asset }}
+        uses: actions/upload-artifact@v3
+        with:
+          name: kata-artifacts-arm64
+          path: kata-build/kata-static-${{ matrix.asset }}.tar.xz
+          retention-days: 1
+          if-no-files-found: error
+
+  create-kata-tarball:
+    runs-on: arm64
+    needs: build-asset
+    steps:
+      - name: Adjust a permission for repo
+        run: |
+          sudo chown -R $USER:$USER $GITHUB_WORKSPACE
+
+      - uses: actions/checkout@v3
+      - name: get-artifacts
+        uses: actions/download-artifact@v3
+        with:
+          name: kata-artifacts-arm64
+          path: kata-artifacts
+      - name: merge-artifacts
+        run: |
+          ./tools/packaging/kata-deploy/local-build/kata-deploy-merge-builds.sh kata-artifacts
+      - name: store-artifacts
+        uses: actions/upload-artifact@v3
+        with:
+          name: kata-static-tarball-arm64
+          path: kata-static.tar.xz
+          retention-days: 1
+          if-no-files-found: error
+
+  kata-payload:
+    needs: create-kata-tarball
+    runs-on: arm64
+    steps:
+      - name: Login to Kata Containers quay.io
+        uses: docker/login-action@v2
+        with:
+          registry: quay.io
+          username: ${{ secrets.QUAY_DEPLOYER_USERNAME }}
+          password: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
+
+      - name: Adjust a permission for repo
+        run: |
+          sudo chown -R $USER:$USER $GITHUB_WORKSPACE
+
+      - uses: actions/checkout@v3
+      - name: get-kata-tarball
+        uses: actions/download-artifact@v3
+        with:
+          name: kata-static-tarball-arm64
+
+      - name: build-and-push-kata-payload
+        id: build-and-push-kata-payload
+        run: |
+          ./tools/packaging/kata-deploy/local-build/kata-deploy-build-and-upload-payload.sh \
+          $(pwd)/kata-static.tar.xz "quay.io/kata-containers/kata-deploy-ci" \
+          "kata-containers-${{ inputs.target-arch }}"
--- a/.github/workflows/payload-after-push-s390x.yaml
+++ b/.github/workflows/payload-after-push-s390x.yaml
@@ -0,0 +1,107 @@
+name: CI | Publish kata-deploy payload for s390x
+on:
+  workflow_call:
+    inputs:
+      target-arch:
+        required: true
+        type: string
+
+jobs:
+  build-asset:
+    runs-on: s390x
+    strategy:
+      matrix:
+        asset:
+          - kernel
+          - shim-v2
+          - qemu
+          - rootfs-image
+          - rootfs-initrd
+          - virtiofsd
+    steps:
+      - name: Login to Kata Containers quay.io
+        uses: docker/login-action@v2
+        with:
+          registry: quay.io
+          username: ${{ secrets.QUAY_DEPLOYER_USERNAME }}
+          password: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
+
+      - name: Adjust a permission for repo
+        run: |
+          sudo chown -R $USER:$USER $GITHUB_WORKSPACE
+
+      - uses: actions/checkout@v3
+        with:
+          fetch-depth: 0 # This is needed in order to keep the commit ids history
+      - name: Build ${{ matrix.asset }}
+        run: |
+          make "${KATA_ASSET}-tarball"
+          build_dir=$(readlink -f build)
+          # store-artifact does not work with symlink
+          sudo cp -r "${build_dir}" "kata-build"
+          sudo chown -R $(id -u):$(id -g) "kata-build"
+        env:
+          KATA_ASSET: ${{ matrix.asset }}
+          TAR_OUTPUT: ${{ matrix.asset }}.tar.gz
+          PUSH_TO_REGISTRY: yes
+
+      - name: store-artifact ${{ matrix.asset }}
+        uses: actions/upload-artifact@v3
+        with:
+          name: kata-artifacts-s390x
+          path: kata-build/kata-static-${{ matrix.asset }}.tar.xz
+          retention-days: 1
+          if-no-files-found: error
+
+  create-kata-tarball:
+    runs-on: s390x
+    needs: build-asset
+    steps:
+      - name: Adjust a permission for repo
+        run: |
+          sudo chown -R $USER:$USER $GITHUB_WORKSPACE
+
+      - uses: actions/checkout@v3
+      - name: get-artifacts
+        uses: actions/download-artifact@v3
+        with:
+          name: kata-artifacts-s390x
+          path: kata-artifacts
+      - name: merge-artifacts
+        run: |
+          ./tools/packaging/kata-deploy/local-build/kata-deploy-merge-builds.sh kata-artifacts
+      - name: store-artifacts
+        uses: actions/upload-artifact@v3
+        with:
+          name: kata-static-tarball-s390x
+          path: kata-static.tar.xz
+          retention-days: 1
+          if-no-files-found: error
+
+  kata-payload:
+    needs: create-kata-tarball
+    runs-on: s390x
+    steps:
+      - name: Login to Kata Containers quay.io
+        uses: docker/login-action@v2
+        with:
+          registry: quay.io
+          username: ${{ secrets.QUAY_DEPLOYER_USERNAME }}
+          password: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
+
+      - name: Adjust a permission for repo
+        run: |
+          sudo chown -R $USER:$USER $GITHUB_WORKSPACE
+
+      - uses: actions/checkout@v3
+      - name: get-kata-tarball
+        uses: actions/download-artifact@v3
+        with:
+          name: kata-static-tarball-s390x
+
+      - name: build-and-push-kata-payload
+        id: build-and-push-kata-payload
+        run: |
+          ./tools/packaging/kata-deploy/local-build/kata-deploy-build-and-upload-payload.sh \
+          $(pwd)/kata-static.tar.xz "quay.io/kata-containers/kata-deploy-ci" \
+          "kata-containers-${{ inputs.target-arch }}"
--- a/.github/workflows/payload-after-push.yaml
+++ b/.github/workflows/payload-after-push.yaml
@@ -0,0 +1,47 @@
+name: CI | Publish Kata Containers payload
+on:
+  push:
+    branches:
+      - main
+      - stable-*
+
+jobs:
+  build-assets-amd64:
+    uses: ./.github/workflows/payload-after-push-amd64.yaml
+    with:
+      target-arch: amd64
+    secrets: inherit
+
+  build-assets-arm64:
+    uses: ./.github/workflows/payload-after-push-arm64.yaml
+    with:
+      target-arch: arm64
+    secrets: inherit
+
+  build-assets-s390x:
+    uses: ./.github/workflows/payload-after-push-s390x.yaml
+    with:
+      target-arch: s390x
+    secrets: inherit
+
+  publish:
+    runs-on: ubuntu-latest
+    needs: [build-assets-amd64, build-assets-arm64, build-assets-s390x]
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v3
+
+      - name: Login to Kata Containers quay.io
+        uses: docker/login-action@v2
+        with:
+          registry: quay.io
+          username: ${{ secrets.QUAY_DEPLOYER_USERNAME }}
+          password: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
+
+      - name: Push multi-arch manifest
+        run: |
+          docker manifest create quay.io/kata-containers/kata-deploy-ci:kata-containers-latest \
+          --amend quay.io/kata-containers/kata-deploy-ci:kata-containers-amd64 \
+          --amend quay.io/kata-containers/kata-deploy-ci:kata-containers-arm64 \
+          --amend quay.io/kata-containers/kata-deploy-ci:kata-containers-s390x
+          docker manifest push quay.io/kata-containers/kata-deploy-ci:kata-containers-latest
--- a/.github/workflows/static-checks.yaml
+++ b/.github/workflows/static-checks.yaml
@@ -43,8 +43,7 @@ jobs:
        kernel_dir="tools/packaging/kernel/"
        kernel_version_file="${kernel_dir}kata_config_version"
        modified_files=$(git diff --name-only origin/CCv0..HEAD)
-        result=$(git whatchanged origin/CCv0..HEAD "${kernel_dir}" >>"/dev/null")
-        if git whatchanged origin/CCv0..HEAD "${kernel_dir}" >>"/dev/null"; then
+        if git diff --name-only origin/CCv0..HEAD "${kernel_dir}" | grep "${kernel_dir}"; then
          echo "Kernel directory has changed, checking if $kernel_version_file has been updated"
          if echo "$modified_files" | grep -v "README.md" | grep "${kernel_dir}" >>"/dev/null"; then
            echo "$modified_files" | grep "$kernel_version_file" >>/dev/null || ( echo "Please bump version in $kernel_version_file" && exit 1)
--- a/docs/Developer-Guide.md
+++ b/docs/Developer-Guide.md
@@ -2,6 +2,8 @@

 This document is written **specifically for developers**: it is not intended for end users.

+If you want to contribute changes that you have made, please read the [community guidelines](https://github.com/kata-containers/community/blob/main/CONTRIBUTING.md) for information about our processes.
+
 # Assumptions

 - You are working on a non-critical test or development system.
@@ -654,7 +656,7 @@ section when using rootfs, or when using initrd, complete the steps in the [Buil

 Install the image:

->**Note**: When using an initrd image, replace the below rootfs image name `kata-containers.img` 
+>**Note**: When using an initrd image, replace the below rootfs image name `kata-containers.img`
 >with the initrd image name `kata-containers-initrd.img`.

 ```bash
@@ -688,25 +690,25 @@ $ sudo crictl run -r kata container.yaml pod.yaml

 The steps required to enable debug console for QEMU slightly differ with
 those for firecracker / cloud-hypervisor.
- 
+
 ##### Enabling debug console for QEMU

-Add `agent.debug_console` to the guest kernel command line to allow the agent process to start a debug console. 
+Add `agent.debug_console` to the guest kernel command line to allow the agent process to start a debug console.

 ```bash
 $ sudo sed -i -e 's/^kernel_params = "\(.*\)"/kernel_params = "\1 agent.debug_console"/g' "${kata_configuration_file}"
 ```

-Here `kata_configuration_file` could point to `/etc/kata-containers/configuration.toml` 
+Here `kata_configuration_file` could point to `/etc/kata-containers/configuration.toml`
 or `/usr/share/defaults/kata-containers/configuration.toml`
 or `/opt/kata/share/defaults/kata-containers/configuration-{hypervisor}.toml`, if
 you installed Kata Containers using `kata-deploy`.

 ##### Enabling debug console for cloud-hypervisor / firecracker

-Slightly different configuration is required in case of firecracker and cloud hypervisor. 
-Firecracker and cloud-hypervisor don't have a UNIX socket connected to `/dev/console`. 
-Hence, the kernel command line option `agent.debug_console` will not work for them. 
+Slightly different configuration is required in case of firecracker and cloud hypervisor.
+Firecracker and cloud-hypervisor don't have a UNIX socket connected to `/dev/console`.
+Hence, the kernel command line option `agent.debug_console` will not work for them.
 These hypervisors support `hybrid vsocks`,  which can be used for communication
 between the host and the guest. The kernel command line option `agent.debug_console_vport`
 was added to allow developers specify on which `vsock` port the debugging console should be connected.
@@ -719,7 +721,7 @@ sudo sed -i -e 's/^kernel_params = "\(.*\)"/kernel_params = "\1 agent.debug_cons
 ```

 > **Note** Ports 1024 and 1025 are reserved for communication with the agent
-> and gathering of agent logs respectively. 
+> and gathering of agent logs respectively.

 ##### Connecting to the debug console

--- a/docs/design/README.md
+++ b/docs/design/README.md
@@ -11,6 +11,7 @@ Kata Containers design documents:
 - [Host cgroups](host-cgroups.md)
 - [Agent systemd cgroup](agent-systemd-cgroup.md)
 - [`Inotify` support](inotify.md)
+- [`Hooks` support](hooks-handling.md)
 - [Metrics(Kata 2.0)](kata-2-0-metrics.md)
 - [Design for Kata Containers `Lazyload` ability with `nydus`](kata-nydus-design.md)
 - [Design for direct-assigned volume](direct-blk-device-assignment.md)
--- a/docs/design/hooks-handling.md
+++ b/docs/design/hooks-handling.md
@@ -0,0 +1,63 @@
+# Kata Containers support for `Hooks`
+
+## Introduction
+
+During container's lifecycle, different Hooks can be executed to do custom actions. In Kata Containers, we support two types of Hooks, `OCI Hooks` and `Kata Hooks`.
+
+### OCI Hooks
+
+The OCI Spec stipulates six hooks that can be executed at different time points and namespaces, including `Prestart Hooks`, `CreateRuntime Hooks`, `CreateContainer Hooks`, `StartContainer Hooks`, `Poststart Hooks` and `Poststop Hooks`. We support these types of Hooks as compatible as possible in Kata Containers.
+
+The path and arguments of these hooks will be passed to Kata for execution via `bundle/config.json`. For example:
+```
+...
+"hooks": {
+  "prestart": [
+    {
+      "path": "/usr/bin/prestart-hook",
+      "args": ["prestart-hook", "arg1", "arg2"],
+      "env":  [ "key1=value1"]
+    }
+  ],
+  "createRuntime": [
+    {
+      "path": "/usr/bin/createRuntime-hook",
+      "args": ["createRuntime-hook", "arg1", "arg2"],
+      "env":  [ "key1=value1"]
+    }
+  ]
+}
+...
+```
+
+### Kata Hooks
+
+In Kata, we support another three kinds of hooks executed in guest VM, including `Guest Prestart Hook`, `Guest Poststart Hook`, `Guest Poststop Hook`.
+
+The executable files for Kata Hooks must be packaged in the *guest rootfs*. The file path to those guest hooks should be specified in the configuration file, and guest hooks must be stored in a subdirectory of `guest_hook_path` according to their hook type. For example:
+
+ In configuration file:
+```
+guest_hook_path="/usr/share/hooks"
+```
+ In guest rootfs, prestart-hook is stored in `/usr/share/hooks/prestart/prestart-hook`.
+
+## Execution
+The table below summarized when and where those different hooks will be executed in Kata Containers:
+
+| Hook Name | Hook Type | Hook Path | Exec Place | Exec Time |
+|---|---|---|---|---|
+| `Prestart(deprecated)` | OCI hook | host runtime namespace | host runtime namespace | After VM is started, before container is created. |
+| `CreateRuntime` | OCI hook | host runtime namespace | host runtime namespace | After VM is started, before container is created, after `Prestart` hooks. |
+| `CreateContainer` | OCI hook | host runtime namespace | host vmm namespace* | After VM is started, before container is created, after `CreateRuntime` hooks. |
+| `StartContainer` | OCI hook | guest container namespace | guest container namespace | After container is created, before container is started. |
+| `Poststart` | OCI hook | host runtime namespace | host runtime namespace | After container is started, before start operation returns. |
+| `Poststop` | OCI hook | host runtime namespace | host runtime namespace | After container is deleted, before delete operation returns. |
+| `Guest Prestart` | Kata hook | guest agent namespace | guest agent namespace | During start operation, before container command is executed. |
+| `Guest Poststart` | Kata hook | guest agent namespace | guest agent namespace | During start operation, after container command is executed, before start operation returns. |
+| `Guest Poststop` | Kata hook | guest agent namespace | guest agent namespace | During delete operation, after container is deleted, before delete operation returns. |
+
+ `Hook Path` specifies where hook's path be resolved.
+ `Exec Place` specifies in which namespace those hooks can be executed.
+  + For `CreateContainer` Hooks, OCI requires to run them inside the container namespace while the hook executable path is in the host runtime, which is a non-starter for VM-based containers. So we design to keep them running in the *host vmm namespace.* 
+ `Exec Time` specifies at which time point those hooks can be executed.
--- a/docs/install/aws-installation-guide.md
+++ b/docs/install/aws-installation-guide.md
@@ -123,7 +123,7 @@ Refer to [this guide](https://docs.aws.amazon.com/cli/latest/userguide/cli-ec2-l
 SSH into the machine

 ```bash
-$ ssh -i MyKeyPair.pen ubuntu@${IP}
+$ ssh -i MyKeyPair.pem ubuntu@${IP}
 ```

 Go onto the next step.
--- a/src/agent/rustjail/src/cgroups/fs/mod.rs
+++ b/src/agent/rustjail/src/cgroups/fs/mod.rs
@@ -267,6 +267,10 @@ impl CgroupManager for Manager {
    fn as_any(&self) -> Result<&dyn Any> {
        Ok(self)
    }
+
+    fn name(&self) -> &str {
+        "cgroupfs"
+    }
 }

 fn set_network_resources(
--- a/src/agent/rustjail/src/cgroups/mock.rs
+++ b/src/agent/rustjail/src/cgroups/mock.rs
@@ -66,6 +66,10 @@ impl CgroupManager for Manager {
    fn as_any(&self) -> Result<&dyn Any> {
        Ok(self)
    }
+
+    fn name(&self) -> &str {
+        "mock"
+    }
 }

 impl Manager {
--- a/src/agent/rustjail/src/cgroups/mod.rs
+++ b/src/agent/rustjail/src/cgroups/mod.rs
@@ -52,10 +52,12 @@ pub trait Manager {
    fn as_any(&self) -> Result<&dyn Any> {
        Err(anyhow!("not supported!"))
    }
+
+    fn name(&self) -> &str;
 }

 impl Debug for dyn Manager + Send + Sync {
    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
-        write!(f, "CgroupManager")
+        write!(f, "{}", self.name())
    }
 }
--- a/src/agent/rustjail/src/cgroups/systemd/manager.rs
+++ b/src/agent/rustjail/src/cgroups/systemd/manager.rs
@@ -101,6 +101,10 @@ impl CgroupManager for Manager {
    fn as_any(&self) -> Result<&dyn Any> {
        Ok(self)
    }
+
+    fn name(&self) -> &str {
+        "systemd"
+    }
 }

 impl Manager {
--- a/src/agent/rustjail/src/container.rs
+++ b/src/agent/rustjail/src/container.rs
@@ -374,13 +374,18 @@ fn do_init_child(cwfd: RawFd) -> Result<()> {
    let buf = read_sync(crfd)?;
    let spec_str = std::str::from_utf8(&buf)?;
    let spec: oci::Spec = serde_json::from_str(spec_str)?;
-
    log_child!(cfd_log, "notify parent to send oci process");
    write_sync(cwfd, SYNC_SUCCESS, "")?;

    let buf = read_sync(crfd)?;
    let process_str = std::str::from_utf8(&buf)?;
    let oci_process: oci::Process = serde_json::from_str(process_str)?;
+    log_child!(cfd_log, "notify parent to send oci state");
+    write_sync(cwfd, SYNC_SUCCESS, "")?;
+
+    let buf = read_sync(crfd)?;
+    let state_str = std::str::from_utf8(&buf)?;
+    let mut state: oci::State = serde_json::from_str(state_str)?;
    log_child!(cfd_log, "notify parent to send cgroup manager");
    write_sync(cwfd, SYNC_SUCCESS, "")?;

@@ -743,6 +748,19 @@ fn do_init_child(cwfd: RawFd) -> Result<()> {
        unistd::read(fd, buf)?;
    }

+    if init {
+        // StartContainer Hooks:
+        // * should be run in container namespace
+        // * should be run after container is created and before container is started (before user-specific command is executed)
+        // * spec details: https://github.com/opencontainers/runtime-spec/blob/c1662686cff159595277b79322d0272f5182941b/config.md#startcontainer-hooks
+        state.pid = std::process::id() as i32;
+        state.status = oci::ContainerState::Created;
+        if let Some(hooks) = spec.hooks.as_ref() {
+            let mut start_container_states = HookStates::new();
+            start_container_states.execute_hooks(&hooks.start_container, Some(state))?;
+        }
+    }
+
    // With NoNewPrivileges, we should set seccomp as close to
    // do_exec as possible in order to reduce the amount of
    // system calls in the seccomp profiles.
@@ -1323,7 +1341,6 @@ async fn join_namespaces(
    write_async(pipe_w, SYNC_DATA, spec_str.as_str()).await?;

    info!(logger, "wait child received oci spec");
-
    read_async(pipe_r).await?;

    info!(logger, "send oci process from parent to child");
@@ -1333,6 +1350,13 @@ async fn join_namespaces(
    info!(logger, "wait child received oci process");
    read_async(pipe_r).await?;

+    info!(logger, "try to send state from parent to child");
+    let state_str = serde_json::to_string(st)?;
+    write_async(pipe_w, SYNC_DATA, state_str.as_str()).await?;
+
+    info!(logger, "wait child received oci state");
+    read_async(pipe_r).await?;
+
    let cm_str = if use_systemd_cgroup {
        serde_json::to_string(cm.as_any()?.downcast_ref::<SystemdManager>().unwrap())
    } else {
@@ -1449,7 +1473,7 @@ impl LinuxContainer {
    pub fn new<T: Into<String> + Display + Clone>(
        id: T,
        base: T,
-        mut config: Config,
+        config: Config,
        logger: &Logger,
    ) -> Result<Self> {
        let base = base.into();
@@ -1475,26 +1499,18 @@ impl LinuxContainer {
        .context(format!("Cannot change owner of container {} root", id))?;

        let spec = config.spec.as_ref().unwrap();
-
        let linux = spec.linux.as_ref().unwrap();
-
-        // determine which cgroup driver to take and then assign to config.use_systemd_cgroup
-        // systemd: "[slice]:[prefix]:[name]"
-        // fs: "/path_a/path_b"
-        let cpath = if SYSTEMD_CGROUP_PATH_FORMAT.is_match(linux.cgroups_path.as_str()) {
-            config.use_systemd_cgroup = true;
+        let cpath = if config.use_systemd_cgroup {
            if linux.cgroups_path.len() == 2 {
                format!("system.slice:kata_agent:{}", id.as_str())
            } else {
                linux.cgroups_path.clone()
            }
+        } else if linux.cgroups_path.is_empty() {
+            format!("/{}", id.as_str())
        } else {
-            config.use_systemd_cgroup = false;
-            if linux.cgroups_path.is_empty() {
-                format!("/{}", id.as_str())
-            } else {
-                linux.cgroups_path.clone()
-            }
+            // if we have a systemd cgroup path we need to convert it to a fs cgroup path
+            linux.cgroups_path.replace(':', "/")
        };

        let cgroup_manager: Box<dyn Manager + Send + Sync> = if config.use_systemd_cgroup {
--- a/src/agent/rustjail/src/lib.rs
+++ b/src/agent/rustjail/src/lib.rs
@@ -153,13 +153,17 @@ fn hook_grpc_to_oci(h: &[grpcHook]) -> Vec<oci::Hook> {

 fn hooks_grpc_to_oci(h: &grpc::Hooks) -> oci::Hooks {
    let prestart = hook_grpc_to_oci(h.Prestart.as_ref());
-
+    let create_runtime = hook_grpc_to_oci(h.CreateRuntime.as_ref());
+    let create_container = hook_grpc_to_oci(h.CreateContainer.as_ref());
+    let start_container = hook_grpc_to_oci(h.StartContainer.as_ref());
    let poststart = hook_grpc_to_oci(h.Poststart.as_ref());
-
    let poststop = hook_grpc_to_oci(h.Poststop.as_ref());

    oci::Hooks {
        prestart,
+        create_runtime,
+        create_container,
+        start_container,
        poststart,
        poststop,
    }
@@ -837,6 +841,45 @@ mod tests {
                        Timeout: 10,
                        ..Default::default()
                    }])),
+                    CreateRuntime: protobuf::RepeatedField::from(Vec::from([grpc::Hook {
+                        Path: String::from("createruntimepath"),
+                        Args: protobuf::RepeatedField::from(Vec::from([
+                            String::from("arg1"),
+                            String::from("arg2"),
+                        ])),
+                        Env: protobuf::RepeatedField::from(Vec::from([
+                            String::from("env1"),
+                            String::from("env2"),
+                        ])),
+                        Timeout: 10,
+                        ..Default::default()
+                    }])),
+                    CreateContainer: protobuf::RepeatedField::from(Vec::from([grpc::Hook {
+                        Path: String::from("createcontainerpath"),
+                        Args: protobuf::RepeatedField::from(Vec::from([
+                            String::from("arg1"),
+                            String::from("arg2"),
+                        ])),
+                        Env: protobuf::RepeatedField::from(Vec::from([
+                            String::from("env1"),
+                            String::from("env2"),
+                        ])),
+                        Timeout: 10,
+                        ..Default::default()
+                    }])),
+                    StartContainer: protobuf::RepeatedField::from(Vec::from([grpc::Hook {
+                        Path: String::from("startcontainerpath"),
+                        Args: protobuf::RepeatedField::from(Vec::from([
+                            String::from("arg1"),
+                            String::from("arg2"),
+                        ])),
+                        Env: protobuf::RepeatedField::from(Vec::from([
+                            String::from("env1"),
+                            String::from("env2"),
+                        ])),
+                        Timeout: 10,
+                        ..Default::default()
+                    }])),
                    ..Default::default()
                },
                result: oci::Hooks {
@@ -866,6 +909,24 @@ mod tests {
                        env: Vec::from([String::from("env1"), String::from("env2")]),
                        timeout: Some(10),
                    }]),
+                    create_runtime: Vec::from([oci::Hook {
+                        path: String::from("createruntimepath"),
+                        args: Vec::from([String::from("arg1"), String::from("arg2")]),
+                        env: Vec::from([String::from("env1"), String::from("env2")]),
+                        timeout: Some(10),
+                    }]),
+                    create_container: Vec::from([oci::Hook {
+                        path: String::from("createcontainerpath"),
+                        args: Vec::from([String::from("arg1"), String::from("arg2")]),
+                        env: Vec::from([String::from("env1"), String::from("env2")]),
+                        timeout: Some(10),
+                    }]),
+                    start_container: Vec::from([oci::Hook {
+                        path: String::from("startcontainerpath"),
+                        args: Vec::from([String::from("arg1"), String::from("arg2")]),
+                        env: Vec::from([String::from("env1"), String::from("env2")]),
+                        timeout: Some(10),
+                    }]),
                },
            },
            TestData {
@@ -898,6 +959,45 @@ mod tests {
                        Timeout: 10,
                        ..Default::default()
                    }])),
+                    CreateRuntime: protobuf::RepeatedField::from(Vec::from([grpc::Hook {
+                        Path: String::from("createruntimepath"),
+                        Args: protobuf::RepeatedField::from(Vec::from([
+                            String::from("arg1"),
+                            String::from("arg2"),
+                        ])),
+                        Env: protobuf::RepeatedField::from(Vec::from([
+                            String::from("env1"),
+                            String::from("env2"),
+                        ])),
+                        Timeout: 10,
+                        ..Default::default()
+                    }])),
+                    CreateContainer: protobuf::RepeatedField::from(Vec::from([grpc::Hook {
+                        Path: String::from("createcontainerpath"),
+                        Args: protobuf::RepeatedField::from(Vec::from([
+                            String::from("arg1"),
+                            String::from("arg2"),
+                        ])),
+                        Env: protobuf::RepeatedField::from(Vec::from([
+                            String::from("env1"),
+                            String::from("env2"),
+                        ])),
+                        Timeout: 10,
+                        ..Default::default()
+                    }])),
+                    StartContainer: protobuf::RepeatedField::from(Vec::from([grpc::Hook {
+                        Path: String::from("startcontainerpath"),
+                        Args: protobuf::RepeatedField::from(Vec::from([
+                            String::from("arg1"),
+                            String::from("arg2"),
+                        ])),
+                        Env: protobuf::RepeatedField::from(Vec::from([
+                            String::from("env1"),
+                            String::from("env2"),
+                        ])),
+                        Timeout: 10,
+                        ..Default::default()
+                    }])),
                    ..Default::default()
                },
                result: oci::Hooks {
@@ -914,6 +1014,24 @@ mod tests {
                        env: Vec::from([String::from("env1"), String::from("env2")]),
                        timeout: Some(10),
                    }]),
+                    create_runtime: Vec::from([oci::Hook {
+                        path: String::from("createruntimepath"),
+                        args: Vec::from([String::from("arg1"), String::from("arg2")]),
+                        env: Vec::from([String::from("env1"), String::from("env2")]),
+                        timeout: Some(10),
+                    }]),
+                    create_container: Vec::from([oci::Hook {
+                        path: String::from("createcontainerpath"),
+                        args: Vec::from([String::from("arg1"), String::from("arg2")]),
+                        env: Vec::from([String::from("env1"), String::from("env2")]),
+                        timeout: Some(10),
+                    }]),
+                    start_container: Vec::from([oci::Hook {
+                        path: String::from("startcontainerpath"),
+                        args: Vec::from([String::from("arg1"), String::from("arg2")]),
+                        env: Vec::from([String::from("env1"), String::from("env2")]),
+                        timeout: Some(10),
+                    }]),
                },
            },
        ];
--- a/src/agent/src/main.rs
+++ b/src/agent/src/main.rs
@@ -340,7 +340,7 @@ async fn start_sandbox(
    sandbox.lock().await.sender = Some(tx);

    // vsock:///dev/vsock, port
-    let mut server = rpc::start(sandbox.clone(), config.server_addr.as_str())?;
+    let mut server = rpc::start(sandbox.clone(), config.server_addr.as_str(), init_mode)?;
    server.start().await?;

    rx.await?;
--- a/src/agent/src/rpc.rs
+++ b/src/agent/src/rpc.rs
@@ -39,7 +39,7 @@ use protocols::{
    image_ttrpc_async as image_ttrpc,
 };
 use rustjail::cgroups::notifier;
-use rustjail::container::{BaseContainer, Container, LinuxContainer};
+use rustjail::container::{BaseContainer, Container, LinuxContainer, SYSTEMD_CGROUP_PATH_FORMAT};
 use rustjail::process::Process;
 use rustjail::specconv::CreateOpts;

@@ -145,6 +145,7 @@ macro_rules! is_allowed {
 #[derive(Clone, Debug)]
 pub struct AgentService {
    sandbox: Arc<Mutex<Sandbox>>,
+    init_mode: bool,
 }

 // A container ID must match this regex:
@@ -280,9 +281,20 @@ impl AgentService {
        // restore the cwd for kata-agent process.
        defer!(unistd::chdir(&olddir).unwrap());

+        // determine which cgroup driver to take and then assign to use_systemd_cgroup
+        // systemd: "[slice]:[prefix]:[name]"
+        // fs: "/path_a/path_b"
+        // If agent is init we can't use systemd cgroup mode, no matter what the host tells us
+        let cgroups_path = oci.linux.as_ref().map_or("", |linux| &linux.cgroups_path);
+        let use_systemd_cgroup = if self.init_mode {
+            false
+        } else {
+            SYSTEMD_CGROUP_PATH_FORMAT.is_match(cgroups_path)
+        };
+
        let opts = CreateOpts {
            cgroup_name: "".to_string(),
-            use_systemd_cgroup: false,
+            use_systemd_cgroup,
            no_pivot_root: s.no_pivot_root,
            no_new_keyring: false,
            spec: Some(oci.clone()),
@@ -1791,9 +1803,12 @@ async fn read_stream(reader: Arc<Mutex<ReadHalf<PipeStream>>>, l: usize) -> Resu
    Ok(content)
 }

-pub fn start(s: Arc<Mutex<Sandbox>>, server_address: &str) -> Result<TtrpcServer> {
-    let agent_service = Box::new(AgentService { sandbox: s.clone() })
-        as Box<dyn agent_ttrpc::AgentService + Send + Sync>;
+pub fn start(s: Arc<Mutex<Sandbox>>, server_address: &str, init_mode: bool) -> Result<TtrpcServer> {
+    let agent_service = Box::new(AgentService {
+        sandbox: s.clone(),
+        init_mode,
+    }) as Box<dyn agent_ttrpc::AgentService + Send + Sync>;
+
    let agent_worker = Arc::new(agent_service);

    let health_service = Box::new(HealthService {}) as Box<dyn health_ttrpc::Health + Send + Sync>;
@@ -2000,23 +2015,18 @@ fn do_copy_file(req: &CopyFileRequest) -> Result<()> {
        ));
    }

-    let parent = path.parent();
-
-    let dir = if let Some(parent) = parent {
-        parent.to_path_buf()
-    } else {
-        PathBuf::from("/")
-    };
-
-    fs::create_dir_all(&dir).or_else(|e| {
-        if e.kind() != std::io::ErrorKind::AlreadyExists {
-            return Err(e);
+    if let Some(parent) = path.parent() {
+        if !parent.exists() {
+            let dir = parent.to_path_buf();
+            if let Err(e) = fs::create_dir_all(&dir) {
+                if e.kind() != std::io::ErrorKind::AlreadyExists {
+                    return Err(e.into());
+                }
+            } else {
+                std::fs::set_permissions(&dir, std::fs::Permissions::from_mode(req.dir_mode))?;
+            }
        }
-
-        Ok(())
-    })?;
-
-    std::fs::set_permissions(&dir, std::fs::Permissions::from_mode(req.dir_mode))?;
+    }

    let sflag = stat::SFlag::from_bits_truncate(req.file_mode);

@@ -2314,6 +2324,7 @@ mod tests {

        let agent_service = Box::new(AgentService {
            sandbox: Arc::new(Mutex::new(sandbox)),
+            init_mode: true,
        });

        let req = protocols::agent::UpdateInterfaceRequest::default();
@@ -2331,6 +2342,7 @@ mod tests {

        let agent_service = Box::new(AgentService {
            sandbox: Arc::new(Mutex::new(sandbox)),
+            init_mode: true,
        });

        let req = protocols::agent::UpdateRoutesRequest::default();
@@ -2348,6 +2360,7 @@ mod tests {

        let agent_service = Box::new(AgentService {
            sandbox: Arc::new(Mutex::new(sandbox)),
+            init_mode: true,
        });

        let req = protocols::agent::AddARPNeighborsRequest::default();
@@ -2481,6 +2494,7 @@ mod tests {

            let agent_service = Box::new(AgentService {
                sandbox: Arc::new(Mutex::new(sandbox)),
+                init_mode: true,
            });

            let result = agent_service
@@ -2961,6 +2975,7 @@ OtherField:other
        let sandbox = Sandbox::new(&logger).unwrap();
        let agent_service = Box::new(AgentService {
            sandbox: Arc::new(Mutex::new(sandbox)),
+            init_mode: true,
        });

        let ctx = mk_ttrpc_context();
--- a/src/dragonball/Cargo.lock
+++ b/src/dragonball/Cargo.lock
--- a/src/dragonball/Cargo.toml
+++ b/src/dragonball/Cargo.toml
@@ -19,11 +19,11 @@ dbs-boot = "0.3.0"
 dbs-device = "0.2.0"
 dbs-interrupt = { version = "0.2.0", features = ["kvm-irq"] }
 dbs-legacy-devices = "0.1.0"
-dbs-upcall = { version = "0.1.0", optional = true }
+dbs-upcall = { version = "0.2.0", optional = true }
 dbs-utils = "0.2.0"
-dbs-virtio-devices = { version = "0.1.0", optional = true, features = ["virtio-mmio"] }
-kvm-bindings = "0.5.0"
-kvm-ioctls = "0.11.0"
+dbs-virtio-devices = { version = "0.2.0", optional = true, features = ["virtio-mmio"] }
+kvm-bindings = "0.6.0"
+kvm-ioctls = "0.12.0"
 lazy_static = "1.2"
 libc = "0.2.39"
 linux-loader = "0.6.0"
@@ -37,8 +37,9 @@ slog = "2.5.2"
 slog-scope = "4.4.0"
 thiserror = "1"
 vmm-sys-util = "0.11.0"
-virtio-queue = { version = "0.4.0", optional = true }
+virtio-queue = { version = "0.6.0", optional = true }
 vm-memory = { version = "0.9.0", features = ["backend-mmap"] }
+crossbeam-channel = "0.5.6"

 [dev-dependencies]
 slog-term = "2.9.0"
@@ -47,7 +48,7 @@ test-utils = { path = "../libs/test-utils" }

 [features]
 acpi = []
-atomic-guest-memory = [ "vm-memory/backend-atomic" ]
+atomic-guest-memory = ["vm-memory/backend-atomic"]
 hotplug = ["virtio-vsock"]
 virtio-vsock = ["dbs-virtio-devices/virtio-vsock", "virtio-queue"]
 virtio-blk = ["dbs-virtio-devices/virtio-blk", "virtio-queue"]
--- a/src/dragonball/src/api/v1/instance_info.rs
+++ b/src/dragonball/src/api/v1/instance_info.rs
@@ -59,6 +59,8 @@ pub struct InstanceInfo {
    pub vmm_version: String,
    /// The pid of the current VMM process.
    pub pid: u32,
+    /// The tid of the current VMM master thread.
+    pub master_tid: u32,
    /// The state of async actions.
    pub async_state: AsyncState,
    /// List of tids of vcpu threads (vcpu index, tid)
@@ -77,6 +79,7 @@ impl InstanceInfo {
            state: InstanceState::Uninitialized,
            vmm_version,
            pid: std::process::id(),
+            master_tid: 0,
            async_state: AsyncState::Uninitialized,
            tids: Vec::new(),
            last_instance_downtime: 0,
@@ -97,6 +100,7 @@ impl Default for InstanceInfo {
            state: InstanceState::Uninitialized,
            vmm_version: env!("CARGO_PKG_VERSION").to_string(),
            pid: std::process::id(),
+            master_tid: 0,
            async_state: AsyncState::Uninitialized,
            tids: Vec::new(),
            last_instance_downtime: 0,
--- a/src/dragonball/src/api/v1/vmm_action.rs
+++ b/src/dragonball/src/api/v1/vmm_action.rs
@@ -7,8 +7,8 @@
 // found in the THIRD-PARTY file.

 use std::fs::File;
-use std::sync::mpsc::{Receiver, Sender, TryRecvError};

+use crossbeam_channel::{Receiver, Sender, TryRecvError};
 use log::{debug, error, info, warn};

 use crate::error::{Result, StartMicroVmError, StopMicrovmError};
@@ -676,9 +676,9 @@ fn handle_cpu_topology(

 #[cfg(test)]
 mod tests {
-    use std::sync::mpsc::channel;
    use std::sync::{Arc, Mutex};

+    use crossbeam_channel::unbounded;
    use dbs_utils::epoll_manager::EpollManager;
    use test_utils::skip_if_not_root;
    use vmm_sys_util::tempfile::TempFile;
@@ -702,8 +702,8 @@ mod tests {
        }

        fn check_request(&mut self) {
-            let (to_vmm, from_api) = channel();
-            let (to_api, from_vmm) = channel();
+            let (to_vmm, from_api) = unbounded();
+            let (to_api, from_vmm) = unbounded();

            let epoll_mgr = EpollManager::default();
            let vmm = Arc::new(Mutex::new(create_vmm_instance(epoll_mgr.clone())));
@@ -728,8 +728,8 @@ mod tests {
    fn test_vmm_action_receive_unknown() {
        skip_if_not_root!();

-        let (_to_vmm, from_api) = channel();
-        let (to_api, _from_vmm) = channel();
+        let (_to_vmm, from_api) = unbounded();
+        let (to_api, _from_vmm) = unbounded();
        let epoll_mgr = EpollManager::default();
        let vmm = Arc::new(Mutex::new(create_vmm_instance(epoll_mgr.clone())));
        let mut vservice = VmmService::new(from_api, to_api);
@@ -742,8 +742,8 @@ mod tests {
    #[should_panic]
    #[test]
    fn test_vmm_action_disconnected() {
-        let (to_vmm, from_api) = channel();
-        let (to_api, _from_vmm) = channel();
+        let (to_vmm, from_api) = unbounded();
+        let (to_api, _from_vmm) = unbounded();
        let epoll_mgr = EpollManager::default();
        let vmm = Arc::new(Mutex::new(create_vmm_instance(epoll_mgr.clone())));
        let mut vservice = VmmService::new(from_api, to_api);
--- a/src/dragonball/src/config_manager.rs
+++ b/src/dragonball/src/config_manager.rs
@@ -231,7 +231,7 @@ where
                        info.config.check_conflicts(config)?;
                    }
                }
-                self.info_list[index] = device_info;
+                self.info_list[index].config = config.clone();
                index
            }
            None => {
--- a/src/dragonball/src/device_manager/mod.rs
+++ b/src/dragonball/src/device_manager/mod.rs
@@ -147,17 +147,13 @@ pub type Result<T> = ::std::result::Result<T, DeviceMgrError>;
 /// Type of the dragonball virtio devices.
 #[cfg(feature = "dbs-virtio-devices")]
 pub type DbsVirtioDevice = Box<
-    dyn VirtioDevice<
-        GuestAddressSpaceImpl,
-        virtio_queue::QueueStateSync,
-        vm_memory::GuestRegionMmap,
-    >,
+    dyn VirtioDevice<GuestAddressSpaceImpl, virtio_queue::QueueSync, vm_memory::GuestRegionMmap>,
 >;

 /// Type of the dragonball virtio mmio devices.
 #[cfg(feature = "dbs-virtio-devices")]
 pub type DbsMmioV2Device =
-    MmioV2Device<GuestAddressSpaceImpl, virtio_queue::QueueStateSync, vm_memory::GuestRegionMmap>;
+    MmioV2Device<GuestAddressSpaceImpl, virtio_queue::QueueSync, vm_memory::GuestRegionMmap>;

 /// Struct to support transactional operations for device management.
 pub struct DeviceManagerTx {
--- a/src/dragonball/src/error.rs
+++ b/src/dragonball/src/error.rs
@@ -9,6 +9,8 @@

 //! Error codes for the virtual machine monitor subsystem.

+#[cfg(target_arch = "aarch64")]
+use dbs_arch::pmu::PmuError;
 #[cfg(feature = "dbs-virtio-devices")]
 use dbs_virtio_devices::Error as VirtIoError;

@@ -61,6 +63,11 @@ pub enum Error {
    #[error("failed to write MP table to guest memory: {0}")]
    MpTableSetup(#[source] dbs_boot::mptable::Error),

+    /// Create pmu device error
+    #[cfg(target_arch = "aarch64")]
+    #[error("Create pmu device error: {0}")]
+    PmuDeviceError(#[source] PmuError),
+
    /// Fail to boot system
    #[error("failed to boot system: {0}")]
    BootSystem(#[source] dbs_boot::Error),
--- a/src/dragonball/src/vcpu/aarch64.rs
+++ b/src/dragonball/src/vcpu/aarch64.rs
@@ -11,7 +11,7 @@ use std::sync::mpsc::{channel, Sender};
 use std::sync::Arc;

 use crate::IoManagerCached;
-use dbs_arch::regs;
+use dbs_arch::{regs, VpmuFeatureLevel};
 use dbs_boot::get_fdt_addr;
 use dbs_utils::time::TimestampUs;
 use kvm_ioctls::{VcpuFd, VmFd};
@@ -81,7 +81,7 @@ impl Vcpu {
    /// * `_pgtable_addr` - pgtable address for ap vcpu (not used in aarch64)
    pub fn configure(
        &mut self,
-        _vcpu_config: &VcpuConfig,
+        vcpu_config: &VcpuConfig,
        vm_fd: &VmFd,
        vm_as: &GuestAddressSpaceImpl,
        kernel_load_addr: Option<GuestAddress>,
@@ -99,6 +99,9 @@ impl Vcpu {
        if self.id > 0 {
            kvi.features[0] |= 1 << kvm_bindings::KVM_ARM_VCPU_POWER_OFF;
        }
+        if vcpu_config.vpmu_feature == VpmuFeatureLevel::FullyEnabled {
+            kvi.features[0] |= 1 << kvm_bindings::KVM_ARM_VCPU_PMU_V3;
+        }

        self.fd.vcpu_init(&kvi).map_err(VcpuError::VcpuArmInit)?;

--- a/src/dragonball/src/vcpu/mod.rs
+++ b/src/dragonball/src/vcpu/mod.rs
@@ -7,9 +7,7 @@ mod sm;
 mod vcpu_impl;
 mod vcpu_manager;

-#[cfg(target_arch = "x86_64")]
-use dbs_arch::cpuid::VpmuFeatureLevel;
-
+use dbs_arch::VpmuFeatureLevel;
 pub use vcpu_manager::{VcpuManager, VcpuManagerError, VcpuResizeInfo};

 #[cfg(feature = "hotplug")]
@@ -32,6 +30,6 @@ pub struct VcpuConfig {
    /// if vpmu feature is Disabled, it means vpmu feature is off (by default)
    /// if vpmu feature is LimitedlyEnabled, it means minimal vpmu counters are supported (cycles and instructions)
    /// if vpmu feature is FullyEnabled, it means all vpmu counters are supported
-    #[cfg(target_arch = "x86_64")]
+    /// For aarch64, VpmuFeatureLevel only supports Disabled and FullyEnabled.
    pub vpmu_feature: VpmuFeatureLevel,
 }
--- a/src/dragonball/src/vcpu/vcpu_impl.rs
+++ b/src/dragonball/src/vcpu/vcpu_impl.rs
@@ -441,75 +441,77 @@ impl Vcpu {
    /// Returns error or enum specifying whether emulation was handled or interrupted.
    fn run_emulation(&mut self) -> Result<VcpuEmulation> {
        match Vcpu::emulate(&self.fd) {
-            Ok(run) => match run {
-                #[cfg(target_arch = "x86_64")]
-                VcpuExit::IoIn(addr, data) => {
-                    let _ = self.io_mgr.pio_read(addr, data);
-                    METRICS.vcpu.exit_io_in.inc();
-                    Ok(VcpuEmulation::Handled)
-                }
-                #[cfg(target_arch = "x86_64")]
-                VcpuExit::IoOut(addr, data) => {
-                    if !self.check_io_port_info(addr, data)? {
-                        let _ = self.io_mgr.pio_write(addr, data);
+            Ok(run) => {
+                match run {
+                    #[cfg(target_arch = "x86_64")]
+                    VcpuExit::IoIn(addr, data) => {
+                        let _ = self.io_mgr.pio_read(addr, data);
+                        METRICS.vcpu.exit_io_in.inc();
+                        Ok(VcpuEmulation::Handled)
                    }
-                    METRICS.vcpu.exit_io_out.inc();
-                    Ok(VcpuEmulation::Handled)
-                }
-                VcpuExit::MmioRead(addr, data) => {
-                    let _ = self.io_mgr.mmio_read(addr, data);
-                    METRICS.vcpu.exit_mmio_read.inc();
-                    Ok(VcpuEmulation::Handled)
-                }
-                VcpuExit::MmioWrite(addr, data) => {
-                    let _ = self.io_mgr.mmio_write(addr, data);
-                    METRICS.vcpu.exit_mmio_write.inc();
-                    Ok(VcpuEmulation::Handled)
-                }
-                VcpuExit::Hlt => {
-                    info!("Received KVM_EXIT_HLT signal");
-                    Err(VcpuError::VcpuUnhandledKvmExit)
-                }
-                VcpuExit::Shutdown => {
-                    info!("Received KVM_EXIT_SHUTDOWN signal");
-                    Err(VcpuError::VcpuUnhandledKvmExit)
-                }
-                // Documentation specifies that below kvm exits are considered errors.
-                VcpuExit::FailEntry => {
-                    METRICS.vcpu.failures.inc();
-                    error!("Received KVM_EXIT_FAIL_ENTRY signal");
-                    Err(VcpuError::VcpuUnhandledKvmExit)
-                }
-                VcpuExit::InternalError => {
-                    METRICS.vcpu.failures.inc();
-                    error!("Received KVM_EXIT_INTERNAL_ERROR signal");
-                    Err(VcpuError::VcpuUnhandledKvmExit)
-                }
-                VcpuExit::SystemEvent(event_type, event_flags) => match event_type {
-                    KVM_SYSTEM_EVENT_RESET | KVM_SYSTEM_EVENT_SHUTDOWN => {
-                        info!(
-                            "Received KVM_SYSTEM_EVENT: type: {}, event: {}",
-                            event_type, event_flags
-                        );
-                        Ok(VcpuEmulation::Stopped)
+                    #[cfg(target_arch = "x86_64")]
+                    VcpuExit::IoOut(addr, data) => {
+                        if !self.check_io_port_info(addr, data)? {
+                            let _ = self.io_mgr.pio_write(addr, data);
+                        }
+                        METRICS.vcpu.exit_io_out.inc();
+                        Ok(VcpuEmulation::Handled)
                    }
-                    _ => {
-                        METRICS.vcpu.failures.inc();
-                        error!(
-                            "Received KVM_SYSTEM_EVENT signal type: {}, flag: {}",
-                            event_type, event_flags
-                        );
+                    VcpuExit::MmioRead(addr, data) => {
+                        let _ = self.io_mgr.mmio_read(addr, data);
+                        METRICS.vcpu.exit_mmio_read.inc();
+                        Ok(VcpuEmulation::Handled)
+                    }
+                    VcpuExit::MmioWrite(addr, data) => {
+                        let _ = self.io_mgr.mmio_write(addr, data);
+                        METRICS.vcpu.exit_mmio_write.inc();
+                        Ok(VcpuEmulation::Handled)
+                    }
+                    VcpuExit::Hlt => {
+                        info!("Received KVM_EXIT_HLT signal");
+                        Err(VcpuError::VcpuUnhandledKvmExit)
+                    }
+                    VcpuExit::Shutdown => {
+                        info!("Received KVM_EXIT_SHUTDOWN signal");
+                        Err(VcpuError::VcpuUnhandledKvmExit)
+                    }
+                    // Documentation specifies that below kvm exits are considered errors.
+                    VcpuExit::FailEntry(reason, cpu) => {
+                        METRICS.vcpu.failures.inc();
+                        error!("Received KVM_EXIT_FAIL_ENTRY signal, reason {reason}, cpu number {cpu}");
+                        Err(VcpuError::VcpuUnhandledKvmExit)
+                    }
+                    VcpuExit::InternalError => {
+                        METRICS.vcpu.failures.inc();
+                        error!("Received KVM_EXIT_INTERNAL_ERROR signal");
+                        Err(VcpuError::VcpuUnhandledKvmExit)
+                    }
+                    VcpuExit::SystemEvent(event_type, event_flags) => match event_type {
+                        KVM_SYSTEM_EVENT_RESET | KVM_SYSTEM_EVENT_SHUTDOWN => {
+                            info!(
+                                "Received KVM_SYSTEM_EVENT: type: {}, event: {}",
+                                event_type, event_flags
+                            );
+                            Ok(VcpuEmulation::Stopped)
+                        }
+                        _ => {
+                            METRICS.vcpu.failures.inc();
+                            error!(
+                                "Received KVM_SYSTEM_EVENT signal type: {}, flag: {}",
+                                event_type, event_flags
+                            );
+                            Err(VcpuError::VcpuUnhandledKvmExit)
+                        }
+                    },
+                    r => {
+                        METRICS.vcpu.failures.inc();
+                        // TODO: Are we sure we want to finish running a vcpu upon
+                        // receiving a vm exit that is not necessarily an error?
+                        error!("Unexpected exit reason on vcpu run: {:?}", r);
                        Err(VcpuError::VcpuUnhandledKvmExit)
                    }
-                },
-                r => {
-                    METRICS.vcpu.failures.inc();
-                    // TODO: Are we sure we want to finish running a vcpu upon
-                    // receiving a vm exit that is not necessarily an error?
-                    error!("Unexpected exit reason on vcpu run: {:?}", r);
-                    Err(VcpuError::VcpuUnhandledKvmExit)
                }
-            },
+            }
            // The unwrap on raw_os_error can only fail if we have a logic
            // error in our code in which case it is better to panic.
            Err(ref e) => {
@@ -758,6 +760,11 @@ impl Vcpu {
        // State machine reached its end.
        StateMachine::finish(Self::exited)
    }
+
+    /// Get vcpu file descriptor.
+    pub fn vcpu_fd(&self) -> &VcpuFd {
+        self.fd.as_ref()
+    }
 }

 impl Drop for Vcpu {
@@ -786,7 +793,7 @@ pub mod tests {
        MmioWrite,
        Hlt,
        Shutdown,
-        FailEntry,
+        FailEntry(u64, u32),
        InternalError,
        Unknown,
        SystemEvent(u32, u64),
@@ -807,7 +814,9 @@ pub mod tests {
                EmulationCase::MmioWrite => Ok(VcpuExit::MmioWrite(0, &[])),
                EmulationCase::Hlt => Ok(VcpuExit::Hlt),
                EmulationCase::Shutdown => Ok(VcpuExit::Shutdown),
-                EmulationCase::FailEntry => Ok(VcpuExit::FailEntry),
+                EmulationCase::FailEntry(error_type, cpu_num) => {
+                    Ok(VcpuExit::FailEntry(*error_type, *cpu_num))
+                }
                EmulationCase::InternalError => Ok(VcpuExit::InternalError),
                EmulationCase::Unknown => Ok(VcpuExit::Unknown),
                EmulationCase::SystemEvent(event_type, event_flags) => {
@@ -850,6 +859,8 @@ pub mod tests {

    #[cfg(target_arch = "aarch64")]
    fn create_vcpu() -> (Vcpu, Receiver<VcpuStateEvent>) {
+        use kvm_ioctls::Kvm;
+        use std::os::fd::AsRawFd;
        // Call for kvm too frequently would cause error in some host kernel.
        std::thread::sleep(std::time::Duration::from_millis(5));

@@ -918,7 +929,7 @@ pub mod tests {
        assert!(matches!(res, Err(VcpuError::VcpuUnhandledKvmExit)));

        // KVM_EXIT_FAIL_ENTRY signal
-        *(EMULATE_RES.lock().unwrap()) = EmulationCase::FailEntry;
+        *(EMULATE_RES.lock().unwrap()) = EmulationCase::FailEntry(0, 0);
        let res = vcpu.run_emulation();
        assert!(matches!(res, Err(VcpuError::VcpuUnhandledKvmExit)));

--- a/src/dragonball/src/vcpu/vcpu_manager.rs
+++ b/src/dragonball/src/vcpu/vcpu_manager.rs
@@ -15,6 +15,7 @@ use std::sync::mpsc::{channel, Receiver, RecvError, RecvTimeoutError, Sender};
 use std::sync::{Arc, Barrier, Mutex, RwLock};
 use std::time::Duration;

+use dbs_arch::VpmuFeatureLevel;
 #[cfg(all(feature = "hotplug", feature = "dbs-upcall"))]
 use dbs_upcall::{DevMgrService, UpcallClient};
 use dbs_utils::epoll_manager::{EpollManager, EventOps, EventSet, Events, MutEventSubscriber};
@@ -281,11 +282,20 @@ impl VcpuManager {
        let supported_cpuid = kvm_context
            .supported_cpuid(kvm_bindings::KVM_MAX_CPUID_ENTRIES)
            .map_err(VcpuManagerError::Kvm)?;
-        #[cfg(target_arch = "x86_64")]
+        #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))]
        let vpmu_feature_level = match vm_config_info.vpmu_feature {
-            1 => dbs_arch::cpuid::VpmuFeatureLevel::LimitedlyEnabled,
-            2 => dbs_arch::cpuid::VpmuFeatureLevel::FullyEnabled,
-            _ => dbs_arch::cpuid::VpmuFeatureLevel::Disabled,
+            #[cfg(target_arch = "x86_64")]
+            1 => VpmuFeatureLevel::LimitedlyEnabled,
+            #[cfg(target_arch = "aarch64")]
+            1 => {
+                log::warn!(
+                    "Limitedly enabled vpmu feature isn't supported on aarch64 for now.\
+                       This will be supported in the future. The vpmu_feature will be set disabled!"
+                );
+                VpmuFeatureLevel::Disabled
+            }
+            2 => VpmuFeatureLevel::FullyEnabled,
+            _ => VpmuFeatureLevel::Disabled,
        };

        let vcpu_manager = Arc::new(Mutex::new(VcpuManager {
@@ -297,7 +307,6 @@ impl VcpuManager {
                cores_per_die: vm_config_info.cpu_topology.cores_per_die,
                dies_per_socket: vm_config_info.cpu_topology.dies_per_socket,
                sockets: vm_config_info.cpu_topology.sockets,
-                #[cfg(target_arch = "x86_64")]
                vpmu_feature: vpmu_feature_level,
            },
            vcpu_seccomp_filter,
@@ -799,6 +808,11 @@ impl VcpuManager {
        )
        .map_err(VcpuManagerError::Vcpu)
    }
+
+    /// get vpmu_feature config
+    pub fn vpmu_feature(&self) -> VpmuFeatureLevel {
+        self.vcpu_config.vpmu_feature
+    }
 }

 #[cfg(feature = "hotplug")]
@@ -887,7 +901,9 @@ mod hotplug {
            cpu_ids_array[..cpu_ids.len()].copy_from_slice(&cpu_ids[..cpu_ids.len()]);
            let req = DevMgrRequest::AddVcpu(CpuDevRequest {
                count: cpu_ids.len() as u8,
+                #[cfg(target_arch = "x86_64")]
                apic_ids: cpu_ids_array,
+                #[cfg(target_arch = "x86_64")]
                apic_ver: APIC_VERSION,
            });
            self.send_upcall_action(upcall_client, req)?;
@@ -924,7 +940,9 @@ mod hotplug {
            cpu_ids_array[..cpu_ids.len()].copy_from_slice(&cpu_ids[..cpu_ids.len()]);
            let req = DevMgrRequest::DelVcpu(CpuDevRequest {
                count: cpu_num_to_be_del as u8,
+                #[cfg(target_arch = "x86_64")]
                apic_ids: cpu_ids_array,
+                #[cfg(target_arch = "x86_64")]
                apic_ver: APIC_VERSION,
            });
            self.send_upcall_action(upcall_client, req)?;
@@ -969,7 +987,10 @@ mod hotplug {
                                vcpu_state_sender
                                    .send(VcpuStateEvent::Hotplug((
                                        result,
+                                        #[cfg(target_arch = "x86_64")]
                                        resp.info.apic_id_index,
+                                        #[cfg(target_arch = "aarch64")]
+                                        resp.info.cpu_id,
                                    )))
                                    .unwrap();
                                vcpu_state_event.write(1).unwrap();
--- a/src/dragonball/src/vm/aarch64.rs
+++ b/src/dragonball/src/vm/aarch64.rs
@@ -11,7 +11,8 @@ use std::fmt::Debug;
 use std::ops::Deref;

 use dbs_arch::gic::GICDevice;
-use dbs_arch::{DeviceInfoForFDT, DeviceType};
+use dbs_arch::pmu::initialize_pmu;
+use dbs_arch::{DeviceInfoForFDT, DeviceType, VpmuFeatureLevel};
 use dbs_boot::InitrdConfig;
 use dbs_utils::epoll_manager::EpollManager;
 use dbs_utils::time::TimestampUs;
@@ -43,6 +44,7 @@ fn configure_system<T: DeviceInfoForFDT + Clone + Debug, M: GuestMemory>(
    device_info: Option<&HashMap<(DeviceType, String), T>>,
    gic_device: &Box<dyn GICDevice>,
    initrd: &Option<super::InitrdConfig>,
+    vpmu_feature: &VpmuFeatureLevel,
 ) -> super::Result<()> {
    dbs_boot::fdt::create_fdt(
        guest_mem,
@@ -51,6 +53,7 @@ fn configure_system<T: DeviceInfoForFDT + Clone + Debug, M: GuestMemory>(
        device_info,
        gic_device,
        initrd,
+        vpmu_feature,
    )
    .map_err(Error::BootSystem)?;
    Ok(())
@@ -76,6 +79,23 @@ impl Vm {
        Ok(())
    }

+    /// Setup pmu devices for guest vm.
+    pub fn setup_pmu_devices(&mut self) -> std::result::Result<(), StartMicroVmError> {
+        let vm = self.vm_fd();
+        let mut vcpu_manager = self.vcpu_manager().map_err(StartMicroVmError::Vcpu)?;
+        let vpmu_feature = vcpu_manager.vpmu_feature();
+        if vpmu_feature == VpmuFeatureLevel::Disabled {
+            return Ok(());
+        }
+
+        for vcpu in vcpu_manager.vcpus_mut() {
+            initialize_pmu(vm, vcpu.vcpu_fd())
+                .map_err(|e| StartMicroVmError::ConfigureVm(VmError::SetupPmu(e)))?;
+        }
+
+        Ok(())
+    }
+
    /// Initialize the virtual machine instance.
    ///
    /// It initialize the virtual machine instance by:
@@ -113,6 +133,7 @@ impl Vm {
            .create_boot_vcpus(request_ts, kernel_loader_result.kernel_load)
            .map_err(StartMicroVmError::Vcpu)?;
        self.setup_interrupt_controller()?;
+        self.setup_pmu_devices()?;
        self.init_devices(epoll_mgr)?;

        Ok(())
@@ -129,6 +150,7 @@ impl Vm {
        initrd: Option<InitrdConfig>,
    ) -> std::result::Result<(), StartMicroVmError> {
        let vcpu_manager = self.vcpu_manager().map_err(StartMicroVmError::Vcpu)?;
+        let vpmu_feature = vcpu_manager.vpmu_feature();
        let vcpu_mpidr = vcpu_manager
            .vcpus()
            .into_iter()
@@ -143,6 +165,7 @@ impl Vm {
            self.device_manager.get_mmio_device_info(),
            self.get_irqchip(),
            &initrd,
+            &vpmu_feature,
        )
        .map_err(StartMicroVmError::ConfigureSystem)
    }
--- a/src/dragonball/src/vm/mod.rs
+++ b/src/dragonball/src/vm/mod.rs
@@ -10,6 +10,8 @@ use std::sync::{Arc, Mutex, RwLock};
 use dbs_address_space::AddressSpace;
 #[cfg(target_arch = "aarch64")]
 use dbs_arch::gic::GICDevice;
+#[cfg(target_arch = "aarch64")]
+use dbs_arch::pmu::PmuError;
 use dbs_boot::InitrdConfig;
 use dbs_utils::epoll_manager::EpollManager;
 use dbs_utils::time::TimestampUs;
@@ -69,6 +71,11 @@ pub enum VmError {
    #[cfg(target_arch = "aarch64")]
    #[error("failed to configure GIC")]
    SetupGIC(GICError),
+
+    /// Cannot setup pmu device
+    #[cfg(target_arch = "aarch64")]
+    #[error("failed to setup pmu device")]
+    SetupPmu(#[source] PmuError),
 }

 /// Configuration information for user defined NUMA nodes.
@@ -1033,6 +1040,7 @@ pub mod tests {
        assert!(vm.remove_devices().is_ok());
    }

+    #[cfg(target_arch = "x86_64")]
    #[test]
    fn test_run_code() {
        skip_if_not_root!();
--- a/src/libs/kata-types/src/config/default.rs
+++ b/src/libs/kata-types/src/config/default.rs
@@ -71,6 +71,7 @@ pub const MIN_QEMU_MEMORY_SIZE_MB: u32 = 64;

 // Default configuration for Cloud Hypervisor (CH)
 pub const DEFAULT_CH_BINARY_PATH: &str = "/usr/bin/cloud-hypervisor";
+pub const DEFAULT_CH_ROOTFS_TYPE: &str = "ext4";
 pub const DEFAULT_CH_CONTROL_PATH: &str = "";
 pub const DEFAULT_CH_ENTROPY_SOURCE: &str = "/dev/urandom";
 pub const DEFAULT_CH_GUEST_KERNEL_IMAGE: &str = "vmlinuz";
--- a/src/libs/kata-types/src/config/runtime.rs
+++ b/src/libs/kata-types/src/config/runtime.rs
@@ -103,8 +103,9 @@ pub struct Runtime {
    pub enable_pprof: bool,

    /// If enabled, static resource management will calculate the vcpu and memory for the sandbox/container
+    /// And pod configured this will not be able to further update its CPU/Memory resource
    #[serde(default)]
-    pub static_resource_mgmt: bool,
+    pub static_sandbox_resource_mgmt: bool,

    /// Determines whether container seccomp profiles are passed to the virtual machine and
    /// applied by the kata agent. If set to true, seccomp is not applied within the guest.
--- a/src/libs/oci/src/lib.rs
+++ b/src/libs/oci/src/lib.rs
@@ -193,6 +193,12 @@ pub struct Hooks {
    #[serde(default, skip_serializing_if = "Vec::is_empty")]
    pub prestart: Vec<Hook>,
    #[serde(default, skip_serializing_if = "Vec::is_empty")]
+    pub create_runtime: Vec<Hook>,
+    #[serde(default, skip_serializing_if = "Vec::is_empty")]
+    pub create_container: Vec<Hook>,
+    #[serde(default, skip_serializing_if = "Vec::is_empty")]
+    pub start_container: Vec<Hook>,
+    #[serde(default, skip_serializing_if = "Vec::is_empty")]
    pub poststart: Vec<Hook>,
    #[serde(default, skip_serializing_if = "Vec::is_empty")]
    pub poststop: Vec<Hook>,
@@ -1401,6 +1407,7 @@ mod tests {
                    env: vec![],
                    timeout: None,
                }],
+                ..Default::default()
            }),
            annotations: [
                ("com.example.key1".to_string(), "value1".to_string()),
--- a/src/libs/protocols/protos/oci.proto
+++ b/src/libs/protocols/protos/oci.proto
@@ -166,6 +166,15 @@ message Hooks {

 	// Poststop is a list of hooks to be run after the container process exits.
 	repeated Hook Poststop = 3  [(gogoproto.nullable) = false];
+
+	// Createruntime is a list of hooks to be run during the creation of runtime(sandbox).
+	repeated Hook CreateRuntime = 4  [(gogoproto.nullable) = false];
+
+	// CreateContainer is a list of hooks to be run after VM is started, and before container is created.
+	repeated Hook CreateContainer = 5  [(gogoproto.nullable) = false];
+
+	// StartContainer is a list of hooks to be run after container is created, but before it is started.
+	repeated Hook StartContainer = 6  [(gogoproto.nullable) = false];
 }

 message Hook {
--- a/src/libs/protocols/src/trans.rs
+++ b/src/libs/protocols/src/trans.rs
@@ -294,6 +294,9 @@ impl From<oci::Hooks> for crate::oci::Hooks {
    fn from(from: Hooks) -> Self {
        crate::oci::Hooks {
            Prestart: from_vec(from.prestart),
+            CreateRuntime: from_vec(from.create_runtime),
+            CreateContainer: from_vec(from.create_container),
+            StartContainer: from_vec(from.start_container),
            Poststart: from_vec(from.poststart),
            Poststop: from_vec(from.poststop),
            unknown_fields: Default::default(),
@@ -970,20 +973,34 @@ impl From<crate::oci::Hook> for oci::Hook {

 impl From<crate::oci::Hooks> for oci::Hooks {
    fn from(mut from: crate::oci::Hooks) -> Self {
-        let mut prestart = Vec::new();
-        for hook in from.take_Prestart().to_vec() {
-            prestart.push(hook.into())
-        }
-        let mut poststart = Vec::new();
-        for hook in from.take_Poststart().to_vec() {
-            poststart.push(hook.into());
-        }
-        let mut poststop = Vec::new();
-        for hook in from.take_Poststop().to_vec() {
-            poststop.push(hook.into());
-        }
+        let prestart = from.take_Prestart().into_iter().map(|i| i.into()).collect();
+        let create_runtime = from
+            .take_CreateRuntime()
+            .into_iter()
+            .map(|i| i.into())
+            .collect();
+        let create_container = from
+            .take_CreateContainer()
+            .into_iter()
+            .map(|i| i.into())
+            .collect();
+        let start_container = from
+            .take_StartContainer()
+            .into_iter()
+            .map(|i| i.into())
+            .collect();
+        let poststart = from
+            .take_Poststart()
+            .into_iter()
+            .map(|i| i.into())
+            .collect();
+        let poststop = from.take_Poststop().into_iter().map(|i| i.into()).collect();
+
        oci::Hooks {
            prestart,
+            create_runtime,
+            create_container,
+            start_container,
            poststart,
            poststop,
        }
--- a/src/runtime-rs/Cargo.lock
+++ b/src/runtime-rs/Cargo.lock
--- a/src/runtime-rs/config/configuration-dragonball.toml.in
+++ b/src/runtime-rs/config/configuration-dragonball.toml.in
@@ -293,5 +293,12 @@ experimental=@DEFAULTEXPFEATURES@
 # (default: false)
 # enable_pprof = true

-static_resource_mgmt=@DEFSTATICRESOURCEMGMT_DB@
+# If enabled, the runtime will attempt to determine appropriate sandbox size (memory, CPU) before booting the virtual machine. In
+# this case, the runtime will not dynamically update the amount of memory and CPU in the virtual machine. This is generally helpful
+# when a hardware architecture or hypervisor solutions is utilized which does not support CPU and/or memory hotplug.
+# Compatibility for determining appropriate sandbox (VM) size:
+# - When running with pods, sandbox sizing information will only be available if using Kubernetes >= 1.23 and containerd >= 1.6. CRI-O
+#   does not yet support sandbox sizing annotations.
+# - When running single containers using a tool like ctr, container sizing information will be available.
+static_sandbox_resource_mgmt=@DEFSTATICRESOURCEMGMT_DB@

--- a/src/runtime-rs/crates/agent/src/sock/hybrid_vsock.rs
+++ b/src/runtime-rs/crates/agent/src/sock/hybrid_vsock.rs
@@ -15,9 +15,6 @@ use tokio::{

 use super::{ConnectConfig, Sock, Stream};

-unsafe impl Send for HybridVsock {}
-unsafe impl Sync for HybridVsock {}
-
 #[derive(Debug, PartialEq)]
 pub struct HybridVsock {
    uds: String,
--- a/src/runtime-rs/crates/agent/src/sock/vsock.rs
+++ b/src/runtime-rs/crates/agent/src/sock/vsock.rs
@@ -16,9 +16,6 @@ use tokio::net::UnixStream;

 use super::{ConnectConfig, Sock, Stream};

-unsafe impl Send for Vsock {}
-unsafe impl Sync for Vsock {}
-
 #[derive(Debug, PartialEq)]
 pub struct Vsock {
    vsock_cid: u32,
--- a/src/runtime-rs/crates/agent/src/types.rs
+++ b/src/runtime-rs/crates/agent/src/types.rs
@@ -124,7 +124,6 @@ pub struct CreateContainerRequest {
    pub devices: Vec<Device>,
    pub storages: Vec<Storage>,
    pub oci: Option<oci::Spec>,
-    pub guest_hooks: Option<oci::Hooks>,
    pub sandbox_pidns: bool,
    pub rootfs_mounts: Vec<oci::Mount>,
 }
--- a/src/runtime-rs/crates/hypervisor/Cargo.toml
+++ b/src/runtime-rs/crates/hypervisor/Cargo.toml
@@ -36,6 +36,7 @@ ch-config = { path = "ch-config", optional = true }

 futures = "0.3.25"
 safe-path = "0.1.0"
+crossbeam-channel = "0.5.6"

 [features]
 default = []
--- a/src/runtime-rs/crates/hypervisor/ch-config/Cargo.toml
+++ b/src/runtime-rs/crates/hypervisor/ch-config/Cargo.toml
@@ -20,3 +20,6 @@ tokio = { version = "1.25.0", features = ["sync", "rt"] }
 # being used. This version is used to pin the CH config structure
 # which is relatively static.
 api_client = { git = "https://github.com/cloud-hypervisor/cloud-hypervisor", crate = "api_client", tag = "v27.0" }
+
+kata-types = { path = "../../../../libs/kata-types"}
+nix = "0.26.2"
--- a/src/runtime-rs/crates/hypervisor/ch-config/src/ch_api.rs
+++ b/src/runtime-rs/crates/hypervisor/ch-config/src/ch_api.rs
@@ -2,18 +2,11 @@
 //
 // SPDX-License-Identifier: Apache-2.0

-use crate::net_util::MAC_ADDR_LEN;
-use crate::{
-    ConsoleConfig, ConsoleOutputMode, CpuTopology, CpusConfig, DeviceConfig, FsConfig, MacAddr,
-    MemoryConfig, NetConfig, PayloadConfig, PmemConfig, RngConfig, VmConfig, VsockConfig,
-};
-use anyhow::{anyhow, Context, Result};
+use crate::{DeviceConfig, FsConfig, VmConfig};
+use anyhow::{anyhow, Result};
 use api_client::simple_api_full_command_and_response;

-use std::fmt::Display;
-use std::net::Ipv4Addr;
 use std::os::unix::net::UnixStream;
-use std::path::PathBuf;
 use tokio::task;

 pub async fn cloud_hypervisor_vmm_ping(mut socket: UnixStream) -> Result<Option<String>> {
@@ -38,20 +31,9 @@ pub async fn cloud_hypervisor_vmm_shutdown(mut socket: UnixStream) -> Result<Opt
 }

 pub async fn cloud_hypervisor_vm_create(
-    sandbox_path: String,
-    vsock_socket_path: String,
    mut socket: UnixStream,
-    shared_fs_devices: Option<Vec<FsConfig>>,
-    pmem_devices: Option<Vec<PmemConfig>>,
+    cfg: VmConfig,
 ) -> Result<Option<String>> {
-    let cfg = cloud_hypervisor_vm_create_cfg(
-        sandbox_path,
-        vsock_socket_path,
-        shared_fs_devices,
-        pmem_devices,
-    )
-    .await?;
-
    let serialised = serde_json::to_string_pretty(&cfg)?;

    task::spawn_blocking(move || -> Result<Option<String>> {
@@ -124,151 +106,3 @@ pub async fn cloud_hypervisor_vm_fs_add(

    result
 }
-
-pub async fn cloud_hypervisor_vm_create_cfg(
-    // FIXME:
-    _sandbox_path: String,
-    vsock_socket_path: String,
-    shared_fs_devices: Option<Vec<FsConfig>>,
-    pmem_devices: Option<Vec<PmemConfig>>,
-) -> Result<VmConfig> {
-    let topology = CpuTopology {
-        threads_per_core: 1,
-        cores_per_die: 12,
-        dies_per_package: 1,
-        packages: 1,
-    };
-
-    let cpus = CpusConfig {
-        boot_vcpus: 1,
-        max_vcpus: 12,
-        max_phys_bits: 46,
-        topology: Some(topology),
-        ..Default::default()
-    };
-
-    let rng = RngConfig {
-        src: PathBuf::from("/dev/urandom"),
-        ..Default::default()
-    };
-
-    let kernel_args = vec![
-        "root=/dev/pmem0p1",
-        "rootflags=dax,data=ordered,errors=remount-ro",
-        "ro",
-        "rootfstype=ext4",
-        "panic=1",
-        "no_timer_check",
-        "noreplace-smp",
-        "console=ttyS0,115200n8",
-        "systemd.log_target=console",
-        "systemd.unit=kata-containers",
-        "systemd.mask=systemd-networkd.service",
-        "systemd.mask=systemd-networkd.socket",
-        "agent.log=debug",
-    ];
-
-    let cmdline = kernel_args.join(" ");
-
-    let kernel = PathBuf::from("/opt/kata/share/kata-containers/vmlinux.container");
-
-    // Note that PmemConfig replaces the PayloadConfig.initrd.
-    let payload = PayloadConfig {
-        kernel: Some(kernel),
-        cmdline: Some(cmdline),
-        ..Default::default()
-    };
-
-    let serial = ConsoleConfig {
-        mode: ConsoleOutputMode::Tty,
-        ..Default::default()
-    };
-
-    let ip = Ipv4Addr::new(192, 168, 10, 10);
-    let mask = Ipv4Addr::new(255, 255, 255, 0);
-
-    let mac_str = "12:34:56:78:90:01";
-
-    let mac = parse_mac(mac_str)?;
-
-    let network = NetConfig {
-        ip,
-        mask,
-        mac,
-        ..Default::default()
-    };
-
-    let memory = MemoryConfig {
-        size: (1024 * 1024 * 2048),
-
-        // Required
-        shared: true,
-
-        prefault: false,
-        hugepages: false,
-        mergeable: false,
-
-        // FIXME:
-        hotplug_size: Some(16475226112),
-
-        ..Default::default()
-    };
-
-    let fs = shared_fs_devices;
-    let pmem = pmem_devices;
-
-    let vsock = VsockConfig {
-        cid: 3,
-        socket: PathBuf::from(vsock_socket_path),
-        ..Default::default()
-    };
-
-    let cfg = VmConfig {
-        cpus,
-        memory,
-        fs,
-        serial,
-        pmem,
-        payload: Some(payload),
-        vsock: Some(vsock),
-        rng,
-        net: Some(vec![network]),
-        ..Default::default()
-    };
-
-    Ok(cfg)
-}
-
-fn parse_mac<S>(s: &S) -> Result<MacAddr>
-where
-    S: AsRef<str> + ?Sized + Display,
-{
-    let v: Vec<&str> = s.as_ref().split(':').collect();
-    let mut bytes = [0u8; MAC_ADDR_LEN];
-
-    if v.len() != MAC_ADDR_LEN {
-        return Err(anyhow!(
-            "invalid MAC {} (length {}, expected {})",
-            s,
-            v.len(),
-            MAC_ADDR_LEN
-        ));
-    }
-
-    for i in 0..MAC_ADDR_LEN {
-        if v[i].len() != 2 {
-            return Err(anyhow!(
-                "invalid MAC {} (segment {} length {}, expected {})",
-                s,
-                i,
-                v.len(),
-                2
-            ));
-        }
-
-        bytes[i] =
-            u8::from_str_radix(v[i], 16).context(format!("failed to parse MAC address: {}", s))?;
-    }
-
-    Ok(MacAddr { bytes })
-}
--- a/src/runtime-rs/crates/hypervisor/ch-config/src/convert.rs
+++ b/src/runtime-rs/crates/hypervisor/ch-config/src/convert.rs
@@ -0,0 +1,324 @@
+// Copyright (c) 2023 Intel Corporation
+//
+// SPDX-License-Identifier: Apache-2.0
+
+use crate::net_util::MAC_ADDR_LEN;
+use crate::NamedHypervisorConfig;
+use crate::VmConfig;
+use crate::{
+    ConsoleConfig, ConsoleOutputMode, CpuFeatures, CpuTopology, CpusConfig, MacAddr, MemoryConfig,
+    PayloadConfig, PmemConfig, RngConfig, VsockConfig,
+};
+use anyhow::{anyhow, Context, Result};
+use kata_types::config::default::DEFAULT_CH_ENTROPY_SOURCE;
+use kata_types::config::hypervisor::{CpuInfo, MachineInfo, MemoryInfo};
+use kata_types::config::BootInfo;
+use std::convert::TryFrom;
+use std::fmt::Display;
+use std::path::PathBuf;
+
+// 1 MiB
+const MIB: u64 = 1024 * 1024;
+
+const PMEM_ALIGN_BYTES: u64 = 2 * MIB;
+
+const DEFAULT_CH_MAX_PHYS_BITS: u8 = 46;
+
+impl TryFrom<NamedHypervisorConfig> for VmConfig {
+    type Error = anyhow::Error;
+
+    fn try_from(n: NamedHypervisorConfig) -> Result<Self, Self::Error> {
+        let kernel_params = n.kernel_params;
+        let cfg = n.cfg;
+        let vsock_socket_path = n.vsock_socket_path;
+        let sandbox_path = n.sandbox_path;
+        let fs = n.shared_fs_devices;
+
+        let cpus = CpusConfig::try_from(cfg.cpu_info)?;
+
+        let rng = RngConfig::try_from(cfg.machine_info)?;
+
+        // Note how CH handles the different image types:
+        //
+        // - An image is specified in PmemConfig.
+        // - An initrd/initramfs is specified in PayloadConfig.
+        let boot_info = cfg.boot_info;
+
+        let use_initrd = !boot_info.initrd.is_empty();
+        let use_image = !boot_info.image.is_empty();
+
+        if use_initrd && use_image {
+            return Err(anyhow!("cannot specify image and initrd"));
+        }
+
+        if !use_initrd && !use_image {
+            return Err(anyhow!("missing boot file (no image or initrd)"));
+        }
+
+        let initrd = if use_initrd {
+            Some(PathBuf::from(boot_info.initrd.clone()))
+        } else {
+            None
+        };
+
+        let pmem = if use_initrd {
+            None
+        } else {
+            let pmem = PmemConfig::try_from(&boot_info)?;
+            Some(vec![pmem])
+        };
+
+        let payload = PayloadConfig::try_from((boot_info, kernel_params, initrd))?;
+
+        let serial = get_serial_cfg()?;
+        let console = get_console_cfg()?;
+
+        let memory = MemoryConfig::try_from(cfg.memory_info)?;
+
+        std::fs::create_dir_all(sandbox_path).context("failed to create sandbox path")?;
+
+        let vsock = VsockConfig {
+            cid: 3,
+            socket: PathBuf::from(vsock_socket_path),
+            ..Default::default()
+        };
+
+        let cfg = VmConfig {
+            cpus,
+            memory,
+            serial,
+            console,
+            payload: Some(payload),
+            fs,
+            pmem,
+            vsock: Some(vsock),
+            rng,
+            ..Default::default()
+        };
+
+        Ok(cfg)
+    }
+}
+
+impl TryFrom<MemoryInfo> for MemoryConfig {
+    type Error = anyhow::Error;
+
+    fn try_from(mem: MemoryInfo) -> Result<Self, Self::Error> {
+        let sysinfo = nix::sys::sysinfo::sysinfo()?;
+
+        let max_mem_bytes = sysinfo.ram_total();
+
+        let mem_bytes: u64 = MIB
+            .checked_mul(mem.default_memory as u64)
+            .ok_or("cannot convert default memory to bytes")
+            .map_err(|e| anyhow!(e))?;
+
+        // The amount of memory that can be hot-plugged is the total less the
+        // amount allocated at VM start.
+        let hotplug_size_bytes = max_mem_bytes
+            .checked_sub(mem_bytes)
+            .ok_or("failed to calculate max hotplug size for CH")
+            .map_err(|e| anyhow!(e))?;
+
+        let aligned_hotplug_size_bytes =
+            checked_next_multiple_of(hotplug_size_bytes, PMEM_ALIGN_BYTES)
+                .ok_or("cannot handle pmem alignment for CH")
+                .map_err(|e| anyhow!(e))?;
+
+        let cfg = MemoryConfig {
+            size: mem_bytes,
+
+            // Required
+            shared: true,
+
+            hotplug_size: Some(aligned_hotplug_size_bytes),
+
+            ..Default::default()
+        };
+
+        Ok(cfg)
+    }
+}
+
+// Return the next multiple of 'multiple' starting from the specified value
+// (aka align value to multiple).
+//
+// This is a temporary solution until checked_next_multiple_of() integer
+// method is available in the rust language.
+//
+// See: https://github.com/rust-lang/rust/issues/88581
+fn checked_next_multiple_of(value: u64, multiple: u64) -> Option<u64> {
+    match value.checked_rem(multiple) {
+        None => Some(value),
+        Some(r) => value.checked_add(multiple - r),
+    }
+}
+
+impl TryFrom<CpuInfo> for CpusConfig {
+    type Error = anyhow::Error;
+
+    fn try_from(cpu: CpuInfo) -> Result<Self, Self::Error> {
+        let boot_vcpus = u8::try_from(cpu.default_vcpus)?;
+        let max_vcpus = u8::try_from(cpu.default_maxvcpus)?;
+
+        let topology = CpuTopology {
+            threads_per_core: 1,
+            cores_per_die: max_vcpus,
+            dies_per_package: 1,
+            packages: 1,
+        };
+
+        let max_phys_bits = DEFAULT_CH_MAX_PHYS_BITS;
+
+        let cfg = CpusConfig {
+            boot_vcpus,
+            max_vcpus,
+            max_phys_bits,
+            topology: Some(topology),
+
+            ..Default::default()
+        };
+
+        Ok(cfg)
+    }
+}
+
+impl TryFrom<String> for CpuFeatures {
+    type Error = anyhow::Error;
+
+    #[cfg(target_arch = "x86_64")]
+    fn try_from(s: String) -> Result<Self, Self::Error> {
+        let amx = s.split(',').any(|x| x == "amx");
+
+        let cpu_features = CpuFeatures { amx };
+
+        Ok(cpu_features)
+    }
+
+    #[cfg(not(target_arch = "x86_64"))]
+    fn try_from(_s: String) -> Result<Self, Self::Error> {
+        Ok(CpuFeatures::default())
+    }
+}
+
+// The 2nd tuple element is the space separated kernel parameters list.
+// The 3rd tuple element is an optional initramfs image to use.
+// This cannot be created only from BootInfo since that contains the
+// user-specified kernel parameters only.
+impl TryFrom<(BootInfo, String, Option<PathBuf>)> for PayloadConfig {
+    type Error = anyhow::Error;
+
+    fn try_from(args: (BootInfo, String, Option<PathBuf>)) -> Result<Self, Self::Error> {
+        let b = args.0;
+        let cmdline = args.1;
+        let initramfs = args.2;
+
+        let kernel = PathBuf::from(b.kernel);
+
+        let payload = PayloadConfig {
+            kernel: Some(kernel),
+            cmdline: Some(cmdline),
+            initramfs,
+
+            ..Default::default()
+        };
+
+        Ok(payload)
+    }
+}
+
+impl TryFrom<MachineInfo> for RngConfig {
+    type Error = anyhow::Error;
+
+    fn try_from(m: MachineInfo) -> Result<Self, Self::Error> {
+        let entropy_source = if !m.entropy_source.is_empty() {
+            m.entropy_source
+        } else {
+            DEFAULT_CH_ENTROPY_SOURCE.to_string()
+        };
+
+        let rng = RngConfig {
+            src: PathBuf::from(entropy_source),
+
+            ..Default::default()
+        };
+
+        Ok(rng)
+    }
+}
+
+impl TryFrom<&BootInfo> for PmemConfig {
+    type Error = anyhow::Error;
+
+    fn try_from(b: &BootInfo) -> Result<Self, Self::Error> {
+        let file = if b.image.is_empty() {
+            return Err(anyhow!("CH PmemConfig only used for images"));
+        } else {
+            b.image.clone()
+        };
+
+        let cfg = PmemConfig {
+            file: PathBuf::from(file),
+            discard_writes: true,
+
+            ..Default::default()
+        };
+
+        Ok(cfg)
+    }
+}
+
+fn get_serial_cfg() -> Result<ConsoleConfig> {
+    let cfg = ConsoleConfig {
+        file: None,
+        mode: ConsoleOutputMode::Tty,
+        iommu: false,
+    };
+
+    Ok(cfg)
+}
+
+fn get_console_cfg() -> Result<ConsoleConfig> {
+    let cfg = ConsoleConfig {
+        file: None,
+        mode: ConsoleOutputMode::Off,
+        iommu: false,
+    };
+
+    Ok(cfg)
+}
+
+#[allow(dead_code)]
+fn parse_mac<S>(s: &S) -> Result<MacAddr>
+where
+    S: AsRef<str> + ?Sized + Display,
+{
+    let v: Vec<&str> = s.as_ref().split(':').collect();
+    let mut bytes = [0u8; MAC_ADDR_LEN];
+
+    if v.len() != MAC_ADDR_LEN {
+        return Err(anyhow!(
+            "invalid MAC {} (length {}, expected {})",
+            s,
+            v.len(),
+            MAC_ADDR_LEN
+        ));
+    }
+
+    for i in 0..MAC_ADDR_LEN {
+        if v[i].len() != 2 {
+            return Err(anyhow!(
+                "invalid MAC {} (segment {} length {}, expected {})",
+                s,
+                i,
+                v.len(),
+                2
+            ));
+        }
+
+        bytes[i] =
+            u8::from_str_radix(v[i], 16).context(format!("failed to parse MAC address: {}", s))?;
+    }
+
+    Ok(MacAddr { bytes })
+}
--- a/src/runtime-rs/crates/hypervisor/ch-config/src/lib.rs
+++ b/src/runtime-rs/crates/hypervisor/ch-config/src/lib.rs
@@ -7,10 +7,12 @@ use std::net::Ipv4Addr;
 use std::path::PathBuf;

 pub mod ch_api;
+pub mod convert;
 pub mod net_util;
 mod virtio_devices;

 use crate::virtio_devices::RateLimiterConfig;
+use kata_types::config::hypervisor::Hypervisor as HypervisorConfig;
 pub use net_util::MacAddr;

 pub const MAX_NUM_PCI_SEGMENTS: u16 = 16;
@@ -479,3 +481,15 @@ fn usize_is_zero(v: &usize) -> bool {
 fn u16_is_zero(v: &u16) -> bool {
    *v == 0
 }
+
+// Type used to simplify conversion from a generic Hypervisor config
+// to a CH specific VmConfig.
+#[derive(Debug, Clone)]
+pub struct NamedHypervisorConfig {
+    pub kernel_params: String,
+    pub sandbox_path: String,
+    pub vsock_socket_path: String,
+    pub cfg: HypervisorConfig,
+
+    pub shared_fs_devices: Option<Vec<FsConfig>>,
+}
--- a/src/runtime-rs/crates/hypervisor/src/ch/inner.rs
+++ b/src/runtime-rs/crates/hypervisor/src/ch/inner.rs
@@ -53,9 +53,6 @@ pub struct CloudHypervisorInner {
    pub(crate) tasks: Option<Vec<JoinHandle<Result<()>>>>,
 }

-unsafe impl Send for CloudHypervisorInner {}
-unsafe impl Sync for CloudHypervisorInner {}
-
 const CH_DEFAULT_TIMEOUT_SECS: u32 = 10;

 impl CloudHypervisorInner {
--- a/src/runtime-rs/crates/hypervisor/src/ch/inner_device.rs
+++ b/src/runtime-rs/crates/hypervisor/src/ch/inner_device.rs
@@ -10,7 +10,7 @@ use crate::HybridVsockConfig;
 use crate::VmmState;
 use anyhow::{anyhow, Context, Result};
 use ch_config::ch_api::cloud_hypervisor_vm_fs_add;
-use ch_config::{FsConfig, PmemConfig};
+use ch_config::FsConfig;
 use safe_path::scoped_join;
 use std::convert::TryFrom;
 use std::path::PathBuf;
@@ -148,41 +148,6 @@ impl CloudHypervisorInner {
            Ok(None)
        }
    }
-
-    pub(crate) async fn get_boot_file(&mut self) -> Result<PathBuf> {
-        if let Some(ref config) = self.config {
-            let boot_info = &config.boot_info;
-
-            let file = if !boot_info.initrd.is_empty() {
-                boot_info.initrd.clone()
-            } else if !boot_info.image.is_empty() {
-                boot_info.image.clone()
-            } else {
-                return Err(anyhow!("missing boot file (no image or initrd)"));
-            };
-
-            Ok(PathBuf::from(file))
-        } else {
-            Err(anyhow!("no hypervisor config"))
-        }
-    }
-
-    pub(crate) async fn get_pmem_devices(&mut self) -> Result<Option<Vec<PmemConfig>>> {
-        let file = self.get_boot_file().await?;
-
-        let pmem_cfg = PmemConfig {
-            file,
-            size: None,
-            iommu: false,
-            discard_writes: true,
-            id: None,
-            pci_segment: 0,
-        };
-
-        let pmem_devices = vec![pmem_cfg];
-
-        Ok(Some(pmem_devices))
-    }
 }

 #[derive(Debug)]
--- a/src/runtime-rs/crates/hypervisor/src/ch/inner_hypervisor.rs
+++ b/src/runtime-rs/crates/hypervisor/src/ch/inner_hypervisor.rs
@@ -6,18 +6,23 @@
 use super::inner::CloudHypervisorInner;
 use crate::ch::utils::get_api_socket_path;
 use crate::ch::utils::{get_jailer_root, get_sandbox_path, get_vsock_path};
+use crate::kernel_param::KernelParams;
 use crate::Device;
 use crate::VsockConfig;
+use crate::VM_ROOTFS_DRIVER_PMEM;
 use crate::{VcpuThreadIds, VmmState};
 use anyhow::{anyhow, Context, Result};
 use ch_config::ch_api::{
    cloud_hypervisor_vm_create, cloud_hypervisor_vm_start, cloud_hypervisor_vmm_ping,
    cloud_hypervisor_vmm_shutdown,
 };
+use ch_config::{NamedHypervisorConfig, VmConfig};
 use core::future::poll_fn;
 use futures::executor::block_on;
 use futures::future::join_all;
 use kata_types::capabilities::{Capabilities, CapabilityBits};
+use kata_types::config::default::DEFAULT_CH_ROOTFS_TYPE;
+use std::convert::TryFrom;
 use std::fs::create_dir_all;
 use std::os::unix::net::UnixStream;
 use std::path::Path;
@@ -54,11 +59,43 @@ impl CloudHypervisorInner {
        Ok(())
    }

+    async fn get_kernel_params(&self) -> Result<String> {
+        let cfg = self
+            .config
+            .as_ref()
+            .ok_or("no hypervisor config for CH")
+            .map_err(|e| anyhow!(e))?;
+
+        let enable_debug = cfg.debug_info.enable_debug;
+
+        // Note that the configuration option hypervisor.block_device_driver is not used.
+        let rootfs_driver = VM_ROOTFS_DRIVER_PMEM;
+
+        let rootfs_type = match cfg.boot_info.rootfs_type.is_empty() {
+            true => DEFAULT_CH_ROOTFS_TYPE,
+            false => &cfg.boot_info.rootfs_type,
+        };
+
+        // Start by adding the default set of kernel parameters.
+        let mut params = KernelParams::new(enable_debug);
+
+        let mut rootfs_param = KernelParams::new_rootfs_kernel_params(rootfs_driver, rootfs_type)?;
+
+        // Add the rootfs device
+        params.append(&mut rootfs_param);
+
+        // Finally, add the user-specified options at the end
+        // (so they will take priority).
+        params.append(&mut KernelParams::from_string(&cfg.boot_info.kernel_params));
+
+        let kernel_params = params.to_string()?;
+
+        Ok(kernel_params)
+    }
+
    async fn boot_vm(&mut self) -> Result<()> {
        let shared_fs_devices = self.get_shared_fs_devices().await?;

-        let pmem_devices = self.get_pmem_devices().await?;
-
        let socket = self
            .api_socket
            .as_ref()
@@ -71,14 +108,34 @@ impl CloudHypervisorInner {

        let vsock_socket_path = get_vsock_path(&self.id)?;

-        let response = cloud_hypervisor_vm_create(
+        let hypervisor_config = self
+            .config
+            .as_ref()
+            .ok_or("no hypervisor config for CH")
+            .map_err(|e| anyhow!(e))?;
+
+        debug!(
+            sl!(),
+            "generic Hypervisor configuration: {:?}", hypervisor_config
+        );
+
+        let kernel_params = self.get_kernel_params().await?;
+
+        let named_cfg = NamedHypervisorConfig {
+            kernel_params,
            sandbox_path,
            vsock_socket_path,
-            socket.try_clone().context("failed to clone socket")?,
+            cfg: hypervisor_config.clone(),
            shared_fs_devices,
-            pmem_devices,
-        )
-        .await?;
+        };
+
+        let cfg = VmConfig::try_from(named_cfg)?;
+
+        debug!(sl!(), "CH specific VmConfig configuration: {:?}", cfg);
+
+        let response =
+            cloud_hypervisor_vm_create(socket.try_clone().context("failed to clone socket")?, cfg)
+                .await?;

        if let Some(detail) = response {
            debug!(sl!(), "vm boot response: {:?}", detail);
@@ -415,6 +472,10 @@ impl CloudHypervisorInner {
        Ok(Vec::<u32>::new())
    }

+    pub(crate) async fn get_vmm_master_tid(&self) -> Result<u32> {
+        todo!()
+    }
+
    pub(crate) async fn check(&self) -> Result<()> {
        Ok(())
    }
--- a/src/runtime-rs/crates/hypervisor/src/ch/mod.rs
+++ b/src/runtime-rs/crates/hypervisor/src/ch/mod.rs
@@ -33,9 +33,6 @@ pub struct CloudHypervisor {
    inner: Arc<RwLock<CloudHypervisorInner>>,
 }

-unsafe impl Send for CloudHypervisor {}
-unsafe impl Sync for CloudHypervisor {}
-
 impl CloudHypervisor {
    pub fn new() -> Self {
        Self {
@@ -121,6 +118,11 @@ impl Hypervisor for CloudHypervisor {
        inner.get_pids().await
    }

+    async fn get_vmm_master_tid(&self) -> Result<u32> {
+        let inner = self.inner.read().await;
+        inner.get_vmm_master_tid().await
+    }
+
    async fn check(&self) -> Result<()> {
        let inner = self.inner.read().await;
        inner.check().await
--- a/src/runtime-rs/crates/hypervisor/src/dragonball/inner.rs
+++ b/src/runtime-rs/crates/hypervisor/src/dragonball/inner.rs
@@ -27,7 +27,6 @@ use std::{collections::HashSet, fs::create_dir_all, path::PathBuf};
 const DRAGONBALL_KERNEL: &str = "vmlinux";
 const DRAGONBALL_ROOT_FS: &str = "rootfs";

-unsafe impl Sync for DragonballInner {}
 pub struct DragonballInner {
    /// sandbox id
    pub(crate) id: String,
--- a/src/runtime-rs/crates/hypervisor/src/dragonball/inner_hypervisor.rs
+++ b/src/runtime-rs/crates/hypervisor/src/dragonball/inner_hypervisor.rs
@@ -127,6 +127,11 @@ impl DragonballInner {
        Ok(Vec::from_iter(pids.into_iter()))
    }

+    pub(crate) async fn get_vmm_master_tid(&self) -> Result<u32> {
+        let master_tid = self.vmm_instance.get_vmm_master_tid();
+        Ok(master_tid)
+    }
+
    pub(crate) async fn check(&self) -> Result<()> {
        Ok(())
    }
--- a/src/runtime-rs/crates/hypervisor/src/dragonball/mod.rs
+++ b/src/runtime-rs/crates/hypervisor/src/dragonball/mod.rs
@@ -117,6 +117,11 @@ impl Hypervisor for Dragonball {
        inner.get_pids().await
    }

+    async fn get_vmm_master_tid(&self) -> Result<u32> {
+        let inner = self.inner.read().await;
+        inner.get_vmm_master_tid().await
+    }
+
    async fn check(&self) -> Result<()> {
        let inner = self.inner.read().await;
        inner.check().await
--- a/src/runtime-rs/crates/hypervisor/src/dragonball/vmm_instance.rs
+++ b/src/runtime-rs/crates/hypervisor/src/dragonball/vmm_instance.rs
@@ -7,14 +7,12 @@
 use std::{
    fs::{File, OpenOptions},
    os::unix::{io::IntoRawFd, prelude::AsRawFd},
-    sync::{
-        mpsc::{channel, Receiver, Sender},
-        Arc, Mutex, RwLock,
-    },
+    sync::{Arc, Mutex, RwLock},
    thread,
 };

 use anyhow::{anyhow, Context, Result};
+use crossbeam_channel::{unbounded, Receiver, Sender};
 use dragonball::{
    api::v1::{
        BlockDeviceConfigInfo, BootSourceConfig, FsDeviceConfigInfo, FsMountConfigInfo,
@@ -77,6 +75,12 @@ impl VmmInstance {
        share_info_lock.write().unwrap().id = String::from(id);
    }

+    pub fn get_vmm_master_tid(&self) -> u32 {
+        let info = self.vmm_shared_info.clone();
+        let result = info.read().unwrap().master_tid;
+        result
+    }
+
    pub fn get_vcpu_tids(&self) -> Vec<(u8, u32)> {
        let info = self.vmm_shared_info.clone();
        let result = info.read().unwrap().tids.clone();
@@ -86,8 +90,8 @@ impl VmmInstance {
    pub fn run_vmm_server(&mut self, id: &str, netns: Option<String>) -> Result<()> {
        let kvm = OpenOptions::new().read(true).write(true).open(KVM_DEVICE)?;

-        let (to_vmm, from_runtime) = channel();
-        let (to_runtime, from_vmm) = channel();
+        let (to_vmm, from_runtime) = unbounded();
+        let (to_runtime, from_vmm) = unbounded();

        self.set_instance_id(id);

@@ -105,6 +109,7 @@ impl VmmInstance {
            Some(kvm.into_raw_fd()),
        )
        .expect("Failed to start vmm");
+        let vmm_shared_info = self.get_shared_info();

        self.vmm_thread = Some(
            thread::Builder::new()
@@ -112,6 +117,9 @@ impl VmmInstance {
                .spawn(move || {
                    || -> Result<i32> {
                        debug!(sl!(), "run vmm thread start");
+                        let cur_tid = nix::unistd::gettid().as_raw() as u32;
+                        vmm_shared_info.write().unwrap().master_tid = cur_tid;
+
                        if let Some(netns_path) = netns {
                            info!(sl!(), "set netns for vmm master {}", &netns_path);
                            let netns_fd = File::open(&netns_path)
--- a/src/runtime-rs/crates/hypervisor/src/lib.rs
+++ b/src/runtime-rs/crates/hypervisor/src/lib.rs
@@ -87,6 +87,7 @@ pub trait Hypervisor: Send + Sync {
    async fn hypervisor_config(&self) -> HypervisorConfig;
    async fn get_thread_ids(&self) -> Result<VcpuThreadIds>;
    async fn get_pids(&self) -> Result<Vec<u32>>;
+    async fn get_vmm_master_tid(&self) -> Result<u32>;
    async fn cleanup(&self) -> Result<()>;
    async fn check(&self) -> Result<()>;
    async fn get_jailer_root(&self) -> Result<String>;
--- a/src/runtime-rs/crates/hypervisor/src/qemu/inner.rs
+++ b/src/runtime-rs/crates/hypervisor/src/qemu/inner.rs
@@ -12,9 +12,6 @@ const VSOCK_SCHEME: &str = "vsock";
 const VSOCK_AGENT_CID: u32 = 3;
 const VSOCK_AGENT_PORT: u32 = 1024;

-unsafe impl Send for QemuInner {}
-unsafe impl Sync for QemuInner {}
-
 pub struct QemuInner {
    config: HypervisorConfig,
 }
@@ -92,6 +89,11 @@ impl QemuInner {
        todo!()
    }

+    pub(crate) async fn get_vmm_master_tid(&self) -> Result<u32> {
+        info!(sl!(), "QemuInner::get_vmm_master_tid()");
+        todo!()
+    }
+
    pub(crate) async fn cleanup(&self) -> Result<()> {
        info!(sl!(), "QemuInner::cleanup()");
        todo!()
--- a/src/runtime-rs/crates/hypervisor/src/qemu/mod.rs
+++ b/src/runtime-rs/crates/hypervisor/src/qemu/mod.rs
@@ -103,6 +103,11 @@ impl Hypervisor for Qemu {
        inner.get_thread_ids().await
    }

+    async fn get_vmm_master_tid(&self) -> Result<u32> {
+        let inner = self.inner.read().await;
+        inner.get_vmm_master_tid().await
+    }
+
    async fn cleanup(&self) -> Result<()> {
        let inner = self.inner.read().await;
        inner.cleanup().await
--- a/src/runtime-rs/crates/resource/src/cgroups/mod.rs
+++ b/src/runtime-rs/crates/resource/src/cgroups/mod.rs
@@ -114,7 +114,7 @@ impl CgroupsResource {
    pub async fn delete(&self) -> Result<()> {
        for cg_pid in self.cgroup_manager.tasks() {
            // For now, we can't guarantee that the thread in cgroup_manager does still
-            // exist. Once it exit, we should ignor that error returned by remove_task
+            // exist. Once it exit, we should ignore that error returned by remove_task
            // to let it go.
            if let Err(error) = self.cgroup_manager.remove_task(cg_pid) {
                match error.source() {
--- a/src/runtime-rs/crates/resource/src/manager.rs
+++ b/src/runtime-rs/crates/resource/src/manager.rs
@@ -101,9 +101,9 @@ impl ResourceManager {
        inner.update_cgroups(cid, linux_resources).await
    }

-    pub async fn delete_cgroups(&self) -> Result<()> {
+    pub async fn cleanup(&self) -> Result<()> {
        let inner = self.inner.read().await;
-        inner.delete_cgroups().await
+        inner.cleanup().await
    }
 }

--- a/src/runtime-rs/crates/resource/src/manager_inner.rs
+++ b/src/runtime-rs/crates/resource/src/manager_inner.rs
@@ -8,7 +8,7 @@ use std::{sync::Arc, thread};

 use crate::resource_persist::ResourceState;
 use agent::{Agent, Storage};
-use anyhow::{anyhow, Context, Result};
+use anyhow::{anyhow, Context, Ok, Result};
 use async_trait::async_trait;
 use hypervisor::Hypervisor;
 use kata_types::config::TomlConfig;
@@ -233,8 +233,22 @@ impl ResourceManagerInner {
            .await
    }

-    pub async fn delete_cgroups(&self) -> Result<()> {
-        self.cgroups_resource.delete().await
+    pub async fn cleanup(&self) -> Result<()> {
+        // clean up cgroup
+        self.cgroups_resource
+            .delete()
+            .await
+            .context("delete cgroup")?;
+        // clean up share fs mount
+        if let Some(share_fs) = &self.share_fs {
+            share_fs
+                .get_share_fs_mount()
+                .cleanup(&self.sid)
+                .await
+                .context("failed to cleanup host path")?;
+        }
+        // TODO cleanup other resources
+        Ok(())
    }

    pub async fn dump(&self) {
--- a/src/runtime-rs/crates/resource/src/network/mod.rs
+++ b/src/runtime-rs/crates/resource/src/network/mod.rs
@@ -5,6 +5,7 @@
 //

 mod endpoint;
+pub use endpoint::endpoint_persist::EndpointState;
 pub use endpoint::Endpoint;
 mod network_entity;
 mod network_info;
@@ -17,7 +18,7 @@ use network_with_netns::NetworkWithNetns;
 mod network_pair;
 use network_pair::NetworkPair;
 mod utils;
-pub use endpoint::endpoint_persist::EndpointState;
+pub use utils::netns::NetnsGuard;

 use std::sync::Arc;

--- a/src/runtime-rs/crates/resource/src/network/utils/netns.rs
+++ b/src/runtime-rs/crates/resource/src/network/utils/netns.rs
@@ -10,12 +10,12 @@ use anyhow::{Context, Result};
 use nix::sched::{setns, CloneFlags};
 use nix::unistd::{getpid, gettid};

-pub(crate) struct NetnsGuard {
+pub struct NetnsGuard {
    old_netns: Option<File>,
 }

 impl NetnsGuard {
-    pub(crate) fn new(new_netns_path: &str) -> Result<Self> {
+    pub fn new(new_netns_path: &str) -> Result<Self> {
        let old_netns = if !new_netns_path.is_empty() {
            let current_netns_path = format!("/proc/{}/task/{}/ns/{}", getpid(), gettid(), "net");
            let old_netns = File::open(&current_netns_path)
--- a/src/runtime-rs/crates/resource/src/share_fs/mod.rs
+++ b/src/runtime-rs/crates/resource/src/share_fs/mod.rs
@@ -131,6 +131,8 @@ pub trait ShareFsMount: Send + Sync {
    async fn umount_volume(&self, file_name: &str) -> Result<()>;
    /// Umount the rootfs
    async fn umount_rootfs(&self, config: &ShareFsRootfsConfig) -> Result<()>;
+    /// Clean up share fs mount
+    async fn cleanup(&self, sid: &str) -> Result<()>;
 }

 pub fn new(id: &str, config: &SharedFsInfo) -> Result<Arc<dyn ShareFs>> {
--- a/src/runtime-rs/crates/resource/src/share_fs/utils.rs
+++ b/src/runtime-rs/crates/resource/src/share_fs/utils.rs
@@ -59,6 +59,10 @@ pub fn get_host_rw_shared_path(sid: &str) -> PathBuf {
    Path::new(KATA_HOST_SHARED_DIR).join(sid).join("rw")
 }

+pub fn get_host_shared_path(sid: &str) -> PathBuf {
+    Path::new(KATA_HOST_SHARED_DIR).join(sid)
+}
+
 fn do_get_guest_any_path(
    target: &str,
    cid: &str,
--- a/src/runtime-rs/crates/resource/src/share_fs/virtio_fs_share_mount.rs
+++ b/src/runtime-rs/crates/resource/src/share_fs/virtio_fs_share_mount.rs
@@ -7,7 +7,7 @@
 use agent::Storage;
 use anyhow::{anyhow, Context, Result};
 use async_trait::async_trait;
-use kata_sys_util::mount::{bind_remount, umount_timeout};
+use kata_sys_util::mount::{bind_remount, umount_all, umount_timeout};
 use kata_types::k8s::is_watchable_mount;
 use kata_types::mount;
 use nix::sys::stat::stat;
@@ -20,7 +20,8 @@ const WATCHABLE_BIND_DEV_TYPE: &str = "watchable-bind";
 pub const EPHEMERAL_PATH: &str = "/run/kata-containers/sandbox/ephemeral";

 use super::{
-    utils::{self, do_get_host_path},
+    get_host_rw_shared_path,
+    utils::{self, do_get_host_path, get_host_ro_shared_path, get_host_shared_path},
    ShareFsMount, ShareFsMountResult, ShareFsRootfsConfig, ShareFsVolumeConfig,
    KATA_GUEST_SHARE_DIR, PASSTHROUGH_FS_DIR,
 };
@@ -224,4 +225,18 @@ impl ShareFsMount for VirtiofsShareMount {

        Ok(())
    }
+
+    async fn cleanup(&self, sid: &str) -> Result<()> {
+        // Unmount ro path
+        let host_ro_dest = get_host_ro_shared_path(sid);
+        umount_all(host_ro_dest.clone(), true).context("failed to umount ro path")?;
+        fs::remove_dir_all(host_ro_dest).context("failed to remove ro path")?;
+        // As the rootfs and volume have been umounted before calling this function, so just remove the rw dir directly
+        let host_rw_dest = get_host_rw_shared_path(sid);
+        fs::remove_dir_all(host_rw_dest).context("failed to remove rw path")?;
+        // remove the host share directory
+        let host_path = get_host_shared_path(sid);
+        fs::remove_dir_all(host_path).context("failed to remove host shared path")?;
+        Ok(())
+    }
 }
--- a/src/runtime-rs/crates/runtimes/Cargo.toml
+++ b/src/runtime-rs/crates/runtimes/Cargo.toml
@@ -13,9 +13,12 @@ slog-scope = "4.4.0"
 tokio = { version = "1.8.0", features = ["rt-multi-thread"] }
 hyper = { version = "0.14.20", features = ["stream", "server", "http1"] }
 hyperlocal = "0.8"
+serde_json = "1.0.88"
+nix = "0.25.0"

 common = { path = "./common" }
 kata-types = { path = "../../../libs/kata-types" }
+kata-sys-util = { path = "../../../libs/kata-sys-util" }
 logging = { path = "../../../libs/logging"}
 oci = { path = "../../../libs/oci" }
 shim-interface = { path = "../../../libs/shim-interface" }
--- a/src/runtime-rs/crates/runtimes/common/Cargo.toml
+++ b/src/runtime-rs/crates/runtimes/common/Cargo.toml
@@ -26,3 +26,4 @@ agent = { path = "../../agent" }
 kata-sys-util = { path = "../../../../libs/kata-sys-util" }
 kata-types = { path = "../../../../libs/kata-types" }
 oci = { path = "../../../../libs/oci" }
+
--- a/src/runtime-rs/crates/runtimes/common/src/message.rs
+++ b/src/runtime-rs/crates/runtimes/common/src/message.rs
@@ -17,12 +17,9 @@ pub enum Action {
    Start,
    Stop,
    Shutdown,
-    Event(Arc<dyn Event>),
+    Event(Arc<dyn Event + Send + Sync>),
 }

-unsafe impl Send for Message {}
-unsafe impl Sync for Message {}
-
 #[derive(Debug)]
 pub struct Message {
    pub action: Action,
--- a/src/runtime-rs/crates/runtimes/common/src/sandbox.rs
+++ b/src/runtime-rs/crates/runtimes/common/src/sandbox.rs
@@ -9,9 +9,15 @@ use async_trait::async_trait;

 #[async_trait]
 pub trait Sandbox: Send + Sync {
-    async fn start(&self, netns: Option<String>, dns: Vec<String>) -> Result<()>;
+    async fn start(
+        &self,
+        netns: Option<String>,
+        dns: Vec<String>,
+        spec: &oci::Spec,
+        state: &oci::State,
+    ) -> Result<()>;
    async fn stop(&self) -> Result<()>;
-    async fn cleanup(&self, container_id: &str) -> Result<()>;
+    async fn cleanup(&self) -> Result<()>;
    async fn shutdown(&self) -> Result<()>;

    // agent function
--- a/src/runtime-rs/crates/runtimes/common/src/types/mod.rs
+++ b/src/runtime-rs/crates/runtimes/common/src/types/mod.rs
@@ -184,6 +184,7 @@ pub enum ProcessStatus {
    Stopped = 3,
    Paused = 4,
    Pausing = 5,
+    Exited = 6,
 }

 #[derive(Debug, Clone)]
--- a/src/runtime-rs/crates/runtimes/common/src/types/trans_into_shim.rs
+++ b/src/runtime-rs/crates/runtimes/common/src/types/trans_into_shim.rs
@@ -56,6 +56,7 @@ impl From<ProcessStatus> for api::Status {
            ProcessStatus::Stopped => api::Status::STOPPED,
            ProcessStatus::Paused => api::Status::PAUSED,
            ProcessStatus::Pausing => api::Status::PAUSING,
+            ProcessStatus::Exited => api::Status::STOPPED,
        }
    }
 }
--- a/src/runtime-rs/crates/runtimes/src/manager.rs
+++ b/src/runtime-rs/crates/runtimes/src/manager.rs
@@ -18,6 +18,7 @@ use hypervisor::Param;
 use kata_types::{
    annotations::Annotation, config::default::DEFAULT_GUEST_DNS_FILE, config::TomlConfig,
 };
+
 #[cfg(feature = "linux")]
 use linux_container::LinuxContainer;
 use persist::sandbox_persist::Persist;
@@ -50,6 +51,8 @@ impl RuntimeHandlerManagerInner {

    async fn init_runtime_handler(
        &mut self,
+        spec: &oci::Spec,
+        state: &oci::State,
        netns: Option<String>,
        dns: Vec<String>,
        config: Arc<TomlConfig>,
@@ -74,14 +77,19 @@ impl RuntimeHandlerManagerInner {
        // start sandbox
        runtime_instance
            .sandbox
-            .start(netns, dns)
+            .start(netns, dns, spec, state)
            .await
            .context("start sandbox")?;
        self.runtime_instance = Some(Arc::new(runtime_instance));
        Ok(())
    }

-    async fn try_init(&mut self, spec: &oci::Spec, options: &Option<Vec<u8>>) -> Result<()> {
+    async fn try_init(
+        &mut self,
+        spec: &oci::Spec,
+        state: &oci::State,
+        options: &Option<Vec<u8>>,
+    ) -> Result<()> {
        // return if runtime instance has init
        if self.runtime_instance.is_some() {
            return Ok(());
@@ -121,7 +129,7 @@ impl RuntimeHandlerManagerInner {
        }

        let config = load_config(spec, options).context("load config")?;
-        self.init_runtime_handler(netns, dns, Arc::new(config))
+        self.init_runtime_handler(spec, state, netns, dns, Arc::new(config))
            .await
            .context("init runtime handler")?;

@@ -185,7 +193,7 @@ impl RuntimeHandlerManager {
                    .await
                    .context("failed to restore the sandbox")?;
                sandbox
-                    .cleanup(&inner.id)
+                    .cleanup()
                    .await
                    .context("failed to cleanup the resource")?;
            }
@@ -207,10 +215,11 @@ impl RuntimeHandlerManager {
    async fn try_init_runtime_instance(
        &self,
        spec: &oci::Spec,
+        state: &oci::State,
        options: &Option<Vec<u8>>,
    ) -> Result<()> {
        let mut inner = self.inner.write().await;
-        inner.try_init(spec, options).await
+        inner.try_init(spec, state, options).await
    }

    pub async fn handler_message(&self, req: Request) -> Result<Response> {
@@ -222,8 +231,16 @@ impl RuntimeHandlerManager {
                oci::OCI_SPEC_CONFIG_FILE_NAME
            );
            let spec = oci::Spec::load(&bundler_path).context("load spec")?;
+            let state = oci::State {
+                version: spec.version.clone(),
+                id: container_config.container_id.to_string(),
+                status: oci::ContainerState::Creating,
+                pid: 0,
+                bundle: bundler_path,
+                annotations: spec.annotations.clone(),
+            };

-            self.try_init_runtime_instance(&spec, &container_config.options)
+            self.try_init_runtime_instance(&spec, &state, &container_config.options)
                .await
                .context("try init runtime instance")?;
            let instance = self
@@ -374,7 +391,7 @@ fn load_config(spec: &oci::Spec, option: &Option<Vec<u8>>) -> Result<TomlConfig>
    //   2. If this is not a sandbox infrastructure container, but instead a standalone single container (analogous to "docker run..."),
    //	then the container spec itself will contain appropriate sizing information for the entire sandbox (since it is
    //	a single container.
-    if toml_config.runtime.static_resource_mgmt {
+    if toml_config.runtime.static_sandbox_resource_mgmt {
        info!(sl!(), "static resource management enabled");
        let static_resource_manager = StaticResourceManager::new(spec)
            .context("failed to construct static resource manager")?;
@@ -382,6 +399,7 @@ fn load_config(spec: &oci::Spec, option: &Option<Vec<u8>>) -> Result<TomlConfig>
            .setup_config(&mut toml_config)
            .context("failed to setup static resource mgmt config")?;
    }
+
    info!(sl!(), "get config content {:?}", &toml_config);
    Ok(toml_config)
 }
--- a/src/runtime-rs/crates/runtimes/virt_container/src/container_manager/container.rs
+++ b/src/runtime-rs/crates/runtimes/virt_container/src/container_manager/container.rs
@@ -37,6 +37,7 @@ pub struct Container {
    pid: u32,
    pub container_id: ContainerID,
    config: ContainerConfig,
+    spec: oci::Spec,
    inner: Arc<RwLock<ContainerInner>>,
    agent: Arc<dyn Agent>,
    resource_manager: Arc<ResourceManager>,
@@ -47,6 +48,7 @@ impl Container {
    pub fn new(
        pid: u32,
        config: ContainerConfig,
+        spec: oci::Spec,
        agent: Arc<dyn Agent>,
        resource_manager: Arc<ResourceManager>,
    ) -> Result<Self> {
@@ -67,6 +69,7 @@ impl Container {
            pid,
            container_id,
            config,
+            spec,
            inner: Arc::new(RwLock::new(ContainerInner::new(
                agent.clone(),
                init_process,
@@ -382,11 +385,31 @@ impl Container {
            .context("agent update container")?;
        Ok(())
    }
+
+    pub async fn config(&self) -> ContainerConfig {
+        self.config.clone()
+    }
+
+    pub async fn spec(&self) -> oci::Spec {
+        self.spec.clone()
+    }
 }

 fn amend_spec(spec: &mut oci::Spec, disable_guest_seccomp: bool) -> Result<()> {
-    // hook should be done on host
-    spec.hooks = None;
+    // Only the StartContainer hook needs to be reserved for execution in the guest
+    let start_container_hooks = match spec.hooks.as_ref() {
+        Some(hooks) => hooks.start_container.clone(),
+        None => Vec::new(),
+    };
+
+    spec.hooks = if start_container_hooks.is_empty() {
+        None
+    } else {
+        Some(oci::Hooks {
+            start_container: start_container_hooks,
+            ..Default::default()
+        })
+    };

    // special process K8s ephemeral volumes.
    update_ephemeral_storage_type(spec);
--- a/src/runtime-rs/crates/runtimes/virt_container/src/container_manager/container_inner.rs
+++ b/src/runtime-rs/crates/runtimes/virt_container/src/container_manager/container_inner.rs
@@ -200,20 +200,22 @@ impl ContainerInner {
            return Ok(());
        }

-        self.check_state(vec![ProcessStatus::Running])
+        self.check_state(vec![ProcessStatus::Running, ProcessStatus::Exited])
            .await
            .context("check state")?;

-        // if use force mode to stop container, stop always successful
-        // send kill signal to container
-        // ignore the error of sending signal, since the process would
-        // have been killed and exited yet.
-        self.signal_process(process, Signal::SIGKILL as u32, false)
-            .await
-            .map_err(|e| {
-                warn!(logger, "failed to signal kill. {:?}", e);
-            })
-            .ok();
+        if state == ProcessStatus::Running {
+            // if use force mode to stop container, stop always successful
+            // send kill signal to container
+            // ignore the error of sending signal, since the process would
+            // have been killed and exited yet.
+            self.signal_process(process, Signal::SIGKILL as u32, false)
+                .await
+                .map_err(|e| {
+                    warn!(logger, "failed to signal kill. {:?}", e);
+                })
+                .ok();
+        }

        match process.process_type {
            ProcessType::Container => self
--- a/src/runtime-rs/crates/runtimes/virt_container/src/container_manager/manager.rs
+++ b/src/runtime-rs/crates/runtimes/virt_container/src/container_manager/manager.rs
@@ -5,11 +5,10 @@
 //

 use anyhow::{anyhow, Context, Result};
-
+use async_trait::async_trait;
 use std::{collections::HashMap, sync::Arc};

 use agent::Agent;
-use async_trait::async_trait;
 use common::{
    error::Error,
    types::{
@@ -19,10 +18,14 @@ use common::{
    },
    ContainerManager,
 };
+use hypervisor::Hypervisor;
 use oci::Process as OCIProcess;
+use resource::network::NetnsGuard;
 use resource::ResourceManager;
 use tokio::sync::RwLock;

+use kata_sys_util::hooks::HookStates;
+
 use super::{logger_with_process, Container};

 pub struct VirtContainerManager {
@@ -31,6 +34,7 @@ pub struct VirtContainerManager {
    containers: Arc<RwLock<HashMap<String, Container>>>,
    resource_manager: Arc<ResourceManager>,
    agent: Arc<dyn Agent>,
+    hypervisor: Arc<dyn Hypervisor>,
 }

 impl VirtContainerManager {
@@ -38,6 +42,7 @@ impl VirtContainerManager {
        sid: &str,
        pid: u32,
        agent: Arc<dyn Agent>,
+        hypervisor: Arc<dyn Hypervisor>,
        resource_manager: Arc<ResourceManager>,
    ) -> Self {
        Self {
@@ -46,6 +51,7 @@ impl VirtContainerManager {
            containers: Default::default(),
            resource_manager,
            agent,
+            hypervisor,
        }
    }
 }
@@ -55,12 +61,37 @@ impl ContainerManager for VirtContainerManager {
    async fn create_container(&self, config: ContainerConfig, spec: oci::Spec) -> Result<PID> {
        let container = Container::new(
            self.pid,
-            config,
+            config.clone(),
+            spec.clone(),
            self.agent.clone(),
            self.resource_manager.clone(),
        )
        .context("new container")?;

+        // CreateContainer Hooks:
+        // * should be run in vmm namespace (hook path in runtime namespace)
+        // * should be run after the vm is started, before container is created, and after CreateRuntime Hooks
+        // * spec details: https://github.com/opencontainers/runtime-spec/blob/c1662686cff159595277b79322d0272f5182941b/config.md#createcontainer-hooks
+        let vmm_master_tid = self.hypervisor.get_vmm_master_tid().await?;
+        let vmm_netns_path = format!("/proc/{}/task/{}/ns/{}", self.pid, vmm_master_tid, "net");
+        let state = oci::State {
+            version: spec.version.clone(),
+            id: config.container_id.clone(),
+            status: oci::ContainerState::Creating,
+            pid: vmm_master_tid as i32,
+            bundle: config.bundle.clone(),
+            annotations: spec.annotations.clone(),
+        };
+
+        // new scope, CreateContainer hooks in which will execute in a new network namespace
+        {
+            let _netns_guard = NetnsGuard::new(&vmm_netns_path).context("vmm netns guard")?;
+            if let Some(hooks) = spec.hooks.as_ref() {
+                let mut create_container_hook_states = HookStates::new();
+                create_container_hook_states.execute_hooks(&hooks.create_container, Some(state))?;
+            }
+        }
+
        let mut containers = self.containers.write().await;
        container.create(spec).await.context("create")?;
        containers.insert(container.container_id.to_string(), container);
@@ -87,6 +118,26 @@ impl ContainerManager for VirtContainerManager {
                let c = containers
                    .remove(container_id)
                    .ok_or_else(|| Error::ContainerNotFound(container_id.to_string()))?;
+
+                // Poststop Hooks:
+                // * should be run in runtime namespace
+                // * should be run after the container is deleted but before delete operation returns
+                // * spec details: https://github.com/opencontainers/runtime-spec/blob/c1662686cff159595277b79322d0272f5182941b/config.md#poststop
+                let c_spec = c.spec().await;
+                let vmm_master_tid = self.hypervisor.get_vmm_master_tid().await?;
+                let state = oci::State {
+                    version: c_spec.version.clone(),
+                    id: c.container_id.to_string(),
+                    status: oci::ContainerState::Stopped,
+                    pid: vmm_master_tid as i32,
+                    bundle: c.config().await.bundle,
+                    annotations: c_spec.annotations.clone(),
+                };
+                if let Some(hooks) = c_spec.hooks.as_ref() {
+                    let mut poststop_hook_states = HookStates::new();
+                    poststop_hook_states.execute_hooks(&hooks.poststop, Some(state))?;
+                }
+
                c.state_process(process).await.context("state process")
            }
            ProcessType::Exec => {
@@ -190,6 +241,26 @@ impl ContainerManager for VirtContainerManager {
            .get(container_id)
            .ok_or_else(|| Error::ContainerNotFound(container_id.clone()))?;
        c.start(process).await.context("start")?;
+
+        // Poststart Hooks:
+        // * should be run in runtime namespace
+        // * should be run after user-specific command is executed but before start operation returns
+        // * spec details: https://github.com/opencontainers/runtime-spec/blob/c1662686cff159595277b79322d0272f5182941b/config.md#poststart
+        let c_spec = c.spec().await;
+        let vmm_master_tid = self.hypervisor.get_vmm_master_tid().await?;
+        let state = oci::State {
+            version: c_spec.version.clone(),
+            id: c.container_id.to_string(),
+            status: oci::ContainerState::Running,
+            pid: vmm_master_tid as i32,
+            bundle: c.config().await.bundle,
+            annotations: c_spec.annotations.clone(),
+        };
+        if let Some(hooks) = c_spec.hooks.as_ref() {
+            let mut poststart_hook_states = HookStates::new();
+            poststart_hook_states.execute_hooks(&hooks.poststart, Some(state))?;
+        }
+
        Ok(PID { pid: self.pid })
    }

--- a/src/runtime-rs/crates/runtimes/virt_container/src/container_manager/process.rs
+++ b/src/runtime-rs/crates/runtimes/virt_container/src/container_manager/process.rs
@@ -182,7 +182,7 @@ impl Process {
            drop(exit_status);

            let mut status = status.write().await;
-            *status = ProcessStatus::Stopped;
+            *status = ProcessStatus::Exited;
            drop(status);

            drop(exit_notifier);
--- a/src/runtime-rs/crates/runtimes/virt_container/src/lib.rs
+++ b/src/runtime-rs/crates/runtimes/virt_container/src/lib.rs
@@ -86,13 +86,18 @@ impl RuntimeHandler for VirtContainer {
            sid,
            msg_sender,
            agent.clone(),
-            hypervisor,
+            hypervisor.clone(),
            resource_manager.clone(),
        )
        .await
        .context("new virt sandbox")?;
-        let container_manager =
-            container_manager::VirtContainerManager::new(sid, pid, agent, resource_manager);
+        let container_manager = container_manager::VirtContainerManager::new(
+            sid,
+            pid,
+            agent,
+            hypervisor,
+            resource_manager,
+        );
        Ok(RuntimeInstance {
            sandbox: Arc::new(sandbox),
            container_manager: Arc::new(container_manager),
--- a/src/runtime-rs/crates/runtimes/virt_container/src/sandbox.rs
+++ b/src/runtime-rs/crates/runtimes/virt_container/src/sandbox.rs
@@ -17,6 +17,7 @@ use common::{
 };
 use containerd_shim_protos::events::task::TaskOOM;
 use hypervisor::{dragonball::Dragonball, Hypervisor, HYPERVISOR_DRAGONBALL};
+use kata_sys_util::hooks::HookStates;
 use kata_types::config::{
    default::{DEFAULT_AGENT_LOG_PORT, DEFAULT_AGENT_VSOCK_PORT},
    TomlConfig,
@@ -117,11 +118,50 @@ impl VirtSandbox {

        Ok(resource_configs)
    }
+
+    async fn execute_oci_hook_functions(
+        &self,
+        prestart_hooks: &[oci::Hook],
+        create_runtime_hooks: &[oci::Hook],
+        state: &oci::State,
+    ) -> Result<()> {
+        let mut st = state.clone();
+        // for dragonball, we use vmm_master_tid
+        let vmm_pid = self
+            .hypervisor
+            .get_vmm_master_tid()
+            .await
+            .context("get vmm master tid")?;
+        st.pid = vmm_pid as i32;
+
+        // Prestart Hooks [DEPRECATED in newest oci spec]:
+        // * should be run in runtime namespace
+        // * should be run after vm is started, but before container is created
+        //      if Prestart Hook and CreateRuntime Hook are both supported
+        // * spec details: https://github.com/opencontainers/runtime-spec/blob/c1662686cff159595277b79322d0272f5182941b/config.md#prestart
+        let mut prestart_hook_states = HookStates::new();
+        prestart_hook_states.execute_hooks(prestart_hooks, Some(st.clone()))?;
+
+        // CreateRuntime Hooks:
+        // * should be run in runtime namespace
+        // * should be run when creating the runtime
+        // * spec details: https://github.com/opencontainers/runtime-spec/blob/c1662686cff159595277b79322d0272f5182941b/config.md#createruntime-hooks
+        let mut create_runtime_hook_states = HookStates::new();
+        create_runtime_hook_states.execute_hooks(create_runtime_hooks, Some(st.clone()))?;
+
+        Ok(())
+    }
 }

 #[async_trait]
 impl Sandbox for VirtSandbox {
-    async fn start(&self, netns: Option<String>, dns: Vec<String>) -> Result<()> {
+    async fn start(
+        &self,
+        netns: Option<String>,
+        dns: Vec<String>,
+        spec: &oci::Spec,
+        state: &oci::State,
+    ) -> Result<()> {
        let id = &self.sid;

        // if sandbox running, return
@@ -149,6 +189,17 @@ impl Sandbox for VirtSandbox {
        self.hypervisor.start_vm(10_000).await.context("start vm")?;
        info!(sl!(), "start vm");

+        // execute pre-start hook functions, including Prestart Hooks and CreateRuntime Hooks
+        let (prestart_hooks, create_runtime_hooks) = match spec.hooks.as_ref() {
+            Some(hooks) => (hooks.prestart.clone(), hooks.create_runtime.clone()),
+            None => (Vec::new(), Vec::new()),
+        };
+        self.execute_oci_hook_functions(&prestart_hooks, &create_runtime_hooks, state)
+            .await?;
+
+        // TODO: if prestart_hooks is not empty, rescan the network endpoints(rely on hotplug endpoints).
+        // see: https://github.com/kata-containers/kata-containers/issues/6378
+
        // connect agent
        // set agent socket
        let address = self
@@ -240,17 +291,7 @@ impl Sandbox for VirtSandbox {

        self.stop().await.context("stop")?;

-        info!(sl!(), "delete cgroup");
-        self.resource_manager
-            .delete_cgroups()
-            .await
-            .context("delete cgroups")?;
-
-        info!(sl!(), "delete hypervisor");
-        self.hypervisor
-            .cleanup()
-            .await
-            .context("delete hypervisor")?;
+        self.cleanup().await.context("do the clean up")?;

        info!(sl!(), "stop monitor");
        self.monitor.stop().await;
@@ -267,9 +308,19 @@ impl Sandbox for VirtSandbox {
        Ok(())
    }

-    async fn cleanup(&self, _id: &str) -> Result<()> {
-        self.resource_manager.delete_cgroups().await?;
-        self.hypervisor.cleanup().await?;
+    async fn cleanup(&self) -> Result<()> {
+        info!(sl!(), "delete hypervisor");
+        self.hypervisor
+            .cleanup()
+            .await
+            .context("delete hypervisor")?;
+
+        info!(sl!(), "resource clean up");
+        self.resource_manager
+            .cleanup()
+            .await
+            .context("resource clean up")?;
+
        // TODO: cleanup other snadbox resource
        Ok(())
    }
--- a/src/runtime/Makefile
+++ b/src/runtime/Makefile
@@ -225,7 +225,7 @@ DEFSHAREDFS_CLH_VIRTIOFS := virtio-fs
 DEFSHAREDFS_QEMU_VIRTIOFS := virtio-fs
 DEFVIRTIOFSDAEMON := $(LIBEXECDIR)/virtiofsd
 ifeq ($(ARCH),ppc64le)
-DEFVIRTIOFSDAEMON := $(LIBEXECDIR)/kata-qemu/virtiofsd
+DEFVIRTIOFSDAEMON := $(LIBEXECDIR)/qemu/virtiofsd
 endif
 DEFVALIDVIRTIOFSDAEMONPATHS := [\"$(DEFVIRTIOFSDAEMON)\"]
 # Default DAX mapping cache size in MiB
@@ -789,7 +789,7 @@ install-bin: $(BINLIST)

 install-runtime: runtime install-scripts install-completions install-configs install-bin

-install-containerd-shim-v2: $(SHIMV2)
+install-containerd-shim-v2: $(SHIMV2_OUTPUT)
 	$(QUIET_INST)$(call INSTALL_EXEC,$<,$(BINDIR))

 install-monitor: $(MONITOR)
--- a/src/runtime/pkg/containerd-shim-v2/shim_management.go
+++ b/src/runtime/pkg/containerd-shim-v2/shim_management.go
@@ -307,7 +307,7 @@ func GetSandboxesStoragePath() string {
 	return "/run/vc/sbs"
 }

-// GetSandboxesStoragePath returns the storage path where sandboxes info are stored in runtime-rs
+// GetSandboxesStoragePathRust returns the storage path where sandboxes info are stored in runtime-rs
 func GetSandboxesStoragePathRust() string {
 	return "/run/kata"
 }
--- a/src/runtime/virtcontainers/mount.go
+++ b/src/runtime/virtcontainers/mount.go
@@ -44,6 +44,7 @@ func mountLogger() *logrus.Entry {
 }

 func isSystemMount(m string) bool {
+	m = filepath.Clean(m)
 	for _, p := range systemMountPrefixes {
 		if m == p || strings.HasPrefix(m, p+"/") {
 			return true
@@ -54,6 +55,7 @@ func isSystemMount(m string) bool {
 }

 func isHostDevice(m string) bool {
+	m = filepath.Clean(m)
 	if m == "/dev" {
 		return true
 	}
--- a/src/runtime/virtcontainers/mount_linux_test.go
+++ b/src/runtime/virtcontainers/mount_linux_test.go
@@ -249,6 +249,9 @@ func TestIsHostDevice(t *testing.T) {
 		{"/dev/zero", true},
 		{"/dev/block", true},
 		{"/mnt/dev/block", false},
+		{"/../dev", true},
+		{"/../dev/block", true},
+		{"/../mnt/dev/block", false},
 	}

 	for _, test := range tests {
--- a/src/runtime/virtcontainers/mount_test.go
+++ b/src/runtime/virtcontainers/mount_test.go
@@ -41,6 +41,10 @@ func TestIsSystemMount(t *testing.T) {
 		{"/home", false},
 		{"/dev/block/", false},
 		{"/mnt/dev/foo", false},
+		{"/../sys", true},
+		{"/../sys/", true},
+		{"/../sys/fs/cgroup", true},
+		{"/../sysfoo", false},
 	}

 	for _, test := range tests {
--- a/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/api/openapi.yaml
+++ b/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/api/openapi.yaml
@@ -1131,6 +1131,9 @@ components:
          items:
            type: integer
          type: array
+      required:
+      - host_cpus
+      - vcpu
      type: object
    CpuFeatures:
      example:
--- a/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/docs/CpuAffinity.md
+++ b/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/docs/CpuAffinity.md
@@ -4,14 +4,14 @@

 Name | Type | Description | Notes
 ------------ | ------------- | ------------- | -------------
-**Vcpu** | Pointer to **int32** |  | [optional] 
-**HostCpus** | Pointer to **[]int32** |  | [optional] 
+**Vcpu** | **int32** |  | 
+**HostCpus** | **[]int32** |  | 

 ## Methods

 ### NewCpuAffinity

-`func NewCpuAffinity() *CpuAffinity`
+`func NewCpuAffinity(vcpu int32, hostCpus []int32, ) *CpuAffinity`

 NewCpuAffinity instantiates a new CpuAffinity object
 This constructor will assign default values to properties that have it defined,
@@ -45,11 +45,6 @@ and a boolean to check if the value has been set.

 SetVcpu sets Vcpu field to given value.

-### HasVcpu
-
-`func (o *CpuAffinity) HasVcpu() bool`
-
-HasVcpu returns a boolean if a field has been set.

 ### GetHostCpus

@@ -70,11 +65,6 @@ and a boolean to check if the value has been set.

 SetHostCpus sets HostCpus field to given value.

-### HasHostCpus
-
-`func (o *CpuAffinity) HasHostCpus() bool`
-
-HasHostCpus returns a boolean if a field has been set.


 [[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md)
--- a/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/model_cpu_affinity.go
+++ b/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/model_cpu_affinity.go
@@ -16,16 +16,18 @@ import (

 // CpuAffinity struct for CpuAffinity
 type CpuAffinity struct {
-	Vcpu     *int32   `json:"vcpu,omitempty"`
-	HostCpus *[]int32 `json:"host_cpus,omitempty"`
+	Vcpu     int32   `json:"vcpu"`
+	HostCpus []int32 `json:"host_cpus"`
 }

 // NewCpuAffinity instantiates a new CpuAffinity object
 // This constructor will assign default values to properties that have it defined,
 // and makes sure properties required by API are set, but the set of arguments
 // will change when the set of required properties is changed
-func NewCpuAffinity() *CpuAffinity {
+func NewCpuAffinity(vcpu int32, hostCpus []int32) *CpuAffinity {
 	this := CpuAffinity{}
+	this.Vcpu = vcpu
+	this.HostCpus = hostCpus
 	return &this
 }

@@ -37,76 +39,60 @@ func NewCpuAffinityWithDefaults() *CpuAffinity {
 	return &this
 }

-// GetVcpu returns the Vcpu field value if set, zero value otherwise.
+// GetVcpu returns the Vcpu field value
 func (o *CpuAffinity) GetVcpu() int32 {
-	if o == nil || o.Vcpu == nil {
+	if o == nil {
 		var ret int32
 		return ret
 	}
-	return *o.Vcpu
+
+	return o.Vcpu
 }

-// GetVcpuOk returns a tuple with the Vcpu field value if set, nil otherwise
+// GetVcpuOk returns a tuple with the Vcpu field value
 // and a boolean to check if the value has been set.
 func (o *CpuAffinity) GetVcpuOk() (*int32, bool) {
-	if o == nil || o.Vcpu == nil {
+	if o == nil {
 		return nil, false
 	}
-	return o.Vcpu, true
+	return &o.Vcpu, true
 }

-// HasVcpu returns a boolean if a field has been set.
-func (o *CpuAffinity) HasVcpu() bool {
-	if o != nil && o.Vcpu != nil {
-		return true
-	}
-
-	return false
-}
-
-// SetVcpu gets a reference to the given int32 and assigns it to the Vcpu field.
+// SetVcpu sets field value
 func (o *CpuAffinity) SetVcpu(v int32) {
-	o.Vcpu = &v
+	o.Vcpu = v
 }

-// GetHostCpus returns the HostCpus field value if set, zero value otherwise.
+// GetHostCpus returns the HostCpus field value
 func (o *CpuAffinity) GetHostCpus() []int32 {
-	if o == nil || o.HostCpus == nil {
+	if o == nil {
 		var ret []int32
 		return ret
 	}
-	return *o.HostCpus
+
+	return o.HostCpus
 }

-// GetHostCpusOk returns a tuple with the HostCpus field value if set, nil otherwise
+// GetHostCpusOk returns a tuple with the HostCpus field value
 // and a boolean to check if the value has been set.
 func (o *CpuAffinity) GetHostCpusOk() (*[]int32, bool) {
-	if o == nil || o.HostCpus == nil {
+	if o == nil {
 		return nil, false
 	}
-	return o.HostCpus, true
+	return &o.HostCpus, true
 }

-// HasHostCpus returns a boolean if a field has been set.
-func (o *CpuAffinity) HasHostCpus() bool {
-	if o != nil && o.HostCpus != nil {
-		return true
-	}
-
-	return false
-}
-
-// SetHostCpus gets a reference to the given []int32 and assigns it to the HostCpus field.
+// SetHostCpus sets field value
 func (o *CpuAffinity) SetHostCpus(v []int32) {
-	o.HostCpus = &v
+	o.HostCpus = v
 }

 func (o CpuAffinity) MarshalJSON() ([]byte, error) {
 	toSerialize := map[string]interface{}{}
-	if o.Vcpu != nil {
+	if true {
 		toSerialize["vcpu"] = o.Vcpu
 	}
-	if o.HostCpus != nil {
+	if true {
 		toSerialize["host_cpus"] = o.HostCpus
 	}
 	return json.Marshal(toSerialize)
--- a/src/runtime/virtcontainers/pkg/cloud-hypervisor/cloud-hypervisor.yaml
+++ b/src/runtime/virtcontainers/pkg/cloud-hypervisor/cloud-hypervisor.yaml
@@ -578,6 +578,9 @@ components:
      description: Virtual machine configuration

    CpuAffinity:
+      required:
+        - vcpu
+        - host_cpus
      type: object
      properties:
        vcpu:
--- a/src/tools/kata-ctl/Cargo.lock
+++ b/src/tools/kata-ctl/Cargo.lock
--- a/src/tools/kata-ctl/Cargo.toml
+++ b/src/tools/kata-ctl/Cargo.toml
@@ -31,6 +31,14 @@ kata-types = { path = "../../libs/kata-types" }
 safe-path = { path = "../../libs/safe-path" }
 agent = { path = "../../runtime-rs/crates/agent"}
 serial_test = "0.5.1"
+vmm-sys-util = "0.11.0"
+epoll = "4.0.1"
+libc = "0.2.138"
+slog = "2.7.0"
+slog-scope = "4.4.0"
+hyper = "0.14.20"
+ttrpc = "0.6.0"
+tokio = "1.8.0"

 [target.'cfg(target_arch = "s390x")'.dependencies]
 reqwest = { version = "0.11", default-features = false, features = ["json", "blocking", "native-tls"] }
@@ -42,3 +50,4 @@ reqwest = { version = "0.11", default-features = false, features = ["json", "blo
 semver = "1.0.12"
 tempfile = "3.1.0"
 test-utils = { path = "../../libs/test-utils" }
+micro_http = { git = "https://github.com/firecracker-microvm/micro-http", branch = "main" }
--- a/src/tools/kata-ctl/src/args.rs
+++ b/src/tools/kata-ctl/src/args.rs
@@ -26,7 +26,7 @@ pub enum Commands {
    Env,

    /// Enter into guest VM by debug console
-    Exec,
+    Exec(ExecArguments),

    /// Manage VM factory
    Factory,
@@ -136,3 +136,12 @@ pub struct DirectVolResizeArgs {
    pub volume_path: String,
    pub resize_size: u64,
 }
+
+#[derive(Debug, Args)]
+pub struct ExecArguments {
+    /// pod sandbox ID.
+    pub sandbox_id: String,
+    #[clap(short = 'p', long = "kata-debug-port", default_value_t = 1026)]
+    /// kata debug console vport same as configuration, default is 1026.
+    pub vport: u32,
+}
--- a/src/tools/kata-ctl/src/check.rs
+++ b/src/tools/kata-ctl/src/check.rs
@@ -69,7 +69,7 @@ pub fn get_cpu_flags(cpu_info: &str, cpu_flags_tag: &str) -> Result<String> {
    }

    if cpu_flags_tag.is_empty() {
-        return Err(anyhow!("cpu flags delimiter string is empty"))?;
+        return Err(anyhow!("cpu flags delimiter string is empty"));
    }

    let subcontents: Vec<&str> = cpu_info.split('\n').collect();
--- a/src/tools/kata-ctl/src/main.rs
+++ b/src/tools/kata-ctl/src/main.rs
@@ -17,9 +17,9 @@ use std::process::exit;
 use args::{Commands, KataCtlCli};

 use ops::check_ops::{
-    handle_check, handle_env, handle_exec, handle_factory, handle_iptables, handle_metrics,
-    handle_version,
+    handle_check, handle_env, handle_factory, handle_iptables, handle_metrics, handle_version,
 };
+use ops::exec_ops::handle_exec;
 use ops::volume_ops::handle_direct_volume;

 fn real_main() -> Result<()> {
@@ -28,8 +28,8 @@ fn real_main() -> Result<()> {
    match args.command {
        Commands::Check(args) => handle_check(args),
        Commands::DirectVolume(args) => handle_direct_volume(args),
+        Commands::Exec(args) => handle_exec(args),
        Commands::Env => handle_env(),
-        Commands::Exec => handle_exec(),
        Commands::Factory => handle_factory(),
        Commands::Iptables(args) => handle_iptables(args),
        Commands::Metrics(args) => handle_metrics(args),
--- a/src/tools/kata-ctl/src/ops.rs
+++ b/src/tools/kata-ctl/src/ops.rs
@@ -4,5 +4,6 @@
 //

 pub mod check_ops;
+pub mod exec_ops;
 pub mod version;
 pub mod volume_ops;
--- a/src/tools/kata-ctl/src/ops/check_ops.rs
+++ b/src/tools/kata-ctl/src/ops/check_ops.rs
@@ -108,10 +108,6 @@ pub fn handle_env() -> Result<()> {
    Ok(())
 }

-pub fn handle_exec() -> Result<()> {
-    Ok(())
-}
-
 pub fn handle_factory() -> Result<()> {
    Ok(())
 }
--- a/src/tools/kata-ctl/src/ops/exec_ops.rs
+++ b/src/tools/kata-ctl/src/ops/exec_ops.rs
@@ -0,0 +1,444 @@
+// Copyright (c) 2022 Ant Group
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+// Description:
+// Implementation of entering into guest VM by debug console.
+// Ensure that `kata-debug-port` is consistent with the port
+// set in the configuration.
+
+use std::{
+    io::{self, BufRead, BufReader, Read, Write},
+    os::unix::{
+        io::{AsRawFd, FromRawFd, RawFd},
+        net::UnixStream,
+    },
+    time::Duration,
+};
+
+use anyhow::{anyhow, Context};
+use nix::sys::socket::{connect, socket, AddressFamily, SockFlag, SockType, VsockAddr};
+use reqwest::StatusCode;
+use slog::debug;
+use vmm_sys_util::terminal::Terminal;
+
+use crate::args::ExecArguments;
+use shim_interface::shim_mgmt::{client::MgmtClient, AGENT_URL};
+
+const CMD_CONNECT: &str = "CONNECT";
+const CMD_OK: &str = "OK";
+const SCHEME_VSOCK: &str = "VSOCK";
+const SCHEME_HYBRID_VSOCK: &str = "HVSOCK";
+
+const EPOLL_EVENTS_LEN: usize = 16;
+const KATA_AGENT_VSOCK_TIMEOUT: u64 = 5;
+const TIMEOUT: Duration = Duration::from_millis(2000);
+
+type Result<T> = std::result::Result<T, Error>;
+
+// Convenience macro to obtain the scope logger
+#[macro_export]
+macro_rules! sl {
+    () => {
+        slog_scope::logger()
+    };
+}
+
+#[derive(Debug)]
+pub enum Error {
+    EpollWait(io::Error),
+    EpollCreate(io::Error),
+    EpollAdd(io::Error),
+    SocketWrite(io::Error),
+    StdioErr(io::Error),
+}
+
+#[derive(Debug, PartialEq)]
+enum EpollDispatch {
+    Stdin,
+    ServerSock,
+}
+
+struct EpollContext {
+    epoll_raw_fd: RawFd,
+    stdin_index: u64,
+    dispatch_table: Vec<EpollDispatch>,
+    stdin_handle: io::Stdin,
+    debug_console_sock: Option<UnixStream>,
+}
+
+impl EpollContext {
+    fn new() -> Result<Self> {
+        let epoll_raw_fd = epoll::create(true).map_err(Error::EpollCreate)?;
+        let dispatch_table = Vec::new();
+        let stdin_index = 0;
+
+        Ok(EpollContext {
+            epoll_raw_fd,
+            stdin_index,
+            dispatch_table,
+            stdin_handle: io::stdin(),
+            debug_console_sock: None,
+        })
+    }
+
+    fn init_debug_console_sock(&mut self, sock: UnixStream) -> Result<()> {
+        let dispatch_index = self.dispatch_table.len() as u64;
+        epoll::ctl(
+            self.epoll_raw_fd,
+            epoll::ControlOptions::EPOLL_CTL_ADD,
+            sock.as_raw_fd(),
+            epoll::Event::new(epoll::Events::EPOLLIN, dispatch_index),
+        )
+        .map_err(Error::EpollAdd)?;
+
+        self.dispatch_table.push(EpollDispatch::ServerSock);
+        self.debug_console_sock = Some(sock);
+
+        Ok(())
+    }
+
+    fn enable_stdin_event(&mut self) -> Result<()> {
+        let stdin_index = self.dispatch_table.len() as u64;
+        epoll::ctl(
+            self.epoll_raw_fd,
+            epoll::ControlOptions::EPOLL_CTL_ADD,
+            libc::STDIN_FILENO,
+            epoll::Event::new(epoll::Events::EPOLLIN, stdin_index),
+        )
+        .map_err(Error::EpollAdd)?;
+
+        self.stdin_index = stdin_index;
+        self.dispatch_table.push(EpollDispatch::Stdin);
+
+        Ok(())
+    }
+
+    fn do_exit(&self) {
+        self.stdin_handle
+            .lock()
+            .set_canon_mode()
+            .expect("Fail to set stdin to RAW mode");
+    }
+
+    fn do_process_handler(&mut self) -> Result<()> {
+        let mut events = vec![epoll::Event::new(epoll::Events::empty(), 0); EPOLL_EVENTS_LEN];
+
+        let epoll_raw_fd = self.epoll_raw_fd;
+        let debug_console_sock = self.debug_console_sock.as_mut().unwrap();
+
+        loop {
+            let num_events =
+                epoll::wait(epoll_raw_fd, -1, &mut events[..]).map_err(Error::EpollWait)?;
+
+            for event in events.iter().take(num_events) {
+                let dispatch_index = event.data as usize;
+                match self.dispatch_table[dispatch_index] {
+                    EpollDispatch::Stdin => {
+                        let mut out = [0u8; 128];
+                        let stdin_lock = self.stdin_handle.lock();
+                        match stdin_lock.read_raw(&mut out[..]) {
+                            Ok(0) => {
+                                return Ok(());
+                            }
+                            Err(e) => {
+                                println!("error with errno {:?} while reading stdin", e);
+                                return Ok(());
+                            }
+                            Ok(count) => {
+                                debug_console_sock
+                                    .write(&out[..count])
+                                    .map_err(Error::SocketWrite)?;
+                            }
+                        }
+                    }
+                    EpollDispatch::ServerSock => {
+                        let mut out = [0u8; 128];
+                        match debug_console_sock.read(&mut out[..]) {
+                            Ok(0) => {
+                                return Ok(());
+                            }
+                            Err(e) => {
+                                println!("error with errno {:?} while reading server", e);
+                                return Ok(());
+                            }
+                            Ok(count) => {
+                                io::stdout()
+                                    .write_all(&out[..count])
+                                    .map_err(Error::StdioErr)?;
+                                io::stdout().flush().map_err(Error::StdioErr)?;
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+
+trait SockHandler {
+    fn setup_sock(&self) -> anyhow::Result<UnixStream>;
+}
+
+struct VsockConfig {
+    sock_cid: u32,
+    sock_port: u32,
+}
+
+impl VsockConfig {
+    fn new(sock_cid: u32, sock_port: u32) -> VsockConfig {
+        VsockConfig {
+            sock_cid,
+            sock_port,
+        }
+    }
+}
+
+impl SockHandler for VsockConfig {
+    fn setup_sock(&self) -> anyhow::Result<UnixStream> {
+        let sock_addr = VsockAddr::new(self.sock_cid, self.sock_port);
+
+        // Create socket fd
+        let vsock_fd = socket(
+            AddressFamily::Vsock,
+            SockType::Stream,
+            SockFlag::SOCK_CLOEXEC,
+            None,
+        )
+        .context("create vsock socket")?;
+
+        // Wrap the socket fd in UnixStream, so that it is closed
+        // when anything fails.
+        let stream = unsafe { UnixStream::from_raw_fd(vsock_fd) };
+        // Connect the socket to vsock server.
+        connect(stream.as_raw_fd(), &sock_addr)
+            .with_context(|| format!("failed to connect to server {:?}", &sock_addr))?;
+
+        Ok(stream)
+    }
+}
+
+struct HvsockConfig {
+    sock_addr: String,
+    sock_port: u32,
+}
+
+impl HvsockConfig {
+    fn new(sock_addr: String, sock_port: u32) -> Self {
+        HvsockConfig {
+            sock_addr,
+            sock_port,
+        }
+    }
+}
+
+impl SockHandler for HvsockConfig {
+    fn setup_sock(&self) -> anyhow::Result<UnixStream> {
+        let mut stream = match UnixStream::connect(self.sock_addr.clone()) {
+            Ok(s) => s,
+            Err(e) => return Err(anyhow!(e).context("failed to create UNIX Stream socket")),
+        };
+
+        // Ensure the Unix Stream directly connects to the real VSOCK server which
+        // the Kata agent is listening to in the VM.
+        {
+            let test_msg = format!("{} {}\n", CMD_CONNECT, self.sock_port);
+
+            stream.set_read_timeout(Some(Duration::new(KATA_AGENT_VSOCK_TIMEOUT, 0)))?;
+            stream.set_write_timeout(Some(Duration::new(KATA_AGENT_VSOCK_TIMEOUT, 0)))?;
+
+            stream.write_all(test_msg.as_bytes())?;
+            // Now, see if we get the expected response
+            let stream_reader = stream.try_clone()?;
+            let mut reader = BufReader::new(&stream_reader);
+            let mut msg = String::new();
+
+            reader.read_line(&mut msg)?;
+            if msg.is_empty() {
+                return Err(anyhow!(
+                    "stream reader get message is empty with port: {:?}",
+                    self.sock_port
+                ));
+            }
+
+            // Expected response message returned was successful.
+            if msg.starts_with(CMD_OK) {
+                let response = msg
+                    .strip_prefix(CMD_OK)
+                    .ok_or(format!("invalid response: {:?}", msg))
+                    .map_err(|e| anyhow!(e))?
+                    .trim();
+                debug!(sl!(), "Hybrid Vsock host-side port: {:?}", response);
+                // Unset the timeout in order to turn the sokect to bloking mode.
+                stream.set_read_timeout(None)?;
+                stream.set_write_timeout(None)?;
+            } else {
+                return Err(anyhow!(
+                    "failed to setup Hybrid Vsock connection: {:?}",
+                    msg
+                ));
+            }
+        }
+
+        Ok(stream)
+    }
+}
+
+fn setup_client(server_url: String, dbg_console_port: u32) -> anyhow::Result<UnixStream> {
+    // server address format: scheme://[cid|/x/domain.sock]:port
+    let url_fields: Vec<&str> = server_url.split("://").collect();
+    if url_fields.len() != 2 {
+        return Err(anyhow!("invalid URI"));
+    }
+
+    let scheme = url_fields[0].to_uppercase();
+    let sock_addr: Vec<&str> = url_fields[1].split(':').collect();
+    if sock_addr.len() != 2 {
+        return Err(anyhow!("invalid VSOCK server address URI"));
+    }
+
+    match scheme.as_str() {
+        // Hybrid Vsock: hvsock://<path>:<port>.
+        // Example: "hvsock:///x/y/z/kata.hvsock:port"
+        // Firecracker/Dragonball/CLH implements the hybrid vsock device model.
+        SCHEME_HYBRID_VSOCK => {
+            let hvsock_path = sock_addr[0].to_string();
+            if hvsock_path.is_empty() {
+                return Err(anyhow!("hvsock path cannot be empty"));
+            }
+
+            let hvsock = HvsockConfig::new(hvsock_path, dbg_console_port);
+            hvsock.setup_sock().context("set up hvsock")
+        }
+        // Vsock: vsock://<cid>:<port>
+        // Example: "vsock://31513974:1024"
+        // Qemu using the Vsock device model.
+        SCHEME_VSOCK => {
+            let sock_cid: u32 = match sock_addr[0] {
+                "-1" | "" => libc::VMADDR_CID_ANY,
+                _ => match sock_addr[0].parse::<u32>() {
+                    Ok(cid) => cid,
+                    Err(e) => return Err(anyhow!("vsock addr CID is INVALID: {:?}", e)),
+                },
+            };
+
+            let vsock = VsockConfig::new(sock_cid, dbg_console_port);
+            vsock.setup_sock().context("set up vsock")
+        }
+        // Others will be INVALID URI.
+        _ => {
+            return Err(anyhow!("invalid URI scheme: {:?}", scheme));
+        }
+    }
+}
+
+async fn get_agent_socket(sandbox_id: &str) -> anyhow::Result<String> {
+    let shim_client = MgmtClient::new(sandbox_id, Some(TIMEOUT))?;
+
+    // get agent sock from body when status code is OK.
+    let response = shim_client.get(AGENT_URL).await?;
+    let status = response.status();
+    if status != StatusCode::OK {
+        return Err(anyhow!("shim client get connection failed: {:?} ", status));
+    }
+
+    let body = hyper::body::to_bytes(response.into_body()).await?;
+    let agent_sock = String::from_utf8(body.to_vec())?;
+
+    Ok(agent_sock)
+}
+
+fn get_server_socket(sandbox_id: &str) -> anyhow::Result<String> {
+    let server_url = tokio::runtime::Builder::new_current_thread()
+        .enable_all()
+        .build()?
+        .block_on(get_agent_socket(sandbox_id))
+        .context("get connection vsock")?;
+
+    Ok(server_url)
+}
+
+fn do_run_exec(sandbox_id: &str, dbg_console_vport: u32) -> anyhow::Result<()> {
+    // sandbox_id MUST be a long ID.
+    let server_url = get_server_socket(sandbox_id).context("get debug console socket URL")?;
+    if server_url.is_empty() {
+        return Err(anyhow!("server url is empty."));
+    }
+    let sock_stream = setup_client(server_url, dbg_console_vport)?;
+
+    let mut epoll_context = EpollContext::new().expect("create epoll context");
+    epoll_context
+        .enable_stdin_event()
+        .expect("enable stdin event");
+    epoll_context
+        .init_debug_console_sock(sock_stream)
+        .expect("enable debug console sock");
+
+    let stdin_handle = io::stdin();
+    stdin_handle.lock().set_raw_mode().expect("set raw mode");
+
+    epoll_context
+        .do_process_handler()
+        .expect("do process handler");
+    epoll_context.do_exit();
+
+    Ok(())
+}
+
+// kata-ctl handle exec command starts here.
+pub fn handle_exec(exec_args: ExecArguments) -> anyhow::Result<()> {
+    do_run_exec(exec_args.sandbox_id.as_str(), exec_args.vport)?;
+
+    Ok(())
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use micro_http::HttpServer;
+
+    #[test]
+    fn test_epoll_context_methods() {
+        let kata_hybrid_addr = "/tmp/kata_hybrid_vsock01.hvsock";
+        std::fs::remove_file(kata_hybrid_addr).unwrap_or_default();
+        let mut server = HttpServer::new(kata_hybrid_addr).unwrap();
+        server.start_server().unwrap();
+        let sock_addr: UnixStream = UnixStream::connect(kata_hybrid_addr).unwrap();
+        let mut epoll_ctx = EpollContext::new().expect("epoll context");
+        epoll_ctx
+            .init_debug_console_sock(sock_addr)
+            .expect("enable debug console sock");
+        assert_eq!(epoll_ctx.stdin_index, 0);
+        assert!(epoll_ctx.debug_console_sock.is_some());
+        assert_eq!(epoll_ctx.dispatch_table[0], EpollDispatch::ServerSock);
+        assert_eq!(epoll_ctx.dispatch_table.len(), 1);
+
+        epoll_ctx.enable_stdin_event().expect("enable stdin event");
+        assert_eq!(epoll_ctx.stdin_index, 1);
+        assert_eq!(epoll_ctx.dispatch_table[1], EpollDispatch::Stdin);
+        assert_eq!(epoll_ctx.dispatch_table.len(), 2);
+        std::fs::remove_file(kata_hybrid_addr).unwrap_or_default();
+    }
+
+    #[test]
+    fn test_setup_hvsock_failed() {
+        let kata_hybrid_addr = "/tmp/kata_hybrid_vsock02.hvsock";
+        let hybrid_sock_addr = "hvsock:///tmp/kata_hybrid_vsock02.hvsock:1024";
+        std::fs::remove_file(kata_hybrid_addr).unwrap_or_default();
+        let dbg_console_port: u32 = 1026;
+        let mut server = HttpServer::new(kata_hybrid_addr).unwrap();
+        server.start_server().unwrap();
+
+        let stream = setup_client(hybrid_sock_addr.to_string(), dbg_console_port);
+        assert!(stream.is_err());
+        std::fs::remove_file(kata_hybrid_addr).unwrap_or_default();
+    }
+
+    #[test]
+    fn test_setup_vsock_client_failed() {
+        let hybrid_sock_addr = "hvsock://8:1024";
+        let dbg_console_port: u32 = 1026;
+        let stream = setup_client(hybrid_sock_addr.to_string(), dbg_console_port);
+        assert!(stream.is_err());
+    }
+}
--- a/tools/osbuilder/Makefile
+++ b/tools/osbuilder/Makefile
@@ -85,6 +85,9 @@ endif

 ################################################################################

+.PHONY: all
+all: image initrd
+
 rootfs-%: $(ROOTFS_BUILD_DEST)/.%$(ROOTFS_MARKER_SUFFIX)
 	@ # DONT remove. This is not cancellation rule.

@@ -97,11 +100,13 @@ $(ROOTFS_BUILD_DEST)/.%$(ROOTFS_MARKER_SUFFIX):: rootfs-builder/%
 # extract it in a local folder.
 # Notes:
 # - assuming a not compressed initrd.
+ifeq (dracut,$(BUILD_METHOD))
 .PRECIOUS: $(ROOTFS_BUILD_DEST)/.dracut$(ROOTFS_MARKER_SUFFIX)
 $(ROOTFS_BUILD_DEST)/.dracut$(ROOTFS_MARKER_SUFFIX): $(TARGET_INITRD)
 	mkdir -p $(TARGET_ROOTFS)
 	(cd $(TARGET_ROOTFS); cat $< | cpio --extract --preserve-modification-time --make-directories)
 	@touch $@
+endif

 image-%: $(IMAGES_BUILD_DEST)/kata-containers-image-%.img
 	@ # DONT remove. This is not cancellation rule.
@@ -117,9 +122,6 @@ initrd-%: $(IMAGES_BUILD_DEST)/kata-containers-initrd-%.img
 $(IMAGES_BUILD_DEST)/kata-containers-initrd-%.img: rootfs-%
 	$(call silent_run,Creating initrd image for $*,$(INITRD_BUILDER) -o $@ $(ROOTFS_BUILD_DEST)/$*_rootfs)

-.PHONY: all
-all: image initrd
-
 .PHONY: rootfs
 rootfs: $(TARGET_ROOTFS_MARKER)

--- a/tools/osbuilder/rootfs-builder/ubuntu/Dockerfile.in
+++ b/tools/osbuilder/rootfs-builder/ubuntu/Dockerfile.in
@@ -17,6 +17,8 @@ RUN CGO_ENABLED=0 DISABLE_DOCS=1 make BUILDTAGS=containers_image_openpgp GO_DYN_
 FROM ${IMAGE_REGISTRY}/ubuntu:@OS_VERSION@
@SET_PROXY@

+# makedev tries to mknod from postinst
+RUN [ -x /usr/bin/systemd-detect-virt ] || ( echo "echo docker" >/usr/bin/systemd-detect-virt && chmod +x /usr/bin/systemd-detect-virt )
 RUN apt-get update && \
    DEBIAN_FRONTEND=noninteractive \
    apt-get --no-install-recommends -y install \
@@ -35,6 +37,7 @@ RUN apt-get update && \
    libgpgme-dev \
    libssl-dev \
    make \
+    makedev \
    multistrap \
    musl-tools \
    pkg-config \
--- a/tools/osbuilder/rootfs-builder/ubuntu/rootfs_lib.sh
+++ b/tools/osbuilder/rootfs-builder/ubuntu/rootfs_lib.sh
@@ -60,4 +60,10 @@ EOF

 	# Reduce image size and memory footprint by removing unnecessary files and directories.
 	rm -rf $rootfs_dir/usr/share/{bash-completion,bug,doc,info,lintian,locale,man,menu,misc,pixmaps,terminfo,zsh}
+
+	# Minimal set of device nodes needed when AGENT_INIT=yes so that the
+	# kernel can properly setup stdout/stdin/stderr for us
+	pushd $rootfs_dir/dev
+	MAKEDEV -v console tty ttyS null zero fd
+	popd
 }
--- a/tools/packaging/kata-deploy/Dockerfile
+++ b/tools/packaging/kata-deploy/Dockerfile
@@ -3,27 +3,27 @@
 # SPDX-License-Identifier: Apache-2.0

 # Specify alternative base image, e.g. clefos for s390x
-ARG IMAGE
-FROM ${IMAGE:-registry.centos.org/centos}:7
+ARG BASE_IMAGE_NAME=ubuntu
+ARG BASE_IMAGE_TAG=20.04
+FROM $BASE_IMAGE_NAME:$BASE_IMAGE_TAG
+ENV DEBIAN_FRONTEND=noninteractive
 ARG KATA_ARTIFACTS=./kata-static.tar.xz
 ARG DESTINATION=/opt/kata-artifacts

 COPY ${KATA_ARTIFACTS} ${WORKDIR}

+SHELL ["/bin/bash", "-o", "pipefail", "-c"]
+
 RUN \
-yum -y update && \
-yum -y install xz && \
-yum clean all && \
+apt-get update && \
+apt-get install -y --no-install-recommends apt-transport-https ca-certificates curl xz-utils systemd && \
+mkdir -p /etc/apt/keyrings/ && \
+curl -fsSLo /etc/apt/keyrings/kubernetes-archive-keyring.gpg https://packages.cloud.google.com/apt/doc/apt-key.gpg && \
+echo "deb [signed-by=/etc/apt/keyrings/kubernetes-archive-keyring.gpg] https://apt.kubernetes.io/ kubernetes-xenial main" | tee /etc/apt/sources.list.d/kubernetes.list && \
+apt-get update && \
+apt-get install -y --no-install-recommends kubectl && \
+apt-get clean && rm -rf /var/lib/apt/lists/ && \
 mkdir -p ${DESTINATION} && \
 tar xvf ${KATA_ARTIFACTS} -C ${DESTINATION}

-# hadolint will deny echo -e, heredocs don't work in Dockerfiles, shell substitution doesn't work with $'...'
-RUN \
-echo "[kubernetes]" >> /etc/yum.repos.d/kubernetes.repo && \
-echo "name=Kubernetes" >> /etc/yum.repos.d/kubernetes.repo && \
-echo "baseurl=https://packages.cloud.google.com/yum/repos/kubernetes-el7-$(uname -m)" >> /etc/yum.repos.d/kubernetes.repo && \
-echo "gpgkey=https://packages.cloud.google.com/yum/doc/yum-key.gpg https://packages.cloud.google.com/yum/doc/rpm-package-key.gpg" >> /etc/yum.repos.d/kubernetes.repo && \
-yum -y install kubectl && \
-yum clean all
-
 COPY scripts ${DESTINATION}/scripts
--- a/tools/packaging/kata-deploy/kata-cleanup/base/kata-cleanup.yaml
+++ b/tools/packaging/kata-deploy/kata-cleanup/base/kata-cleanup.yaml
@@ -27,19 +27,19 @@ spec:
            fieldRef:
              fieldPath: spec.nodeName
        securityContext:
-          privileged: false
+          privileged: true
        volumeMounts:
        - name: dbus
-          mountPath: /var/run/dbus
+          mountPath: /var/run/dbus/system_bus_socket
        - name: systemd
-          mountPath: /run/systemd
+          mountPath: /run/systemd/system
      volumes:
        - name: dbus
          hostPath:
-            path: /var/run/dbus
+            path: /var/run/dbus/system_bus_socket
        - name: systemd
          hostPath:
-            path: /run/systemd
+            path: /run/systemd/system
  updateStrategy:
    rollingUpdate:
      maxUnavailable: 1
--- a/tools/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml
+++ b/tools/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml
@@ -31,7 +31,7 @@ spec:
        - name: CONFIGURE_CC
          value: "yes"
        securityContext:
-          privileged: false
+          privileged: true
        volumeMounts:
        - name: crio-conf
          mountPath: /etc/crio/
@@ -40,9 +40,9 @@ spec:
        - name: kata-artifacts
          mountPath: /opt/kata/
        - name: dbus
-          mountPath: /var/run/dbus
+          mountPath: /var/run/dbus/system_bus_socket
        - name: systemd
-          mountPath: /run/systemd
+          mountPath: /run/systemd/system
        - name: local-bin
          mountPath: /usr/local/bin/
      volumes:
@@ -58,10 +58,10 @@ spec:
            type: DirectoryOrCreate
        - name: dbus
          hostPath:
-            path: /var/run/dbus
+            path: /var/run/dbus/system_bus_socket
        - name: systemd
          hostPath:
-            path: /run/systemd
+            path: /run/systemd/system
        - name: local-bin
          hostPath:
            path: /usr/local/bin/
--- a/Show More
+++ b/Show More