mirror of
https://github.com/aljazceru/kata-containers.git
synced 2026-01-16 04:44:21 +01:00
Merge pull request #7134 from stevenhorsman/CCv0-merge-19th-june
CCv0: Merge main into CCv0 branch
This commit is contained in:
@@ -2,6 +2,10 @@ name: CI | Build kata-static tarball for amd64
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
stage:
|
||||
required: false
|
||||
type: string
|
||||
default: test
|
||||
tarball-suffix:
|
||||
required: false
|
||||
type: string
|
||||
@@ -15,8 +19,11 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
stage:
|
||||
- ${{ inputs.stage }}
|
||||
asset:
|
||||
- cloud-hypervisor
|
||||
- cloud-hypervisor-glibc
|
||||
- firecracker
|
||||
- kernel
|
||||
- kernel-sev
|
||||
@@ -32,11 +39,16 @@ jobs:
|
||||
- qemu-snp-experimental
|
||||
- qemu-tdx-experimental
|
||||
- rootfs-image
|
||||
- rootfs-image-tdx
|
||||
- rootfs-initrd
|
||||
- rootfs-initrd-mariner
|
||||
- rootfs-initrd-sev
|
||||
- shim-v2
|
||||
- tdvf
|
||||
- virtiofsd
|
||||
exclude:
|
||||
- stage: release
|
||||
asset: cloud-hypervisor-glibc
|
||||
steps:
|
||||
- name: Login to Kata Containers quay.io
|
||||
if: ${{ inputs.push-to-registry == 'yes' }}
|
||||
|
||||
4
.github/workflows/ci-on-push.yaml
vendored
4
.github/workflows/ci-on-push.yaml
vendored
@@ -52,3 +52,7 @@ jobs:
|
||||
registry: ghcr.io
|
||||
repo: ${{ github.repository_owner }}/kata-deploy-ci
|
||||
tag: ${{ github.event.pull_request.number }}-${{ github.event.pull_request.head.sha }}-amd64
|
||||
|
||||
run-metrics-tests:
|
||||
needs: build-kata-static-tarball-amd64
|
||||
uses: ./.github/workflows/run-launchtimes-metrics.yaml
|
||||
|
||||
2
.github/workflows/release-amd64.yaml
vendored
2
.github/workflows/release-amd64.yaml
vendored
@@ -9,6 +9,8 @@ on:
|
||||
jobs:
|
||||
build-kata-static-tarball-amd64:
|
||||
uses: ./.github/workflows/build-kata-static-tarball-amd64.yaml
|
||||
with:
|
||||
stage: release
|
||||
|
||||
kata-deploy:
|
||||
needs: build-kata-static-tarball-amd64
|
||||
|
||||
2
.github/workflows/release-arm64.yaml
vendored
2
.github/workflows/release-arm64.yaml
vendored
@@ -9,6 +9,8 @@ on:
|
||||
jobs:
|
||||
build-kata-static-tarball-arm64:
|
||||
uses: ./.github/workflows/build-kata-static-tarball-arm64.yaml
|
||||
with:
|
||||
stage: release
|
||||
|
||||
kata-deploy:
|
||||
needs: build-kata-static-tarball-arm64
|
||||
|
||||
2
.github/workflows/release-s390x.yaml
vendored
2
.github/workflows/release-s390x.yaml
vendored
@@ -9,6 +9,8 @@ on:
|
||||
jobs:
|
||||
build-kata-static-tarball-s390x:
|
||||
uses: ./.github/workflows/build-kata-static-tarball-s390x.yaml
|
||||
with:
|
||||
stage: release
|
||||
|
||||
kata-deploy:
|
||||
needs: build-kata-static-tarball-s390x
|
||||
|
||||
4
.github/workflows/release.yaml
vendored
4
.github/workflows/release.yaml
vendored
@@ -83,7 +83,7 @@ jobs:
|
||||
- name: push amd64 static tarball to github
|
||||
run: |
|
||||
tag=$(echo $GITHUB_REF | cut -d/ -f3-)
|
||||
tarball="kata-static-$tag-x86_64.tar.xz"
|
||||
tarball="kata-static-$tag-amd64.tar.xz"
|
||||
mv kata-static.tar.xz "$GITHUB_WORKSPACE/${tarball}"
|
||||
pushd $GITHUB_WORKSPACE
|
||||
echo "uploading asset '${tarball}' for tag: ${tag}"
|
||||
@@ -97,7 +97,7 @@ jobs:
|
||||
- name: push arm64 static tarball to github
|
||||
run: |
|
||||
tag=$(echo $GITHUB_REF | cut -d/ -f3-)
|
||||
tarball="kata-static-$tag-aarch64.tar.xz"
|
||||
tarball="kata-static-$tag-arm64.tar.xz"
|
||||
mv kata-static.tar.xz "$GITHUB_WORKSPACE/${tarball}"
|
||||
pushd $GITHUB_WORKSPACE
|
||||
echo "uploading asset '${tarball}' for tag: ${tag}"
|
||||
|
||||
75
.github/workflows/run-k8s-tests-on-aks.yaml
vendored
75
.github/workflows/run-k8s-tests-on-aks.yaml
vendored
@@ -17,79 +17,54 @@ jobs:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
host_os:
|
||||
- ubuntu
|
||||
vmm:
|
||||
- clh
|
||||
- dragonball
|
||||
- qemu
|
||||
include:
|
||||
- host_os: cbl-mariner
|
||||
vmm: clh
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
DOCKER_REGISTRY: ${{ inputs.registry }}
|
||||
DOCKER_REPO: ${{ inputs.repo }}
|
||||
DOCKER_TAG: ${{ inputs.tag }}
|
||||
GH_PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
KATA_HOST_OS: ${{ matrix.host_os }}
|
||||
KATA_HYPERVISOR: ${{ matrix.vmm }}
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
with:
|
||||
ref: ${{ github.event.pull_request.head.sha }}
|
||||
|
||||
- name: Download Azure CLI
|
||||
run: |
|
||||
curl -sL https://aka.ms/InstallAzureCLIDeb | sudo bash
|
||||
run: bash tests/integration/gha-run.sh install-azure-cli
|
||||
|
||||
- name: Log into the Azure account
|
||||
run: |
|
||||
az login \
|
||||
--service-principal \
|
||||
-u "${{ secrets.AZ_APPID }}" \
|
||||
-p "${{ secrets.AZ_PASSWORD }}" \
|
||||
--tenant "${{ secrets.AZ_TENANT_ID }}"
|
||||
run: bash tests/integration/gha-run.sh login-azure
|
||||
env:
|
||||
AZ_APPID: ${{ secrets.AZ_APPID }}
|
||||
AZ_PASSWORD: ${{ secrets.AZ_PASSWORD }}
|
||||
AZ_TENANT_ID: ${{ secrets.AZ_TENANT_ID }}
|
||||
|
||||
- name: Create AKS cluster
|
||||
run: |
|
||||
az aks create \
|
||||
-g "kataCI" \
|
||||
-n "${{ github.event.pull_request.number }}-${{ github.event.pull_request.head.sha }}-${{ matrix.vmm }}-amd64" \
|
||||
-s "Standard_D4s_v5" \
|
||||
--node-count 1 \
|
||||
--generate-ssh-keys
|
||||
run: bash tests/integration/gha-run.sh create-cluster
|
||||
|
||||
- name: Install `bats`
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get -y install bats
|
||||
run: bash tests/integration/gha-run.sh install-bats
|
||||
|
||||
- name: Install `kubectl`
|
||||
run: |
|
||||
sudo az aks install-cli
|
||||
run: bash tests/integration/gha-run.sh install-kubectl
|
||||
|
||||
- name: Download credentials for the Kubernetes CLI to use them
|
||||
run: |
|
||||
az aks get-credentials -g "kataCI" -n ${{ github.event.pull_request.number }}-${{ github.event.pull_request.head.sha }}-${{ matrix.vmm }}-amd64
|
||||
run: bash tests/integration/gha-run.sh get-cluster-credentials
|
||||
|
||||
- name: Run tests
|
||||
timeout-minutes: 35
|
||||
run: |
|
||||
sed -i -e "s|quay.io/kata-containers/kata-deploy:latest|${{ inputs.registry }}/${{ inputs.repo }}:${{ inputs.tag }}|g" tools/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml
|
||||
cat tools/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml
|
||||
cat tools/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml | grep "${{ inputs.registry }}/${{ inputs.repo }}:${{ inputs.tag }}" || die "Failed to setup the tests image"
|
||||
|
||||
kubectl apply -f tools/packaging/kata-deploy/kata-rbac/base/kata-rbac.yaml
|
||||
kubectl apply -f tools/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml
|
||||
kubectl -n kube-system wait --timeout=10m --for=condition=Ready -l name=kata-deploy pod
|
||||
kubectl apply -f tools/packaging/kata-deploy/runtimeclasses/kata-runtimeClasses.yaml
|
||||
|
||||
# This is needed as the kata-deploy pod will be set to "Ready" when it starts running,
|
||||
# which may cause issues like not having the node properly labeled or the artefacts
|
||||
# properly deployed when the tests actually start running.
|
||||
sleep 150s
|
||||
|
||||
pushd tests/integration/kubernetes
|
||||
sed -i -e 's|runtimeClassName: kata|runtimeClassName: kata-${{ matrix.vmm }}|' runtimeclass_workloads/*.yaml
|
||||
bash run_kubernetes_tests.sh
|
||||
popd
|
||||
env:
|
||||
KATA_HYPERVISOR: ${{ matrix.vmm }}
|
||||
timeout-minutes: 60
|
||||
run: bash tests/integration/gha-run.sh run-tests-aks
|
||||
|
||||
- name: Delete AKS cluster
|
||||
if: always()
|
||||
run: |
|
||||
az aks delete \
|
||||
-g "kataCI" \
|
||||
-n "${{ github.event.pull_request.number }}-${{ github.event.pull_request.head.sha }}-${{ matrix.vmm }}-amd64" \
|
||||
--yes \
|
||||
--no-wait
|
||||
run: bash tests/integration/gha-run.sh delete-cluster
|
||||
|
||||
40
.github/workflows/run-k8s-tests-on-sev.yaml
vendored
40
.github/workflows/run-k8s-tests-on-sev.yaml
vendored
@@ -21,6 +21,10 @@ jobs:
|
||||
- qemu-sev
|
||||
runs-on: sev
|
||||
env:
|
||||
DOCKER_REGISTRY: ${{ inputs.registry }}
|
||||
DOCKER_REPO: ${{ inputs.repo }}
|
||||
DOCKER_TAG: ${{ inputs.tag }}
|
||||
KATA_HYPERVISOR: ${{ matrix.vmm }}
|
||||
KUBECONFIG: /home/kata/.kube/config
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
@@ -29,40 +33,8 @@ jobs:
|
||||
|
||||
- name: Run tests
|
||||
timeout-minutes: 30
|
||||
run: |
|
||||
sed -i -e "s|quay.io/kata-containers/kata-deploy:latest|${{ inputs.registry }}/${{ inputs.repo }}:${{ inputs.tag }}|g" tools/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml
|
||||
cat tools/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml
|
||||
cat tools/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml | grep "${{ inputs.registry }}/${{ inputs.repo }}:${{ inputs.tag }}" || die "Failed to setup the tests image"
|
||||
|
||||
kubectl apply -f tools/packaging/kata-deploy/kata-rbac/base/kata-rbac.yaml
|
||||
kubectl apply -f tools/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml
|
||||
kubectl -n kube-system wait --timeout=10m --for=condition=Ready -l name=kata-deploy pod
|
||||
kubectl apply -f tools/packaging/kata-deploy/runtimeclasses/kata-runtimeClasses.yaml
|
||||
|
||||
# This is needed as the kata-deploy pod will be set to "Ready" when it starts running,
|
||||
# which may cause issues like not having the node properly labeled or the artefacts
|
||||
# properly deployed when the tests actually start running.
|
||||
sleep 60s
|
||||
|
||||
pushd tests/integration/kubernetes
|
||||
sed -i -e 's|runtimeClassName: kata|runtimeClassName: kata-${{ matrix.vmm }}|' runtimeclass_workloads/*.yaml
|
||||
bash run_kubernetes_tests.sh
|
||||
popd
|
||||
env:
|
||||
KATA_HYPERVISOR: ${{ matrix.vmm }}
|
||||
run: bash tests/integration/gha-run.sh run-tests-sev
|
||||
|
||||
- name: Delete kata-deploy
|
||||
if: always()
|
||||
run: |
|
||||
kubectl delete -f tools/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml
|
||||
kubectl -n kube-system wait --timeout=10m --for=delete -l name=kata-deploy pod
|
||||
|
||||
sed -i -e "s|quay.io/kata-containers/kata-deploy:latest|${{ inputs.registry }}/${{ inputs.repo }}:${{ inputs.tag }}|g" tools/packaging/kata-deploy/kata-cleanup/base/kata-cleanup.yaml
|
||||
cat tools/packaging/kata-deploy/kata-cleanup/base/kata-cleanup.yaml
|
||||
cat tools/packaging/kata-deploy/kata-cleanup/base/kata-cleanup.yaml | grep "${{ inputs.registry }}/${{ inputs.repo }}:${{ inputs.tag }}" || die "Failed to setup the tests image"
|
||||
kubectl apply -f tools/packaging/kata-deploy/kata-cleanup/base/kata-cleanup.yaml
|
||||
sleep 180s
|
||||
|
||||
kubectl delete -f tools/packaging/kata-deploy/kata-cleanup/base/kata-cleanup.yaml
|
||||
kubectl delete -f tools/packaging/kata-deploy/kata-rbac/base/kata-rbac.yaml
|
||||
kubectl delete -f tools/packaging/kata-deploy/runtimeclasses/kata-runtimeClasses.yaml
|
||||
run: bash tests/integration/gha-run.sh cleanup-sev
|
||||
|
||||
40
.github/workflows/run-k8s-tests-on-snp.yaml
vendored
40
.github/workflows/run-k8s-tests-on-snp.yaml
vendored
@@ -21,6 +21,10 @@ jobs:
|
||||
- qemu-snp
|
||||
runs-on: sev-snp
|
||||
env:
|
||||
DOCKER_REGISTRY: ${{ inputs.registry }}
|
||||
DOCKER_REPO: ${{ inputs.repo }}
|
||||
DOCKER_TAG: ${{ inputs.tag }}
|
||||
KATA_HYPERVISOR: ${{ matrix.vmm }}
|
||||
KUBECONFIG: /home/kata/.kube/config
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
@@ -29,40 +33,8 @@ jobs:
|
||||
|
||||
- name: Run tests
|
||||
timeout-minutes: 30
|
||||
run: |
|
||||
sed -i -e "s|quay.io/kata-containers/kata-deploy:latest|${{ inputs.registry }}/${{ inputs.repo }}:${{ inputs.tag }}|g" tools/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml
|
||||
cat tools/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml
|
||||
cat tools/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml | grep "${{ inputs.registry }}/${{ inputs.repo }}:${{ inputs.tag }}" || die "Failed to setup the tests image"
|
||||
|
||||
kubectl apply -f tools/packaging/kata-deploy/kata-rbac/base/kata-rbac.yaml
|
||||
kubectl apply -f tools/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml
|
||||
kubectl -n kube-system wait --timeout=10m --for=condition=Ready -l name=kata-deploy pod
|
||||
kubectl apply -f tools/packaging/kata-deploy/runtimeclasses/kata-runtimeClasses.yaml
|
||||
|
||||
# This is needed as the kata-deploy pod will be set to "Ready" when it starts running,
|
||||
# which may cause issues like not having the node properly labeled or the artefacts
|
||||
# properly deployed when the tests actually start running.
|
||||
sleep 60s
|
||||
|
||||
pushd tests/integration/kubernetes
|
||||
sed -i -e 's|runtimeClassName: kata|runtimeClassName: kata-${{ matrix.vmm }}|' runtimeclass_workloads/*.yaml
|
||||
bash run_kubernetes_tests.sh
|
||||
popd
|
||||
env:
|
||||
KATA_HYPERVISOR: ${{ matrix.vmm }}
|
||||
run: bash tests/integration/gha-run.sh run-tests-snp
|
||||
|
||||
- name: Delete kata-deploy
|
||||
if: always()
|
||||
run: |
|
||||
kubectl delete -f tools/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml
|
||||
kubectl -n kube-system wait --timeout=10m --for=delete -l name=kata-deploy pod
|
||||
|
||||
sed -i -e "s|quay.io/kata-containers/kata-deploy:latest|${{ inputs.registry }}/${{ inputs.repo }}:${{ inputs.tag }}|g" tools/packaging/kata-deploy/kata-cleanup/base/kata-cleanup.yaml
|
||||
cat tools/packaging/kata-deploy/kata-cleanup/base/kata-cleanup.yaml
|
||||
cat tools/packaging/kata-deploy/kata-cleanup/base/kata-cleanup.yaml | grep "${{ inputs.registry }}/${{ inputs.repo }}:${{ inputs.tag }}" || die "Failed to setup the tests image"
|
||||
kubectl apply -f tools/packaging/kata-deploy/kata-cleanup/base/kata-cleanup.yaml
|
||||
sleep 180s
|
||||
|
||||
kubectl delete -f tools/packaging/kata-deploy/kata-cleanup/base/kata-cleanup.yaml
|
||||
kubectl delete -f tools/packaging/kata-deploy/kata-rbac/base/kata-rbac.yaml
|
||||
kubectl delete -f tools/packaging/kata-deploy/runtimeclasses/kata-runtimeClasses.yaml
|
||||
run: bash tests/integration/gha-run.sh cleanup-snp
|
||||
|
||||
40
.github/workflows/run-k8s-tests-on-tdx.yaml
vendored
40
.github/workflows/run-k8s-tests-on-tdx.yaml
vendored
@@ -21,6 +21,10 @@ jobs:
|
||||
- qemu-tdx
|
||||
runs-on: tdx
|
||||
env:
|
||||
DOCKER_REGISTRY: ${{ inputs.registry }}
|
||||
DOCKER_REPO: ${{ inputs.repo }}
|
||||
DOCKER_TAG: ${{ inputs.tag }}
|
||||
KATA_HYPERVISOR: ${{ matrix.vmm }}
|
||||
KUBECONFIG: /etc/rancher/k3s/k3s.yaml
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
@@ -29,40 +33,8 @@ jobs:
|
||||
|
||||
- name: Run tests
|
||||
timeout-minutes: 30
|
||||
run: |
|
||||
sed -i -e "s|quay.io/kata-containers/kata-deploy:latest|${{ inputs.registry }}/${{ inputs.repo }}:${{ inputs.tag }}|g" tools/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml
|
||||
cat tools/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml
|
||||
cat tools/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml | grep "${{ inputs.registry }}/${{ inputs.repo }}:${{ inputs.tag }}" || die "Failed to setup the tests image"
|
||||
|
||||
kubectl apply -f tools/packaging/kata-deploy/kata-rbac/base/kata-rbac.yaml
|
||||
kubectl apply -k tools/packaging/kata-deploy/kata-deploy/overlays/k3s
|
||||
kubectl -n kube-system wait --timeout=10m --for=condition=Ready -l name=kata-deploy pod
|
||||
kubectl apply -f tools/packaging/kata-deploy/runtimeclasses/kata-runtimeClasses.yaml
|
||||
|
||||
# This is needed as the kata-deploy pod will be set to "Ready" when it starts running,
|
||||
# which may cause issues like not having the node properly labeled or the artefacts
|
||||
# properly deployed when the tests actually start running.
|
||||
sleep 60s
|
||||
|
||||
pushd tests/integration/kubernetes
|
||||
sed -i -e 's|runtimeClassName: kata|runtimeClassName: kata-${{ matrix.vmm }}|' runtimeclass_workloads/*.yaml
|
||||
bash run_kubernetes_tests.sh
|
||||
popd
|
||||
env:
|
||||
KATA_HYPERVISOR: ${{ matrix.vmm }}
|
||||
run: bash tests/integration/gha-run.sh run-tests-tdx
|
||||
|
||||
- name: Delete kata-deploy
|
||||
if: always()
|
||||
run: |
|
||||
kubectl delete -k tools/packaging/kata-deploy/kata-deploy/overlays/k3s
|
||||
kubectl -n kube-system wait --timeout=10m --for=delete -l name=kata-deploy pod
|
||||
|
||||
sed -i -e "s|quay.io/kata-containers/kata-deploy:latest|${{ inputs.registry }}/${{ inputs.repo }}:${{ inputs.tag }}|g" tools/packaging/kata-deploy/kata-cleanup/base/kata-cleanup.yaml
|
||||
cat tools/packaging/kata-deploy/kata-cleanup/base/kata-cleanup.yaml
|
||||
cat tools/packaging/kata-deploy/kata-cleanup/base/kata-cleanup.yaml | grep "${{ inputs.registry }}/${{ inputs.repo }}:${{ inputs.tag }}" || die "Failed to setup the tests image"
|
||||
kubectl apply -k tools/packaging/kata-deploy/kata-cleanup/overlays/k3s
|
||||
sleep 180s
|
||||
|
||||
kubectl delete -k tools/packaging/kata-deploy/kata-cleanup/overlays/k3s
|
||||
kubectl delete -f tools/packaging/kata-deploy/kata-rbac/base/kata-rbac.yaml
|
||||
kubectl delete -f tools/packaging/kata-deploy/runtimeclasses/kata-runtimeClasses.yaml
|
||||
run: bash tests/integration/gha-run.sh cleanup-tdx
|
||||
|
||||
19
.github/workflows/run-launchtimes-metrics.yaml
vendored
Normal file
19
.github/workflows/run-launchtimes-metrics.yaml
vendored
Normal file
@@ -0,0 +1,19 @@
|
||||
name: CI | Run launch-times metrics
|
||||
on:
|
||||
workflow_call:
|
||||
|
||||
jobs:
|
||||
launch-times-tests:
|
||||
runs-on: metrics
|
||||
env:
|
||||
GOPATH: ${{ github.workspace }}
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
with:
|
||||
ref: ${{ github.event.pull_request.head.sha }}
|
||||
|
||||
- name: run launch times on qemu
|
||||
run: bash tests/metrics/gha-run.sh run-test-launchtimes-qemu
|
||||
|
||||
- name: run launch times on clh
|
||||
run: bash tests/metrics/gha-run.sh run-test-launchtimes-clh
|
||||
53
.github/workflows/snap-release.yaml
vendored
53
.github/workflows/snap-release.yaml
vendored
@@ -1,53 +0,0 @@
|
||||
name: Release Kata in snapcraft store
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- '[0-9]+.[0-9]+.[0-9]+*'
|
||||
|
||||
env:
|
||||
SNAPCRAFT_STORE_CREDENTIALS: ${{ secrets.snapcraft_token }}
|
||||
|
||||
jobs:
|
||||
release-snap:
|
||||
runs-on: ubuntu-20.04
|
||||
steps:
|
||||
- name: Check out Git repository
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Install Snapcraft
|
||||
run: |
|
||||
# Required to avoid snapcraft install failure
|
||||
sudo chown root:root /
|
||||
|
||||
# "--classic" is needed for the GitHub action runner
|
||||
# environment.
|
||||
sudo snap install snapcraft --classic
|
||||
|
||||
# Allow other parts to access snap binaries
|
||||
echo /snap/bin >> "$GITHUB_PATH"
|
||||
|
||||
- name: Build snap
|
||||
run: |
|
||||
# Removing man-db, workflow kept failing, fixes: #4480
|
||||
sudo apt -y remove --purge man-db
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y git git-extras
|
||||
kata_url="https://github.com/kata-containers/kata-containers"
|
||||
latest_version=$(git ls-remote --tags ${kata_url} | egrep -o "refs.*" | egrep -v "\-alpha|\-rc|{}" | egrep -o "[[:digit:]]+\.[[:digit:]]+\.[[:digit:]]+" | sort -V -r | head -1)
|
||||
current_version="$(echo ${GITHUB_REF} | cut -d/ -f3)"
|
||||
# Check semantic versioning format (x.y.z) and if the current tag is the latest tag
|
||||
if echo "${current_version}" | grep -q "^[[:digit:]]\+\.[[:digit:]]\+\.[[:digit:]]\+$" && echo -e "$latest_version\n$current_version" | sort -C -V; then
|
||||
# Current version is the latest version, build it
|
||||
snapcraft snap --debug --destructive-mode
|
||||
fi
|
||||
|
||||
- name: Upload snap
|
||||
run: |
|
||||
snap_version="$(echo ${GITHUB_REF} | cut -d/ -f3)"
|
||||
snap_file="kata-containers_${snap_version}_amd64.snap"
|
||||
# Upload the snap if it exists
|
||||
if [ -f ${snap_file} ]; then
|
||||
snapcraft upload --release=stable ${snap_file}
|
||||
fi
|
||||
37
.github/workflows/snap.yaml
vendored
37
.github/workflows/snap.yaml
vendored
@@ -1,37 +0,0 @@
|
||||
name: snap CI
|
||||
on:
|
||||
pull_request:
|
||||
types:
|
||||
- opened
|
||||
- synchronize
|
||||
- reopened
|
||||
- edited
|
||||
paths-ignore: [ '**.md', '**.png', '**.jpg', '**.jpeg', '**.svg', '/docs/**' ]
|
||||
|
||||
jobs:
|
||||
test:
|
||||
runs-on: ubuntu-20.04
|
||||
steps:
|
||||
- name: Check out
|
||||
if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }}
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Install Snapcraft
|
||||
if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }}
|
||||
run: |
|
||||
# Required to avoid snapcraft install failure
|
||||
sudo chown root:root /
|
||||
|
||||
# "--classic" is needed for the GitHub action runner
|
||||
# environment.
|
||||
sudo snap install snapcraft --classic
|
||||
|
||||
# Allow other parts to access snap binaries
|
||||
echo /snap/bin >> "$GITHUB_PATH"
|
||||
|
||||
- name: Build snap
|
||||
if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }}
|
||||
run: |
|
||||
snapcraft snap --debug --destructive-mode
|
||||
@@ -1,4 +1,6 @@
|
||||
<img src="https://www.openstack.org/assets/kata/kata-vertical-on-white.png" width="150">
|
||||
<img src="https://object-storage-ca-ymq-1.vexxhost.net/swift/v1/6e4619c416ff4bd19e1c087f27a43eea/www-images-prod/openstack-logo/kata/SVG/kata-1.svg" width="900">
|
||||
|
||||
[](https://github.com/kata-containers/kata-containers/actions/workflows/payload-after-push.yaml)
|
||||
|
||||
# Kata Containers
|
||||
|
||||
@@ -144,8 +146,6 @@ The table below lists the remaining parts of the project:
|
||||
|
||||
Kata Containers is now
|
||||
[available natively for most distributions](docs/install/README.md#packaged-installation-methods).
|
||||
However, packaging scripts and metadata are still used to generate [snap](snap/local) and GitHub releases. See
|
||||
the [components](#components) section for further details.
|
||||
|
||||
## Glossary of Terms
|
||||
|
||||
|
||||
@@ -587,10 +587,15 @@ $ sudo kata-monitor
|
||||
|
||||
#### Connect to debug console
|
||||
|
||||
Command `kata-runtime exec` is used to connect to the debug console.
|
||||
You need to start a container for example:
|
||||
```bash
|
||||
$ sudo ctr run --runtime io.containerd.kata.v2 -d docker.io/library/ubuntu:latest testdebug
|
||||
```
|
||||
|
||||
Then, you can use the command `kata-runtime exec <sandbox id>` to connect to the debug console.
|
||||
|
||||
```
|
||||
$ kata-runtime exec 1a9ab65be63b8b03dfd0c75036d27f0ed09eab38abb45337fea83acd3cd7bacd
|
||||
$ kata-runtime exec testdebug
|
||||
bash-4.2# id
|
||||
uid=0(root) gid=0(root) groups=0(root)
|
||||
bash-4.2# pwd
|
||||
|
||||
@@ -147,7 +147,8 @@ these commands is potentially challenging.
|
||||
See issue https://github.com/clearcontainers/runtime/issues/341 and [the constraints challenge](#the-constraints-challenge) for more information.
|
||||
|
||||
For CPUs resource management see
|
||||
[CPU constraints](design/vcpu-handling.md).
|
||||
[CPU constraints(in runtime-go)](design/vcpu-handling-runtime-go.md).
|
||||
[CPU constraints(in runtime-rs)](design/vcpu-handling-runtime-rs.md).
|
||||
|
||||
# Architectural limitations
|
||||
|
||||
|
||||
@@ -6,7 +6,8 @@ Kata Containers design documents:
|
||||
- [API Design of Kata Containers](kata-api-design.md)
|
||||
- [Design requirements for Kata Containers](kata-design-requirements.md)
|
||||
- [VSocks](VSocks.md)
|
||||
- [VCPU handling](vcpu-handling.md)
|
||||
- [VCPU handling(in runtime-go)](vcpu-handling-runtime-go.md)
|
||||
- [VCPU handling(in runtime-rs)](vcpu-handling-runtime-rs.md)
|
||||
- [VCPU threads pinning](vcpu-threads-pinning.md)
|
||||
- [Host cgroups](host-cgroups.md)
|
||||
- [Agent systemd cgroup](agent-systemd-cgroup.md)
|
||||
|
||||
@@ -78,4 +78,4 @@ with the containers is if the VM itself or the `containerd-shim-kata-v2` dies, i
|
||||
the containers are removed automatically.
|
||||
|
||||
[1]: https://wiki.qemu.org/Features/VirtioVsock
|
||||
[2]: ./vcpu-handling.md#virtual-cpus-and-kubernetes-pods
|
||||
[2]: ./vcpu-handling-runtime-go.md#virtual-cpus-and-kubernetes-pods
|
||||
|
||||
51
docs/design/vcpu-handling-runtime-rs.md
Normal file
51
docs/design/vcpu-handling-runtime-rs.md
Normal file
@@ -0,0 +1,51 @@
|
||||
# Virtual machine vCPU sizing in Kata Containers 3.0
|
||||
|
||||
> Preview:
|
||||
> [Kubernetes(since 1.23)][1] and [Containerd(since 1.6.0-beta4)][2] will help calculate `Sandbox Size` info and pass it to Kata Containers through annotations.
|
||||
> In order to adapt to this beneficial change and be compatible with the past, we have implemented the new vCPUs handling way in `runtime-rs`, which is slightly different from the original `runtime-go`'s design.
|
||||
|
||||
## When do we need to handle vCPUs size?
|
||||
vCPUs sizing should be determined by the container workloads. So throughout the life cycle of Kata Containers, there are several points in time when we need to think about how many vCPUs should be at the time. Mainly including the time points of `CreateVM`, `CreateContainer`, `UpdateContainer`, and `DeleteContainer`.
|
||||
* `CreateVM`: When creating a sandbox, we need to know how many vCPUs to start the VM with.
|
||||
* `CreateContainer`: When creating a new container in the VM, we may need to hot-plug the vCPUs according to the requirements in container's spec.
|
||||
* `UpdateContainer`: When receiving the `UpdateContainer` request, we may need to update the vCPU resources according to the new requirements of the container.
|
||||
* `DeleteContainer`: When a container is removed from the VM, we may need to hot-unplug the vCPUs to reclaim the vCPU resources introduced by the container.
|
||||
|
||||
## On what basis do we calculate the number of vCPUs?
|
||||
When Kata calculate the number of vCPUs, We have three data sources, the `default_vcpus` and `default_maxvcpus` specified in the configuration file (named `TomlConfig` later in the doc), the `io.kubernetes.cri.sandbox-cpu-quota` and `io.kubernetes.cri.sandbox-cpu-period` annotations passed by the upper layer runtime, and the corresponding CPU resource part in the container's spec for the container when `CreateContainer`/`UpdateContainer`/`DeleteContainer` is requested.
|
||||
|
||||
Our understanding and priority of these resources are as follows, which will affect how we calculate the number of vCPUs later.
|
||||
|
||||
* From `TomlConfig`:
|
||||
* `default_vcpus`: default number of vCPUs when starting a VM.
|
||||
* `default_maxvcpus`: maximum number of vCPUs.
|
||||
* From `Annotation`:
|
||||
* `InitialSize`: we call the size of the resource passed from the annotations as `InitialSize`. Kubernetes will calculate the sandbox size according to the Pod's statement, which is the `InitialSize` here. This size should be the size we want to prioritize.
|
||||
* From `Container Spec`:
|
||||
* The amount of CPU resources that the Container wants to use will be declared through the spec. Including the aforementioned annotations, we mainly consider `cpu quota` and `cpuset` when calculating the number of vCPUs.
|
||||
* `cpu quota`: `cpu quota` is the most common way to declare the amount of CPU resources. The number of vCPUs introduced by `cpu quota` declared in a container's spec is: `vCPUs = ceiling( quota / period )`.
|
||||
* `cpuset`: `cpuset` is often used to bind the CPUs that tasks can run on. The number of vCPUs may introduced by `cpuset` declared in a container's spec is the number of CPUs specified in the set that do not overlap with other containers.
|
||||
|
||||
|
||||
## How to calculate and adjust the vCPUs size:
|
||||
There are two types of vCPUs that we need to consider, one is the number of vCPUs when starting the VM (named `Boot Size` in the doc). The second is the number of vCPUs when `CreateContainer`/`UpdateContainer`/`DeleteContainer` request is received (`Real-time Size` in the doc).
|
||||
|
||||
### `Boot Size`
|
||||
The main considerations are `InitialSize` and `default_vcpus`. There are the following principles:
|
||||
`InitialSize` has priority over `default_vcpus` declared in `TomlConfig`.
|
||||
1. When there is such an annotation statement, the originally `default_vcpus` will be modified to the number of vCPUs in the `InitialSize` as the `Boot Size`. (Because not all runtimes support this annotation for the time being, we still keep the `default_cpus` in `TomlConfig`.)
|
||||
2. When the specs of all containers are aggregated for sandbox size calculation, the method is consistent with the calculation method of `InitialSize` here.
|
||||
|
||||
### `Real-time Size`
|
||||
When we receive an OCI request, it may be for a single container. But what we have to consider is the number of vCPUs for the entire VM. So we will maintain a list. Every time there is a demand for adjustment, the entire list will be traversed to calculate a value for the number of vCPUs. In addition, there are the following principles:
|
||||
1. Do not cut computing power and try to keep the number of vCPUs specified by `InitialSize`.
|
||||
* So the number of vCPUs after will not be less than the `Boot Size`.
|
||||
2. `cpu quota` takes precedence over `cpuset` and the setting history are took into account.
|
||||
* We think quota describes the CPU time slice that a cgroup can use, and `cpuset` describes the actual CPU number that a cgroup can use. Quota can better describe the size of the CPU time slice that a cgroup actually wants to use. The `cpuset` only describes which CPUs the cgroup can use, but the cgroup can use the specified CPU but consumes a smaller time slice, so the quota takes precedence over the `cpuset`.
|
||||
* On the one hand, when both `cpu quota` and `cpuset` are specified, we will calculate the number of vCPUs based on `cpu quota` and ignore `cpuset`. On the other hand, if `cpu quota` was used to control the number of vCPUs in the past, and only `cpuset` was updated during `UpdateContainer`, we will not adjust the number of vCPUs at this time.
|
||||
3. `StaticSandboxResourceMgmt` controls hotplug.
|
||||
* Some VMMs and kernels of some architectures do not support hotplugging. We can accommodate this situation through `StaticSandboxResourceMgmt`. When `StaticSandboxResourceMgmt = true` is set, we don't make any further attempts to update the number of vCPUs after booting.
|
||||
|
||||
|
||||
[1]: https://github.com/kubernetes/kubernetes/pull/104886
|
||||
[2]: https://github.com/containerd/containerd/pull/6155
|
||||
@@ -45,6 +45,8 @@
|
||||
- [How to run Kata Containers with `nydus`](how-to-use-virtio-fs-nydus-with-kata.md)
|
||||
- [How to run Kata Containers with AMD SEV-SNP](how-to-run-kata-containers-with-SNP-VMs.md)
|
||||
- [How to use EROFS to build rootfs in Kata Containers](how-to-use-erofs-build-rootfs.md)
|
||||
- [How to run Kata Containers with kinds of Block Volumes](how-to-run-kata-containers-with-kinds-of-Block-Volumes.md)
|
||||
|
||||
## Confidential Containers
|
||||
- [How to use build and test the Confidential Containers `CCv0` proof of concept](how-to-build-and-test-ccv0.md)
|
||||
- [How to generate a Kata Containers payload for the Confidential Containers Operator](how-to-generate-a-kata-containers-payload-for-the-confidential-containers-operator.md)
|
||||
|
||||
@@ -0,0 +1,78 @@
|
||||
# A new way for Kata Containers to use Kinds of Block Volumes
|
||||
|
||||
> **Note:** This guide is only available for runtime-rs with default Hypervisor Dragonball.
|
||||
> Now, other hypervisors are still ongoing, and it'll be updated when they're ready.
|
||||
|
||||
|
||||
## Background
|
||||
|
||||
Currently, there is no widely applicable and convenient method available for users to use some kinds of backend storages, such as File on host based block volume, SPDK based volume or VFIO device based volume for Kata Containers, so we adopt [Proposal: Direct Block Device Assignment](https://github.com/kata-containers/kata-containers/blob/main/docs/design/direct-blk-device-assignment.md) to address it.
|
||||
|
||||
## Solution
|
||||
|
||||
According to the proposal, it requires to use the `kata-ctl direct-volume` command to add a direct assigned block volume device to the Kata Containers runtime.
|
||||
|
||||
And then with the help of method [get_volume_mount_info](https://github.com/kata-containers/kata-containers/blob/099b4b0d0e3db31b9054e7240715f0d7f51f9a1c/src/libs/kata-types/src/mount.rs#L95), get information from JSON file: `(mountinfo.json)` and parse them into structure [Direct Volume Info](https://github.com/kata-containers/kata-containers/blob/099b4b0d0e3db31b9054e7240715f0d7f51f9a1c/src/libs/kata-types/src/mount.rs#L70) which is used to save device-related information.
|
||||
|
||||
We only fill the `mountinfo.json`, such as `device` ,`volume_type`, `fs_type`, `metadata` and `options`, which correspond to the fields in [Direct Volume Info](https://github.com/kata-containers/kata-containers/blob/099b4b0d0e3db31b9054e7240715f0d7f51f9a1c/src/libs/kata-types/src/mount.rs#L70), to describe a device.
|
||||
|
||||
The JSON file `mountinfo.json` placed in a sub-path `/kubelet/kata-test-vol-001/volume001` which under fixed path `/run/kata-containers/shared/direct-volumes/`.
|
||||
And the full path looks like: `/run/kata-containers/shared/direct-volumes/kubelet/kata-test-vol-001/volume001`, But for some security reasons. it is
|
||||
encoded as `/run/kata-containers/shared/direct-volumes/L2t1YmVsZXQva2F0YS10ZXN0LXZvbC0wMDEvdm9sdW1lMDAx`.
|
||||
|
||||
Finally, when running a Kata Containers witch `ctr run --mount type=X, src=Y, dst=Z,,options=rbind:rw`, the `type=X` should be specified a proprietary type specifically designed for some kind of volume.
|
||||
|
||||
Now, supported types:
|
||||
|
||||
- `directvol` for direct volume
|
||||
- `spdkvol` for SPDK volume (TBD)
|
||||
- `vfiovol` for VFIO device based volume (TBD)
|
||||
|
||||
|
||||
## Setup Device and Run a Kata-Containers
|
||||
|
||||
### Direct Block Device Based Volume
|
||||
|
||||
#### create raw block based backend storage
|
||||
|
||||
> **Tips:** raw block based backend storage MUST be formatted with `mkfs`.
|
||||
|
||||
```bash
|
||||
$ sudo dd if=/dev/zero of=/tmp/stor/rawdisk01.20g bs=1M count=20480
|
||||
$ sudo mkfs.ext4 /tmp/stor/rawdisk01.20g
|
||||
```
|
||||
|
||||
#### setup direct block device for kata-containers
|
||||
|
||||
```json
|
||||
{
|
||||
"device": "/tmp/stor/rawdisk01.20g",
|
||||
"volume_type": "directvol",
|
||||
"fs_type": "ext4",
|
||||
"metadata":"{}",
|
||||
"options": []
|
||||
}
|
||||
```
|
||||
|
||||
```bash
|
||||
$ sudo ./kata-ctl direct-volume add /kubelet/kata-direct-vol-002/directvol002 "{\"device\": \"/tmp/stor/rawdisk01.20g\", \"volume_type\": \"directvol\", \"fs_type\": \"ext4\", \"metadata\":"{}", \"options\": []}"
|
||||
$# /kubelet/kata-direct-vol-002/directvol002 <==> /run/kata-containers/shared/direct-volumes/W1lMa2F0ZXQva2F0YS10a2F0DAxvbC0wMDEvdm9sdW1lMDAx
|
||||
$ cat W1lMa2F0ZXQva2F0YS10a2F0DAxvbC0wMDEvdm9sdW1lMDAx/mountInfo.json
|
||||
{"volume_type":"directvol","device":"/tmp/stor/rawdisk01.20g","fs_type":"ext4","metadata":{},"options":[]}
|
||||
```
|
||||
|
||||
#### Run a Kata container with direct block device volume
|
||||
|
||||
```bash
|
||||
$ # type=disrectvol,src=/kubelet/kata-direct-vol-002/directvol002,dst=/disk002,options=rbind:rw
|
||||
$sudo ctr run -t --rm --runtime io.containerd.kata.v2 --mount type=directvol,src=/kubelet/kata-direct-vol-002/directvol002,dst=/disk002,options=rbind:rw "$image" kata-direct-vol-xx05302045 /bin/bash
|
||||
```
|
||||
|
||||
|
||||
### SPDK Device Based Volume
|
||||
|
||||
TBD
|
||||
|
||||
### VFIO Device Based Volume
|
||||
|
||||
TBD
|
||||
@@ -19,7 +19,6 @@ Packaged installation methods uses your distribution's native package format (su
|
||||
|------------------------------------------------------|----------------------------------------------------------------------------------------------|-------------------|-----------------------------------------------------------------------------------------------|
|
||||
| [Using kata-deploy](#kata-deploy-installation) | The preferred way to deploy the Kata Containers distributed binaries on a Kubernetes cluster | **No!** | Best way to give it a try on kata-containers on an already up and running Kubernetes cluster. |
|
||||
| [Using official distro packages](#official-packages) | Kata packages provided by Linux distributions official repositories | yes | Recommended for most users. |
|
||||
| ~~[Using snap](#snap-installation)~~ | ~~Easy to install~~ | ~~yes~~ | **Snap is unmaintained!** ~~Good alternative to official distro packages.~~ |
|
||||
| [Automatic](#automatic-installation) | Run a single command to install a full system | **No!** | For those wanting the latest release quickly. |
|
||||
| [Manual](#manual-installation) | Follow a guide step-by-step to install a working system | **No!** | For those who want the latest release with more control. |
|
||||
| [Build from source](#build-from-source-installation) | Build the software components manually | **No!** | Power users and developers only. |
|
||||
@@ -42,27 +41,6 @@ Kata packages are provided by official distribution repositories for:
|
||||
| [CentOS](centos-installation-guide.md) | 8 |
|
||||
| [Fedora](fedora-installation-guide.md) | 34 |
|
||||
|
||||
### Snap Installation
|
||||
|
||||
> **WARNING:**
|
||||
>
|
||||
> The Snap package method is **unmaintained** and only provides an old
|
||||
> version of Kata Containers:
|
||||
> The [latest Kata Containers snap](https://snapcraft.io/kata-containers)
|
||||
> provides Kata Containers
|
||||
> [version 2.4.2](https://github.com/kata-containers/kata-containers/releases/tag/2.4.2)
|
||||
> but the latest stable Kata Containers release at the time of writing is
|
||||
> [version 3.1.0](https://github.com/kata-containers/kata-containers/releases/tag/3.1.0).
|
||||
>
|
||||
> We recommend strongly that you switch to an alternative Kata Containers installation method.
|
||||
>
|
||||
> See: https://github.com/kata-containers/kata-containers/issues/6769
|
||||
> for further details.
|
||||
|
||||
~~The snap installation is available for all distributions which support `snapd`.~~
|
||||
|
||||
~~[Use snap](snap-installation-guide.md) to install Kata Containers from https://snapcraft.io. ~~
|
||||
|
||||
### Automatic Installation
|
||||
|
||||
[Use `kata-manager`](/utils/README.md) to automatically install a working Kata Containers system.
|
||||
|
||||
@@ -26,7 +26,6 @@ architectures:
|
||||
|------------------------------------------------------|----------------------------------------------------------------------------------------------|-------------------|-----------------------------------------------------------------------------------------------|----------- |
|
||||
| [Using kata-deploy](#kata-deploy-installation) | The preferred way to deploy the Kata Containers distributed binaries on a Kubernetes cluster | **No!** | Best way to give it a try on kata-containers on an already up and running Kubernetes cluster. | Yes |
|
||||
| [Using official distro packages](#official-packages) | Kata packages provided by Linux distributions official repositories | yes | Recommended for most users. | No |
|
||||
| [Using snap](#snap-installation) | Easy to install | yes | Good alternative to official distro packages. | No |
|
||||
| [Automatic](#automatic-installation) | Run a single command to install a full system | **No!** | For those wanting the latest release quickly. | No |
|
||||
| [Manual](#manual-installation) | Follow a guide step-by-step to install a working system | **No!** | For those who want the latest release with more control. | No |
|
||||
| [Build from source](#build-from-source-installation) | Build the software components manually | **No!** | Power users and developers only. | Yes |
|
||||
@@ -36,8 +35,6 @@ architectures:
|
||||
Follow the [`kata-deploy`](../../tools/packaging/kata-deploy/README.md).
|
||||
### Official packages
|
||||
`ToDo`
|
||||
### Snap Installation
|
||||
`ToDo`
|
||||
### Automatic Installation
|
||||
`ToDo`
|
||||
### Manual Installation
|
||||
|
||||
@@ -1,82 +0,0 @@
|
||||
# Kata Containers snap package
|
||||
|
||||
> **WARNING:**
|
||||
>
|
||||
> The Snap package method is **unmaintained** and only provides an old
|
||||
> version of Kata Containers:
|
||||
> The [latest Kata Containers snap](https://snapcraft.io/kata-containers)
|
||||
> provides Kata Containers
|
||||
> [version 2.4.2](https://github.com/kata-containers/kata-containers/releases/tag/2.4.2)
|
||||
> but the latest stable Kata Containers release at the time of writing is
|
||||
> [version 3.1.0](https://github.com/kata-containers/kata-containers/releases/tag/3.1.0).
|
||||
>
|
||||
> We recommend strongly that you switch to an alternative Kata Containers installation method.
|
||||
>
|
||||
> See: https://github.com/kata-containers/kata-containers/issues/6769
|
||||
> for further details.
|
||||
|
||||
## Install Kata Containers
|
||||
|
||||
Kata Containers can be installed in any Linux distribution that supports
|
||||
[snapd](https://docs.snapcraft.io/installing-snapd).
|
||||
|
||||
Run the following command to install **Kata Containers**:
|
||||
|
||||
> **WARNING:**
|
||||
>
|
||||
> The Snap package method is **unmaintained** and only provides an old
|
||||
> version of Kata Containers:
|
||||
> The [latest Kata Containers snap](https://snapcraft.io/kata-containers)
|
||||
> provides Kata Containers
|
||||
> [version 2.4.2](https://github.com/kata-containers/kata-containers/releases/tag/2.4.2)
|
||||
> but the latest stable Kata Containers release at the time of writing is
|
||||
> [version 3.1.0](https://github.com/kata-containers/kata-containers/releases/tag/3.1.0).
|
||||
>
|
||||
> We recommend strongly that you switch to an alternative Kata Containers installation method.
|
||||
>
|
||||
> See: https://github.com/kata-containers/kata-containers/issues/6769
|
||||
> for further details.
|
||||
|
||||
```sh
|
||||
$ sudo snap install kata-containers --stable --classic
|
||||
```
|
||||
|
||||
## Configure Kata Containers
|
||||
|
||||
By default Kata Containers snap image is mounted at `/snap/kata-containers` as a
|
||||
read-only file system, therefore default configuration file can not be edited.
|
||||
Fortunately Kata Containers supports loading a configuration file from another
|
||||
path than the default.
|
||||
|
||||
```sh
|
||||
$ sudo mkdir -p /etc/kata-containers
|
||||
$ sudo cp /snap/kata-containers/current/usr/share/defaults/kata-containers/configuration.toml /etc/kata-containers/
|
||||
$ $EDITOR /etc/kata-containers/configuration.toml
|
||||
```
|
||||
|
||||
## Integration with shim v2 Container Engines
|
||||
|
||||
The Container engine daemon (`cri-o`, `containerd`, etc) needs to be able to find the
|
||||
`containerd-shim-kata-v2` binary to allow Kata Containers to be created.
|
||||
Run the following command to create a symbolic link to the shim v2 binary.
|
||||
|
||||
```sh
|
||||
$ sudo ln -sf /snap/kata-containers/current/usr/bin/containerd-shim-kata-v2 /usr/local/bin/containerd-shim-kata-v2
|
||||
```
|
||||
|
||||
Once the symbolic link has been created and the engine daemon configured, `io.containerd.kata.v2`
|
||||
can be used as runtime.
|
||||
|
||||
Read the following documents to know how to run Kata Containers 2.x with `containerd`.
|
||||
|
||||
* [How to use Kata Containers and Containerd](../how-to/containerd-kata.md)
|
||||
* [Install Kata Containers with containerd](./container-manager/containerd/containerd-install.md)
|
||||
|
||||
|
||||
## Remove Kata Containers snap package
|
||||
|
||||
Run the following command to remove the Kata Containers snap:
|
||||
|
||||
```sh
|
||||
$ sudo snap remove kata-containers
|
||||
```
|
||||
@@ -1,101 +0,0 @@
|
||||
# Kata Containers snap image
|
||||
|
||||
This directory contains the resources needed to build the Kata Containers
|
||||
[snap][1] image.
|
||||
|
||||
## Initial setup
|
||||
|
||||
Kata Containers can be installed in any Linux distribution that supports
|
||||
[snapd](https://docs.snapcraft.io/installing-snapd). For this example, we
|
||||
assume Ubuntu as your base distro.
|
||||
```sh
|
||||
$ sudo apt-get --no-install-recommends install -y apt-utils ca-certificates snapd snapcraft
|
||||
```
|
||||
|
||||
## Install snap
|
||||
|
||||
You can install the Kata Containers snap from the [snapcraft store][8] or by running the following command:
|
||||
|
||||
```sh
|
||||
$ sudo snap install kata-containers --classic
|
||||
```
|
||||
|
||||
## Build and install snap image
|
||||
|
||||
Run the command below which will use the packaging Makefile to build the snap image:
|
||||
|
||||
```sh
|
||||
$ make -C tools/packaging snap
|
||||
```
|
||||
|
||||
> **Warning:**
|
||||
>
|
||||
> By default, `snapcraft` will create a clean virtual machine
|
||||
> environment to build the snap in using the `multipass` tool.
|
||||
>
|
||||
> However, `multipass` is silently disabled when `--destructive-mode` is
|
||||
> used.
|
||||
>
|
||||
> Since building the Kata Containers package currently requires
|
||||
> `--destructive-mode`, the snap will be built using the host
|
||||
> environment. To avoid parts of the build auto-detecting additional
|
||||
> features to enable (for example for QEMU), we recommend that you
|
||||
> only run the snap build in a minimal host environment.
|
||||
|
||||
To install the resulting snap image, snap must be put in [classic mode][3] and the
|
||||
security confinement must be disabled (`--classic`). Also since the resulting snap
|
||||
has not been signed the verification of signature must be omitted (`--dangerous`).
|
||||
|
||||
```sh
|
||||
$ sudo snap install --classic --dangerous "kata-containers_${version}_${arch}.snap"
|
||||
```
|
||||
|
||||
Replace `${version}` with the current version of Kata Containers and `${arch}` with
|
||||
the system architecture.
|
||||
|
||||
## Configure Kata Containers
|
||||
|
||||
By default Kata Containers snap image is mounted at `/snap/kata-containers` as a
|
||||
read-only file system, therefore default configuration file can not be edited.
|
||||
Fortunately [`kata-runtime`][4] supports loading a configuration file from another
|
||||
path than the default.
|
||||
|
||||
```sh
|
||||
$ sudo mkdir -p /etc/kata-containers
|
||||
$ sudo cp /snap/kata-containers/current/usr/share/defaults/kata-containers/configuration.toml /etc/kata-containers/
|
||||
$ $EDITOR /etc/kata-containers/configuration.toml
|
||||
```
|
||||
|
||||
## Integration with docker and Kubernetes
|
||||
|
||||
The path to the runtime provided by the Kata Containers snap image is
|
||||
`/snap/kata-containers/current/usr/bin/kata-runtime`. You should use it to
|
||||
run Kata Containers with [docker][9] and [Kubernetes][10].
|
||||
|
||||
## Remove snap
|
||||
|
||||
You can remove the Kata Containers snap by running the following command:
|
||||
|
||||
```sh
|
||||
$ sudo snap remove kata-containers
|
||||
```
|
||||
|
||||
## Limitations
|
||||
|
||||
The [miniOS image][2] is not included in the snap image as it is not possible for
|
||||
QEMU to open a guest RAM backing store on a read-only filesystem. Fortunately,
|
||||
you can start Kata Containers with a Linux initial RAM disk (initrd) that is
|
||||
included in the snap image. If you want to use the miniOS image instead of initrd,
|
||||
then a new configuration file can be [created](#configure-kata-containers)
|
||||
and [configured][7].
|
||||
|
||||
[1]: https://docs.snapcraft.io/snaps/intro
|
||||
[2]: ../../docs/design/architecture/README.md#root-filesystem-image
|
||||
[3]: https://docs.snapcraft.io/reference/confinement#classic
|
||||
[4]: https://github.com/kata-containers/kata-containers/tree/main/src/runtime#configuration
|
||||
[5]: https://docs.docker.com/engine/reference/commandline/dockerd
|
||||
[6]: ../../docs/install/docker/ubuntu-docker-install.md
|
||||
[7]: ../../docs/Developer-Guide.md#configure-to-use-initrd-or-rootfs-image
|
||||
[8]: https://snapcraft.io/kata-containers
|
||||
[9]: ../../docs/Developer-Guide.md#run-kata-containers-with-docker
|
||||
[10]: ../../docs/Developer-Guide.md#run-kata-containers-with-kubernetes
|
||||
@@ -1,114 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
#
|
||||
# Copyright (c) 2022 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
# Description: Idempotent script to be sourced by all parts in a
|
||||
# snapcraft config file.
|
||||
|
||||
set -o errexit
|
||||
set -o nounset
|
||||
set -o pipefail
|
||||
|
||||
# XXX: Bash-specific code. zsh doesn't support this option and that *does*
|
||||
# matter if this script is run sourced... since it'll be using zsh! ;)
|
||||
[ -n "$BASH_VERSION" ] && set -o errtrace
|
||||
|
||||
[ -n "${DEBUG:-}" ] && set -o xtrace
|
||||
|
||||
die()
|
||||
{
|
||||
echo >&2 "ERROR: $0: $*"
|
||||
}
|
||||
|
||||
[ -n "${SNAPCRAFT_STAGE:-}" ] ||\
|
||||
die "must be sourced from a snapcraft config file"
|
||||
|
||||
snap_yq_version=3.4.1
|
||||
|
||||
snap_common_install_yq()
|
||||
{
|
||||
export yq="${SNAPCRAFT_STAGE}/bin/yq"
|
||||
|
||||
local yq_pkg
|
||||
yq_pkg="github.com/mikefarah/yq"
|
||||
|
||||
local yq_url
|
||||
yq_url="https://${yq_pkg}/releases/download/${snap_yq_version}/yq_${goos}_${goarch}"
|
||||
curl -o "${yq}" -L "${yq_url}"
|
||||
chmod +x "${yq}"
|
||||
}
|
||||
|
||||
# Function that should be called for each snap "part" in
|
||||
# snapcraft.yaml.
|
||||
snap_common_main()
|
||||
{
|
||||
# Architecture
|
||||
arch="$(uname -m)"
|
||||
|
||||
case "${arch}" in
|
||||
aarch64)
|
||||
goarch="arm64"
|
||||
qemu_arch="${arch}"
|
||||
;;
|
||||
|
||||
ppc64le)
|
||||
goarch="ppc64le"
|
||||
qemu_arch="ppc64"
|
||||
;;
|
||||
|
||||
s390x)
|
||||
goarch="${arch}"
|
||||
qemu_arch="${arch}"
|
||||
;;
|
||||
|
||||
x86_64)
|
||||
goarch="amd64"
|
||||
qemu_arch="${arch}"
|
||||
;;
|
||||
|
||||
*) die "unsupported architecture: ${arch}" ;;
|
||||
esac
|
||||
|
||||
dpkg_arch=$(dpkg --print-architecture)
|
||||
|
||||
# golang
|
||||
#
|
||||
# We need the O/S name in golang format, but since we don't
|
||||
# know if the godeps part has run, we don't know if golang is
|
||||
# available yet, hence fall back to a standard system command.
|
||||
goos="$(go env GOOS &>/dev/null || true)"
|
||||
[ -z "$goos" ] && goos=$(uname -s|tr '[A-Z]' '[a-z]')
|
||||
|
||||
export GOROOT="${SNAPCRAFT_STAGE}"
|
||||
export GOPATH="${GOROOT}/gopath"
|
||||
export GO111MODULE="auto"
|
||||
|
||||
mkdir -p "${GOPATH}/bin"
|
||||
export PATH="${GOPATH}/bin:${PATH}"
|
||||
|
||||
# Proxy
|
||||
export http_proxy="${http_proxy:-}"
|
||||
export https_proxy="${https_proxy:-}"
|
||||
|
||||
# Binaries
|
||||
mkdir -p "${SNAPCRAFT_STAGE}/bin"
|
||||
|
||||
export PATH="$PATH:${SNAPCRAFT_STAGE}/bin"
|
||||
|
||||
# YAML query tool
|
||||
export yq="${SNAPCRAFT_STAGE}/bin/yq"
|
||||
|
||||
# Kata paths
|
||||
export kata_dir=$(printf "%s/src/github.com/%s/%s" \
|
||||
"${GOPATH}" \
|
||||
"${SNAPCRAFT_PROJECT_NAME}" \
|
||||
"${SNAPCRAFT_PROJECT_NAME}")
|
||||
|
||||
export versions_file="${kata_dir}/versions.yaml"
|
||||
|
||||
[ -n "${yq:-}" ] && [ -x "${yq:-}" ] || snap_common_install_yq
|
||||
}
|
||||
|
||||
snap_common_main
|
||||
@@ -1,170 +0,0 @@
|
||||
name: kata-containers
|
||||
website: https://github.com/kata-containers/kata-containers
|
||||
summary: Build lightweight VMs that seamlessly plug into the containers ecosystem
|
||||
description: |
|
||||
Kata Containers is an open source project and community working to build a
|
||||
standard implementation of lightweight Virtual Machines (VMs) that feel and
|
||||
perform like containers, but provide the workload isolation and security
|
||||
advantages of VMs
|
||||
confinement: classic
|
||||
adopt-info: metadata
|
||||
base: core20
|
||||
|
||||
parts:
|
||||
metadata:
|
||||
plugin: nil
|
||||
prime:
|
||||
- -*
|
||||
build-packages:
|
||||
- git
|
||||
- git-extras
|
||||
override-pull: |
|
||||
source "${SNAPCRAFT_PROJECT_DIR}/snap/local/snap-common.sh"
|
||||
|
||||
version="9999"
|
||||
|
||||
if echo "${GITHUB_REF:-}" | grep -q -E "^refs/tags"; then
|
||||
version=$(echo ${GITHUB_REF:-} | cut -d/ -f3)
|
||||
git checkout ${version}
|
||||
fi
|
||||
|
||||
snapcraftctl set-grade "stable"
|
||||
snapcraftctl set-version "${version}"
|
||||
|
||||
mkdir -p $(dirname ${kata_dir})
|
||||
ln -sf $(realpath "${SNAPCRAFT_STAGE}/..") ${kata_dir}
|
||||
|
||||
docker:
|
||||
after: [metadata]
|
||||
plugin: nil
|
||||
prime:
|
||||
- -*
|
||||
build-packages:
|
||||
- ca-certificates
|
||||
- containerd
|
||||
- curl
|
||||
- gnupg
|
||||
- lsb-release
|
||||
- runc
|
||||
override-build: |
|
||||
source "${SNAPCRAFT_PROJECT_DIR}/snap/local/snap-common.sh"
|
||||
|
||||
curl -fsSL https://download.docker.com/linux/ubuntu/gpg |\
|
||||
sudo gpg --batch --yes --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg
|
||||
distro_codename=$(lsb_release -cs)
|
||||
echo "deb [arch=${dpkg_arch} signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/ubuntu ${distro_codename} stable" |\
|
||||
sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
|
||||
sudo apt-get -y update
|
||||
sudo apt-get -y install docker-ce docker-ce-cli containerd.io
|
||||
|
||||
echo "Unmasking docker service"
|
||||
sudo -E systemctl unmask docker.service || true
|
||||
sudo -E systemctl unmask docker.socket || true
|
||||
echo "Adding $USER into docker group"
|
||||
sudo -E gpasswd -a $USER docker
|
||||
echo "Starting docker"
|
||||
# docker may fail to start using "fd://" in docker.service
|
||||
sudo sed -i 's/fd:\/\//unix:\/\//g' /lib/systemd/system/docker.service
|
||||
sudo systemctl daemon-reload
|
||||
sudo -E systemctl start docker || true
|
||||
|
||||
image:
|
||||
after: [docker]
|
||||
plugin: nil
|
||||
override-build: |
|
||||
source "${SNAPCRAFT_PROJECT_DIR}/snap/local/snap-common.sh"
|
||||
|
||||
cd "${SNAPCRAFT_PROJECT_DIR}"
|
||||
sudo -E NO_TTY=true make rootfs-image-tarball
|
||||
|
||||
tarfile="${SNAPCRAFT_PROJECT_DIR}/tools/packaging/kata-deploy/local-build/build/kata-static-rootfs-image.tar.xz"
|
||||
|
||||
tar -xvJpf "${tarfile}" -C "${SNAPCRAFT_PART_INSTALL}"
|
||||
|
||||
|
||||
sudo -E NO_TTY=true make rootfs-initrd-tarball
|
||||
|
||||
tarfile="${SNAPCRAFT_PROJECT_DIR}/tools/packaging/kata-deploy/local-build/build/kata-static-rootfs-initrd.tar.xz"
|
||||
|
||||
tar -xvJpf "${tarfile}" -C "${SNAPCRAFT_PART_INSTALL}"
|
||||
|
||||
|
||||
runtime:
|
||||
after: [docker]
|
||||
plugin: nil
|
||||
override-build: |
|
||||
source "${SNAPCRAFT_PROJECT_DIR}/snap/local/snap-common.sh"
|
||||
|
||||
cd "${SNAPCRAFT_PROJECT_DIR}"
|
||||
sudo -E NO_TTY=true make shim-v2-tarball
|
||||
|
||||
tarfile="${SNAPCRAFT_PROJECT_DIR}/tools/packaging/kata-deploy/local-build/build/kata-static-shim-v2.tar.xz"
|
||||
|
||||
tar -xvJpf "${tarfile}" -C "${SNAPCRAFT_PART_INSTALL}"
|
||||
|
||||
mkdir -p "${SNAPCRAFT_PART_INSTALL}/usr/bin"
|
||||
ln -sf "${SNAPCRAFT_PART_INSTALL}/opt/kata/bin/containerd-shim-kata-v2" "${SNAPCRAFT_PART_INSTALL}/usr/bin/containerd-shim-kata-v2"
|
||||
ln -sf "${SNAPCRAFT_PART_INSTALL}/opt/kata/bin/kata-runtime" "${SNAPCRAFT_PART_INSTALL}/usr/bin/kata-runtime"
|
||||
ln -sf "${SNAPCRAFT_PART_INSTALL}/opt/kata/bin/kata-collect-data.sh" "${SNAPCRAFT_PART_INSTALL}/usr/bin/kata-collect-data.sh"
|
||||
|
||||
kernel:
|
||||
after: [docker]
|
||||
plugin: nil
|
||||
override-build: |
|
||||
source "${SNAPCRAFT_PROJECT_DIR}/snap/local/snap-common.sh"
|
||||
|
||||
cd "${SNAPCRAFT_PROJECT_DIR}"
|
||||
sudo -E NO_TTY=true make kernel-tarball
|
||||
|
||||
tarfile="${SNAPCRAFT_PROJECT_DIR}/tools/packaging/kata-deploy/local-build/build/kata-static-kernel.tar.xz"
|
||||
|
||||
tar -xvJpf "${tarfile}" -C "${SNAPCRAFT_PART_INSTALL}"
|
||||
|
||||
qemu:
|
||||
plugin: make
|
||||
after: [docker]
|
||||
override-build: |
|
||||
source "${SNAPCRAFT_PROJECT_DIR}/snap/local/snap-common.sh"
|
||||
|
||||
cd "${SNAPCRAFT_PROJECT_DIR}"
|
||||
sudo -E NO_TTY=true make qemu-tarball
|
||||
|
||||
tarfile="${SNAPCRAFT_PROJECT_DIR}/tools/packaging/kata-deploy/local-build/build/kata-static-qemu.tar.xz"
|
||||
|
||||
tar -xvJpf "${tarfile}" -C "${SNAPCRAFT_PART_INSTALL}"
|
||||
|
||||
virtiofsd:
|
||||
plugin: nil
|
||||
after: [docker]
|
||||
override-build: |
|
||||
source "${SNAPCRAFT_PROJECT_DIR}/snap/local/snap-common.sh"
|
||||
|
||||
cd "${SNAPCRAFT_PROJECT_DIR}"
|
||||
sudo -E NO_TTY=true make virtiofsd-tarball
|
||||
|
||||
tarfile="${SNAPCRAFT_PROJECT_DIR}/tools/packaging/kata-deploy/local-build/build/kata-static-virtiofsd.tar.xz"
|
||||
|
||||
tar -xvJpf "${tarfile}" -C "${SNAPCRAFT_PART_INSTALL}"
|
||||
|
||||
cloud-hypervisor:
|
||||
plugin: nil
|
||||
after: [docker]
|
||||
override-build: |
|
||||
source "${SNAPCRAFT_PROJECT_DIR}/snap/local/snap-common.sh"
|
||||
|
||||
if [ "${arch}" == "aarch64" ] || [ "${arch}" == "x86_64" ]; then
|
||||
cd "${SNAPCRAFT_PROJECT_DIR}"
|
||||
sudo -E NO_TTY=true make cloud-hypervisor-tarball
|
||||
|
||||
tarfile="${SNAPCRAFT_PROJECT_DIR}/tools/packaging/kata-deploy/local-build/build/kata-static-cloud-hypervisor.tar.xz"
|
||||
|
||||
tar -xvJpf "${tarfile}" -C "${SNAPCRAFT_PART_INSTALL}"
|
||||
fi
|
||||
|
||||
apps:
|
||||
runtime:
|
||||
command: usr/bin/kata-runtime
|
||||
shim:
|
||||
command: usr/bin/containerd-shim-kata-v2
|
||||
collect-data:
|
||||
command: usr/bin/kata-collect-data.sh
|
||||
33
src/agent/Cargo.lock
generated
33
src/agent/Cargo.lock
generated
@@ -2146,6 +2146,7 @@ dependencies = [
|
||||
"num_cpus",
|
||||
"oci",
|
||||
"regex",
|
||||
"safe-path",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"slog",
|
||||
@@ -2285,6 +2286,7 @@ dependencies = [
|
||||
"slog-async",
|
||||
"slog-json",
|
||||
"slog-scope",
|
||||
"slog-term",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -2645,6 +2647,15 @@ dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "num_threads"
|
||||
version = "0.1.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2819ce041d2ee131036f4fc9d6ae7ae125a3a40e97ba64d04fe799ad9dabbb44"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "objc"
|
||||
version = "0.2.7"
|
||||
@@ -3887,6 +3898,13 @@ version = "1.0.13"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f91339c0467de62360649f8d3e185ca8de4224ff281f66000de5eb2a77a79041"
|
||||
|
||||
[[package]]
|
||||
name = "safe-path"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "salsa20"
|
||||
version = "0.10.2"
|
||||
@@ -4384,6 +4402,19 @@ dependencies = [
|
||||
"slog-scope",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "slog-term"
|
||||
version = "2.9.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "87d29185c55b7b258b4f120eab00f48557d4d9bc814f41713f449d35b0f8977c"
|
||||
dependencies = [
|
||||
"atty",
|
||||
"slog",
|
||||
"term",
|
||||
"thread_local",
|
||||
"time 0.3.20",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "smallvec"
|
||||
version = "1.10.0"
|
||||
@@ -4646,6 +4677,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cd0cbfecb4d19b5ea75bb31ad904eb5b9fa13f21079c3b92017ebdf4999a5890"
|
||||
dependencies = [
|
||||
"itoa",
|
||||
"libc",
|
||||
"num_threads",
|
||||
"serde",
|
||||
"time-core",
|
||||
"time-macros",
|
||||
|
||||
@@ -161,7 +161,7 @@ impl Process {
|
||||
|
||||
pub fn notify_term_close(&mut self) {
|
||||
let notify = self.term_exit_notifier.clone();
|
||||
notify.notify_one();
|
||||
notify.notify_waiters();
|
||||
}
|
||||
|
||||
pub fn close_stdin(&mut self) {
|
||||
|
||||
@@ -81,7 +81,8 @@ cfg_if! {
|
||||
// sysfs as directories in the subtree under /sys/devices/LNXSYSTM:00
|
||||
pub const ACPI_DEV_PATH: &str = "/devices/LNXSYSTM";
|
||||
|
||||
pub const SYSFS_CPU_ONLINE_PATH: &str = "/sys/devices/system/cpu";
|
||||
pub const SYSFS_CPU_PATH: &str = "/sys/devices/system/cpu";
|
||||
pub const SYSFS_CPU_ONLINE_PATH: &str = "/sys/devices/system/cpu/online";
|
||||
|
||||
pub const SYSFS_MEMORY_BLOCK_SIZE_PATH: &str = "/sys/devices/system/memory/block_size_bytes";
|
||||
pub const SYSFS_MEMORY_HOTPLUG_PROBE_PATH: &str = "/sys/devices/system/memory/probe";
|
||||
|
||||
@@ -89,6 +89,27 @@ impl Handle {
|
||||
.await?;
|
||||
}
|
||||
|
||||
// we need to update the link's interface name, thus we should rename the existed link whose name
|
||||
// is the same with the link's request name, otherwise, it would update the link failed with the
|
||||
// name conflicted.
|
||||
let mut new_link = None;
|
||||
if link.name() != iface.name {
|
||||
if let Ok(link) = self.find_link(LinkFilter::Name(iface.name.as_str())).await {
|
||||
// update the existing interface name with a temporary name, otherwise
|
||||
// it would failed to udpate this interface with an existing name.
|
||||
let mut request = self.handle.link().set(link.index());
|
||||
request.message_mut().header = link.header.clone();
|
||||
|
||||
request
|
||||
.name(format!("{}_temp", link.name()))
|
||||
.up()
|
||||
.execute()
|
||||
.await?;
|
||||
|
||||
new_link = Some(link);
|
||||
}
|
||||
}
|
||||
|
||||
// Update link
|
||||
let mut request = self.handle.link().set(link.index());
|
||||
request.message_mut().header = link.header.clone();
|
||||
@@ -101,6 +122,14 @@ impl Handle {
|
||||
.execute()
|
||||
.await?;
|
||||
|
||||
// swap the updated iface's name.
|
||||
if let Some(nlink) = new_link {
|
||||
let mut request = self.handle.link().set(nlink.index());
|
||||
request.message_mut().header = nlink.header.clone();
|
||||
|
||||
request.name(link.name()).up().execute().await?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
@@ -673,15 +673,16 @@ impl AgentService {
|
||||
let cid = req.container_id;
|
||||
let eid = req.exec_id;
|
||||
|
||||
let mut term_exit_notifier = Arc::new(tokio::sync::Notify::new());
|
||||
let term_exit_notifier;
|
||||
let reader = {
|
||||
let s = self.sandbox.clone();
|
||||
let mut sandbox = s.lock().await;
|
||||
|
||||
let p = sandbox.find_container_process(cid.as_str(), eid.as_str())?;
|
||||
|
||||
term_exit_notifier = p.term_exit_notifier.clone();
|
||||
|
||||
if p.term_master.is_some() {
|
||||
term_exit_notifier = p.term_exit_notifier.clone();
|
||||
p.get_reader(StreamType::TermMaster)
|
||||
} else if stdout {
|
||||
if p.parent_stdout.is_some() {
|
||||
|
||||
@@ -12,6 +12,7 @@ use crate::pci;
|
||||
use crate::uevent::{Uevent, UeventMatcher};
|
||||
use crate::watcher::BindWatcher;
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use kata_types::cpu::CpuSet;
|
||||
use libc::pid_t;
|
||||
use oci::{Hook, Hooks};
|
||||
use protocols::agent::OnlineCPUMemRequest;
|
||||
@@ -25,6 +26,7 @@ use std::collections::HashMap;
|
||||
use std::fs;
|
||||
use std::os::unix::fs::PermissionsExt;
|
||||
use std::path::Path;
|
||||
use std::str::FromStr;
|
||||
use std::sync::Arc;
|
||||
use std::{thread, time};
|
||||
use tokio::sync::mpsc::{channel, Receiver, Sender};
|
||||
@@ -265,12 +267,12 @@ impl Sandbox {
|
||||
pub fn online_cpu_memory(&self, req: &OnlineCPUMemRequest) -> Result<()> {
|
||||
if req.nb_cpus > 0 {
|
||||
// online cpus
|
||||
online_cpus(&self.logger, req.nb_cpus as i32)?;
|
||||
online_cpus(&self.logger, req.nb_cpus as i32).context("online cpus")?;
|
||||
}
|
||||
|
||||
if !req.cpu_only {
|
||||
// online memory
|
||||
online_memory(&self.logger)?;
|
||||
online_memory(&self.logger).context("online memory")?;
|
||||
}
|
||||
|
||||
if req.nb_cpus == 0 {
|
||||
@@ -434,23 +436,33 @@ fn online_resources(logger: &Logger, path: &str, pattern: &str, num: i32) -> Res
|
||||
|
||||
// max wait for all CPUs to online will use 50 * 100 = 5 seconds.
|
||||
const ONLINE_CPUMEM_WATI_MILLIS: u64 = 50;
|
||||
const ONLINE_CPUMEM_MAX_RETRIES: u32 = 100;
|
||||
const ONLINE_CPUMEM_MAX_RETRIES: i32 = 100;
|
||||
|
||||
#[instrument]
|
||||
fn online_cpus(logger: &Logger, num: i32) -> Result<i32> {
|
||||
let mut onlined_count: i32 = 0;
|
||||
let mut onlined_cpu_count = onlined_cpus().context("onlined cpu count")?;
|
||||
// for some vmms, like dragonball, they will online cpus for us
|
||||
// so check first whether agent need to do the online operation
|
||||
if onlined_cpu_count >= num {
|
||||
return Ok(num);
|
||||
}
|
||||
|
||||
for i in 0..ONLINE_CPUMEM_MAX_RETRIES {
|
||||
let r = online_resources(
|
||||
// online num resources
|
||||
online_resources(
|
||||
logger,
|
||||
SYSFS_CPU_ONLINE_PATH,
|
||||
SYSFS_CPU_PATH,
|
||||
r"cpu[0-9]+",
|
||||
num - onlined_count,
|
||||
);
|
||||
num - onlined_cpu_count,
|
||||
)
|
||||
.context("online cpu resource")?;
|
||||
|
||||
onlined_count += r?;
|
||||
if onlined_count == num {
|
||||
info!(logger, "online {} CPU(s) after {} retries", num, i);
|
||||
onlined_cpu_count = onlined_cpus().context("onlined cpu count")?;
|
||||
if onlined_cpu_count >= num {
|
||||
info!(
|
||||
logger,
|
||||
"Currently {} onlined CPU(s) after {} retries", onlined_cpu_count, i
|
||||
);
|
||||
return Ok(num);
|
||||
}
|
||||
thread::sleep(time::Duration::from_millis(ONLINE_CPUMEM_WATI_MILLIS));
|
||||
@@ -465,10 +477,18 @@ fn online_cpus(logger: &Logger, num: i32) -> Result<i32> {
|
||||
|
||||
#[instrument]
|
||||
fn online_memory(logger: &Logger) -> Result<()> {
|
||||
online_resources(logger, SYSFS_MEMORY_ONLINE_PATH, r"memory[0-9]+", -1)?;
|
||||
online_resources(logger, SYSFS_MEMORY_ONLINE_PATH, r"memory[0-9]+", -1)
|
||||
.context("online memory resource")?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn onlined_cpus() -> Result<i32> {
|
||||
let content =
|
||||
fs::read_to_string(SYSFS_CPU_ONLINE_PATH).context("read sysfs cpu online file")?;
|
||||
let online_cpu_set = CpuSet::from_str(content.trim())?;
|
||||
Ok(online_cpu_set.len() as i32)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
143
src/dragonball/Cargo.lock
generated
143
src/dragonball/Cargo.lock
generated
@@ -180,9 +180,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-channel"
|
||||
version = "0.5.7"
|
||||
version = "0.5.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cf2b3e8478797446514c91ef04bafcb59faba183e621ad488df88983cc14128c"
|
||||
checksum = "a33c2bf77f2df06183c3aa30d1e96c0695a313d4f9c453cc3762a6db39f99200"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"crossbeam-utils",
|
||||
@@ -209,11 +209,12 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "dbs-address-space"
|
||||
version = "0.2.2"
|
||||
version = "0.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6bcc37dc0b8ffae1c5911d13ae630dc7a9020fa0de0edd178d6ab71daf56c8fc"
|
||||
checksum = "95e20d28a9cd13bf00d0ecd1bd073d242242b04f0acb663d7adfc659f8879322"
|
||||
dependencies = [
|
||||
"arc-swap",
|
||||
"lazy_static",
|
||||
"libc",
|
||||
"nix 0.23.2",
|
||||
"thiserror",
|
||||
@@ -247,9 +248,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "dbs-boot"
|
||||
version = "0.3.1"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1a74a8c05a1674d3032e610b4f201c7440c345559bad3dfe6b455ce195785108"
|
||||
checksum = "5466a92f75aa928a9103dcb2088f6d1638ef9da8945fad7389a73864dfa0182c"
|
||||
dependencies = [
|
||||
"dbs-arch",
|
||||
"kvm-bindings",
|
||||
@@ -300,9 +301,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "dbs-upcall"
|
||||
version = "0.2.0"
|
||||
version = "0.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "699e62afa444ae4b00d474fd91bc37785ba050acdfbe179731c81898e32efc3f"
|
||||
checksum = "ea3a78128fd0be8b8b10257675c262b378dc5d00b1e18157736a6c27e45ce4fb"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"dbs-utils",
|
||||
@@ -330,9 +331,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "dbs-virtio-devices"
|
||||
version = "0.2.0"
|
||||
version = "0.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "88e5c6c48b766afb95851b04b6b193871a59d0b2a3ed19990d4f8f651ae5c668"
|
||||
checksum = "24d671cc3e5f98b84ef6b6bed007d28f72f16d3aea8eb38e2d42b00b2973c1d8"
|
||||
dependencies = [
|
||||
"byteorder",
|
||||
"caps",
|
||||
@@ -346,7 +347,7 @@ dependencies = [
|
||||
"kvm-ioctls",
|
||||
"libc",
|
||||
"log",
|
||||
"nix 0.23.2",
|
||||
"nix 0.24.3",
|
||||
"nydus-api",
|
||||
"nydus-blobfs",
|
||||
"nydus-rafs",
|
||||
@@ -444,13 +445,13 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "errno"
|
||||
version = "0.2.8"
|
||||
version = "0.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f639046355ee4f37944e44f60642c6f3a7efa3cf6b78c78a0d989a8ce6c396a1"
|
||||
checksum = "4bcfec3a70f97c962c307b2d2c56e358cf1d00b558d74262b5f929ee8cc7e73a"
|
||||
dependencies = [
|
||||
"errno-dragonfly",
|
||||
"libc",
|
||||
"winapi",
|
||||
"windows-sys 0.48.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -482,7 +483,7 @@ dependencies = [
|
||||
"cfg-if",
|
||||
"libc",
|
||||
"redox_syscall",
|
||||
"windows-sys",
|
||||
"windows-sys 0.45.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -618,9 +619,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "getrandom"
|
||||
version = "0.2.8"
|
||||
version = "0.2.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c05aeb6a22b8f62540c194aac980f2115af067bfe15a0734d7277a768d396b31"
|
||||
checksum = "c85e1d9ab2eadba7e5040d4e09cbd6d072b76a557ad64e797c2cb9d4da21d7e4"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"libc",
|
||||
@@ -670,7 +671,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1abeb7a0dd0f8181267ff8adc397075586500b81b28a73e8a0208b00fc170fb3"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"windows-sys",
|
||||
"windows-sys 0.45.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -841,7 +842,7 @@ dependencies = [
|
||||
"libc",
|
||||
"log",
|
||||
"wasi",
|
||||
"windows-sys",
|
||||
"windows-sys 0.45.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1044,7 +1045,7 @@ dependencies = [
|
||||
"libc",
|
||||
"redox_syscall",
|
||||
"smallvec",
|
||||
"windows-sys",
|
||||
"windows-sys 0.45.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1120,16 +1121,16 @@ checksum = "7ef03e0a2b150c7a90d01faf6254c9c48a41e95fb2a8c2ac1c6f0d2b9aefc342"
|
||||
|
||||
[[package]]
|
||||
name = "rustix"
|
||||
version = "0.36.8"
|
||||
version = "0.36.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f43abb88211988493c1abb44a70efa56ff0ce98f233b7b276146f1f3f7ba9644"
|
||||
checksum = "14e4d67015953998ad0eb82887a0eb0129e18a7e2f3b7b0f6c422fddcd503d62"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
"errno",
|
||||
"io-lifetimes",
|
||||
"libc",
|
||||
"linux-raw-sys",
|
||||
"windows-sys",
|
||||
"windows-sys 0.45.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1167,18 +1168,18 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "serde"
|
||||
version = "1.0.152"
|
||||
version = "1.0.156"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bb7d1f0d3021d347a83e556fc4683dea2ea09d87bccdf88ff5c12545d89d5efb"
|
||||
checksum = "314b5b092c0ade17c00142951e50ced110ec27cea304b1037c6969246c2469a4"
|
||||
dependencies = [
|
||||
"serde_derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_derive"
|
||||
version = "1.0.152"
|
||||
version = "1.0.156"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "af487d118eecd09402d70a5d72551860e788df87b464af30e5ea6a38c75c541e"
|
||||
checksum = "d7e29c4601e36bcec74a223228dce795f4cd3616341a4af93520ca1a837c087d"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
@@ -1187,9 +1188,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "serde_json"
|
||||
version = "1.0.93"
|
||||
version = "1.0.96"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cad406b69c91885b5107daf2c29572f6c8cdb3c66826821e286c533490c0bc76"
|
||||
checksum = "057d394a50403bcac12672b2b18fb387ab6d289d957dab67dd201875391e52f1"
|
||||
dependencies = [
|
||||
"itoa",
|
||||
"ryu",
|
||||
@@ -1422,7 +1423,7 @@ dependencies = [
|
||||
"pin-project-lite",
|
||||
"socket2",
|
||||
"tokio-macros",
|
||||
"windows-sys",
|
||||
"windows-sys 0.45.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1610,7 +1611,16 @@ version = "0.45.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0"
|
||||
dependencies = [
|
||||
"windows-targets",
|
||||
"windows-targets 0.42.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-sys"
|
||||
version = "0.48.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9"
|
||||
dependencies = [
|
||||
"windows-targets 0.48.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1619,13 +1629,28 @@ version = "0.42.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8e2522491fbfcd58cc84d47aeb2958948c4b8982e9a2d8a2a35bbaed431390e7"
|
||||
dependencies = [
|
||||
"windows_aarch64_gnullvm",
|
||||
"windows_aarch64_msvc",
|
||||
"windows_i686_gnu",
|
||||
"windows_i686_msvc",
|
||||
"windows_x86_64_gnu",
|
||||
"windows_x86_64_gnullvm",
|
||||
"windows_x86_64_msvc",
|
||||
"windows_aarch64_gnullvm 0.42.1",
|
||||
"windows_aarch64_msvc 0.42.1",
|
||||
"windows_i686_gnu 0.42.1",
|
||||
"windows_i686_msvc 0.42.1",
|
||||
"windows_x86_64_gnu 0.42.1",
|
||||
"windows_x86_64_gnullvm 0.42.1",
|
||||
"windows_x86_64_msvc 0.42.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-targets"
|
||||
version = "0.48.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7b1eb6f0cd7c80c79759c929114ef071b87354ce476d9d94271031c0497adfd5"
|
||||
dependencies = [
|
||||
"windows_aarch64_gnullvm 0.48.0",
|
||||
"windows_aarch64_msvc 0.48.0",
|
||||
"windows_i686_gnu 0.48.0",
|
||||
"windows_i686_msvc 0.48.0",
|
||||
"windows_x86_64_gnu 0.48.0",
|
||||
"windows_x86_64_gnullvm 0.48.0",
|
||||
"windows_x86_64_msvc 0.48.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1634,42 +1659,84 @@ version = "0.42.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8c9864e83243fdec7fc9c5444389dcbbfd258f745e7853198f365e3c4968a608"
|
||||
|
||||
[[package]]
|
||||
name = "windows_aarch64_gnullvm"
|
||||
version = "0.48.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "91ae572e1b79dba883e0d315474df7305d12f569b400fcf90581b06062f7e1bc"
|
||||
|
||||
[[package]]
|
||||
name = "windows_aarch64_msvc"
|
||||
version = "0.42.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4c8b1b673ffc16c47a9ff48570a9d85e25d265735c503681332589af6253c6c7"
|
||||
|
||||
[[package]]
|
||||
name = "windows_aarch64_msvc"
|
||||
version = "0.48.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b2ef27e0d7bdfcfc7b868b317c1d32c641a6fe4629c171b8928c7b08d98d7cf3"
|
||||
|
||||
[[package]]
|
||||
name = "windows_i686_gnu"
|
||||
version = "0.42.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "de3887528ad530ba7bdbb1faa8275ec7a1155a45ffa57c37993960277145d640"
|
||||
|
||||
[[package]]
|
||||
name = "windows_i686_gnu"
|
||||
version = "0.48.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "622a1962a7db830d6fd0a69683c80a18fda201879f0f447f065a3b7467daa241"
|
||||
|
||||
[[package]]
|
||||
name = "windows_i686_msvc"
|
||||
version = "0.42.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bf4d1122317eddd6ff351aa852118a2418ad4214e6613a50e0191f7004372605"
|
||||
|
||||
[[package]]
|
||||
name = "windows_i686_msvc"
|
||||
version = "0.48.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4542c6e364ce21bf45d69fdd2a8e455fa38d316158cfd43b3ac1c5b1b19f8e00"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_gnu"
|
||||
version = "0.42.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c1040f221285e17ebccbc2591ffdc2d44ee1f9186324dd3e84e99ac68d699c45"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_gnu"
|
||||
version = "0.48.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ca2b8a661f7628cbd23440e50b05d705db3686f894fc9580820623656af974b1"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_gnullvm"
|
||||
version = "0.42.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "628bfdf232daa22b0d64fdb62b09fcc36bb01f05a3939e20ab73aaf9470d0463"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_gnullvm"
|
||||
version = "0.48.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7896dbc1f41e08872e9d5e8f8baa8fdd2677f29468c4e156210174edc7f7b953"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_msvc"
|
||||
version = "0.42.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "447660ad36a13288b1db4d4248e857b510e8c3a225c822ba4fb748c0aafecffd"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_msvc"
|
||||
version = "0.48.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a"
|
||||
|
||||
[[package]]
|
||||
name = "xattr"
|
||||
version = "0.2.3"
|
||||
|
||||
@@ -12,16 +12,16 @@ edition = "2018"
|
||||
[dependencies]
|
||||
arc-swap = "1.5.0"
|
||||
bytes = "1.1.0"
|
||||
dbs-address-space = "0.2.0"
|
||||
dbs-address-space = "0.3.0"
|
||||
dbs-allocator = "0.1.0"
|
||||
dbs-arch = "0.2.0"
|
||||
dbs-boot = "0.3.0"
|
||||
dbs-boot = "0.4.0"
|
||||
dbs-device = "0.2.0"
|
||||
dbs-interrupt = { version = "0.2.0", features = ["kvm-irq"] }
|
||||
dbs-legacy-devices = "0.1.0"
|
||||
dbs-upcall = { version = "0.2.0", optional = true }
|
||||
dbs-upcall = { version = "0.3.0", optional = true }
|
||||
dbs-utils = "0.2.0"
|
||||
dbs-virtio-devices = { version = "0.2.0", optional = true, features = ["virtio-mmio"] }
|
||||
dbs-virtio-devices = { version = "0.3.1", optional = true, features = ["virtio-mmio"] }
|
||||
kvm-bindings = "0.6.0"
|
||||
kvm-ioctls = "0.12.0"
|
||||
lazy_static = "1.2"
|
||||
@@ -55,3 +55,5 @@ virtio-blk = ["dbs-virtio-devices/virtio-blk", "virtio-queue"]
|
||||
virtio-net = ["dbs-virtio-devices/virtio-net", "virtio-queue"]
|
||||
# virtio-fs only work on atomic-guest-memory
|
||||
virtio-fs = ["dbs-virtio-devices/virtio-fs", "virtio-queue", "atomic-guest-memory"]
|
||||
virtio-mem = ["dbs-virtio-devices/virtio-mem", "virtio-queue", "atomic-guest-memory"]
|
||||
virtio-balloon = ["dbs-virtio-devices/virtio-balloon", "virtio-queue"]
|
||||
|
||||
@@ -19,6 +19,8 @@ use crate::vmm::Vmm;
|
||||
use self::VmConfigError::*;
|
||||
use self::VmmActionError::MachineConfig;
|
||||
|
||||
#[cfg(feature = "virtio-balloon")]
|
||||
pub use crate::device_manager::balloon_dev_mgr::{BalloonDeviceConfigInfo, BalloonDeviceError};
|
||||
#[cfg(feature = "virtio-blk")]
|
||||
pub use crate::device_manager::blk_dev_mgr::{
|
||||
BlockDeviceConfigInfo, BlockDeviceConfigUpdateInfo, BlockDeviceError, BlockDeviceMgr,
|
||||
@@ -27,6 +29,8 @@ pub use crate::device_manager::blk_dev_mgr::{
|
||||
pub use crate::device_manager::fs_dev_mgr::{
|
||||
FsDeviceConfigInfo, FsDeviceConfigUpdateInfo, FsDeviceError, FsDeviceMgr, FsMountConfigInfo,
|
||||
};
|
||||
#[cfg(feature = "virtio-mem")]
|
||||
pub use crate::device_manager::mem_dev_mgr::{MemDeviceConfigInfo, MemDeviceError};
|
||||
#[cfg(feature = "virtio-net")]
|
||||
pub use crate::device_manager::virtio_net_dev_mgr::{
|
||||
VirtioNetDeviceConfigInfo, VirtioNetDeviceConfigUpdateInfo, VirtioNetDeviceError,
|
||||
@@ -34,7 +38,6 @@ pub use crate::device_manager::virtio_net_dev_mgr::{
|
||||
};
|
||||
#[cfg(feature = "virtio-vsock")]
|
||||
pub use crate::device_manager::vsock_dev_mgr::{VsockDeviceConfigInfo, VsockDeviceError};
|
||||
|
||||
#[cfg(feature = "hotplug")]
|
||||
pub use crate::vcpu::{VcpuResizeError, VcpuResizeInfo};
|
||||
|
||||
@@ -97,6 +100,20 @@ pub enum VmmActionError {
|
||||
/// The action `ResizeVcpu` Failed
|
||||
#[error("vcpu resize error : {0}")]
|
||||
ResizeVcpu(#[source] VcpuResizeError),
|
||||
|
||||
/// Cannot access address space.
|
||||
#[error("Cannot access address space.")]
|
||||
AddressSpaceNotInitialized,
|
||||
|
||||
#[cfg(feature = "virtio-mem")]
|
||||
/// Mem device related errors.
|
||||
#[error("virtio-mem device error: {0}")]
|
||||
Mem(#[source] MemDeviceError),
|
||||
|
||||
#[cfg(feature = "virtio-balloon")]
|
||||
/// Balloon device related errors.
|
||||
#[error("virtio-balloon device error: {0}")]
|
||||
Balloon(#[source] BalloonDeviceError),
|
||||
}
|
||||
|
||||
/// This enum represents the public interface of the VMM. Each action contains various
|
||||
@@ -172,6 +189,15 @@ pub enum VmmAction {
|
||||
#[cfg(feature = "hotplug")]
|
||||
/// Resize Vcpu number in the guest.
|
||||
ResizeVcpu(VcpuResizeInfo),
|
||||
|
||||
#[cfg(feature = "virtio-mem")]
|
||||
/// Add a new mem device or update one that already exists using the `MemDeviceConfig` as input.
|
||||
InsertMemDevice(MemDeviceConfigInfo),
|
||||
|
||||
#[cfg(feature = "virtio-balloon")]
|
||||
/// Add a new balloon device or update one that already exists using the `BalloonDeviceConfig`
|
||||
/// as input.
|
||||
InsertBalloonDevice(BalloonDeviceConfigInfo),
|
||||
}
|
||||
|
||||
/// The enum represents the response sent by the VMM in case of success. The response is either
|
||||
@@ -274,6 +300,12 @@ impl VmmService {
|
||||
}
|
||||
#[cfg(feature = "hotplug")]
|
||||
VmmAction::ResizeVcpu(vcpu_resize_cfg) => self.resize_vcpu(vmm, vcpu_resize_cfg),
|
||||
#[cfg(feature = "virtio-mem")]
|
||||
VmmAction::InsertMemDevice(mem_cfg) => self.add_mem_device(vmm, event_mgr, mem_cfg),
|
||||
#[cfg(feature = "virtio-balloon")]
|
||||
VmmAction::InsertBalloonDevice(balloon_cfg) => {
|
||||
self.add_balloon_device(vmm, event_mgr, balloon_cfg)
|
||||
}
|
||||
};
|
||||
|
||||
debug!("send vmm response: {:?}", response);
|
||||
@@ -626,12 +658,6 @@ impl VmmService {
|
||||
|
||||
#[cfg(feature = "hotplug")]
|
||||
fn resize_vcpu(&mut self, vmm: &mut Vmm, config: VcpuResizeInfo) -> VmmRequestResult {
|
||||
if !cfg!(target_arch = "x86_64") {
|
||||
// TODO: Arm need to support vcpu hotplug. issue: #6010
|
||||
warn!("This arch do not support vm resize!");
|
||||
return Ok(VmmData::Empty);
|
||||
}
|
||||
|
||||
if !cfg!(feature = "dbs-upcall") {
|
||||
warn!("We only support cpu resize through upcall server in the guest kernel now, please enable dbs-upcall feature.");
|
||||
return Ok(VmmData::Empty);
|
||||
@@ -654,6 +680,62 @@ impl VmmService {
|
||||
|
||||
Ok(VmmData::Empty)
|
||||
}
|
||||
|
||||
#[cfg(feature = "virtio-mem")]
|
||||
fn add_mem_device(
|
||||
&mut self,
|
||||
vmm: &mut Vmm,
|
||||
event_mgr: &mut EventManager,
|
||||
config: MemDeviceConfigInfo,
|
||||
) -> VmmRequestResult {
|
||||
let vm = vmm.get_vm_mut().ok_or(VmmActionError::InvalidVMID)?;
|
||||
|
||||
let ctx = vm
|
||||
.create_device_op_context(Some(event_mgr.epoll_manager()))
|
||||
.map_err(|e| {
|
||||
if let StartMicroVmError::UpcallServerNotReady = e {
|
||||
VmmActionError::UpcallServerNotReady
|
||||
} else {
|
||||
VmmActionError::StartMicroVm(e)
|
||||
}
|
||||
})?;
|
||||
|
||||
vm.device_manager_mut()
|
||||
.mem_manager
|
||||
.insert_or_update_device(ctx, config)
|
||||
.map(|_| VmmData::Empty)
|
||||
.map_err(VmmActionError::Mem)
|
||||
}
|
||||
|
||||
#[cfg(feature = "virtio-balloon")]
|
||||
fn add_balloon_device(
|
||||
&mut self,
|
||||
vmm: &mut Vmm,
|
||||
event_mgr: &mut EventManager,
|
||||
config: BalloonDeviceConfigInfo,
|
||||
) -> VmmRequestResult {
|
||||
let vm = vmm.get_vm_mut().ok_or(VmmActionError::InvalidVMID)?;
|
||||
|
||||
if config.size_mib != 0 {
|
||||
info!("add_balloon_device: wait prealloc");
|
||||
vm.stop_prealloc().map_err(VmmActionError::StartMicroVm)?;
|
||||
}
|
||||
let ctx = vm
|
||||
.create_device_op_context(Some(event_mgr.epoll_manager()))
|
||||
.map_err(|e| {
|
||||
if let StartMicroVmError::UpcallServerNotReady = e {
|
||||
VmmActionError::UpcallServerNotReady
|
||||
} else {
|
||||
VmmActionError::StartMicroVm(e)
|
||||
}
|
||||
})?;
|
||||
|
||||
vm.device_manager_mut()
|
||||
.balloon_manager
|
||||
.insert_or_update_device(ctx, config)
|
||||
.map(|_| VmmData::Empty)
|
||||
.map_err(VmmActionError::Balloon)
|
||||
}
|
||||
}
|
||||
|
||||
fn handle_cpu_topology(
|
||||
@@ -1462,4 +1544,84 @@ mod tests {
|
||||
t.check_request();
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "virtio-mem")]
|
||||
#[test]
|
||||
fn test_vmm_action_insert_mem_device() {
|
||||
skip_if_not_root!();
|
||||
|
||||
let tests = &mut [
|
||||
// hotplug unready
|
||||
TestData::new(
|
||||
VmmAction::InsertMemDevice(MemDeviceConfigInfo::default()),
|
||||
InstanceState::Running,
|
||||
&|result| {
|
||||
assert!(matches!(
|
||||
result,
|
||||
Err(VmmActionError::StartMicroVm(
|
||||
StartMicroVmError::UpcallMissVsock
|
||||
))
|
||||
));
|
||||
let err_string = format!("{}", result.unwrap_err());
|
||||
let expected_err = String::from(
|
||||
"failed to boot the VM: \
|
||||
the upcall client needs a virtio-vsock device for communication",
|
||||
);
|
||||
assert_eq!(err_string, expected_err);
|
||||
},
|
||||
),
|
||||
// success
|
||||
TestData::new(
|
||||
VmmAction::InsertMemDevice(MemDeviceConfigInfo::default()),
|
||||
InstanceState::Uninitialized,
|
||||
&|result| {
|
||||
assert!(result.is_ok());
|
||||
},
|
||||
),
|
||||
];
|
||||
|
||||
for t in tests.iter_mut() {
|
||||
t.check_request();
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "virtio-balloon")]
|
||||
#[test]
|
||||
fn test_vmm_action_insert_balloon_device() {
|
||||
skip_if_not_root!();
|
||||
|
||||
let tests = &mut [
|
||||
// hotplug unready
|
||||
TestData::new(
|
||||
VmmAction::InsertBalloonDevice(BalloonDeviceConfigInfo::default()),
|
||||
InstanceState::Running,
|
||||
&|result| {
|
||||
assert!(matches!(
|
||||
result,
|
||||
Err(VmmActionError::StartMicroVm(
|
||||
StartMicroVmError::UpcallMissVsock
|
||||
))
|
||||
));
|
||||
let err_string = format!("{}", result.unwrap_err());
|
||||
let expected_err = String::from(
|
||||
"failed to boot the VM: \
|
||||
the upcall client needs a virtio-vsock device for communication",
|
||||
);
|
||||
assert_eq!(err_string, expected_err);
|
||||
},
|
||||
),
|
||||
// success
|
||||
TestData::new(
|
||||
VmmAction::InsertBalloonDevice(BalloonDeviceConfigInfo::default()),
|
||||
InstanceState::Uninitialized,
|
||||
&|result| {
|
||||
assert!(result.is_ok());
|
||||
},
|
||||
),
|
||||
];
|
||||
|
||||
for t in tests.iter_mut() {
|
||||
t.check_request();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
419
src/dragonball/src/device_manager/balloon_dev_mgr.rs
Normal file
419
src/dragonball/src/device_manager/balloon_dev_mgr.rs
Normal file
@@ -0,0 +1,419 @@
|
||||
// Copyright 2020 Alibaba Cloud. All Rights Reserved.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
use dbs_virtio_devices as virtio;
|
||||
use serde_derive::{Deserialize, Serialize};
|
||||
use slog::{error, info};
|
||||
use virtio::balloon::{Balloon, BalloonConfig};
|
||||
use virtio::Error as VirtIoError;
|
||||
|
||||
use crate::address_space_manager::GuestAddressSpaceImpl;
|
||||
use crate::config_manager::{ConfigItem, DeviceConfigInfo, DeviceConfigInfos};
|
||||
use crate::device_manager::DbsMmioV2Device;
|
||||
use crate::device_manager::{DeviceManager, DeviceMgrError, DeviceOpContext};
|
||||
|
||||
// The flag of whether to use the shared irq.
|
||||
const USE_SHARED_IRQ: bool = true;
|
||||
// The flag of whether to use the generic irq.
|
||||
const USE_GENERIC_IRQ: bool = false;
|
||||
|
||||
/// Errors associated with `BalloonDeviceConfig`.
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum BalloonDeviceError {
|
||||
/// The balloon device was already used.
|
||||
#[error("the virtio-balloon ID was already added to a different device")]
|
||||
BalloonDeviceAlreadyExists,
|
||||
|
||||
/// Cannot perform the requested operation after booting the microVM.
|
||||
#[error("the update operation is not allowed after boot")]
|
||||
UpdateNotAllowedPostBoot,
|
||||
|
||||
/// guest memory error
|
||||
#[error("failed to access guest memory, {0}")]
|
||||
GuestMemoryError(#[source] vm_memory::mmap::Error),
|
||||
|
||||
/// create balloon device error
|
||||
#[error("failed to create virtio-balloon device, {0}")]
|
||||
CreateBalloonDevice(#[source] virtio::Error),
|
||||
|
||||
/// hotplug balloon device error
|
||||
#[error("cannot hotplug virtio-balloon device, {0}")]
|
||||
HotplugDeviceFailed(#[source] DeviceMgrError),
|
||||
|
||||
/// create mmio device error
|
||||
#[error("cannot create virtio-balloon mmio device, {0}")]
|
||||
CreateMmioDevice(#[source] DeviceMgrError),
|
||||
|
||||
/// Cannot initialize a balloon device or add a device to the MMIO Bus.
|
||||
#[error("failure while registering balloon device: {0}")]
|
||||
RegisterBalloonDevice(#[source] DeviceMgrError),
|
||||
|
||||
/// resize balloon device error
|
||||
#[error("failure while resizing virtio-balloon device, {0}")]
|
||||
ResizeFailed(#[source] VirtIoError),
|
||||
|
||||
/// The balloon device id doesn't exist.
|
||||
#[error("invalid balloon device id '{0}'")]
|
||||
InvalidDeviceId(String),
|
||||
|
||||
/// balloon device does not exist
|
||||
#[error("balloon device does not exist")]
|
||||
NotExist,
|
||||
|
||||
/// The device manager errors.
|
||||
#[error("DeviceManager error: {0}")]
|
||||
DeviceManager(#[source] DeviceMgrError),
|
||||
}
|
||||
|
||||
/// Configuration information for a virtio-balloon device.
|
||||
#[derive(Clone, Debug, Deserialize, PartialEq, Eq, Serialize)]
|
||||
pub struct BalloonDeviceConfigInfo {
|
||||
/// Unique identifier of the balloon device
|
||||
pub balloon_id: String,
|
||||
/// Resize balloon size in mib
|
||||
pub size_mib: u64,
|
||||
/// Use shared irq
|
||||
pub use_shared_irq: Option<bool>,
|
||||
/// Use generic irq
|
||||
pub use_generic_irq: Option<bool>,
|
||||
/// VIRTIO_BALLOON_F_DEFLATE_ON_OOM
|
||||
pub f_deflate_on_oom: bool,
|
||||
/// VIRTIO_BALLOON_F_REPORTING
|
||||
pub f_reporting: bool,
|
||||
}
|
||||
|
||||
impl ConfigItem for BalloonDeviceConfigInfo {
|
||||
type Err = BalloonDeviceError;
|
||||
|
||||
fn id(&self) -> &str {
|
||||
&self.balloon_id
|
||||
}
|
||||
|
||||
fn check_conflicts(&self, other: &Self) -> Result<(), BalloonDeviceError> {
|
||||
if self.balloon_id.as_str() == other.balloon_id.as_str() {
|
||||
Err(BalloonDeviceError::BalloonDeviceAlreadyExists)
|
||||
} else {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Balloon Device Info
|
||||
pub type BalloonDeviceInfo = DeviceConfigInfo<BalloonDeviceConfigInfo>;
|
||||
|
||||
impl ConfigItem for BalloonDeviceInfo {
|
||||
type Err = BalloonDeviceError;
|
||||
|
||||
fn id(&self) -> &str {
|
||||
&self.config.balloon_id
|
||||
}
|
||||
|
||||
fn check_conflicts(&self, other: &Self) -> Result<(), BalloonDeviceError> {
|
||||
if self.config.balloon_id.as_str() == other.config.balloon_id.as_str() {
|
||||
Err(BalloonDeviceError::BalloonDeviceAlreadyExists)
|
||||
} else {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Wrapper for the collection that holds all the Balloon Devices Configs
|
||||
#[derive(Clone)]
|
||||
pub struct BalloonDeviceMgr {
|
||||
/// A list of `BalloonDeviceConfig` objects.
|
||||
info_list: DeviceConfigInfos<BalloonDeviceConfigInfo>,
|
||||
pub(crate) use_shared_irq: bool,
|
||||
}
|
||||
|
||||
impl BalloonDeviceMgr {
|
||||
/// Inserts `balloon_cfg` in the virtio-balloon device configuration list.
|
||||
/// If an entry with the same id already exists, it will attempt to update
|
||||
/// the existing entry.
|
||||
pub fn insert_or_update_device(
|
||||
&mut self,
|
||||
mut ctx: DeviceOpContext,
|
||||
balloon_cfg: BalloonDeviceConfigInfo,
|
||||
) -> std::result::Result<(), BalloonDeviceError> {
|
||||
if !cfg!(feature = "hotplug") && ctx.is_hotplug {
|
||||
error!(ctx.logger(), "hotplug feature has been disabled.";
|
||||
"subsystem" => "balloon_dev_mgr",);
|
||||
return Err(BalloonDeviceError::UpdateNotAllowedPostBoot);
|
||||
}
|
||||
|
||||
let epoll_mgr = ctx
|
||||
.get_epoll_mgr()
|
||||
.map_err(BalloonDeviceError::DeviceManager)?;
|
||||
|
||||
// If the id of the drive already exists in the list, the operation is update.
|
||||
if let Some(index) = self.get_index_of_balloon_dev(&balloon_cfg.balloon_id) {
|
||||
// Update an existing balloon device
|
||||
if ctx.is_hotplug {
|
||||
info!(ctx.logger(), "resize virtio balloon size to {:?}", balloon_cfg.size_mib; "subsystem" => "balloon_dev_mgr");
|
||||
self.update_balloon_size(index, balloon_cfg.size_mib)?;
|
||||
}
|
||||
self.info_list.insert_or_update(&balloon_cfg)?;
|
||||
} else {
|
||||
// Create a new balloon device
|
||||
if !self.info_list.is_empty() {
|
||||
error!(ctx.logger(), "only support one balloon device!"; "subsystem" => "balloon_dev_mgr");
|
||||
return Err(BalloonDeviceError::BalloonDeviceAlreadyExists);
|
||||
}
|
||||
|
||||
if !ctx.is_hotplug {
|
||||
self.info_list.insert_or_update(&balloon_cfg)?;
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
info!(ctx.logger(), "hotplug balloon device: {}", balloon_cfg.balloon_id; "subsystem" => "balloon_dev_mgr");
|
||||
let device = Box::new(
|
||||
virtio::balloon::Balloon::new(
|
||||
epoll_mgr,
|
||||
BalloonConfig {
|
||||
f_deflate_on_oom: balloon_cfg.f_deflate_on_oom,
|
||||
f_reporting: balloon_cfg.f_reporting,
|
||||
},
|
||||
)
|
||||
.map_err(BalloonDeviceError::CreateBalloonDevice)?,
|
||||
);
|
||||
|
||||
let mmio_dev =
|
||||
DeviceManager::create_mmio_virtio_device_with_device_change_notification(
|
||||
device,
|
||||
&mut ctx,
|
||||
balloon_cfg.use_shared_irq.unwrap_or(self.use_shared_irq),
|
||||
balloon_cfg.use_generic_irq.unwrap_or(USE_GENERIC_IRQ),
|
||||
)
|
||||
.map_err(BalloonDeviceError::CreateMmioDevice)?;
|
||||
ctx.insert_hotplug_mmio_device(&mmio_dev, None)
|
||||
.map_err(|e| {
|
||||
error!(
|
||||
ctx.logger(),
|
||||
"hotplug balloon device {} error: {}",
|
||||
&balloon_cfg.balloon_id, e;
|
||||
"subsystem" => "balloon_dev_mgr"
|
||||
);
|
||||
BalloonDeviceError::HotplugDeviceFailed(e)
|
||||
})?;
|
||||
let index = self.info_list.insert_or_update(&balloon_cfg)?;
|
||||
self.info_list[index].set_device(mmio_dev);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Attaches all virtio-balloon devices from the BalloonDevicesConfig.
|
||||
pub fn attach_devices(
|
||||
&mut self,
|
||||
ctx: &mut DeviceOpContext,
|
||||
) -> std::result::Result<(), BalloonDeviceError> {
|
||||
let epoll_mgr = ctx
|
||||
.get_epoll_mgr()
|
||||
.map_err(BalloonDeviceError::DeviceManager)?;
|
||||
|
||||
for info in self.info_list.iter_mut() {
|
||||
info!(ctx.logger(), "attach balloon device: {}", info.config.balloon_id; "subsystem" => "balloon_dev_mgr");
|
||||
|
||||
let device = Balloon::new(
|
||||
epoll_mgr.clone(),
|
||||
BalloonConfig {
|
||||
f_deflate_on_oom: info.config.f_deflate_on_oom,
|
||||
f_reporting: info.config.f_reporting,
|
||||
},
|
||||
)
|
||||
.map_err(BalloonDeviceError::CreateBalloonDevice)?;
|
||||
let mmio_dev =
|
||||
DeviceManager::create_mmio_virtio_device_with_device_change_notification(
|
||||
Box::new(device),
|
||||
ctx,
|
||||
info.config.use_shared_irq.unwrap_or(self.use_shared_irq),
|
||||
info.config.use_generic_irq.unwrap_or(USE_GENERIC_IRQ),
|
||||
)
|
||||
.map_err(BalloonDeviceError::RegisterBalloonDevice)?;
|
||||
info.set_device(mmio_dev);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn update_balloon_size(
|
||||
&self,
|
||||
index: usize,
|
||||
size_mib: u64,
|
||||
) -> std::result::Result<(), BalloonDeviceError> {
|
||||
let device = self.info_list[index]
|
||||
.device
|
||||
.as_ref()
|
||||
.ok_or_else(|| BalloonDeviceError::NotExist)?;
|
||||
if let Some(mmio_dev) = device.as_any().downcast_ref::<DbsMmioV2Device>() {
|
||||
let guard = mmio_dev.state();
|
||||
let inner_dev = guard.get_inner_device();
|
||||
if let Some(balloon_dev) = inner_dev
|
||||
.as_any()
|
||||
.downcast_ref::<Balloon<GuestAddressSpaceImpl>>()
|
||||
{
|
||||
return balloon_dev
|
||||
.set_size(size_mib)
|
||||
.map_err(BalloonDeviceError::ResizeFailed);
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn get_index_of_balloon_dev(&self, balloon_id: &str) -> Option<usize> {
|
||||
self.info_list
|
||||
.iter()
|
||||
.position(|info| info.config.balloon_id.eq(balloon_id))
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for BalloonDeviceMgr {
|
||||
/// Create a new `BalloonDeviceMgr` object..
|
||||
fn default() -> Self {
|
||||
BalloonDeviceMgr {
|
||||
info_list: DeviceConfigInfos::new(),
|
||||
use_shared_irq: USE_SHARED_IRQ,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::test_utils::tests::create_vm_for_test;
|
||||
|
||||
impl Default for BalloonDeviceConfigInfo {
|
||||
fn default() -> Self {
|
||||
BalloonDeviceConfigInfo {
|
||||
balloon_id: "".to_string(),
|
||||
size_mib: 0,
|
||||
use_generic_irq: None,
|
||||
use_shared_irq: None,
|
||||
f_deflate_on_oom: false,
|
||||
f_reporting: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_balloon_config_check_conflicts() {
|
||||
let config = BalloonDeviceConfigInfo::default();
|
||||
let mut config2 = BalloonDeviceConfigInfo::default();
|
||||
assert!(config.check_conflicts(&config2).is_err());
|
||||
config2.balloon_id = "dummy_balloon".to_string();
|
||||
assert!(config.check_conflicts(&config2).is_ok());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_create_balloon_devices_configs() {
|
||||
let mgr = BalloonDeviceMgr::default();
|
||||
assert_eq!(mgr.info_list.len(), 0);
|
||||
assert_eq!(mgr.get_index_of_balloon_dev(""), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_balloon_insert_or_update_device() {
|
||||
//Init vm for test.
|
||||
let mut vm = create_vm_for_test();
|
||||
|
||||
// Test for standard config
|
||||
let device_op_ctx = DeviceOpContext::new(
|
||||
Some(vm.epoll_manager().clone()),
|
||||
vm.device_manager(),
|
||||
Some(vm.vm_as().unwrap().clone()),
|
||||
None,
|
||||
false,
|
||||
Some(vm.vm_config().clone()),
|
||||
vm.shared_info().clone(),
|
||||
);
|
||||
|
||||
let dummy_balloon_device = BalloonDeviceConfigInfo::default();
|
||||
vm.device_manager_mut()
|
||||
.balloon_manager
|
||||
.insert_or_update_device(device_op_ctx, dummy_balloon_device)
|
||||
.unwrap();
|
||||
assert_eq!(vm.device_manager().balloon_manager.info_list.len(), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_balloon_attach_device() {
|
||||
//Init vm and insert balloon config for test.
|
||||
let mut vm = create_vm_for_test();
|
||||
let device_op_ctx = DeviceOpContext::new(
|
||||
Some(vm.epoll_manager().clone()),
|
||||
vm.device_manager(),
|
||||
Some(vm.vm_as().unwrap().clone()),
|
||||
None,
|
||||
false,
|
||||
Some(vm.vm_config().clone()),
|
||||
vm.shared_info().clone(),
|
||||
);
|
||||
|
||||
let dummy_balloon_device = BalloonDeviceConfigInfo::default();
|
||||
vm.device_manager_mut()
|
||||
.balloon_manager
|
||||
.insert_or_update_device(device_op_ctx, dummy_balloon_device)
|
||||
.unwrap();
|
||||
assert_eq!(vm.device_manager().balloon_manager.info_list.len(), 1);
|
||||
|
||||
// Test for standard config
|
||||
let mut device_op_ctx = DeviceOpContext::new(
|
||||
Some(vm.epoll_manager().clone()),
|
||||
vm.device_manager(),
|
||||
Some(vm.vm_as().unwrap().clone()),
|
||||
None,
|
||||
false,
|
||||
Some(vm.vm_config().clone()),
|
||||
vm.shared_info().clone(),
|
||||
);
|
||||
assert!(vm
|
||||
.device_manager_mut()
|
||||
.balloon_manager
|
||||
.attach_devices(&mut device_op_ctx)
|
||||
.is_ok());
|
||||
assert_eq!(vm.device_manager().balloon_manager.info_list.len(), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_balloon_update_device() {
|
||||
//Init vm for test.
|
||||
let mut vm = create_vm_for_test();
|
||||
let device_op_ctx = DeviceOpContext::new(
|
||||
Some(vm.epoll_manager().clone()),
|
||||
vm.device_manager(),
|
||||
Some(vm.vm_as().unwrap().clone()),
|
||||
None,
|
||||
false,
|
||||
Some(vm.vm_config().clone()),
|
||||
vm.shared_info().clone(),
|
||||
);
|
||||
|
||||
let dummy_balloon_device = BalloonDeviceConfigInfo::default();
|
||||
vm.device_manager_mut()
|
||||
.balloon_manager
|
||||
.insert_or_update_device(device_op_ctx, dummy_balloon_device)
|
||||
.unwrap();
|
||||
assert_eq!(vm.device_manager().balloon_manager.info_list.len(), 1);
|
||||
|
||||
let mut device_op_ctx = DeviceOpContext::new(
|
||||
Some(vm.epoll_manager().clone()),
|
||||
vm.device_manager(),
|
||||
Some(vm.vm_as().unwrap().clone()),
|
||||
None,
|
||||
false,
|
||||
Some(vm.vm_config().clone()),
|
||||
vm.shared_info().clone(),
|
||||
);
|
||||
|
||||
assert!(vm
|
||||
.device_manager_mut()
|
||||
.balloon_manager
|
||||
.attach_devices(&mut device_op_ctx)
|
||||
.is_ok());
|
||||
assert_eq!(vm.device_manager().balloon_manager.info_list.len(), 1);
|
||||
|
||||
assert!(vm
|
||||
.device_manager()
|
||||
.balloon_manager
|
||||
.update_balloon_size(0, 200)
|
||||
.is_ok());
|
||||
}
|
||||
}
|
||||
@@ -871,6 +871,8 @@ mod tests {
|
||||
Some(vm.vm_as().unwrap().clone()),
|
||||
None,
|
||||
false,
|
||||
Some(vm.vm_config().clone()),
|
||||
vm.shared_info().clone(),
|
||||
);
|
||||
|
||||
let dummy_file = TempFile::new().unwrap();
|
||||
@@ -907,6 +909,8 @@ mod tests {
|
||||
Some(vm.vm_as().unwrap().clone()),
|
||||
None,
|
||||
false,
|
||||
Some(vm.vm_config().clone()),
|
||||
vm.shared_info().clone(),
|
||||
);
|
||||
|
||||
vm.device_manager_mut()
|
||||
|
||||
733
src/dragonball/src/device_manager/mem_dev_mgr.rs
Normal file
733
src/dragonball/src/device_manager/mem_dev_mgr.rs
Normal file
@@ -0,0 +1,733 @@
|
||||
// Copyright 2020 Alibaba Cloud. All Rights Reserved.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
use std::io;
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
use dbs_address_space::{
|
||||
AddressSpace, AddressSpaceError, AddressSpaceRegion, MPOL_MF_MOVE, MPOL_PREFERRED, USABLE_END,
|
||||
};
|
||||
use dbs_utils::epoll_manager::EpollManager;
|
||||
use dbs_virtio_devices as virtio;
|
||||
use kvm_bindings::kvm_userspace_memory_region;
|
||||
use kvm_ioctls::VmFd;
|
||||
use nix::sys::mman;
|
||||
use serde_derive::{Deserialize, Serialize};
|
||||
use slog::{debug, error, info, warn};
|
||||
use virtio::mem::{Mem, MemRegionFactory};
|
||||
use virtio::Error as VirtIoError;
|
||||
use vm_memory::{
|
||||
Address, GuestAddress, GuestAddressSpace, GuestMemory, GuestRegionMmap, GuestUsize, MmapRegion,
|
||||
};
|
||||
|
||||
use crate::address_space_manager::GuestAddressSpaceImpl;
|
||||
use crate::config_manager::{ConfigItem, DeviceConfigInfo, DeviceConfigInfos};
|
||||
use crate::device_manager::DbsMmioV2Device;
|
||||
use crate::device_manager::{DeviceManager, DeviceMgrError, DeviceOpContext};
|
||||
use crate::vm::VmConfigInfo;
|
||||
|
||||
// The flag of whether to use the shared irq.
|
||||
const USE_SHARED_IRQ: bool = true;
|
||||
// The flag of whether to use the generic irq.
|
||||
const USE_GENERIC_IRQ: bool = false;
|
||||
|
||||
const HUGE_PAGE_2M: usize = 0x200000;
|
||||
|
||||
// max numa node ids on host
|
||||
const MAX_NODE: u32 = 64;
|
||||
|
||||
/// Errors associated with `MemDeviceConfig`.
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum MemDeviceError {
|
||||
/// The mem device was already used.
|
||||
#[error("the virtio-mem ID was already added to a different device")]
|
||||
MemDeviceAlreadyExists,
|
||||
|
||||
/// Cannot perform the requested operation after booting the microVM.
|
||||
#[error("the update operation is not allowed after boot")]
|
||||
UpdateNotAllowedPostBoot,
|
||||
|
||||
/// insert mem device error
|
||||
#[error("cannot add virtio-mem device, {0}")]
|
||||
InsertDeviceFailed(#[source] DeviceMgrError),
|
||||
|
||||
/// create mem device error
|
||||
#[error("cannot create virito-mem device, {0}")]
|
||||
CreateMemDevice(#[source] DeviceMgrError),
|
||||
|
||||
/// create mmio device error
|
||||
#[error("cannot create virito-mem mmio device, {0}")]
|
||||
CreateMmioDevice(#[source] DeviceMgrError),
|
||||
|
||||
/// resize mem device error
|
||||
#[error("failure while resizing virtio-mem device, {0}")]
|
||||
ResizeFailed(#[source] VirtIoError),
|
||||
|
||||
/// mem device does not exist
|
||||
#[error("mem device does not exist")]
|
||||
DeviceNotExist,
|
||||
|
||||
/// address space region error
|
||||
#[error("address space region error, {0}")]
|
||||
AddressSpaceRegion(#[source] AddressSpaceError),
|
||||
|
||||
/// Cannot initialize a mem device or add a device to the MMIO Bus.
|
||||
#[error("failure while registering mem device: {0}")]
|
||||
RegisterMemDevice(#[source] DeviceMgrError),
|
||||
|
||||
/// The mem device id doesn't exist.
|
||||
#[error("invalid mem device id '{0}'")]
|
||||
InvalidDeviceId(String),
|
||||
|
||||
/// The device manager errors.
|
||||
#[error("DeviceManager error: {0}")]
|
||||
DeviceManager(#[source] DeviceMgrError),
|
||||
}
|
||||
|
||||
/// Configuration information for a virtio-mem device.
|
||||
#[derive(Clone, Debug, Deserialize, PartialEq, Eq, Serialize)]
|
||||
pub struct MemDeviceConfigInfo {
|
||||
/// Unique identifier of the pmem device
|
||||
pub mem_id: String,
|
||||
/// Memory size mib
|
||||
pub size_mib: u64,
|
||||
/// Memory capacity mib
|
||||
pub capacity_mib: u64,
|
||||
/// Use multi_region or not
|
||||
pub multi_region: bool,
|
||||
/// host numa node id
|
||||
pub host_numa_node_id: Option<u32>,
|
||||
/// guest numa node id
|
||||
pub guest_numa_node_id: Option<u16>,
|
||||
/// Use shared irq
|
||||
pub use_shared_irq: Option<bool>,
|
||||
/// Use generic irq
|
||||
pub use_generic_irq: Option<bool>,
|
||||
}
|
||||
|
||||
impl ConfigItem for MemDeviceConfigInfo {
|
||||
type Err = MemDeviceError;
|
||||
|
||||
fn id(&self) -> &str {
|
||||
&self.mem_id
|
||||
}
|
||||
|
||||
fn check_conflicts(&self, other: &Self) -> Result<(), MemDeviceError> {
|
||||
if self.mem_id.as_str() == other.mem_id.as_str() {
|
||||
Err(MemDeviceError::MemDeviceAlreadyExists)
|
||||
} else {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Mem Device Info
|
||||
pub type MemDeviceInfo = DeviceConfigInfo<MemDeviceConfigInfo>;
|
||||
|
||||
impl ConfigItem for MemDeviceInfo {
|
||||
type Err = MemDeviceError;
|
||||
|
||||
fn id(&self) -> &str {
|
||||
&self.config.mem_id
|
||||
}
|
||||
|
||||
fn check_conflicts(&self, other: &Self) -> Result<(), MemDeviceError> {
|
||||
if self.config.mem_id.as_str() == other.config.mem_id.as_str() {
|
||||
Err(MemDeviceError::MemDeviceAlreadyExists)
|
||||
} else {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Wrapper for the collection that holds all the Mem Devices Configs
|
||||
#[derive(Clone)]
|
||||
pub struct MemDeviceMgr {
|
||||
/// A list of `MemDeviceConfig` objects.
|
||||
info_list: DeviceConfigInfos<MemDeviceConfigInfo>,
|
||||
pub(crate) use_shared_irq: bool,
|
||||
}
|
||||
|
||||
impl MemDeviceMgr {
|
||||
/// Inserts `mem_cfg` in the virtio-mem device configuration list.
|
||||
/// If an entry with the same id already exists, it will attempt to update
|
||||
/// the existing entry.
|
||||
pub fn insert_or_update_device(
|
||||
&mut self,
|
||||
mut ctx: DeviceOpContext,
|
||||
mem_cfg: MemDeviceConfigInfo,
|
||||
) -> std::result::Result<(), MemDeviceError> {
|
||||
if !cfg!(feature = "hotplug") && ctx.is_hotplug {
|
||||
error!(ctx.logger(), "hotplug feature has been disabled.";
|
||||
"subsystem" => "virito-mem");
|
||||
return Err(MemDeviceError::UpdateNotAllowedPostBoot);
|
||||
}
|
||||
|
||||
let epoll_mgr = ctx.get_epoll_mgr().map_err(MemDeviceError::DeviceManager)?;
|
||||
|
||||
// If the id of the drive already exists in the list, the operation is update.
|
||||
if let Some(index) = self.get_index_of_mem_dev(&mem_cfg.mem_id) {
|
||||
// Update an existing memory device
|
||||
if ctx.is_hotplug {
|
||||
info!(
|
||||
ctx.logger(),
|
||||
"update memory device: {}, size: 0x{:x}MB.",
|
||||
mem_cfg.mem_id,
|
||||
mem_cfg.size_mib;
|
||||
"subsystem" => "virito-mem"
|
||||
);
|
||||
self.update_memory_size(index, mem_cfg.size_mib)?;
|
||||
}
|
||||
self.info_list.insert_or_update(&mem_cfg)?;
|
||||
} else {
|
||||
// Create a new memory device
|
||||
if !ctx.is_hotplug {
|
||||
self.info_list.insert_or_update(&mem_cfg)?;
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
info!(
|
||||
ctx.logger(),
|
||||
"hot-add memory device: {}, size: 0x{:x}MB.", mem_cfg.mem_id, mem_cfg.size_mib;
|
||||
"subsystem" => "virito-mem"
|
||||
);
|
||||
|
||||
let device = Self::create_memory_device(&mem_cfg, &ctx, &epoll_mgr)
|
||||
.map_err(MemDeviceError::CreateMemDevice)?;
|
||||
let mmio_device =
|
||||
DeviceManager::create_mmio_virtio_device_with_device_change_notification(
|
||||
Box::new(device),
|
||||
&mut ctx,
|
||||
mem_cfg.use_shared_irq.unwrap_or(self.use_shared_irq),
|
||||
mem_cfg.use_generic_irq.unwrap_or(USE_GENERIC_IRQ),
|
||||
)
|
||||
.map_err(MemDeviceError::CreateMmioDevice)?;
|
||||
|
||||
#[cfg(not(test))]
|
||||
ctx.insert_hotplug_mmio_device(&mmio_device, None)
|
||||
.map_err(|e| {
|
||||
error!(
|
||||
ctx.logger(),
|
||||
"failed to hot-add virtio-mem device {}, {}", &mem_cfg.mem_id, e;
|
||||
"subsystem" => "virito-mem"
|
||||
);
|
||||
MemDeviceError::InsertDeviceFailed(e)
|
||||
})?;
|
||||
|
||||
let index = self.info_list.insert_or_update(&mem_cfg)?;
|
||||
self.info_list[index].set_device(mmio_device);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Attaches all virtio-mem devices from the MemDevicesConfig.
|
||||
pub fn attach_devices(
|
||||
&mut self,
|
||||
ctx: &mut DeviceOpContext,
|
||||
) -> std::result::Result<(), MemDeviceError> {
|
||||
let epoll_mgr = ctx.get_epoll_mgr().map_err(MemDeviceError::DeviceManager)?;
|
||||
|
||||
for info in self.info_list.iter_mut() {
|
||||
let config = &info.config;
|
||||
info!(
|
||||
ctx.logger(),
|
||||
"attach virtio-mem device {}, size 0x{:x}.", config.mem_id, config.size_mib;
|
||||
"subsystem" => "virito-mem"
|
||||
);
|
||||
// Ignore virtio-mem device with zero memory capacity.
|
||||
if config.size_mib == 0 {
|
||||
debug!(
|
||||
ctx.logger(),
|
||||
"ignore zero-sizing memory device {}.", config.mem_id;
|
||||
"subsystem" => "virito-mem"
|
||||
);
|
||||
continue;
|
||||
}
|
||||
|
||||
let device = Self::create_memory_device(config, ctx, &epoll_mgr)
|
||||
.map_err(MemDeviceError::CreateMemDevice)?;
|
||||
let mmio_device =
|
||||
DeviceManager::create_mmio_virtio_device_with_device_change_notification(
|
||||
Box::new(device),
|
||||
ctx,
|
||||
config.use_shared_irq.unwrap_or(self.use_shared_irq),
|
||||
config.use_generic_irq.unwrap_or(USE_GENERIC_IRQ),
|
||||
)
|
||||
.map_err(MemDeviceError::RegisterMemDevice)?;
|
||||
|
||||
info.set_device(mmio_device);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn get_index_of_mem_dev(&self, mem_id: &str) -> Option<usize> {
|
||||
self.info_list
|
||||
.iter()
|
||||
.position(|info| info.config.mem_id.eq(mem_id))
|
||||
}
|
||||
|
||||
fn create_memory_device(
|
||||
config: &MemDeviceConfigInfo,
|
||||
ctx: &DeviceOpContext,
|
||||
epoll_mgr: &EpollManager,
|
||||
) -> std::result::Result<virtio::mem::Mem<GuestAddressSpaceImpl>, DeviceMgrError> {
|
||||
let factory = Arc::new(Mutex::new(MemoryRegionFactory::new(
|
||||
ctx,
|
||||
config.mem_id.clone(),
|
||||
config.host_numa_node_id,
|
||||
)?));
|
||||
|
||||
let mut capacity_mib = config.capacity_mib;
|
||||
if capacity_mib == 0 {
|
||||
capacity_mib = *USABLE_END >> 20;
|
||||
}
|
||||
// get boot memory size for calculate alignment
|
||||
let boot_mem_size = {
|
||||
let boot_size = (ctx.get_vm_config()?.mem_size_mib << 20) as u64;
|
||||
// increase 1G memory because of avoiding mmio hole
|
||||
match boot_size {
|
||||
x if x > dbs_boot::layout::MMIO_LOW_START => x + (1 << 30),
|
||||
_ => boot_size,
|
||||
}
|
||||
};
|
||||
|
||||
virtio::mem::Mem::new(
|
||||
config.mem_id.clone(),
|
||||
capacity_mib,
|
||||
config.size_mib,
|
||||
config.multi_region,
|
||||
config.guest_numa_node_id,
|
||||
epoll_mgr.clone(),
|
||||
factory,
|
||||
boot_mem_size,
|
||||
)
|
||||
.map_err(DeviceMgrError::Virtio)
|
||||
}
|
||||
|
||||
/// Removes all virtio-mem devices
|
||||
pub fn remove_devices(&self, ctx: &mut DeviceOpContext) -> Result<(), DeviceMgrError> {
|
||||
for info in self.info_list.iter() {
|
||||
if let Some(device) = &info.device {
|
||||
DeviceManager::destroy_mmio_virtio_device(device.clone(), ctx)?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn update_memory_size(
|
||||
&self,
|
||||
index: usize,
|
||||
size_mib: u64,
|
||||
) -> std::result::Result<(), MemDeviceError> {
|
||||
let device = self.info_list[index]
|
||||
.device
|
||||
.as_ref()
|
||||
.ok_or_else(|| MemDeviceError::DeviceNotExist)?;
|
||||
if let Some(mmio_dev) = device.as_any().downcast_ref::<DbsMmioV2Device>() {
|
||||
let guard = mmio_dev.state();
|
||||
let inner_dev = guard.get_inner_device();
|
||||
if let Some(mem_dev) = inner_dev
|
||||
.as_any()
|
||||
.downcast_ref::<Mem<GuestAddressSpaceImpl>>()
|
||||
{
|
||||
return mem_dev
|
||||
.set_requested_size(size_mib)
|
||||
.map_err(MemDeviceError::ResizeFailed);
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for MemDeviceMgr {
|
||||
/// Create a new `MemDeviceMgr` object..
|
||||
fn default() -> Self {
|
||||
MemDeviceMgr {
|
||||
info_list: DeviceConfigInfos::new(),
|
||||
use_shared_irq: USE_SHARED_IRQ,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct MemoryRegionFactory {
|
||||
mem_id: String,
|
||||
vm_as: GuestAddressSpaceImpl,
|
||||
address_space: AddressSpace,
|
||||
vm_config: VmConfigInfo,
|
||||
vm_fd: Arc<VmFd>,
|
||||
logger: Arc<slog::Logger>,
|
||||
host_numa_node_id: Option<u32>,
|
||||
instance_id: String,
|
||||
}
|
||||
|
||||
impl MemoryRegionFactory {
|
||||
fn new(
|
||||
ctx: &DeviceOpContext,
|
||||
mem_id: String,
|
||||
host_numa_node_id: Option<u32>,
|
||||
) -> Result<Self, DeviceMgrError> {
|
||||
let vm_as = ctx.get_vm_as()?;
|
||||
let address_space = ctx.get_address_space()?;
|
||||
let vm_config = ctx.get_vm_config()?;
|
||||
let logger = Arc::new(ctx.logger().new(slog::o!()));
|
||||
|
||||
let shared_info = ctx.shared_info.read().unwrap();
|
||||
let instance_id = shared_info.id.clone();
|
||||
|
||||
Ok(MemoryRegionFactory {
|
||||
mem_id,
|
||||
vm_as,
|
||||
address_space,
|
||||
vm_config,
|
||||
vm_fd: ctx.vm_fd.clone(),
|
||||
logger,
|
||||
host_numa_node_id,
|
||||
instance_id,
|
||||
})
|
||||
}
|
||||
|
||||
fn configure_anon_mem(&self, mmap_reg: &MmapRegion) -> Result<(), VirtIoError> {
|
||||
unsafe {
|
||||
mman::madvise(
|
||||
mmap_reg.as_ptr() as *mut libc::c_void,
|
||||
mmap_reg.size(),
|
||||
mman::MmapAdvise::MADV_DONTFORK,
|
||||
)
|
||||
}
|
||||
.map_err(VirtIoError::Madvise)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn configure_numa(&self, mmap_reg: &MmapRegion, node_id: u32) -> Result<(), VirtIoError> {
|
||||
let nodemask = 1_u64
|
||||
.checked_shl(node_id)
|
||||
.ok_or(VirtIoError::InvalidInput)?;
|
||||
let res = unsafe {
|
||||
libc::syscall(
|
||||
libc::SYS_mbind,
|
||||
mmap_reg.as_ptr() as *mut libc::c_void,
|
||||
mmap_reg.size(),
|
||||
MPOL_PREFERRED,
|
||||
&nodemask as *const u64,
|
||||
MAX_NODE,
|
||||
MPOL_MF_MOVE,
|
||||
)
|
||||
};
|
||||
if res < 0 {
|
||||
warn!(
|
||||
self.logger,
|
||||
"failed to mbind memory to host_numa_node_id {}: this may affect performance",
|
||||
node_id;
|
||||
"subsystem" => "virito-mem"
|
||||
);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn configure_thp(&mut self, mmap_reg: &MmapRegion) -> Result<(), VirtIoError> {
|
||||
debug!(
|
||||
self.logger,
|
||||
"Setting MADV_HUGEPAGE on AddressSpaceRegion addr {:x?} len {:x?}",
|
||||
mmap_reg.as_ptr(),
|
||||
mmap_reg.size();
|
||||
"subsystem" => "virito-mem"
|
||||
);
|
||||
|
||||
// Safe because we just create the MmapRegion
|
||||
unsafe {
|
||||
mman::madvise(
|
||||
mmap_reg.as_ptr() as *mut libc::c_void,
|
||||
mmap_reg.size(),
|
||||
mman::MmapAdvise::MADV_HUGEPAGE,
|
||||
)
|
||||
}
|
||||
.map_err(VirtIoError::Madvise)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn map_to_kvm(
|
||||
&mut self,
|
||||
slot: u32,
|
||||
reg: &Arc<AddressSpaceRegion>,
|
||||
mmap_reg: &MmapRegion,
|
||||
) -> Result<(), VirtIoError> {
|
||||
let host_addr = mmap_reg.as_ptr() as u64;
|
||||
|
||||
let flags = 0u32;
|
||||
|
||||
let mem_region = kvm_userspace_memory_region {
|
||||
slot,
|
||||
guest_phys_addr: reg.start_addr().raw_value(),
|
||||
memory_size: reg.len(),
|
||||
userspace_addr: host_addr,
|
||||
flags,
|
||||
};
|
||||
|
||||
// Safe because the user mem region is just created, and kvm slot is allocated
|
||||
// by resource allocator.
|
||||
unsafe { self.vm_fd.set_user_memory_region(mem_region) }
|
||||
.map_err(VirtIoError::SetUserMemoryRegion)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl MemRegionFactory for MemoryRegionFactory {
|
||||
fn create_region(
|
||||
&mut self,
|
||||
guest_addr: GuestAddress,
|
||||
region_len: GuestUsize,
|
||||
kvm_slot: u32,
|
||||
) -> std::result::Result<Arc<GuestRegionMmap>, VirtIoError> {
|
||||
// create address space region
|
||||
let mem_type = self.vm_config.mem_type.as_str();
|
||||
let mut mem_file_path = self.vm_config.mem_file_path.clone();
|
||||
let mem_file_name = format!(
|
||||
"/virtiomem_{}_{}",
|
||||
self.instance_id.as_str(),
|
||||
self.mem_id.as_str()
|
||||
);
|
||||
mem_file_path.push_str(mem_file_name.as_str());
|
||||
let region = Arc::new(
|
||||
AddressSpaceRegion::create_default_memory_region(
|
||||
guest_addr,
|
||||
region_len,
|
||||
self.host_numa_node_id,
|
||||
mem_type,
|
||||
mem_file_path.as_str(),
|
||||
false,
|
||||
true,
|
||||
)
|
||||
.map_err(|e| {
|
||||
error!(self.logger, "failed to insert address space region: {}", e);
|
||||
// dbs-virtio-devices should not depend on dbs-address-space.
|
||||
// So here io::Error is used instead of AddressSpaceError directly.
|
||||
VirtIoError::IOError(io::Error::new(
|
||||
io::ErrorKind::Other,
|
||||
format!(
|
||||
"invalid address space region ({0:#x}, {1:#x})",
|
||||
guest_addr.0, region_len
|
||||
),
|
||||
))
|
||||
})?,
|
||||
);
|
||||
info!(
|
||||
self.logger,
|
||||
"VM: mem_type: {} mem_file_path: {}, numa_node_id: {:?} file_offset: {:?}",
|
||||
mem_type,
|
||||
mem_file_path,
|
||||
self.host_numa_node_id,
|
||||
region.file_offset();
|
||||
"subsystem" => "virito-mem"
|
||||
);
|
||||
|
||||
let mmap_region = MmapRegion::build(
|
||||
region.file_offset().cloned(),
|
||||
region_len as usize,
|
||||
region.prot_flags(),
|
||||
region.perm_flags(),
|
||||
)
|
||||
.map_err(VirtIoError::NewMmapRegion)?;
|
||||
let host_addr: u64 = mmap_region.as_ptr() as u64;
|
||||
|
||||
// thp
|
||||
if mem_type == "hugeanon" || mem_type == "hugeshmem" {
|
||||
self.configure_thp(&mmap_region)?;
|
||||
}
|
||||
|
||||
// Handle numa
|
||||
if let Some(numa_node_id) = self.host_numa_node_id {
|
||||
self.configure_numa(&mmap_region, numa_node_id)?;
|
||||
}
|
||||
|
||||
// add to guest memory mapping
|
||||
self.map_to_kvm(kvm_slot, ®ion, &mmap_region)?;
|
||||
|
||||
info!(
|
||||
self.logger,
|
||||
"kvm set user memory region: slot: {}, flags: {}, guest_phys_addr: {:X}, memory_size: {}, userspace_addr: {:X}",
|
||||
kvm_slot,
|
||||
0,
|
||||
guest_addr.raw_value(),
|
||||
region_len,
|
||||
host_addr;
|
||||
"subsystem" => "virito-mem"
|
||||
);
|
||||
|
||||
// All value should be valid.
|
||||
let memory_region = Arc::new(
|
||||
GuestRegionMmap::new(mmap_region, guest_addr).map_err(VirtIoError::InsertMmap)?,
|
||||
);
|
||||
|
||||
let vm_as_new = self
|
||||
.vm_as
|
||||
.memory()
|
||||
.insert_region(memory_region.clone())
|
||||
.map_err(VirtIoError::InsertMmap)?;
|
||||
self.vm_as.lock().unwrap().replace(vm_as_new);
|
||||
self.address_space.insert_region(region).map_err(|e| {
|
||||
error!(self.logger, "failed to insert address space region: {}", e);
|
||||
// dbs-virtio-devices should not depend on dbs-address-space.
|
||||
// So here io::Error is used instead of AddressSpaceError directly.
|
||||
VirtIoError::IOError(io::Error::new(
|
||||
io::ErrorKind::Other,
|
||||
format!(
|
||||
"invalid address space region ({0:#x}, {1:#x})",
|
||||
guest_addr.0, region_len
|
||||
),
|
||||
))
|
||||
})?;
|
||||
|
||||
Ok(memory_region)
|
||||
}
|
||||
|
||||
fn restore_region_addr(
|
||||
&self,
|
||||
guest_addr: GuestAddress,
|
||||
) -> std::result::Result<*mut u8, VirtIoError> {
|
||||
let memory = self.vm_as.memory();
|
||||
// NOTE: We can't clone `GuestRegionMmap` reference directly!!!
|
||||
//
|
||||
// Since an important role of the member `mapping` (type is
|
||||
// `MmapRegion`) in `GuestRegionMmap` is to mmap the memory during
|
||||
// construction and munmap the memory during drop. However, when the
|
||||
// life time of cloned data is over, the drop operation will be
|
||||
// performed, which will munmap the origional mmap memory, which will
|
||||
// cause some memory in dragonall to be inaccessable. And remember the
|
||||
// data structure that was cloned is still alive now, when its life time
|
||||
// is over, it will perform the munmap operation again, which will cause
|
||||
// a memory exception!
|
||||
memory
|
||||
.get_host_address(guest_addr)
|
||||
.map_err(VirtIoError::GuestMemory)
|
||||
}
|
||||
|
||||
fn get_host_numa_node_id(&self) -> Option<u32> {
|
||||
self.host_numa_node_id
|
||||
}
|
||||
|
||||
fn set_host_numa_node_id(&mut self, host_numa_node_id: Option<u32>) {
|
||||
self.host_numa_node_id = host_numa_node_id;
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use vm_memory::GuestMemoryRegion;
|
||||
|
||||
use super::*;
|
||||
use crate::test_utils::tests::create_vm_for_test;
|
||||
|
||||
impl Default for MemDeviceConfigInfo {
|
||||
fn default() -> Self {
|
||||
MemDeviceConfigInfo {
|
||||
mem_id: "".to_string(),
|
||||
size_mib: 0,
|
||||
capacity_mib: 1024,
|
||||
multi_region: true,
|
||||
host_numa_node_id: None,
|
||||
guest_numa_node_id: None,
|
||||
use_generic_irq: None,
|
||||
use_shared_irq: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_mem_config_check_conflicts() {
|
||||
let config = MemDeviceConfigInfo::default();
|
||||
let mut config2 = MemDeviceConfigInfo::default();
|
||||
assert!(config.check_conflicts(&config2).is_err());
|
||||
config2.mem_id = "dummy_mem".to_string();
|
||||
assert!(config.check_conflicts(&config2).is_ok());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_create_mem_devices_configs() {
|
||||
let mgr = MemDeviceMgr::default();
|
||||
assert_eq!(mgr.info_list.len(), 0);
|
||||
assert_eq!(mgr.get_index_of_mem_dev(""), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_mem_insert_or_update_device() {
|
||||
// Init vm for test.
|
||||
let mut vm = create_vm_for_test();
|
||||
|
||||
// We don't need to use virtio-mem before start vm
|
||||
// Test for standard config with hotplug
|
||||
let device_op_ctx = DeviceOpContext::new(
|
||||
Some(vm.epoll_manager().clone()),
|
||||
vm.device_manager(),
|
||||
Some(vm.vm_as().unwrap().clone()),
|
||||
vm.vm_address_space().cloned(),
|
||||
true,
|
||||
Some(VmConfigInfo::default()),
|
||||
vm.shared_info().clone(),
|
||||
);
|
||||
|
||||
let dummy_mem_device = MemDeviceConfigInfo::default();
|
||||
vm.device_manager_mut()
|
||||
.mem_manager
|
||||
.insert_or_update_device(device_op_ctx, dummy_mem_device)
|
||||
.unwrap();
|
||||
assert_eq!(vm.device_manager().mem_manager.info_list.len(), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_mem_attach_device() {
|
||||
// Init vm and insert mem config for test.
|
||||
let mut vm = create_vm_for_test();
|
||||
let dummy_mem_device = MemDeviceConfigInfo::default();
|
||||
vm.device_manager_mut()
|
||||
.mem_manager
|
||||
.info_list
|
||||
.insert_or_update(&dummy_mem_device)
|
||||
.unwrap();
|
||||
assert_eq!(vm.device_manager().mem_manager.info_list.len(), 1);
|
||||
|
||||
// Test for standard config
|
||||
let mut device_op_ctx = DeviceOpContext::new(
|
||||
Some(vm.epoll_manager().clone()),
|
||||
vm.device_manager(),
|
||||
Some(vm.vm_as().unwrap().clone()),
|
||||
vm.vm_address_space().cloned(),
|
||||
false,
|
||||
Some(VmConfigInfo::default()),
|
||||
vm.shared_info().clone(),
|
||||
);
|
||||
vm.device_manager_mut()
|
||||
.mem_manager
|
||||
.attach_devices(&mut device_op_ctx)
|
||||
.unwrap();
|
||||
assert_eq!(vm.device_manager().mem_manager.info_list.len(), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_mem_create_region() {
|
||||
let vm = create_vm_for_test();
|
||||
let ctx = DeviceOpContext::new(
|
||||
Some(vm.epoll_manager().clone()),
|
||||
vm.device_manager(),
|
||||
Some(vm.vm_as().unwrap().clone()),
|
||||
vm.vm_address_space().cloned(),
|
||||
true,
|
||||
Some(VmConfigInfo::default()),
|
||||
vm.shared_info().clone(),
|
||||
);
|
||||
let mem_id = String::from("mem0");
|
||||
let guest_addr = GuestAddress(0x1_0000_0000);
|
||||
let region_len = 0x1000_0000;
|
||||
let kvm_slot = 2;
|
||||
|
||||
// no vfio manager, no numa node
|
||||
let mut factory = MemoryRegionFactory::new(&ctx, mem_id, None).unwrap();
|
||||
let region_opt = factory.create_region(guest_addr, region_len, kvm_slot);
|
||||
assert_eq!(region_opt.unwrap().len(), region_len);
|
||||
}
|
||||
}
|
||||
@@ -55,6 +55,7 @@ impl DeviceVirtioRegionHandler {
|
||||
None,
|
||||
file_offset,
|
||||
region.flags(),
|
||||
region.prot(),
|
||||
false,
|
||||
));
|
||||
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
use std::io;
|
||||
use std::sync::{Arc, Mutex, MutexGuard};
|
||||
use std::sync::{Arc, Mutex, MutexGuard, RwLock};
|
||||
|
||||
use arc_swap::ArcSwap;
|
||||
use dbs_address_space::AddressSpace;
|
||||
@@ -45,9 +45,10 @@ use dbs_upcall::{
|
||||
use dbs_virtio_devices::vsock::backend::VsockInnerConnector;
|
||||
|
||||
use crate::address_space_manager::GuestAddressSpaceImpl;
|
||||
use crate::api::v1::InstanceInfo;
|
||||
use crate::error::StartMicroVmError;
|
||||
use crate::resource_manager::ResourceManager;
|
||||
use crate::vm::{KernelConfigInfo, Vm};
|
||||
use crate::vm::{KernelConfigInfo, Vm, VmConfigInfo};
|
||||
use crate::IoManagerCached;
|
||||
|
||||
/// Virtual machine console device manager.
|
||||
@@ -89,6 +90,18 @@ mod memory_region_handler;
|
||||
#[cfg(feature = "virtio-fs")]
|
||||
pub use self::memory_region_handler::*;
|
||||
|
||||
#[cfg(feature = "virtio-mem")]
|
||||
/// Device manager for virtio-mem devices.
|
||||
pub mod mem_dev_mgr;
|
||||
#[cfg(feature = "virtio-mem")]
|
||||
use self::mem_dev_mgr::MemDeviceMgr;
|
||||
|
||||
#[cfg(feature = "virtio-balloon")]
|
||||
/// Device manager for virtio-balloon devices.
|
||||
pub mod balloon_dev_mgr;
|
||||
#[cfg(feature = "virtio-balloon")]
|
||||
use self::balloon_dev_mgr::BalloonDeviceMgr;
|
||||
|
||||
macro_rules! info(
|
||||
($l:expr, $($args:tt)+) => {
|
||||
slog::info!($l, $($args)+; slog::o!("subsystem" => "device_manager"))
|
||||
@@ -248,6 +261,8 @@ pub struct DeviceOpContext {
|
||||
upcall_client: Option<Arc<UpcallClient<DevMgrService>>>,
|
||||
#[cfg(feature = "dbs-virtio-devices")]
|
||||
virtio_devices: Vec<Arc<DbsMmioV2Device>>,
|
||||
vm_config: Option<VmConfigInfo>,
|
||||
shared_info: Arc<RwLock<InstanceInfo>>,
|
||||
}
|
||||
|
||||
impl DeviceOpContext {
|
||||
@@ -257,6 +272,8 @@ impl DeviceOpContext {
|
||||
vm_as: Option<GuestAddressSpaceImpl>,
|
||||
address_space: Option<AddressSpace>,
|
||||
is_hotplug: bool,
|
||||
vm_config: Option<VmConfigInfo>,
|
||||
shared_info: Arc<RwLock<InstanceInfo>>,
|
||||
) -> Self {
|
||||
let irq_manager = device_mgr.irq_manager.clone();
|
||||
let res_manager = device_mgr.res_manager.clone();
|
||||
@@ -282,11 +299,21 @@ impl DeviceOpContext {
|
||||
upcall_client: None,
|
||||
#[cfg(feature = "dbs-virtio-devices")]
|
||||
virtio_devices: Vec::new(),
|
||||
vm_config,
|
||||
shared_info,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn create_boot_ctx(vm: &Vm, epoll_mgr: Option<EpollManager>) -> Self {
|
||||
Self::new(epoll_mgr, vm.device_manager(), None, None, false)
|
||||
Self::new(
|
||||
epoll_mgr,
|
||||
vm.device_manager(),
|
||||
None,
|
||||
None,
|
||||
false,
|
||||
Some(vm.vm_config().clone()),
|
||||
vm.shared_info().clone(),
|
||||
)
|
||||
}
|
||||
|
||||
pub(crate) fn get_vm_as(&self) -> Result<GuestAddressSpaceImpl> {
|
||||
@@ -296,6 +323,27 @@ impl DeviceOpContext {
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn get_vm_config(&self) -> Result<VmConfigInfo> {
|
||||
match self.vm_config.as_ref() {
|
||||
Some(v) => Ok(v.clone()),
|
||||
None => Err(DeviceMgrError::InvalidOperation),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn get_address_space(&self) -> Result<AddressSpace> {
|
||||
match self.address_space.as_ref() {
|
||||
Some(v) => Ok(v.clone()),
|
||||
None => Err(DeviceMgrError::InvalidOperation),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn get_epoll_mgr(&self) -> Result<EpollManager> {
|
||||
match self.epoll_mgr.as_ref() {
|
||||
Some(v) => Ok(v.clone()),
|
||||
None => Err(DeviceMgrError::InvalidOperation),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn logger(&self) -> &slog::Logger {
|
||||
&self.logger
|
||||
}
|
||||
@@ -386,6 +434,8 @@ impl DeviceOpContext {
|
||||
Some(vm_as),
|
||||
vm.vm_address_space().cloned(),
|
||||
true,
|
||||
Some(vm.vm_config().clone()),
|
||||
vm.shared_info().clone(),
|
||||
);
|
||||
ctx.upcall_client = vm.upcall_client().clone();
|
||||
ctx
|
||||
@@ -463,7 +513,7 @@ pub struct DeviceManager {
|
||||
res_manager: Arc<ResourceManager>,
|
||||
vm_fd: Arc<VmFd>,
|
||||
pub(crate) logger: slog::Logger,
|
||||
|
||||
pub(crate) shared_info: Arc<RwLock<InstanceInfo>>,
|
||||
pub(crate) con_manager: ConsoleManager,
|
||||
pub(crate) legacy_manager: Option<LegacyDeviceManager>,
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
@@ -481,6 +531,12 @@ pub struct DeviceManager {
|
||||
|
||||
#[cfg(feature = "virtio-fs")]
|
||||
fs_manager: Arc<Mutex<FsDeviceMgr>>,
|
||||
|
||||
#[cfg(feature = "virtio-mem")]
|
||||
pub(crate) mem_manager: MemDeviceMgr,
|
||||
|
||||
#[cfg(feature = "virtio-balloon")]
|
||||
pub(crate) balloon_manager: BalloonDeviceMgr,
|
||||
}
|
||||
|
||||
impl DeviceManager {
|
||||
@@ -490,6 +546,7 @@ impl DeviceManager {
|
||||
res_manager: Arc<ResourceManager>,
|
||||
epoll_manager: EpollManager,
|
||||
logger: &slog::Logger,
|
||||
shared_info: Arc<RwLock<InstanceInfo>>,
|
||||
) -> Self {
|
||||
DeviceManager {
|
||||
io_manager: Arc::new(ArcSwap::new(Arc::new(IoManager::new()))),
|
||||
@@ -498,6 +555,7 @@ impl DeviceManager {
|
||||
res_manager,
|
||||
vm_fd,
|
||||
logger: logger.new(slog::o!()),
|
||||
shared_info,
|
||||
|
||||
con_manager: ConsoleManager::new(epoll_manager, logger),
|
||||
legacy_manager: None,
|
||||
@@ -511,6 +569,10 @@ impl DeviceManager {
|
||||
virtio_net_manager: VirtioNetDeviceMgr::default(),
|
||||
#[cfg(feature = "virtio-fs")]
|
||||
fs_manager: Arc::new(Mutex::new(FsDeviceMgr::default())),
|
||||
#[cfg(feature = "virtio-mem")]
|
||||
mem_manager: MemDeviceMgr::default(),
|
||||
#[cfg(feature = "virtio-balloon")]
|
||||
balloon_manager: BalloonDeviceMgr::default(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -636,9 +698,9 @@ impl DeviceManager {
|
||||
vm_as: GuestAddressSpaceImpl,
|
||||
epoll_mgr: EpollManager,
|
||||
kernel_config: &mut KernelConfigInfo,
|
||||
com1_sock_path: Option<String>,
|
||||
dmesg_fifo: Option<Box<dyn io::Write + Send>>,
|
||||
address_space: Option<&AddressSpace>,
|
||||
vm_config: &VmConfigInfo,
|
||||
) -> std::result::Result<(), StartMicroVmError> {
|
||||
let mut ctx = DeviceOpContext::new(
|
||||
Some(epoll_mgr),
|
||||
@@ -646,8 +708,12 @@ impl DeviceManager {
|
||||
Some(vm_as),
|
||||
address_space.cloned(),
|
||||
false,
|
||||
Some(vm_config.clone()),
|
||||
self.shared_info.clone(),
|
||||
);
|
||||
|
||||
let com1_sock_path = vm_config.serial_path.clone();
|
||||
|
||||
self.create_legacy_devices(&mut ctx)?;
|
||||
self.init_legacy_devices(dmesg_fifo, com1_sock_path, &mut ctx)?;
|
||||
|
||||
@@ -710,6 +776,8 @@ impl DeviceManager {
|
||||
Some(vm_as),
|
||||
address_space.cloned(),
|
||||
true,
|
||||
None,
|
||||
self.shared_info.clone(),
|
||||
);
|
||||
|
||||
#[cfg(feature = "virtio-blk")]
|
||||
@@ -874,6 +942,24 @@ impl DeviceManager {
|
||||
)
|
||||
}
|
||||
|
||||
/// Create an Virtio MMIO transport layer device for the virtio backend device with configure
|
||||
/// change notification enabled.
|
||||
pub fn create_mmio_virtio_device_with_device_change_notification(
|
||||
device: DbsVirtioDevice,
|
||||
ctx: &mut DeviceOpContext,
|
||||
use_shared_irq: bool,
|
||||
use_generic_irq: bool,
|
||||
) -> std::result::Result<Arc<DbsMmioV2Device>, DeviceMgrError> {
|
||||
let features = DRAGONBALL_FEATURE_PER_QUEUE_NOTIFY;
|
||||
DeviceManager::create_mmio_virtio_device_with_features(
|
||||
device,
|
||||
ctx,
|
||||
Some(features),
|
||||
use_shared_irq,
|
||||
use_generic_irq,
|
||||
)
|
||||
}
|
||||
|
||||
/// Create an Virtio MMIO transport layer device for the virtio backend device with specified
|
||||
/// features.
|
||||
pub fn create_mmio_virtio_device_with_features(
|
||||
@@ -1019,6 +1105,7 @@ mod tests {
|
||||
use vm_memory::{GuestAddress, MmapRegion};
|
||||
|
||||
use super::*;
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
use crate::vm::CpuTopology;
|
||||
|
||||
impl DeviceManager {
|
||||
@@ -1029,6 +1116,10 @@ mod tests {
|
||||
let epoll_manager = EpollManager::default();
|
||||
let res_manager = Arc::new(ResourceManager::new(None));
|
||||
let logger = slog_scope::logger().new(slog::o!());
|
||||
let shared_info = Arc::new(RwLock::new(InstanceInfo::new(
|
||||
String::from("dragonball"),
|
||||
String::from("1"),
|
||||
)));
|
||||
|
||||
DeviceManager {
|
||||
vm_fd: Arc::clone(&vm_fd),
|
||||
@@ -1047,10 +1138,15 @@ mod tests {
|
||||
virtio_net_manager: VirtioNetDeviceMgr::default(),
|
||||
#[cfg(feature = "virtio-vsock")]
|
||||
vsock_manager: VsockDeviceMgr::default(),
|
||||
#[cfg(feature = "virtio-mem")]
|
||||
mem_manager: MemDeviceMgr::default(),
|
||||
#[cfg(feature = "virtio-balloon")]
|
||||
balloon_manager: BalloonDeviceMgr::default(),
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
mmio_device_info: HashMap::new(),
|
||||
|
||||
logger,
|
||||
shared_info,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1090,7 +1186,7 @@ mod tests {
|
||||
},
|
||||
vpmu_feature: 0,
|
||||
};
|
||||
vm.set_vm_config(vm_config);
|
||||
vm.set_vm_config(vm_config.clone());
|
||||
vm.init_guest_memory().unwrap();
|
||||
vm.setup_interrupt_controller().unwrap();
|
||||
let vm_as = vm.vm_as().cloned().unwrap();
|
||||
@@ -1116,8 +1212,8 @@ mod tests {
|
||||
event_mgr.epoll_manager(),
|
||||
&mut cmdline,
|
||||
None,
|
||||
None,
|
||||
address_space.as_ref(),
|
||||
&vm_config,
|
||||
)
|
||||
.unwrap();
|
||||
let guard = mgr.io_manager.load();
|
||||
@@ -1141,6 +1237,8 @@ mod tests {
|
||||
Some(vm.vm_as().unwrap().clone()),
|
||||
vm.vm_address_space().cloned(),
|
||||
true,
|
||||
Some(vm.vm_config().clone()),
|
||||
vm.shared_info().clone(),
|
||||
);
|
||||
let guest_addr = GuestAddress(0x200000000000);
|
||||
|
||||
|
||||
@@ -192,6 +192,11 @@ pub enum StartMicroVmError {
|
||||
/// Virtio-fs errors.
|
||||
#[error("virtio-fs errors: {0}")]
|
||||
FsDeviceError(#[source] device_manager::fs_dev_mgr::FsDeviceError),
|
||||
|
||||
#[cfg(feature = "virtio-balloon")]
|
||||
/// Virtio-balloon errors.
|
||||
#[error("virtio-balloon errors: {0}")]
|
||||
BalloonDeviceError(#[source] device_manager::balloon_dev_mgr::BalloonDeviceError),
|
||||
}
|
||||
|
||||
/// Errors associated with starting the instance.
|
||||
|
||||
@@ -374,11 +374,17 @@ impl VcpuManager {
|
||||
entry_addr: GuestAddress,
|
||||
) -> Result<()> {
|
||||
info!("create boot vcpus");
|
||||
self.create_vcpus(
|
||||
self.vcpu_config.boot_vcpu_count,
|
||||
Some(request_ts),
|
||||
Some(entry_addr),
|
||||
)?;
|
||||
let boot_vcpu_count = if cfg!(target_arch = "aarch64") {
|
||||
// On aarch64, kvm doesn't allow to call KVM_CREATE_VCPU ioctl after vm has been booted
|
||||
// because of vgic check. To support vcpu hotplug/hotunplug feature, we should create
|
||||
// all the vcpufd at booting procedure.
|
||||
// SetVmConfiguration API will ensure max_vcpu_count >= boot_vcpu_count, so it is safe
|
||||
// to directly use max_vcpu_count here.
|
||||
self.vcpu_config.max_vcpu_count
|
||||
} else {
|
||||
self.vcpu_config.boot_vcpu_count
|
||||
};
|
||||
self.create_vcpus(boot_vcpu_count, Some(request_ts), Some(entry_addr))?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -1213,7 +1219,10 @@ mod tests {
|
||||
assert!(vcpu_manager
|
||||
.create_boot_vcpus(TimestampUs::default(), GuestAddress(0))
|
||||
.is_ok());
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 1);
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 3);
|
||||
|
||||
// test start boot vcpus
|
||||
assert!(vcpu_manager.start_boot_vcpus(BpfProgram::default()).is_ok());
|
||||
@@ -1267,8 +1276,14 @@ mod tests {
|
||||
assert!(vcpu_manager
|
||||
.create_boot_vcpus(TimestampUs::default(), GuestAddress(0))
|
||||
.is_ok());
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 1);
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 3);
|
||||
|
||||
assert!(vcpu_manager.start_boot_vcpus(BpfProgram::default()).is_ok());
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 2);
|
||||
|
||||
// invalid cpuid for pause
|
||||
let cpu_indexes = vec![2];
|
||||
@@ -1304,9 +1319,14 @@ mod tests {
|
||||
assert!(vcpu_manager
|
||||
.create_boot_vcpus(TimestampUs::default(), GuestAddress(0))
|
||||
.is_ok());
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 1);
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 3);
|
||||
|
||||
assert!(vcpu_manager.start_boot_vcpus(BpfProgram::default()).is_ok());
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 2);
|
||||
|
||||
// invalid cpuid for exit
|
||||
let cpu_indexes = vec![2];
|
||||
@@ -1330,9 +1350,14 @@ mod tests {
|
||||
assert!(vcpu_manager
|
||||
.create_boot_vcpus(TimestampUs::default(), GuestAddress(0))
|
||||
.is_ok());
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 1);
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 3);
|
||||
|
||||
assert!(vcpu_manager.start_boot_vcpus(BpfProgram::default()).is_ok());
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 2);
|
||||
|
||||
// exit all success
|
||||
assert!(vcpu_manager.exit_all_vcpus().is_ok());
|
||||
@@ -1351,9 +1376,14 @@ mod tests {
|
||||
assert!(vcpu_manager
|
||||
.create_boot_vcpus(TimestampUs::default(), GuestAddress(0))
|
||||
.is_ok());
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 1);
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 3);
|
||||
|
||||
assert!(vcpu_manager.start_boot_vcpus(BpfProgram::default()).is_ok());
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 2);
|
||||
|
||||
// invalid cpuid for exit
|
||||
let cpu_indexes = vec![2];
|
||||
@@ -1377,9 +1407,14 @@ mod tests {
|
||||
assert!(vcpu_manager
|
||||
.create_boot_vcpus(TimestampUs::default(), GuestAddress(0))
|
||||
.is_ok());
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 1);
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 3);
|
||||
|
||||
assert!(vcpu_manager.start_boot_vcpus(BpfProgram::default()).is_ok());
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 2);
|
||||
|
||||
// revalidate all success
|
||||
assert!(vcpu_manager.revalidate_all_vcpus_cache().is_ok());
|
||||
@@ -1395,9 +1430,14 @@ mod tests {
|
||||
assert!(vcpu_manager
|
||||
.create_boot_vcpus(TimestampUs::default(), GuestAddress(0))
|
||||
.is_ok());
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 1);
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 3);
|
||||
|
||||
assert!(vcpu_manager.start_boot_vcpus(BpfProgram::default()).is_ok());
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 2);
|
||||
|
||||
// set vcpus in hotplug action
|
||||
let cpu_ids = vec![0];
|
||||
|
||||
@@ -6,70 +6,35 @@
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the THIRD-PARTY file.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::fmt::Debug;
|
||||
use std::ops::Deref;
|
||||
use std::sync::MutexGuard;
|
||||
|
||||
use dbs_arch::gic::GICDevice;
|
||||
use dbs_arch::pmu::initialize_pmu;
|
||||
use dbs_arch::{DeviceInfoForFDT, DeviceType, VpmuFeatureLevel};
|
||||
use dbs_arch::{MMIODeviceInfo, VpmuFeatureLevel};
|
||||
use dbs_boot::fdt_utils::*;
|
||||
use dbs_boot::InitrdConfig;
|
||||
use dbs_utils::epoll_manager::EpollManager;
|
||||
use dbs_utils::time::TimestampUs;
|
||||
use linux_loader::loader::Cmdline;
|
||||
use vm_memory::{GuestAddressSpace, GuestMemory};
|
||||
use linux_loader::cmdline::{Cmdline, Error as CmdlineError};
|
||||
use vm_memory::GuestAddressSpace;
|
||||
use vmm_sys_util::eventfd::EventFd;
|
||||
|
||||
use super::{Vm, VmError};
|
||||
use crate::address_space_manager::{GuestAddressSpaceImpl, GuestMemoryImpl};
|
||||
use crate::error::{Error, StartMicroVmError};
|
||||
use crate::event_manager::EventManager;
|
||||
use crate::vcpu::VcpuManager;
|
||||
|
||||
/// Configures the system and should be called once per vm before starting vcpu threads.
|
||||
/// For aarch64, we only setup the FDT.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `guest_mem` - The memory to be used by the guest.
|
||||
/// * `cmdline` - The kernel commandline.
|
||||
/// * `vcpu_mpidr` - Array of MPIDR register values per vcpu.
|
||||
/// * `device_info` - A hashmap containing the attached devices for building FDT device nodes.
|
||||
/// * `gic_device` - The GIC device.
|
||||
/// * `initrd` - Information about an optional initrd.
|
||||
#[allow(clippy::borrowed_box)]
|
||||
fn configure_system<T: DeviceInfoForFDT + Clone + Debug, M: GuestMemory>(
|
||||
guest_mem: &M,
|
||||
cmdline: &str,
|
||||
vcpu_mpidr: Vec<u64>,
|
||||
device_info: Option<&HashMap<(DeviceType, String), T>>,
|
||||
gic_device: &Box<dyn GICDevice>,
|
||||
initrd: &Option<super::InitrdConfig>,
|
||||
vpmu_feature: &VpmuFeatureLevel,
|
||||
) -> super::Result<()> {
|
||||
dbs_boot::fdt::create_fdt(
|
||||
guest_mem,
|
||||
vcpu_mpidr,
|
||||
cmdline,
|
||||
device_info,
|
||||
gic_device,
|
||||
initrd,
|
||||
vpmu_feature,
|
||||
)
|
||||
.map_err(Error::BootSystem)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
impl Vm {
|
||||
/// Gets a reference to the irqchip of the VM
|
||||
#[allow(clippy::borrowed_box)]
|
||||
pub fn get_irqchip(&self) -> &Box<dyn GICDevice> {
|
||||
self.irqchip_handle.as_ref().unwrap()
|
||||
pub fn get_irqchip(&self) -> &dyn GICDevice {
|
||||
self.irqchip_handle.as_ref().unwrap().as_ref()
|
||||
}
|
||||
|
||||
/// Creates the irq chip in-kernel device model.
|
||||
pub fn setup_interrupt_controller(&mut self) -> std::result::Result<(), StartMicroVmError> {
|
||||
let vcpu_count = self.vm_config.vcpu_count;
|
||||
let vcpu_count = self.vm_config.max_vcpu_count;
|
||||
|
||||
self.irqchip_handle = Some(
|
||||
dbs_arch::gic::create_gic(&self.vm_fd, vcpu_count.into())
|
||||
@@ -99,12 +64,11 @@ impl Vm {
|
||||
/// Initialize the virtual machine instance.
|
||||
///
|
||||
/// It initialize the virtual machine instance by:
|
||||
/// 1) initialize virtual machine global state and configuration.
|
||||
/// 2) create system devices, such as interrupt controller.
|
||||
/// 3) create and start IO devices, such as serial, console, block, net, vsock etc.
|
||||
/// 4) create and initialize vCPUs.
|
||||
/// 5) configure CPU power management features.
|
||||
/// 6) load guest kernel image.
|
||||
/// 1) Initialize virtual machine reset event fd.
|
||||
/// 2) Create and initialize vCPUs.
|
||||
/// 3) Create and initialize interrupt controller.
|
||||
/// 4) Create and initialize vPMU device.
|
||||
/// 5) Create and initialize devices, such as virtio, block, net, vsock, vfio etc.
|
||||
pub fn init_microvm(
|
||||
&mut self,
|
||||
epoll_mgr: EpollManager,
|
||||
@@ -139,6 +103,52 @@ impl Vm {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Generate fdt information about VM.
|
||||
fn get_fdt_vm_info<'a>(
|
||||
&'a self,
|
||||
vm_memory: &'a GuestMemoryImpl,
|
||||
cmdline: &'a str,
|
||||
initrd_config: Option<&'a InitrdConfig>,
|
||||
vcpu_manager: &'a MutexGuard<VcpuManager>,
|
||||
) -> FdtVmInfo {
|
||||
let guest_memory = vm_memory.memory();
|
||||
let vcpu_mpidr = vcpu_manager
|
||||
.vcpus()
|
||||
.into_iter()
|
||||
.map(|cpu| cpu.get_mpidr())
|
||||
.collect();
|
||||
let vm_config = self.vm_config();
|
||||
let mut vcpu_boot_onlined = vec![1; vm_config.vcpu_count as usize];
|
||||
vcpu_boot_onlined.resize(vm_config.max_vcpu_count as usize, 0);
|
||||
let vpmu_feature = vcpu_manager.vpmu_feature();
|
||||
// This configuration is used for passing cache information into guest.
|
||||
// TODO: dragonball-sandbox #274; kata-containers #6969
|
||||
let cache_passthrough_enabled = false;
|
||||
let fdt_vcpu_info = FdtVcpuInfo::new(
|
||||
vcpu_mpidr,
|
||||
vcpu_boot_onlined,
|
||||
vpmu_feature,
|
||||
cache_passthrough_enabled,
|
||||
);
|
||||
|
||||
FdtVmInfo::new(guest_memory, cmdline, initrd_config, fdt_vcpu_info)
|
||||
}
|
||||
|
||||
// This method is used for passing cache/numa information into guest
|
||||
// TODO: dragonball-sandbox #274,#275; kata-containers #6969
|
||||
/// Generate fdt information about cache/numa
|
||||
fn get_fdt_numa_info(&self) -> FdtNumaInfo {
|
||||
FdtNumaInfo::default()
|
||||
}
|
||||
|
||||
/// Generate fdt information about devices
|
||||
fn get_fdt_device_info(&self) -> FdtDeviceInfo<MMIODeviceInfo> {
|
||||
FdtDeviceInfo::new(
|
||||
self.device_manager().get_mmio_device_info(),
|
||||
self.get_irqchip(),
|
||||
)
|
||||
}
|
||||
|
||||
/// Execute system architecture specific configurations.
|
||||
///
|
||||
/// 1) set guest kernel boot parameters
|
||||
@@ -150,24 +160,23 @@ impl Vm {
|
||||
initrd: Option<InitrdConfig>,
|
||||
) -> std::result::Result<(), StartMicroVmError> {
|
||||
let vcpu_manager = self.vcpu_manager().map_err(StartMicroVmError::Vcpu)?;
|
||||
let vpmu_feature = vcpu_manager.vpmu_feature();
|
||||
let vcpu_mpidr = vcpu_manager
|
||||
.vcpus()
|
||||
.into_iter()
|
||||
.map(|cpu| cpu.get_mpidr())
|
||||
.collect();
|
||||
let guest_memory = vm_memory.memory();
|
||||
let cmdline_cstring = cmdline
|
||||
.as_cstring()
|
||||
.map_err(StartMicroVmError::ProcessCommandlne)?;
|
||||
let fdt_vm_info = self.get_fdt_vm_info(
|
||||
vm_memory,
|
||||
cmdline_cstring
|
||||
.to_str()
|
||||
.map_err(|_| StartMicroVmError::ProcessCommandlne(CmdlineError::InvalidAscii))?,
|
||||
initrd.as_ref(),
|
||||
&vcpu_manager,
|
||||
);
|
||||
let fdt_numa_info = self.get_fdt_numa_info();
|
||||
let fdt_device_info = self.get_fdt_device_info();
|
||||
|
||||
configure_system(
|
||||
guest_memory,
|
||||
cmdline.as_cstring().unwrap().to_str().unwrap(),
|
||||
vcpu_mpidr,
|
||||
self.device_manager.get_mmio_device_info(),
|
||||
self.get_irqchip(),
|
||||
&initrd,
|
||||
&vpmu_feature,
|
||||
)
|
||||
.map_err(StartMicroVmError::ConfigureSystem)
|
||||
dbs_boot::fdt::create_fdt(fdt_vm_info, fdt_numa_info, fdt_device_info)
|
||||
.map(|_| ())
|
||||
.map_err(|e| StartMicroVmError::ConfigureSystem(Error::BootSystem(e)))
|
||||
}
|
||||
|
||||
pub(crate) fn register_events(
|
||||
|
||||
@@ -241,6 +241,7 @@ impl Vm {
|
||||
resource_manager.clone(),
|
||||
epoll_manager.clone(),
|
||||
&logger,
|
||||
api_shared_info.clone(),
|
||||
);
|
||||
|
||||
Ok(Vm {
|
||||
@@ -413,6 +414,19 @@ impl Vm {
|
||||
|
||||
(dragonball_version, instance_id)
|
||||
}
|
||||
|
||||
pub(crate) fn stop_prealloc(&mut self) -> std::result::Result<(), StartMicroVmError> {
|
||||
if self.address_space.is_initialized() {
|
||||
return self
|
||||
.address_space
|
||||
.wait_prealloc(true)
|
||||
.map_err(StartMicroVmError::AddressManagerError);
|
||||
}
|
||||
|
||||
Err(StartMicroVmError::AddressManagerError(
|
||||
AddressManagerError::GuestMemoryNotInitialized,
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
impl Vm {
|
||||
@@ -481,7 +495,6 @@ impl Vm {
|
||||
) -> std::result::Result<(), StartMicroVmError> {
|
||||
info!(self.logger, "VM: initializing devices ...");
|
||||
|
||||
let com1_sock_path = self.vm_config.serial_path.clone();
|
||||
let kernel_config = self
|
||||
.kernel_config
|
||||
.as_mut()
|
||||
@@ -503,9 +516,9 @@ impl Vm {
|
||||
vm_as.clone(),
|
||||
epoll_manager,
|
||||
kernel_config,
|
||||
com1_sock_path,
|
||||
self.dmesg_fifo.take(),
|
||||
self.address_space.address_space(),
|
||||
&self.vm_config,
|
||||
)?;
|
||||
|
||||
info!(self.logger, "VM: start devices");
|
||||
@@ -888,6 +901,7 @@ impl Vm {
|
||||
|
||||
#[cfg(test)]
|
||||
pub mod tests {
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
use kvm_ioctls::VcpuExit;
|
||||
use linux_loader::cmdline::Cmdline;
|
||||
use test_utils::skip_if_not_root;
|
||||
|
||||
1
src/libs/Cargo.lock
generated
1
src/libs/Cargo.lock
generated
@@ -508,6 +508,7 @@ dependencies = [
|
||||
"num_cpus",
|
||||
"oci",
|
||||
"regex",
|
||||
"safe-path",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"slog",
|
||||
|
||||
@@ -27,6 +27,7 @@ thiserror = "1.0"
|
||||
toml = "0.5.8"
|
||||
|
||||
oci = { path = "../oci" }
|
||||
safe-path = { path = "../safe-path" }
|
||||
|
||||
[dev-dependencies]
|
||||
tempfile = "3"
|
||||
|
||||
@@ -316,6 +316,10 @@ pub const KATA_ANNO_CFG_VFIO_MODE: &str = "io.katacontainers.config.runtime.vfio
|
||||
pub const KATA_ANNO_CFG_HYPERVISOR_PREFETCH_FILES_LIST: &str =
|
||||
"io.katacontainers.config.hypervisor.prefetch_files.list";
|
||||
|
||||
/// A sandbox annotation for sandbox level volume sharing with host.
|
||||
pub const KATA_ANNO_CFG_SANDBOX_BIND_MOUNTS: &str =
|
||||
"io.katacontainers.config.runtime.sandbox_bind_mounts";
|
||||
|
||||
/// A helper structure to query configuration information by check annotations.
|
||||
#[derive(Debug, Default, Deserialize)]
|
||||
pub struct Annotation {
|
||||
@@ -950,6 +954,16 @@ impl Annotation {
|
||||
KATA_ANNO_CFG_VFIO_MODE => {
|
||||
config.runtime.vfio_mode = value.to_string();
|
||||
}
|
||||
KATA_ANNO_CFG_SANDBOX_BIND_MOUNTS => {
|
||||
let args: Vec<String> = value
|
||||
.to_string()
|
||||
.split_ascii_whitespace()
|
||||
.map(str::to_string)
|
||||
.collect();
|
||||
for arg in args {
|
||||
config.runtime.sandbox_bind_mounts.push(arg.to_string());
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
warn!(sl!(), "Annotation {} not enabled", key);
|
||||
}
|
||||
|
||||
@@ -8,7 +8,8 @@ use std::path::Path;
|
||||
|
||||
use super::default;
|
||||
use crate::config::{ConfigOps, TomlConfig};
|
||||
use crate::{eother, resolve_path, validate_path};
|
||||
use crate::mount::split_bind_mounts;
|
||||
use crate::{eother, validate_path};
|
||||
|
||||
/// Type of runtime VirtContainer.
|
||||
pub const RUNTIME_NAME_VIRTCONTAINER: &str = "virt_container";
|
||||
@@ -146,7 +147,14 @@ impl ConfigOps for Runtime {
|
||||
}
|
||||
|
||||
for bind in conf.runtime.sandbox_bind_mounts.iter_mut() {
|
||||
resolve_path!(*bind, "sandbox bind mount `{}` is invalid: {}")?;
|
||||
// Split the bind mount, canonicalize the path and then append rw mode to it.
|
||||
let (real_path, mode) = split_bind_mounts(bind);
|
||||
match Path::new(real_path).canonicalize() {
|
||||
Err(e) => return Err(eother!("sandbox bind mount `{}` is invalid: {}", bind, e)),
|
||||
Ok(path) => {
|
||||
*bind = format!("{}{}", path.display(), mode);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
@@ -176,7 +184,12 @@ impl ConfigOps for Runtime {
|
||||
}
|
||||
|
||||
for bind in conf.runtime.sandbox_bind_mounts.iter() {
|
||||
validate_path!(*bind, "sandbox bind mount `{}` is invalid: {}")?;
|
||||
// Just validate the real_path.
|
||||
let (real_path, _mode) = split_bind_mounts(bind);
|
||||
validate_path!(
|
||||
real_path.to_owned(),
|
||||
"sandbox bind mount `{}` is invalid: {}"
|
||||
)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
|
||||
@@ -26,7 +26,7 @@ pub enum Error {
|
||||
}
|
||||
|
||||
/// Assigned CPU resources for a Linux container.
|
||||
#[derive(Default, Debug)]
|
||||
#[derive(Clone, Default, Debug)]
|
||||
pub struct LinuxContainerCpuResources {
|
||||
shares: u64,
|
||||
period: u64,
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
//
|
||||
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use std::{collections::HashMap, path::PathBuf};
|
||||
use std::{collections::HashMap, fs, path::PathBuf};
|
||||
|
||||
/// Prefix to mark a volume as Kata special.
|
||||
pub const KATA_VOLUME_TYPE_PREFIX: &str = "kata:";
|
||||
@@ -25,6 +25,15 @@ pub const KATA_MOUNT_INFO_FILE_NAME: &str = "mountInfo.json";
|
||||
/// KATA_DIRECT_VOLUME_ROOT_PATH is the root path used for concatenating with the direct-volume mount info file path
|
||||
pub const KATA_DIRECT_VOLUME_ROOT_PATH: &str = "/run/kata-containers/shared/direct-volumes";
|
||||
|
||||
/// SANDBOX_BIND_MOUNTS_DIR is for sandbox bindmounts
|
||||
pub const SANDBOX_BIND_MOUNTS_DIR: &str = "sandbox-mounts";
|
||||
|
||||
/// SANDBOX_BIND_MOUNTS_RO is for sandbox bindmounts with readonly
|
||||
pub const SANDBOX_BIND_MOUNTS_RO: &str = ":ro";
|
||||
|
||||
/// SANDBOX_BIND_MOUNTS_RO is for sandbox bindmounts with readwrite
|
||||
pub const SANDBOX_BIND_MOUNTS_RW: &str = ":rw";
|
||||
|
||||
/// Information about a mount.
|
||||
#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)]
|
||||
pub struct Mount {
|
||||
@@ -71,6 +80,27 @@ pub struct DirectVolumeMountInfo {
|
||||
pub options: Vec<String>,
|
||||
}
|
||||
|
||||
/// join_path joins user provided volumepath with kata direct-volume root path
|
||||
/// the volume_path is base64-encoded and then safely joined to the end of path prefix
|
||||
pub fn join_path(prefix: &str, volume_path: &str) -> Result<PathBuf> {
|
||||
if volume_path.is_empty() {
|
||||
return Err(anyhow!("volume path must not be empty"));
|
||||
}
|
||||
let b64_encoded_path = base64::encode(volume_path.as_bytes());
|
||||
|
||||
Ok(safe_path::scoped_join(prefix, b64_encoded_path)?)
|
||||
}
|
||||
|
||||
/// get DirectVolume mountInfo from mountinfo.json.
|
||||
pub fn get_volume_mount_info(volume_path: &str) -> Result<DirectVolumeMountInfo> {
|
||||
let mount_info_file_path =
|
||||
join_path(KATA_DIRECT_VOLUME_ROOT_PATH, volume_path)?.join(KATA_MOUNT_INFO_FILE_NAME);
|
||||
let mount_info_file = fs::read_to_string(mount_info_file_path)?;
|
||||
let mount_info: DirectVolumeMountInfo = serde_json::from_str(&mount_info_file)?;
|
||||
|
||||
Ok(mount_info)
|
||||
}
|
||||
|
||||
/// Check whether a mount type is a marker for Kata specific volume.
|
||||
pub fn is_kata_special_volume(ty: &str) -> bool {
|
||||
ty.len() > KATA_VOLUME_TYPE_PREFIX.len() && ty.starts_with(KATA_VOLUME_TYPE_PREFIX)
|
||||
@@ -128,6 +158,28 @@ impl NydusExtraOptions {
|
||||
serde_json::from_slice(&extra_options_buf).context("deserialize nydus's extraoption")
|
||||
}
|
||||
}
|
||||
|
||||
/// sandbox bindmount format: /path/to/dir, or /path/to/dir:ro[:rw]
|
||||
/// the real path is without suffix ":ro" or ":rw".
|
||||
pub fn split_bind_mounts(bindmount: &str) -> (&str, &str) {
|
||||
let (real_path, mode) = if bindmount.ends_with(SANDBOX_BIND_MOUNTS_RO) {
|
||||
(
|
||||
bindmount.trim_end_matches(SANDBOX_BIND_MOUNTS_RO),
|
||||
SANDBOX_BIND_MOUNTS_RO,
|
||||
)
|
||||
} else if bindmount.ends_with(SANDBOX_BIND_MOUNTS_RW) {
|
||||
(
|
||||
bindmount.trim_end_matches(SANDBOX_BIND_MOUNTS_RW),
|
||||
SANDBOX_BIND_MOUNTS_RW,
|
||||
)
|
||||
} else {
|
||||
// default bindmount format
|
||||
(bindmount, "")
|
||||
};
|
||||
|
||||
(real_path, mode)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
@@ -137,6 +189,18 @@ mod tests {
|
||||
assert!(!is_kata_special_volume("kata:"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_split_bind_mounts() {
|
||||
let test01 = "xxx0:ro";
|
||||
let test02 = "xxx2:rw";
|
||||
let test03 = "xxx3:is";
|
||||
let test04 = "xxx4";
|
||||
assert_eq!(split_bind_mounts(test01), ("xxx0", ":ro"));
|
||||
assert_eq!(split_bind_mounts(test02), ("xxx2", ":rw"));
|
||||
assert_eq!(split_bind_mounts(test03), ("xxx3:is", ""));
|
||||
assert_eq!(split_bind_mounts(test04), ("xxx4", ""));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_kata_guest_mount_volume() {
|
||||
assert!(is_kata_guest_mount_volume("kata:guest-mount:nfs"));
|
||||
|
||||
@@ -13,7 +13,7 @@ use crate::Error;
|
||||
///
|
||||
/// The `U32Set` may be used to save CPUs parsed from a CPU list file or NUMA nodes parsed from
|
||||
/// a NUMA node list file.
|
||||
#[derive(Default, Debug)]
|
||||
#[derive(Clone, Default, Debug)]
|
||||
pub struct U32Set(Vec<u32>);
|
||||
|
||||
impl U32Set {
|
||||
|
||||
@@ -15,6 +15,7 @@ serde_json = "1.0.73"
|
||||
# (by stopping the compiler from removing log calls).
|
||||
slog = { version = "2.5.2", features = ["dynamic-keys", "max_level_trace", "release_max_level_debug"] }
|
||||
slog-json = "2.4.0"
|
||||
slog-term = "2.9.0"
|
||||
slog-async = "2.7.0"
|
||||
slog-scope = "4.4.0"
|
||||
|
||||
|
||||
@@ -39,6 +39,28 @@ const LOG_LEVELS: &[(&str, slog::Level)] = &[
|
||||
|
||||
const DEFAULT_SUBSYSTEM: &str = "root";
|
||||
|
||||
// Creates a logger which prints output as human readable text to the terminal
|
||||
pub fn create_term_logger(level: slog::Level) -> (slog::Logger, slog_async::AsyncGuard) {
|
||||
let term_drain = slog_term::term_compact().fuse();
|
||||
|
||||
// Ensure only a unique set of key/value fields is logged
|
||||
let unique_drain = UniqueDrain::new(term_drain).fuse();
|
||||
|
||||
// Allow runtime filtering of records by log level
|
||||
let filter_drain = RuntimeLevelFilter::new(unique_drain, level).fuse();
|
||||
|
||||
// Ensure the logger is thread-safe
|
||||
let (async_drain, guard) = slog_async::Async::new(filter_drain)
|
||||
.thread_name("slog-async-logger".into())
|
||||
.build_with_guard();
|
||||
|
||||
// Add some "standard" fields
|
||||
let logger = slog::Logger::root(async_drain.fuse(), o!("subsystem" => DEFAULT_SUBSYSTEM));
|
||||
|
||||
(logger, guard)
|
||||
}
|
||||
|
||||
// Creates a logger which prints output as JSON
|
||||
// XXX: 'writer' param used to make testing possible.
|
||||
pub fn create_logger<W>(
|
||||
name: &str,
|
||||
|
||||
@@ -366,7 +366,8 @@ message OnlineCPUMemRequest {
|
||||
// resources are connected asynchronously and the agent returns immediately.
|
||||
bool wait = 1;
|
||||
|
||||
// NbCpus specifies the number of CPUs that were added and the agent has to online.
|
||||
// NbCpus specifies the number of CPUs that should be onlined in the guest.
|
||||
// Special value 0 means agent will skip this check.
|
||||
uint32 nb_cpus = 2;
|
||||
|
||||
// CpuOnly specifies whether only online CPU or not.
|
||||
|
||||
110
src/runtime-rs/Cargo.lock
generated
110
src/runtime-rs/Cargo.lock
generated
@@ -241,6 +241,17 @@ version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "debc29dde2e69f9e47506b525f639ed42300fc014a3e007832592448fa8e4599"
|
||||
|
||||
[[package]]
|
||||
name = "atty"
|
||||
version = "0.2.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
|
||||
dependencies = [
|
||||
"hermit-abi 0.1.19",
|
||||
"libc",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "autocfg"
|
||||
version = "1.1.0"
|
||||
@@ -644,11 +655,12 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "dbs-address-space"
|
||||
version = "0.2.2"
|
||||
version = "0.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6bcc37dc0b8ffae1c5911d13ae630dc7a9020fa0de0edd178d6ab71daf56c8fc"
|
||||
checksum = "95e20d28a9cd13bf00d0ecd1bd073d242242b04f0acb663d7adfc659f8879322"
|
||||
dependencies = [
|
||||
"arc-swap",
|
||||
"lazy_static",
|
||||
"libc",
|
||||
"nix 0.23.2",
|
||||
"thiserror",
|
||||
@@ -682,9 +694,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "dbs-boot"
|
||||
version = "0.3.1"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1a74a8c05a1674d3032e610b4f201c7440c345559bad3dfe6b455ce195785108"
|
||||
checksum = "5466a92f75aa928a9103dcb2088f6d1638ef9da8945fad7389a73864dfa0182c"
|
||||
dependencies = [
|
||||
"dbs-arch",
|
||||
"kvm-bindings",
|
||||
@@ -735,9 +747,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "dbs-upcall"
|
||||
version = "0.2.0"
|
||||
version = "0.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "699e62afa444ae4b00d474fd91bc37785ba050acdfbe179731c81898e32efc3f"
|
||||
checksum = "ea3a78128fd0be8b8b10257675c262b378dc5d00b1e18157736a6c27e45ce4fb"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"dbs-utils",
|
||||
@@ -765,9 +777,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "dbs-virtio-devices"
|
||||
version = "0.2.0"
|
||||
version = "0.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "88e5c6c48b766afb95851b04b6b193871a59d0b2a3ed19990d4f8f651ae5c668"
|
||||
checksum = "24d671cc3e5f98b84ef6b6bed007d28f72f16d3aea8eb38e2d42b00b2973c1d8"
|
||||
dependencies = [
|
||||
"byteorder",
|
||||
"caps",
|
||||
@@ -781,7 +793,7 @@ dependencies = [
|
||||
"kvm-ioctls",
|
||||
"libc",
|
||||
"log",
|
||||
"nix 0.23.2",
|
||||
"nix 0.24.3",
|
||||
"nydus-api",
|
||||
"nydus-blobfs",
|
||||
"nydus-rafs",
|
||||
@@ -827,6 +839,27 @@ dependencies = [
|
||||
"subtle",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "dirs-next"
|
||||
version = "2.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b98cf8ebf19c3d1b223e151f99a4f9f0690dca41414773390fc824184ac833e1"
|
||||
dependencies = [
|
||||
"cfg-if 1.0.0",
|
||||
"dirs-sys-next",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "dirs-sys-next"
|
||||
version = "0.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4ebda144c4fe02d1f7ea1a7d9641b6fc6b580adcfa024ae48797ecdeb6825b4d"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"redox_users",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "dlv-list"
|
||||
version = "0.3.0"
|
||||
@@ -1212,6 +1245,15 @@ version = "0.4.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
|
||||
|
||||
[[package]]
|
||||
name = "hermit-abi"
|
||||
version = "0.1.19"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hermit-abi"
|
||||
version = "0.2.6"
|
||||
@@ -1481,6 +1523,7 @@ dependencies = [
|
||||
"num_cpus",
|
||||
"oci",
|
||||
"regex",
|
||||
"safe-path 0.1.0",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"slog",
|
||||
@@ -1618,6 +1661,7 @@ dependencies = [
|
||||
"slog-async",
|
||||
"slog-json",
|
||||
"slog-scope",
|
||||
"slog-term",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1845,7 +1889,16 @@ version = "1.15.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0fac9e2da13b5eb447a6ce3d392f23a29d8694bff781bf03a16cd9ac8697593b"
|
||||
dependencies = [
|
||||
"hermit-abi",
|
||||
"hermit-abi 0.2.6",
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "num_threads"
|
||||
version = "0.1.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2819ce041d2ee131036f4fc9d6ae7ae125a3a40e97ba64d04fe799ad9dabbb44"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
@@ -2433,6 +2486,17 @@ dependencies = [
|
||||
"bitflags",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "redox_users"
|
||||
version = "0.4.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b033d837a7cf162d7993aded9304e30a83213c648b6e389db233191f891e5c2b"
|
||||
dependencies = [
|
||||
"getrandom 0.2.8",
|
||||
"redox_syscall",
|
||||
"thiserror",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex"
|
||||
version = "1.7.1"
|
||||
@@ -2844,6 +2908,19 @@ dependencies = [
|
||||
"slog-scope",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "slog-term"
|
||||
version = "2.9.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "87d29185c55b7b258b4f120eab00f48557d4d9bc814f41713f449d35b0f8977c"
|
||||
dependencies = [
|
||||
"atty",
|
||||
"slog",
|
||||
"term",
|
||||
"thread_local",
|
||||
"time 0.3.20",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "smallvec"
|
||||
version = "1.10.0"
|
||||
@@ -2962,6 +3039,17 @@ dependencies = [
|
||||
"windows-sys 0.42.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "term"
|
||||
version = "0.7.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c59df8ac95d96ff9bede18eb7300b0fda5e5d8d90960e76f8e14ae765eedbf1f"
|
||||
dependencies = [
|
||||
"dirs-next",
|
||||
"rustversion",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "termcolor"
|
||||
version = "1.2.0"
|
||||
@@ -3042,6 +3130,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cd0cbfecb4d19b5ea75bb31ad904eb5b9fa13f21079c3b92017ebdf4999a5890"
|
||||
dependencies = [
|
||||
"itoa",
|
||||
"libc",
|
||||
"num_threads",
|
||||
"serde",
|
||||
"time-core",
|
||||
"time-macros",
|
||||
|
||||
@@ -3,3 +3,4 @@ members = [
|
||||
"crates/shim",
|
||||
"crates/shim-ctl",
|
||||
]
|
||||
|
||||
|
||||
@@ -309,3 +309,12 @@ experimental=@DEFAULTEXPFEATURES@
|
||||
# - When running single containers using a tool like ctr, container sizing information will be available.
|
||||
static_sandbox_resource_mgmt=@DEFSTATICRESOURCEMGMT_DB@
|
||||
|
||||
# If specified, sandbox_bind_mounts identifieds host paths to be mounted(ro, rw) into the sandboxes shared path.
|
||||
# This is only valid if filesystem sharing is utilized. The provided path(s) will be bindmounted into the shared fs directory.
|
||||
# If defaults are utilized, these mounts should be available in the guest at `/run/kata-containers/shared/containers/sandbox-mounts`
|
||||
# These will not be exposed to the container workloads, and are only provided for potential guest services.
|
||||
# Now it supports three kinds of bind mount format:
|
||||
# - "/path/to", default readonly mode.
|
||||
# - "/path/to:ro", readonly mode.
|
||||
# - "/path/to:rw", readwrite mode.
|
||||
sandbox_bind_mounts=@DEFBINDMOUNTS@
|
||||
|
||||
@@ -117,5 +117,6 @@ impl_agent!(
|
||||
get_ip_tables | crate::GetIPTablesRequest | crate::GetIPTablesResponse | None,
|
||||
set_ip_tables | crate::SetIPTablesRequest | crate::SetIPTablesResponse | None,
|
||||
get_volume_stats | crate::VolumeStatsRequest | crate::VolumeStatsResponse | None,
|
||||
resize_volume | crate::ResizeVolumeRequest | crate::Empty | None
|
||||
resize_volume | crate::ResizeVolumeRequest | crate::Empty | None,
|
||||
online_cpu_mem | crate::OnlineCPUMemRequest | crate::Empty | None
|
||||
);
|
||||
|
||||
@@ -336,7 +336,7 @@ impl From<UpdateContainerRequest> for agent::UpdateContainerRequest {
|
||||
fn from(from: UpdateContainerRequest) -> Self {
|
||||
Self {
|
||||
container_id: from.container_id,
|
||||
resources: from_option(Some(from.resources)),
|
||||
resources: from_option(from.resources),
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -54,6 +54,7 @@ pub trait Agent: AgentManager + HealthService + Send + Sync {
|
||||
// sandbox
|
||||
async fn create_sandbox(&self, req: CreateSandboxRequest) -> Result<Empty>;
|
||||
async fn destroy_sandbox(&self, req: Empty) -> Result<Empty>;
|
||||
async fn online_cpu_mem(&self, req: OnlineCPUMemRequest) -> Result<Empty>;
|
||||
|
||||
// network
|
||||
async fn add_arp_neighbors(&self, req: AddArpNeighborRequest) -> Result<Empty>;
|
||||
|
||||
@@ -201,7 +201,7 @@ pub struct ListProcessesRequest {
|
||||
#[derive(PartialEq, Clone, Default)]
|
||||
pub struct UpdateContainerRequest {
|
||||
pub container_id: String,
|
||||
pub resources: oci::LinuxResources,
|
||||
pub resources: Option<oci::LinuxResources>,
|
||||
pub mounts: Vec<oci::Mount>,
|
||||
}
|
||||
|
||||
|
||||
@@ -32,7 +32,7 @@ kata-types = { path = "../../../libs/kata-types" }
|
||||
logging = { path = "../../../libs/logging" }
|
||||
shim-interface = { path = "../../../libs/shim-interface" }
|
||||
|
||||
dragonball = { path = "../../../dragonball", features = ["atomic-guest-memory", "virtio-vsock", "hotplug", "virtio-blk", "virtio-net", "virtio-fs","dbs-upcall"] }
|
||||
dragonball = { path = "../../../dragonball", features = ["atomic-guest-memory", "virtio-vsock", "hotplug", "virtio-blk", "virtio-net", "virtio-fs", "dbs-upcall"] }
|
||||
|
||||
ch-config = { path = "ch-config", optional = true }
|
||||
|
||||
|
||||
@@ -494,6 +494,10 @@ impl CloudHypervisorInner {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) async fn resize_vcpu(&self, old_vcpu: u32, new_vcpu: u32) -> Result<(u32, u32)> {
|
||||
Ok((old_vcpu, new_vcpu))
|
||||
}
|
||||
|
||||
pub(crate) async fn get_pids(&self) -> Result<Vec<u32>> {
|
||||
Ok(Vec::<u32>::new())
|
||||
}
|
||||
|
||||
@@ -114,6 +114,11 @@ impl Hypervisor for CloudHypervisor {
|
||||
inner.cleanup().await
|
||||
}
|
||||
|
||||
async fn resize_vcpu(&self, old_vcpu: u32, new_vcpu: u32) -> Result<(u32, u32)> {
|
||||
let inner = self.inner.read().await;
|
||||
inner.resize_vcpu(old_vcpu, new_vcpu).await
|
||||
}
|
||||
|
||||
async fn get_pids(&self) -> Result<Vec<u32>> {
|
||||
let inner = self.inner.read().await;
|
||||
inner.get_pids().await
|
||||
|
||||
@@ -8,19 +8,21 @@ use std::{collections::HashMap, sync::Arc};
|
||||
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use kata_sys_util::rand::RandomBytes;
|
||||
use tokio::sync::Mutex;
|
||||
use tokio::sync::{Mutex, RwLock};
|
||||
|
||||
use super::{
|
||||
util::{get_host_path, get_virt_drive_name},
|
||||
Device, DeviceConfig, DeviceType,
|
||||
};
|
||||
use crate::{
|
||||
BlockConfig, BlockDevice, Hypervisor, KATA_BLK_DEV_TYPE, KATA_MMIO_BLK_DEV_TYPE,
|
||||
VIRTIO_BLOCK_MMIO, VIRTIO_BLOCK_PCI,
|
||||
};
|
||||
|
||||
use super::{
|
||||
util::{get_host_path, get_virt_drive_name},
|
||||
Device, DeviceConfig,
|
||||
};
|
||||
pub type ArcMutexDevice = Arc<Mutex<dyn Device>>;
|
||||
|
||||
const DEVICE_TYPE_BLOCK: &str = "b";
|
||||
|
||||
/// block_index and released_block_index are used to search an available block index
|
||||
/// in Sandbox.
|
||||
///
|
||||
@@ -75,35 +77,27 @@ impl DeviceManager {
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn new_device(&mut self, device_config: &DeviceConfig) -> Result<String> {
|
||||
let device_id = if let Some(dev) = self.find_device(device_config).await {
|
||||
dev
|
||||
} else {
|
||||
self.create_device(device_config)
|
||||
.await
|
||||
.context("failed to create device")?
|
||||
};
|
||||
Ok(device_id)
|
||||
}
|
||||
|
||||
pub async fn try_add_device(&mut self, device_id: &str) -> Result<()> {
|
||||
async fn try_add_device(&mut self, device_id: &str) -> Result<()> {
|
||||
// find the device
|
||||
let device = self
|
||||
.devices
|
||||
.get(device_id)
|
||||
.context("failed to find device")?;
|
||||
let mut device_guard = device.lock().await;
|
||||
|
||||
// attach device
|
||||
let mut device_guard = device.lock().await;
|
||||
let result = device_guard.attach(self.hypervisor.as_ref()).await;
|
||||
|
||||
// handle attach error
|
||||
if let Err(e) = result {
|
||||
if let DeviceConfig::BlockCfg(config) = device_guard.get_device_info().await {
|
||||
self.shared_info.release_device_index(config.index);
|
||||
if let DeviceType::Block(device) = device_guard.get_device_info().await {
|
||||
self.shared_info.release_device_index(device.config.index);
|
||||
};
|
||||
drop(device_guard);
|
||||
self.devices.remove(device_id);
|
||||
return Err(e);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -120,66 +114,97 @@ impl DeviceManager {
|
||||
}
|
||||
Err(e) => Err(e),
|
||||
};
|
||||
|
||||
// if detach success, remove it from device manager
|
||||
if result.is_ok() {
|
||||
drop(device_guard);
|
||||
// if detach success, remove it from device manager
|
||||
self.devices.remove(device_id);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
Err(anyhow!(
|
||||
"device with specified ID hasn't been created. {}",
|
||||
device_id
|
||||
))
|
||||
}
|
||||
|
||||
pub async fn get_device_info(&self, device_id: &str) -> Result<DeviceConfig> {
|
||||
async fn get_device_info(&self, device_id: &str) -> Result<DeviceType> {
|
||||
if let Some(dev) = self.devices.get(device_id) {
|
||||
return Ok(dev.lock().await.get_device_info().await);
|
||||
}
|
||||
|
||||
Err(anyhow!(
|
||||
"device with specified ID hasn't been created. {}",
|
||||
device_id
|
||||
))
|
||||
}
|
||||
|
||||
async fn find_device(&self, device_config: &DeviceConfig) -> Option<String> {
|
||||
async fn find_device(&self, host_path: String) -> Option<String> {
|
||||
for (device_id, dev) in &self.devices {
|
||||
match dev.lock().await.get_device_info().await {
|
||||
DeviceConfig::BlockCfg(config) => match device_config {
|
||||
DeviceConfig::BlockCfg(ref config_new) => {
|
||||
if config_new.path_on_host == config.path_on_host {
|
||||
return Some(device_id.to_string());
|
||||
}
|
||||
DeviceType::Block(device) => {
|
||||
if device.config.path_on_host == host_path {
|
||||
return Some(device_id.to_string());
|
||||
}
|
||||
_ => {
|
||||
continue;
|
||||
}
|
||||
},
|
||||
}
|
||||
_ => {
|
||||
// TODO: support find other device type
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
async fn create_device(&mut self, device_config: &DeviceConfig) -> Result<String> {
|
||||
fn get_dev_virt_path(&mut self, dev_type: &str) -> Result<Option<(u64, String)>> {
|
||||
let virt_path = if dev_type == DEVICE_TYPE_BLOCK {
|
||||
// generate virt path
|
||||
let current_index = self.shared_info.declare_device_index()?;
|
||||
let drive_name = get_virt_drive_name(current_index as i32)?;
|
||||
let virt_path_name = format!("/dev/{}", drive_name);
|
||||
|
||||
Some((current_index, virt_path_name))
|
||||
} else {
|
||||
// only dev_type is block, otherwise, it's useless.
|
||||
None
|
||||
};
|
||||
|
||||
Ok(virt_path)
|
||||
}
|
||||
|
||||
async fn new_device(&mut self, device_config: &DeviceConfig) -> Result<String> {
|
||||
// device ID must be generated by manager instead of device itself
|
||||
// in case of ID collision
|
||||
let device_id = self.new_device_id()?;
|
||||
let dev: ArcMutexDevice = match device_config {
|
||||
DeviceConfig::BlockCfg(config) => self
|
||||
.create_block_device(config, device_id.clone())
|
||||
.await
|
||||
.context("failed to create device")?,
|
||||
DeviceConfig::BlockCfg(config) => {
|
||||
// try to find the device, found and just return id.
|
||||
if let Some(dev_id_matched) = self.find_device(config.path_on_host.clone()).await {
|
||||
info!(
|
||||
sl!(),
|
||||
"device with host path:{:?} found. just return device id: {:?}",
|
||||
config.path_on_host.clone(),
|
||||
dev_id_matched
|
||||
);
|
||||
|
||||
return Ok(dev_id_matched);
|
||||
}
|
||||
|
||||
self.create_block_device(config, device_id.clone())
|
||||
.await
|
||||
.context("failed to create device")?
|
||||
}
|
||||
_ => {
|
||||
return Err(anyhow!("invliad device type"));
|
||||
}
|
||||
};
|
||||
|
||||
// register device to devices
|
||||
self.devices.insert(device_id.clone(), dev.clone());
|
||||
|
||||
Ok(device_id)
|
||||
}
|
||||
|
||||
@@ -204,17 +229,23 @@ impl DeviceManager {
|
||||
_ => "".to_string(),
|
||||
};
|
||||
block_config.driver_option = block_driver;
|
||||
// generate virt path
|
||||
let current_index = self.shared_info.declare_device_index()?;
|
||||
block_config.index = current_index;
|
||||
let drive_name = get_virt_drive_name(current_index as i32)?;
|
||||
block_config.virt_path = format!("/dev/{}", drive_name);
|
||||
// if the path on host is empty, we need to get device host path from the device major and minor number
|
||||
// Otherwise, it might be rawfile based block device, the host path is already passed from the runtime, so we don't need to do anything here
|
||||
if block_config.path_on_host.is_empty() {
|
||||
block_config.path_on_host = get_host_path("b".to_owned(), config.major, config.minor)
|
||||
.context("failed to get host path")?;
|
||||
|
||||
// generate block device index and virt path
|
||||
// safe here, Block device always has virt_path.
|
||||
if let Some(virt_path) = self.get_dev_virt_path(DEVICE_TYPE_BLOCK)? {
|
||||
block_config.index = virt_path.0;
|
||||
block_config.virt_path = virt_path.1;
|
||||
}
|
||||
|
||||
// if the path on host is empty, we need to get device host path from the device major and minor number
|
||||
// Otherwise, it might be rawfile based block device, the host path is already passed from the runtime,
|
||||
// so we don't need to do anything here
|
||||
if block_config.path_on_host.is_empty() {
|
||||
block_config.path_on_host =
|
||||
get_host_path(DEVICE_TYPE_BLOCK.to_owned(), config.major, config.minor)
|
||||
.context("failed to get host path")?;
|
||||
}
|
||||
|
||||
Ok(Arc::new(Mutex::new(BlockDevice::new(
|
||||
device_id,
|
||||
block_config,
|
||||
@@ -237,3 +268,36 @@ impl DeviceManager {
|
||||
Err(anyhow!("ID are exhausted"))
|
||||
}
|
||||
}
|
||||
|
||||
// Many scenarios have similar steps when adding devices. so to reduce duplicated code,
|
||||
// we should create a common method abstracted and use it in various scenarios.
|
||||
// do_handle_device:
|
||||
// (1) new_device with DeviceConfig and return device_id;
|
||||
// (2) try_add_device with device_id and do really add device;
|
||||
// (3) return device info of device's info;
|
||||
pub async fn do_handle_device(
|
||||
d: &RwLock<DeviceManager>,
|
||||
dev_info: &DeviceConfig,
|
||||
) -> Result<DeviceType> {
|
||||
let device_id = d
|
||||
.write()
|
||||
.await
|
||||
.new_device(dev_info)
|
||||
.await
|
||||
.context("failed to create deviec")?;
|
||||
|
||||
d.write()
|
||||
.await
|
||||
.try_add_device(&device_id)
|
||||
.await
|
||||
.context("failed to add deivce")?;
|
||||
|
||||
let device_info = d
|
||||
.read()
|
||||
.await
|
||||
.get_device_info(&device_id)
|
||||
.await
|
||||
.context("failed to get device info")?;
|
||||
|
||||
Ok(device_info)
|
||||
}
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
use std::{fs, path::Path, process::Command};
|
||||
|
||||
use crate::device::Device;
|
||||
use crate::device::DeviceConfig;
|
||||
use crate::device::DeviceType;
|
||||
use crate::Hypervisor as hypervisor;
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
use anyhow::anyhow;
|
||||
@@ -166,7 +166,7 @@ impl Device for VfioConfig {
|
||||
todo!()
|
||||
}
|
||||
|
||||
async fn get_device_info(&self) -> DeviceConfig {
|
||||
async fn get_device_info(&self) -> DeviceType {
|
||||
todo!()
|
||||
}
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
//
|
||||
|
||||
use crate::device::Device;
|
||||
use crate::device::DeviceConfig;
|
||||
use crate::device::DeviceType;
|
||||
use crate::Hypervisor as hypervisor;
|
||||
use anyhow::Result;
|
||||
use async_trait::async_trait;
|
||||
@@ -47,7 +47,7 @@ impl Device for VhostUserConfig {
|
||||
todo!()
|
||||
}
|
||||
|
||||
async fn get_device_info(&self) -> DeviceConfig {
|
||||
async fn get_device_info(&self) -> DeviceType {
|
||||
todo!()
|
||||
}
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
|
||||
pub const VIRTIO_BLOCK_MMIO: &str = "virtio-blk-mmio";
|
||||
use crate::device::Device;
|
||||
use crate::device::{DeviceConfig, DeviceType};
|
||||
use crate::device::DeviceType;
|
||||
use crate::Hypervisor as hypervisor;
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use async_trait::async_trait;
|
||||
@@ -98,8 +98,8 @@ impl Device for BlockDevice {
|
||||
Ok(Some(self.config.index))
|
||||
}
|
||||
|
||||
async fn get_device_info(&self) -> DeviceConfig {
|
||||
DeviceConfig::BlockCfg(self.config.clone())
|
||||
async fn get_device_info(&self) -> DeviceType {
|
||||
DeviceType::Block(self.clone())
|
||||
}
|
||||
|
||||
async fn increase_attach_count(&mut self) -> Result<bool> {
|
||||
|
||||
@@ -67,6 +67,9 @@ pub struct ShareFsDeviceConfig {
|
||||
|
||||
/// queue_num: queue number
|
||||
pub queue_num: u64,
|
||||
|
||||
/// options: virtiofs device's config options.
|
||||
pub options: Vec<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
|
||||
@@ -53,7 +53,7 @@ pub trait Device: Send + Sync {
|
||||
// detach is to unplug device from VM
|
||||
async fn detach(&mut self, h: &dyn hypervisor) -> Result<Option<u64>>;
|
||||
// get_device_info returns device config
|
||||
async fn get_device_info(&self) -> DeviceConfig;
|
||||
async fn get_device_info(&self) -> DeviceType;
|
||||
// increase_attach_count is used to increase the attach count for a device
|
||||
// return values:
|
||||
// * true: no need to do real attach when current attach count is zero, skip following actions.
|
||||
|
||||
@@ -13,7 +13,7 @@ use crate::{
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use async_trait::async_trait;
|
||||
use dragonball::{
|
||||
api::v1::{BlockDeviceConfigInfo, BootSourceConfig},
|
||||
api::v1::{BlockDeviceConfigInfo, BootSourceConfig, VcpuResizeInfo},
|
||||
vm::VmConfigInfo,
|
||||
};
|
||||
use kata_sys_util::mount;
|
||||
@@ -327,6 +327,53 @@ impl DragonballInner {
|
||||
}
|
||||
}
|
||||
|
||||
// check if resizing info is valid
|
||||
// the error in this function is not ok to be tolerated, the container boot will fail
|
||||
fn precheck_resize_vcpus(&self, old_vcpus: u32, new_vcpus: u32) -> Result<(u32, u32)> {
|
||||
// old_vcpus > 0, safe for conversion
|
||||
let current_vcpus = old_vcpus;
|
||||
|
||||
// a non-zero positive is required
|
||||
if new_vcpus == 0 {
|
||||
return Err(anyhow!("resize vcpu error: 0 vcpu resizing is invalid"));
|
||||
}
|
||||
|
||||
// cannot exceed maximum value
|
||||
if new_vcpus > self.config.cpu_info.default_maxvcpus {
|
||||
return Err(anyhow!("resize vcpu error: cannot greater than maxvcpus"));
|
||||
}
|
||||
|
||||
Ok((current_vcpus, new_vcpus))
|
||||
}
|
||||
|
||||
// do the check before resizing, returns Result<(old, new)>
|
||||
pub(crate) async fn resize_vcpu(&self, old_vcpus: u32, new_vcpus: u32) -> Result<(u32, u32)> {
|
||||
if old_vcpus == new_vcpus {
|
||||
info!(
|
||||
sl!(),
|
||||
"resize_vcpu: no need to resize vcpus because old_vcpus is equal to new_vcpus"
|
||||
);
|
||||
return Ok((new_vcpus, new_vcpus));
|
||||
}
|
||||
|
||||
let (old_vcpus, new_vcpus) = self.precheck_resize_vcpus(old_vcpus, new_vcpus)?;
|
||||
info!(
|
||||
sl!(),
|
||||
"check_resize_vcpus passed, passing new_vcpus = {:?} to vmm", new_vcpus
|
||||
);
|
||||
|
||||
let cpu_resize_info = VcpuResizeInfo {
|
||||
vcpu_count: Some(new_vcpus as u8),
|
||||
};
|
||||
self.vmm_instance
|
||||
.resize_vcpu(&cpu_resize_info)
|
||||
.context(format!(
|
||||
"failed to do_resize_vcpus on new_vcpus={:?}",
|
||||
new_vcpus
|
||||
))?;
|
||||
Ok((old_vcpus, new_vcpus))
|
||||
}
|
||||
|
||||
pub fn set_hypervisor_config(&mut self, config: HypervisorConfig) {
|
||||
self.config = config;
|
||||
}
|
||||
|
||||
@@ -157,7 +157,11 @@ impl DragonballInner {
|
||||
.context("insert vsock")
|
||||
}
|
||||
|
||||
fn parse_inline_virtiofs_args(&self, fs_cfg: &mut FsDeviceConfigInfo) -> Result<()> {
|
||||
fn parse_inline_virtiofs_args(
|
||||
&self,
|
||||
fs_cfg: &mut FsDeviceConfigInfo,
|
||||
options: &mut Vec<String>,
|
||||
) -> Result<()> {
|
||||
let mut debug = false;
|
||||
let mut opt_list = String::new();
|
||||
|
||||
@@ -169,8 +173,8 @@ impl DragonballInner {
|
||||
sl!(),
|
||||
"args: {:?}", &self.config.shared_fs.virtio_fs_extra_args
|
||||
);
|
||||
let args = &self.config.shared_fs.virtio_fs_extra_args;
|
||||
let _ = go_flag::parse_args_with_warnings::<String, _, _>(args, None, |flags| {
|
||||
let mut args = self.config.shared_fs.virtio_fs_extra_args.clone();
|
||||
let _ = go_flag::parse_args_with_warnings::<String, _, _>(&args, None, |flags| {
|
||||
flags.add_flag("d", &mut debug);
|
||||
flags.add_flag("thread-pool-size", &mut fs_cfg.thread_pool_size);
|
||||
flags.add_flag("drop-sys-resource", &mut fs_cfg.drop_sys_resource);
|
||||
@@ -178,6 +182,9 @@ impl DragonballInner {
|
||||
})
|
||||
.with_context(|| format!("parse args: {:?}", args))?;
|
||||
|
||||
// more options parsed for inline virtio-fs' custom config
|
||||
args.append(options);
|
||||
|
||||
if debug {
|
||||
warn!(
|
||||
sl!(),
|
||||
@@ -202,6 +209,7 @@ impl DragonballInner {
|
||||
"xattr" => fs_cfg.xattr = true,
|
||||
"no_xattr" => fs_cfg.xattr = false,
|
||||
"cache_symlinks" => {} // inline virtiofs always cache symlinks
|
||||
"no_readdir" => fs_cfg.no_readdir = true,
|
||||
"trace" => warn!(
|
||||
sl!(),
|
||||
"Inline virtiofs \"-o trace\" option not supported yet, ignored."
|
||||
@@ -234,16 +242,25 @@ impl DragonballInner {
|
||||
xattr: true,
|
||||
..Default::default()
|
||||
};
|
||||
self.do_add_fs_device(&config.fs_type, &mut fs_cfg)
|
||||
|
||||
let mut options = config.options.clone();
|
||||
self.do_add_fs_device(&config.fs_type, &mut fs_cfg, &mut options)
|
||||
}
|
||||
|
||||
fn do_add_fs_device(&self, fs_type: &str, fs_cfg: &mut FsDeviceConfigInfo) -> Result<()> {
|
||||
fn do_add_fs_device(
|
||||
&self,
|
||||
fs_type: &str,
|
||||
fs_cfg: &mut FsDeviceConfigInfo,
|
||||
options: &mut Vec<String>,
|
||||
) -> Result<()> {
|
||||
match fs_type {
|
||||
VIRTIO_FS => {
|
||||
fs_cfg.mode = String::from("vhostuser");
|
||||
}
|
||||
INLINE_VIRTIO_FS => {
|
||||
self.parse_inline_virtiofs_args(fs_cfg)?;
|
||||
// All parameters starting with --patch-fs do not need to be processed, these are the parameters required by patch fs
|
||||
options.retain(|x| !x.starts_with("--patch-fs"));
|
||||
self.parse_inline_virtiofs_args(fs_cfg, options)?;
|
||||
}
|
||||
_ => {
|
||||
return Err(anyhow!(
|
||||
@@ -311,8 +328,12 @@ mod tests {
|
||||
"--drop-sys-resource".to_string(),
|
||||
"-d".to_string(),
|
||||
];
|
||||
|
||||
let mut options: Vec<String> = Vec::new();
|
||||
dragonball.config.shared_fs.virtio_fs_cache = "auto".to_string();
|
||||
dragonball.parse_inline_virtiofs_args(&mut fs_cfg).unwrap();
|
||||
dragonball
|
||||
.parse_inline_virtiofs_args(&mut fs_cfg, &mut options)
|
||||
.unwrap();
|
||||
|
||||
assert!(!fs_cfg.no_open);
|
||||
assert!(fs_cfg.xattr);
|
||||
|
||||
@@ -77,6 +77,12 @@ impl Hypervisor for Dragonball {
|
||||
inner.save_vm().await
|
||||
}
|
||||
|
||||
// returns Result<(old_vcpus, new_vcpus)>
|
||||
async fn resize_vcpu(&self, old_vcpus: u32, new_vcpus: u32) -> Result<(u32, u32)> {
|
||||
let inner = self.inner.read().await;
|
||||
inner.resize_vcpu(old_vcpus, new_vcpus).await
|
||||
}
|
||||
|
||||
async fn add_device(&self, device: DeviceType) -> Result<()> {
|
||||
let mut inner = self.inner.write().await;
|
||||
inner.add_device(device).await
|
||||
|
||||
@@ -16,8 +16,8 @@ use crossbeam_channel::{unbounded, Receiver, Sender};
|
||||
use dragonball::{
|
||||
api::v1::{
|
||||
BlockDeviceConfigInfo, BootSourceConfig, FsDeviceConfigInfo, FsMountConfigInfo,
|
||||
InstanceInfo, InstanceState, VirtioNetDeviceConfigInfo, VmmAction, VmmActionError, VmmData,
|
||||
VmmRequest, VmmResponse, VmmService, VsockDeviceConfigInfo,
|
||||
InstanceInfo, InstanceState, VcpuResizeInfo, VirtioNetDeviceConfigInfo, VmmAction,
|
||||
VmmActionError, VmmData, VmmRequest, VmmResponse, VmmService, VsockDeviceConfigInfo,
|
||||
},
|
||||
vm::VmConfigInfo,
|
||||
Vmm,
|
||||
@@ -248,6 +248,12 @@ impl VmmInstance {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn resize_vcpu(&self, cfg: &VcpuResizeInfo) -> Result<()> {
|
||||
self.handle_request(Request::Sync(VmmAction::ResizeVcpu(cfg.clone())))
|
||||
.with_context(|| format!("Failed to resize_vm(hotplug vcpu), cfg: {:?}", cfg))?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn pause(&self) -> Result<()> {
|
||||
todo!()
|
||||
}
|
||||
|
||||
@@ -78,6 +78,7 @@ pub trait Hypervisor: Send + Sync {
|
||||
async fn pause_vm(&self) -> Result<()>;
|
||||
async fn save_vm(&self) -> Result<()>;
|
||||
async fn resume_vm(&self) -> Result<()>;
|
||||
async fn resize_vcpu(&self, old_vcpus: u32, new_vcpus: u32) -> Result<(u32, u32)>; // returns (old_vcpus, new_vcpus)
|
||||
|
||||
// device manager
|
||||
async fn add_device(&self, device: DeviceType) -> Result<()>;
|
||||
|
||||
@@ -104,6 +104,11 @@ impl QemuInner {
|
||||
todo!()
|
||||
}
|
||||
|
||||
pub(crate) async fn resize_vcpu(&self, _old_vcpus: u32, _new_vcpus: u32) -> Result<(u32, u32)> {
|
||||
info!(sl!(), "QemuInner::resize_vcpu()");
|
||||
todo!()
|
||||
}
|
||||
|
||||
pub(crate) async fn get_pids(&self) -> Result<Vec<u32>> {
|
||||
info!(sl!(), "QemuInner::get_pids()");
|
||||
todo!()
|
||||
|
||||
@@ -118,6 +118,11 @@ impl Hypervisor for Qemu {
|
||||
inner.cleanup().await
|
||||
}
|
||||
|
||||
async fn resize_vcpu(&self, old_vcpus: u32, new_vcpus: u32) -> Result<(u32, u32)> {
|
||||
let inner = self.inner.read().await;
|
||||
inner.resize_vcpu(old_vcpus, new_vcpus).await
|
||||
}
|
||||
|
||||
async fn get_pids(&self) -> Result<Vec<u32>> {
|
||||
let inner = self.inner.read().await;
|
||||
inner.get_pids().await
|
||||
|
||||
@@ -41,4 +41,5 @@ logging = { path = "../../../libs/logging" }
|
||||
oci = { path = "../../../libs/oci" }
|
||||
actix-rt = "2.7.0"
|
||||
persist = { path = "../persist"}
|
||||
|
||||
[features]
|
||||
|
||||
@@ -26,6 +26,8 @@ use oci::LinuxResources;
|
||||
use persist::sandbox_persist::Persist;
|
||||
use tokio::sync::RwLock;
|
||||
|
||||
use crate::ResourceUpdateOp;
|
||||
|
||||
const OS_ERROR_NO_SUCH_PROCESS: i32 = 3;
|
||||
|
||||
pub struct CgroupArgs {
|
||||
@@ -149,29 +151,51 @@ impl CgroupsResource {
|
||||
&self,
|
||||
cid: &str,
|
||||
linux_resources: Option<&LinuxResources>,
|
||||
op: ResourceUpdateOp,
|
||||
h: &dyn Hypervisor,
|
||||
) -> Result<()> {
|
||||
let resource = self.calc_resource(linux_resources);
|
||||
let changed = self.update_resources(cid, resource).await;
|
||||
let new_resources = self.calc_resource(linux_resources);
|
||||
let old_resources = self.update_resources(cid, new_resources.clone(), op).await;
|
||||
|
||||
if !changed {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
self.do_update_cgroups(h).await
|
||||
}
|
||||
|
||||
async fn update_resources(&self, cid: &str, new_resource: Resources) -> bool {
|
||||
let mut resources = self.resources.write().await;
|
||||
let old_resource = resources.insert(cid.to_owned(), new_resource.clone());
|
||||
|
||||
if let Some(old_resource) = old_resource {
|
||||
if old_resource == new_resource {
|
||||
return false;
|
||||
if let Some(old_resource) = old_resources.clone() {
|
||||
if old_resource == new_resources {
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
|
||||
true
|
||||
match self.do_update_cgroups(h).await {
|
||||
Err(e) => {
|
||||
// if update failed, we should roll back the records in resources
|
||||
let mut resources = self.resources.write().await;
|
||||
match op {
|
||||
ResourceUpdateOp::Add => {
|
||||
resources.remove(cid);
|
||||
}
|
||||
ResourceUpdateOp::Update | ResourceUpdateOp::Del => {
|
||||
if let Some(old_resource) = old_resources {
|
||||
resources.insert(cid.to_owned(), old_resource);
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e)
|
||||
}
|
||||
Ok(()) => Ok(()),
|
||||
}
|
||||
}
|
||||
|
||||
async fn update_resources(
|
||||
&self,
|
||||
cid: &str,
|
||||
new_resource: Resources,
|
||||
op: ResourceUpdateOp,
|
||||
) -> Option<Resources> {
|
||||
let mut resources = self.resources.write().await;
|
||||
match op {
|
||||
ResourceUpdateOp::Add | ResourceUpdateOp::Update => {
|
||||
resources.insert(cid.to_owned(), new_resource.clone())
|
||||
}
|
||||
ResourceUpdateOp::Del => resources.remove(cid),
|
||||
}
|
||||
}
|
||||
|
||||
async fn do_update_cgroups(&self, h: &dyn Hypervisor) -> Result<()> {
|
||||
|
||||
188
src/runtime-rs/crates/resource/src/cpu_mem/cpu.rs
Normal file
188
src/runtime-rs/crates/resource/src/cpu_mem/cpu.rs
Normal file
@@ -0,0 +1,188 @@
|
||||
// Copyright (c) 2019-2022 Alibaba Cloud
|
||||
// Copyright (c) 2019-2022 Ant Group
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
use std::{
|
||||
cmp,
|
||||
collections::{HashMap, HashSet},
|
||||
convert::TryFrom,
|
||||
sync::Arc,
|
||||
};
|
||||
|
||||
use agent::{Agent, OnlineCPUMemRequest};
|
||||
use anyhow::{Context, Ok, Result};
|
||||
use hypervisor::Hypervisor;
|
||||
use kata_types::{config::TomlConfig, cpu::LinuxContainerCpuResources};
|
||||
use oci::LinuxCpu;
|
||||
use tokio::sync::RwLock;
|
||||
|
||||
use crate::ResourceUpdateOp;
|
||||
|
||||
#[derive(Default, Debug, Clone)]
|
||||
pub struct CpuResource {
|
||||
/// Current number of vCPUs
|
||||
pub(crate) current_vcpu: Arc<RwLock<u32>>,
|
||||
|
||||
/// Default number of vCPUs
|
||||
pub(crate) default_vcpu: u32,
|
||||
|
||||
/// CpuResource of each container
|
||||
pub(crate) container_cpu_resources: Arc<RwLock<HashMap<String, LinuxContainerCpuResources>>>,
|
||||
}
|
||||
|
||||
impl CpuResource {
|
||||
pub fn new(config: Arc<TomlConfig>) -> Result<Self> {
|
||||
let hypervisor_name = config.runtime.hypervisor_name.clone();
|
||||
let hypervisor_config = config
|
||||
.hypervisor
|
||||
.get(&hypervisor_name)
|
||||
.context(format!("failed to get hypervisor {}", hypervisor_name))?;
|
||||
Ok(Self {
|
||||
current_vcpu: Arc::new(RwLock::new(hypervisor_config.cpu_info.default_vcpus as u32)),
|
||||
default_vcpu: hypervisor_config.cpu_info.default_vcpus as u32,
|
||||
container_cpu_resources: Arc::new(RwLock::new(HashMap::new())),
|
||||
})
|
||||
}
|
||||
|
||||
pub(crate) async fn update_cpu_resources(
|
||||
&self,
|
||||
cid: &str,
|
||||
linux_cpus: Option<&LinuxCpu>,
|
||||
op: ResourceUpdateOp,
|
||||
hypervisor: &dyn Hypervisor,
|
||||
agent: &dyn Agent,
|
||||
) -> Result<()> {
|
||||
self.update_container_cpu_resources(cid, linux_cpus, op)
|
||||
.await
|
||||
.context("update container cpu resources")?;
|
||||
let vcpu_required = self
|
||||
.calc_cpu_resources()
|
||||
.await
|
||||
.context("calculate vcpus required")?;
|
||||
|
||||
if vcpu_required == self.current_vcpu().await {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let curr_vcpus = self
|
||||
.do_update_cpu_resources(vcpu_required, op, hypervisor, agent)
|
||||
.await?;
|
||||
self.update_current_vcpu(curr_vcpus).await;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn current_vcpu(&self) -> u32 {
|
||||
let current_vcpu = self.current_vcpu.read().await;
|
||||
*current_vcpu
|
||||
}
|
||||
|
||||
async fn update_current_vcpu(&self, new_vcpus: u32) {
|
||||
let mut current_vcpu = self.current_vcpu.write().await;
|
||||
*current_vcpu = new_vcpus;
|
||||
}
|
||||
|
||||
// update container_cpu_resources field
|
||||
async fn update_container_cpu_resources(
|
||||
&self,
|
||||
cid: &str,
|
||||
linux_cpus: Option<&LinuxCpu>,
|
||||
op: ResourceUpdateOp,
|
||||
) -> Result<()> {
|
||||
if let Some(cpu) = linux_cpus {
|
||||
let container_resource = LinuxContainerCpuResources::try_from(cpu)?;
|
||||
let mut resources = self.container_cpu_resources.write().await;
|
||||
match op {
|
||||
ResourceUpdateOp::Add => {
|
||||
resources.insert(cid.to_owned(), container_resource);
|
||||
}
|
||||
ResourceUpdateOp::Update => {
|
||||
let resource = resources.insert(cid.to_owned(), container_resource.clone());
|
||||
if let Some(old_container_resource) = resource {
|
||||
// the priority of cpu-quota is higher than cpuset when determine the number of vcpus.
|
||||
// we should better ignore the resource update when update cpu only by cpuset if cpu-quota
|
||||
// has been set previously.
|
||||
if old_container_resource.quota() > 0 && container_resource.quota() < 0 {
|
||||
resources.insert(cid.to_owned(), old_container_resource);
|
||||
}
|
||||
}
|
||||
}
|
||||
ResourceUpdateOp::Del => {
|
||||
resources.remove(cid);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// calculates the total required vcpus by adding each container's requirements within the pod
|
||||
async fn calc_cpu_resources(&self) -> Result<u32> {
|
||||
let mut total_vcpu = 0;
|
||||
let mut cpuset_vcpu: HashSet<u32> = HashSet::new();
|
||||
|
||||
let resources = self.container_cpu_resources.read().await;
|
||||
for (_, cpu_resource) in resources.iter() {
|
||||
let vcpu = cpu_resource.get_vcpus().unwrap_or(0) as u32;
|
||||
cpuset_vcpu.extend(cpu_resource.cpuset().iter());
|
||||
total_vcpu += vcpu;
|
||||
}
|
||||
|
||||
// contrained only by cpuset
|
||||
if total_vcpu == 0 && !cpuset_vcpu.is_empty() {
|
||||
info!(sl!(), "(from cpuset)get vcpus # {:?}", cpuset_vcpu);
|
||||
return Ok(cpuset_vcpu.len() as u32);
|
||||
}
|
||||
|
||||
info!(
|
||||
sl!(),
|
||||
"(from cfs_quota&cfs_period)get vcpus count {}", total_vcpu
|
||||
);
|
||||
Ok(total_vcpu)
|
||||
}
|
||||
|
||||
// do hotplug and hot-unplug the vcpu
|
||||
async fn do_update_cpu_resources(
|
||||
&self,
|
||||
new_vcpus: u32,
|
||||
op: ResourceUpdateOp,
|
||||
hypervisor: &dyn Hypervisor,
|
||||
agent: &dyn Agent,
|
||||
) -> Result<u32> {
|
||||
let old_vcpus = self.current_vcpu().await;
|
||||
|
||||
// when adding vcpus, ignore old_vcpus > new_vcpus
|
||||
// when deleting vcpus, ignore old_vcpus < new_vcpus
|
||||
if (op == ResourceUpdateOp::Add && old_vcpus > new_vcpus)
|
||||
|| (op == ResourceUpdateOp::Del && old_vcpus < new_vcpus)
|
||||
{
|
||||
return Ok(old_vcpus);
|
||||
}
|
||||
|
||||
// do not reduce computing power
|
||||
// the number of vcpus would not be lower than the default size
|
||||
let new_vcpus = cmp::max(new_vcpus, self.default_vcpu);
|
||||
|
||||
let (old, new) = hypervisor
|
||||
.resize_vcpu(old_vcpus, new_vcpus)
|
||||
.await
|
||||
.context("resize vcpus")?;
|
||||
|
||||
if old < new {
|
||||
let add = new - old;
|
||||
info!(sl!(), "request to onlineCpuMem with {:?} cpus", add);
|
||||
|
||||
agent
|
||||
.online_cpu_mem(OnlineCPUMemRequest {
|
||||
wait: false,
|
||||
nb_cpus: new,
|
||||
cpu_only: true,
|
||||
})
|
||||
.await
|
||||
.context("online vcpus")?;
|
||||
}
|
||||
|
||||
Ok(new)
|
||||
}
|
||||
}
|
||||
@@ -13,17 +13,16 @@ use kata_types::{
|
||||
cpu::LinuxContainerCpuResources, k8s::container_type,
|
||||
};
|
||||
|
||||
// static resource that StaticResourceManager needs, this is the spec for the
|
||||
// initial resource that InitialSizeManager needs, this is the spec for the
|
||||
// sandbox/container's workload
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
struct StaticResource {
|
||||
struct InitialSize {
|
||||
vcpu: u32,
|
||||
mem_mb: u32,
|
||||
}
|
||||
|
||||
// generate static resource(vcpu and memory in MiB) from spec's information
|
||||
// used for static resource management
|
||||
impl TryFrom<&oci::Spec> for StaticResource {
|
||||
// generate initial resource(vcpu and memory in MiB) from spec's information
|
||||
impl TryFrom<&oci::Spec> for InitialSize {
|
||||
type Error = anyhow::Error;
|
||||
fn try_from(spec: &oci::Spec) -> Result<Self> {
|
||||
let mut vcpu: u32 = 0;
|
||||
@@ -65,31 +64,32 @@ impl TryFrom<&oci::Spec> for StaticResource {
|
||||
}
|
||||
info!(
|
||||
sl!(),
|
||||
"static resource mgmt result: vcpu={}, mem_mb={}", vcpu, mem_mb
|
||||
"(from PodSandbox's annotation / SingleContainer's spec) initial size: vcpu={}, mem_mb={}", vcpu, mem_mb
|
||||
);
|
||||
Ok(Self { vcpu, mem_mb })
|
||||
}
|
||||
}
|
||||
|
||||
// StaticResourceManager is responsible for static resource management
|
||||
// InitialSizeManager is responsible for initial vcpu/mem management
|
||||
//
|
||||
// static resource management sizing information is optionally provided, either by
|
||||
// inital vcpu/mem management sizing information is optionally provided, either by
|
||||
// upper layer runtime (containerd / crio) or by the container spec itself (when it
|
||||
// is a standalone single container such as the one started with *docker run*)
|
||||
//
|
||||
// the sizing information uses three values, cpu quota, cpu period and memory limit,
|
||||
// and with above values it calculates the # vcpus and memory for the workload and
|
||||
// add them to default value of the config
|
||||
// and with above values it calculates the # vcpus and memory for the workload
|
||||
//
|
||||
// if the workload # of vcpus and memory is invalid for vmms, we still use default
|
||||
// value in toml_config
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub struct StaticResourceManager {
|
||||
resource: StaticResource,
|
||||
pub struct InitialSizeManager {
|
||||
resource: InitialSize,
|
||||
}
|
||||
|
||||
impl StaticResourceManager {
|
||||
impl InitialSizeManager {
|
||||
pub fn new(spec: &oci::Spec) -> Result<Self> {
|
||||
Ok(Self {
|
||||
resource: StaticResource::try_from(spec)
|
||||
.context("failed to construct static resource")?,
|
||||
resource: InitialSize::try_from(spec).context("failed to construct static resource")?,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -100,8 +100,13 @@ impl StaticResourceManager {
|
||||
.hypervisor
|
||||
.get_mut(hypervisor_name)
|
||||
.context("failed to get hypervisor config")?;
|
||||
hv.cpu_info.default_vcpus += self.resource.vcpu as i32;
|
||||
hv.memory_info.default_memory += self.resource.mem_mb;
|
||||
|
||||
if self.resource.vcpu > 0 {
|
||||
hv.cpu_info.default_vcpus = self.resource.vcpu as i32
|
||||
}
|
||||
if self.resource.mem_mb > 0 {
|
||||
hv.memory_info.default_memory = self.resource.mem_mb;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@@ -151,7 +156,7 @@ mod tests {
|
||||
struct TestData<'a> {
|
||||
desc: &'a str,
|
||||
input: InputData,
|
||||
result: StaticResource,
|
||||
result: InitialSize,
|
||||
}
|
||||
|
||||
fn get_test_data() -> Vec<TestData<'static>> {
|
||||
@@ -163,7 +168,7 @@ mod tests {
|
||||
quota: None,
|
||||
memory: None,
|
||||
},
|
||||
result: StaticResource { vcpu: 0, mem_mb: 0 },
|
||||
result: InitialSize { vcpu: 0, mem_mb: 0 },
|
||||
},
|
||||
TestData {
|
||||
desc: "normal resource limit",
|
||||
@@ -173,7 +178,7 @@ mod tests {
|
||||
quota: Some(220_000),
|
||||
memory: Some(1024 * 1024 * 512),
|
||||
},
|
||||
result: StaticResource {
|
||||
result: InitialSize {
|
||||
vcpu: 3,
|
||||
mem_mb: 512,
|
||||
},
|
||||
@@ -183,7 +188,7 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_static_resource_mgmt_sandbox() {
|
||||
fn test_initial_size_sandbox() {
|
||||
let tests = get_test_data();
|
||||
|
||||
// run tests
|
||||
@@ -210,22 +215,22 @@ mod tests {
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let static_resource = StaticResource::try_from(&spec);
|
||||
let initial_size = InitialSize::try_from(&spec);
|
||||
assert!(
|
||||
static_resource.is_ok(),
|
||||
initial_size.is_ok(),
|
||||
"test[{}]: {:?} should be ok",
|
||||
i,
|
||||
d.desc
|
||||
);
|
||||
|
||||
let static_resource = static_resource.unwrap();
|
||||
let initial_size = initial_size.unwrap();
|
||||
assert_eq!(
|
||||
static_resource.vcpu, d.result.vcpu,
|
||||
initial_size.vcpu, d.result.vcpu,
|
||||
"test[{}]: {:?} vcpu should be {}",
|
||||
i, d.desc, d.result.vcpu,
|
||||
);
|
||||
assert_eq!(
|
||||
static_resource.mem_mb, d.result.mem_mb,
|
||||
initial_size.mem_mb, d.result.mem_mb,
|
||||
"test[{}]: {:?} memory should be {}",
|
||||
i, d.desc, d.result.mem_mb,
|
||||
);
|
||||
@@ -233,7 +238,7 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_static_resource_mgmt_container() {
|
||||
fn test_initial_size_container() {
|
||||
let tests = get_test_data();
|
||||
|
||||
// run tests
|
||||
@@ -261,22 +266,22 @@ mod tests {
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let static_resource = StaticResource::try_from(&spec);
|
||||
let initial_size = InitialSize::try_from(&spec);
|
||||
assert!(
|
||||
static_resource.is_ok(),
|
||||
initial_size.is_ok(),
|
||||
"test[{}]: {:?} should be ok",
|
||||
i,
|
||||
d.desc
|
||||
);
|
||||
|
||||
let static_resource = static_resource.unwrap();
|
||||
let initial_size = initial_size.unwrap();
|
||||
assert_eq!(
|
||||
static_resource.vcpu, d.result.vcpu,
|
||||
initial_size.vcpu, d.result.vcpu,
|
||||
"test[{}]: {:?} vcpu should be {}",
|
||||
i, d.desc, d.result.vcpu,
|
||||
);
|
||||
assert_eq!(
|
||||
static_resource.mem_mb, d.result.mem_mb,
|
||||
initial_size.mem_mb, d.result.mem_mb,
|
||||
"test[{}]: {:?} memory should be {}",
|
||||
i, d.desc, d.result.mem_mb,
|
||||
);
|
||||
8
src/runtime-rs/crates/resource/src/cpu_mem/mod.rs
Normal file
8
src/runtime-rs/crates/resource/src/cpu_mem/mod.rs
Normal file
@@ -0,0 +1,8 @@
|
||||
// Copyright (c) 2019-2023 Alibaba Cloud
|
||||
// Copyright (c) 2019-2023 Ant Group
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
pub mod cpu;
|
||||
pub mod initial_size;
|
||||
@@ -22,6 +22,7 @@ pub mod rootfs;
|
||||
pub mod share_fs;
|
||||
pub mod volume;
|
||||
pub use manager::ResourceManager;
|
||||
pub mod cpu_mem;
|
||||
|
||||
use kata_types::config::hypervisor::SharedFsInfo;
|
||||
|
||||
@@ -30,3 +31,10 @@ pub enum ResourceConfig {
|
||||
Network(NetworkConfig),
|
||||
ShareFs(SharedFsInfo),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq)]
|
||||
pub enum ResourceUpdateOp {
|
||||
Add,
|
||||
Del,
|
||||
Update,
|
||||
}
|
||||
|
||||
@@ -4,9 +4,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
use crate::network::NetworkConfig;
|
||||
use crate::resource_persist::ResourceState;
|
||||
use crate::{manager_inner::ResourceManagerInner, rootfs::Rootfs, volume::Volume, ResourceConfig};
|
||||
use std::sync::Arc;
|
||||
|
||||
use agent::types::Device;
|
||||
use agent::{Agent, Storage};
|
||||
use anyhow::Result;
|
||||
@@ -17,9 +16,13 @@ use kata_types::config::TomlConfig;
|
||||
use kata_types::mount::Mount;
|
||||
use oci::{Linux, LinuxResources};
|
||||
use persist::sandbox_persist::Persist;
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::RwLock;
|
||||
|
||||
use crate::network::NetworkConfig;
|
||||
use crate::resource_persist::ResourceState;
|
||||
use crate::ResourceUpdateOp;
|
||||
use crate::{manager_inner::ResourceManagerInner, rootfs::Rootfs, volume::Volume, ResourceConfig};
|
||||
|
||||
pub struct ManagerArgs {
|
||||
pub sid: String,
|
||||
pub agent: Arc<dyn Agent>,
|
||||
@@ -110,13 +113,14 @@ impl ResourceManager {
|
||||
inner.dump().await
|
||||
}
|
||||
|
||||
pub async fn update_cgroups(
|
||||
pub async fn update_linux_resource(
|
||||
&self,
|
||||
cid: &str,
|
||||
linux_resources: Option<&LinuxResources>,
|
||||
) -> Result<()> {
|
||||
op: ResourceUpdateOp,
|
||||
) -> Result<Option<LinuxResources>> {
|
||||
let inner = self.inner.read().await;
|
||||
inner.update_cgroups(cid, linux_resources).await
|
||||
inner.update_linux_resource(cid, linux_resources, op).await
|
||||
}
|
||||
|
||||
pub async fn cleanup(&self) -> Result<()> {
|
||||
|
||||
@@ -4,30 +4,34 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
use std::{sync::Arc, thread, vec};
|
||||
use std::{sync::Arc, thread};
|
||||
|
||||
use crate::{network::NetworkConfig, resource_persist::ResourceState};
|
||||
use agent::{types::Device, Agent, Storage};
|
||||
use anyhow::{anyhow, Context, Ok, Result};
|
||||
use async_trait::async_trait;
|
||||
use hypervisor::{
|
||||
device::{device_manager::DeviceManager, DeviceConfig},
|
||||
device::{
|
||||
device_manager::{do_handle_device, DeviceManager},
|
||||
DeviceConfig, DeviceType,
|
||||
},
|
||||
BlockConfig, Hypervisor,
|
||||
};
|
||||
use kata_types::config::TomlConfig;
|
||||
use kata_types::mount::Mount;
|
||||
use oci::{Linux, LinuxResources};
|
||||
use oci::{Linux, LinuxCpu, LinuxResources};
|
||||
use persist::sandbox_persist::Persist;
|
||||
use tokio::{runtime, sync::RwLock};
|
||||
|
||||
use crate::{
|
||||
cgroups::{CgroupArgs, CgroupsResource},
|
||||
cpu_mem::cpu::CpuResource,
|
||||
manager::ManagerArgs,
|
||||
network::{self, Network},
|
||||
network::{self, Network, NetworkConfig},
|
||||
resource_persist::ResourceState,
|
||||
rootfs::{RootFsResource, Rootfs},
|
||||
share_fs::{self, ShareFs},
|
||||
share_fs::{self, sandbox_bind_mounts::SandboxBindMounts, ShareFs},
|
||||
volume::{Volume, VolumeResource},
|
||||
ResourceConfig,
|
||||
ResourceConfig, ResourceUpdateOp,
|
||||
};
|
||||
|
||||
pub(crate) struct ResourceManagerInner {
|
||||
@@ -42,6 +46,7 @@ pub(crate) struct ResourceManagerInner {
|
||||
pub rootfs_resource: RootFsResource,
|
||||
pub volume_resource: VolumeResource,
|
||||
pub cgroups_resource: CgroupsResource,
|
||||
pub cpu_resource: CpuResource,
|
||||
}
|
||||
|
||||
impl ResourceManagerInner {
|
||||
@@ -51,12 +56,12 @@ impl ResourceManagerInner {
|
||||
hypervisor: Arc<dyn Hypervisor>,
|
||||
toml_config: Arc<TomlConfig>,
|
||||
) -> Result<Self> {
|
||||
let cgroups_resource = CgroupsResource::new(sid, &toml_config)?;
|
||||
|
||||
// create device manager
|
||||
let dev_manager =
|
||||
DeviceManager::new(hypervisor.clone()).context("failed to create device manager")?;
|
||||
|
||||
let cgroups_resource = CgroupsResource::new(sid, &toml_config)?;
|
||||
let cpu_resource = CpuResource::new(toml_config.clone())?;
|
||||
Ok(Self {
|
||||
sid: sid.to_string(),
|
||||
toml_config,
|
||||
@@ -68,6 +73,7 @@ impl ResourceManagerInner {
|
||||
rootfs_resource: RootFsResource::new(),
|
||||
volume_resource: VolumeResource::new(),
|
||||
cgroups_resource,
|
||||
cpu_resource,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -97,6 +103,12 @@ impl ResourceManagerInner {
|
||||
.setup_device_before_start_vm(self.hypervisor.as_ref())
|
||||
.await
|
||||
.context("setup share fs device before start vm")?;
|
||||
|
||||
// setup sandbox bind mounts: setup = true
|
||||
self.handle_sandbox_bindmounts(true)
|
||||
.await
|
||||
.context("failed setup sandbox bindmounts")?;
|
||||
|
||||
Some(share_fs)
|
||||
} else {
|
||||
None
|
||||
@@ -249,6 +261,7 @@ impl ResourceManagerInner {
|
||||
spec,
|
||||
self.device_manager.as_ref(),
|
||||
&self.sid,
|
||||
self.agent.clone(),
|
||||
)
|
||||
.await
|
||||
}
|
||||
@@ -258,42 +271,23 @@ impl ResourceManagerInner {
|
||||
for d in linux.devices.iter() {
|
||||
match d.r#type.as_str() {
|
||||
"b" => {
|
||||
let device_info = DeviceConfig::BlockCfg(BlockConfig {
|
||||
let dev_info = DeviceConfig::BlockCfg(BlockConfig {
|
||||
major: d.major,
|
||||
minor: d.minor,
|
||||
..Default::default()
|
||||
});
|
||||
let device_id = self
|
||||
.device_manager
|
||||
.write()
|
||||
.await
|
||||
.new_device(&device_info)
|
||||
.await
|
||||
.context("failed to create deviec")?;
|
||||
|
||||
self.device_manager
|
||||
.write()
|
||||
let device_info = do_handle_device(&self.device_manager, &dev_info)
|
||||
.await
|
||||
.try_add_device(&device_id)
|
||||
.await
|
||||
.context("failed to add deivce")?;
|
||||
|
||||
// get complete device information
|
||||
let dev_info = self
|
||||
.device_manager
|
||||
.read()
|
||||
.await
|
||||
.get_device_info(&device_id)
|
||||
.await
|
||||
.context("failed to get device info")?;
|
||||
.context("do handle device")?;
|
||||
|
||||
// create agent device
|
||||
if let DeviceConfig::BlockCfg(config) = dev_info {
|
||||
if let DeviceType::Block(device) = device_info {
|
||||
let agent_device = Device {
|
||||
id: device_id.clone(),
|
||||
id: device.device_id.clone(),
|
||||
container_path: d.path.clone(),
|
||||
field_type: config.driver_option,
|
||||
vm_path: config.virt_path,
|
||||
field_type: device.config.driver_option,
|
||||
vm_path: device.config.virt_path,
|
||||
..Default::default()
|
||||
};
|
||||
devices.push(agent_device);
|
||||
@@ -308,14 +302,20 @@ impl ResourceManagerInner {
|
||||
Ok(devices)
|
||||
}
|
||||
|
||||
pub async fn update_cgroups(
|
||||
&self,
|
||||
cid: &str,
|
||||
linux_resources: Option<&LinuxResources>,
|
||||
) -> Result<()> {
|
||||
self.cgroups_resource
|
||||
.update_cgroups(cid, linux_resources, self.hypervisor.as_ref())
|
||||
.await
|
||||
async fn handle_sandbox_bindmounts(&self, setup: bool) -> Result<()> {
|
||||
let bindmounts = self.toml_config.runtime.sandbox_bind_mounts.clone();
|
||||
if bindmounts.is_empty() {
|
||||
info!(sl!(), "sandbox bindmounts empty, just skip it.");
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let sb_bindmnt = SandboxBindMounts::new(self.sid.clone(), bindmounts)?;
|
||||
|
||||
if setup {
|
||||
sb_bindmnt.setup_sandbox_bind_mounts()
|
||||
} else {
|
||||
sb_bindmnt.cleanup_sandbox_bind_mounts()
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn cleanup(&self) -> Result<()> {
|
||||
@@ -324,6 +324,12 @@ impl ResourceManagerInner {
|
||||
.delete()
|
||||
.await
|
||||
.context("delete cgroup")?;
|
||||
|
||||
// cleanup sandbox bind mounts: setup = false
|
||||
self.handle_sandbox_bindmounts(false)
|
||||
.await
|
||||
.context("failed to cleanup sandbox bindmounts")?;
|
||||
|
||||
// clean up share fs mount
|
||||
if let Some(share_fs) = &self.share_fs {
|
||||
share_fs
|
||||
@@ -340,6 +346,57 @@ impl ResourceManagerInner {
|
||||
self.rootfs_resource.dump().await;
|
||||
self.volume_resource.dump().await;
|
||||
}
|
||||
|
||||
pub async fn update_linux_resource(
|
||||
&self,
|
||||
cid: &str,
|
||||
linux_resources: Option<&LinuxResources>,
|
||||
op: ResourceUpdateOp,
|
||||
) -> Result<Option<LinuxResources>> {
|
||||
let linux_cpus = || -> Option<&LinuxCpu> { linux_resources.as_ref()?.cpu.as_ref() }();
|
||||
|
||||
// if static_sandbox_resource_mgmt, we will not have to update sandbox's cpu or mem resource
|
||||
if !self.toml_config.runtime.static_sandbox_resource_mgmt {
|
||||
self.cpu_resource
|
||||
.update_cpu_resources(
|
||||
cid,
|
||||
linux_cpus,
|
||||
op,
|
||||
self.hypervisor.as_ref(),
|
||||
self.agent.as_ref(),
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
|
||||
// we should firstly update the vcpus and mems, and then update the host cgroups
|
||||
self.cgroups_resource
|
||||
.update_cgroups(cid, linux_resources, op, self.hypervisor.as_ref())
|
||||
.await?;
|
||||
|
||||
// update the linux resources for agent
|
||||
self.agent_linux_resources(linux_resources)
|
||||
}
|
||||
|
||||
fn agent_linux_resources(
|
||||
&self,
|
||||
linux_resources: Option<&LinuxResources>,
|
||||
) -> Result<Option<LinuxResources>> {
|
||||
let mut resources = match linux_resources {
|
||||
Some(linux_resources) => linux_resources.clone(),
|
||||
None => {
|
||||
return Ok(None);
|
||||
}
|
||||
};
|
||||
|
||||
// clear the cpuset
|
||||
// for example, if there are only 5 vcpus now, and the cpuset in LinuxResources is 0-2,6, guest os will report
|
||||
// error when creating the container. so we choose to clear the cpuset here.
|
||||
if let Some(cpu) = &mut resources.cpu {
|
||||
cpu.cpus = String::new();
|
||||
}
|
||||
|
||||
Ok(Some(resources))
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
@@ -386,6 +443,7 @@ impl Persist for ResourceManagerInner {
|
||||
)
|
||||
.await?,
|
||||
toml_config: Arc::new(TomlConfig::default()),
|
||||
cpu_resource: CpuResource::default(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -10,7 +10,10 @@ use agent::Storage;
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use async_trait::async_trait;
|
||||
use hypervisor::{
|
||||
device::{device_manager::DeviceManager, DeviceConfig},
|
||||
device::{
|
||||
device_manager::{do_handle_device, DeviceManager},
|
||||
DeviceConfig, DeviceType,
|
||||
},
|
||||
BlockConfig,
|
||||
};
|
||||
use kata_types::mount::Mount;
|
||||
@@ -46,18 +49,10 @@ impl BlockRootfs {
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let device_id = d
|
||||
.write()
|
||||
// create and insert block device into Kata VM
|
||||
let device_info = do_handle_device(d, &DeviceConfig::BlockCfg(block_device_config.clone()))
|
||||
.await
|
||||
.new_device(&DeviceConfig::BlockCfg(block_device_config.clone()))
|
||||
.await
|
||||
.context("failed to create deviec")?;
|
||||
|
||||
d.write()
|
||||
.await
|
||||
.try_add_device(device_id.as_str())
|
||||
.await
|
||||
.context("failed to add deivce")?;
|
||||
.context("do handle device failed.")?;
|
||||
|
||||
let mut storage = Storage {
|
||||
fs_type: rootfs.fs_type.clone(),
|
||||
@@ -66,17 +61,11 @@ impl BlockRootfs {
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
// get complete device information
|
||||
let dev_info = d
|
||||
.read()
|
||||
.await
|
||||
.get_device_info(device_id.as_str())
|
||||
.await
|
||||
.context("failed to get device info")?;
|
||||
|
||||
if let DeviceConfig::BlockCfg(config) = dev_info {
|
||||
storage.driver = config.driver_option;
|
||||
storage.source = config.virt_path;
|
||||
let mut device_id: String = "".to_owned();
|
||||
if let DeviceType::Block(device) = device_info {
|
||||
storage.driver = device.config.driver_option;
|
||||
storage.source = device.config.virt_path;
|
||||
device_id = device.device_id;
|
||||
}
|
||||
|
||||
Ok(Self {
|
||||
|
||||
@@ -18,6 +18,7 @@ pub use utils::{
|
||||
mod virtio_fs_share_mount;
|
||||
use virtio_fs_share_mount::VirtiofsShareMount;
|
||||
pub use virtio_fs_share_mount::EPHEMERAL_PATH;
|
||||
pub mod sandbox_bind_mounts;
|
||||
|
||||
use std::{collections::HashMap, fmt::Debug, path::PathBuf, sync::Arc};
|
||||
|
||||
|
||||
@@ -0,0 +1,155 @@
|
||||
// Copyright (c) 2023 Alibaba Cloud
|
||||
// Copyright (c) 2023 Ant Group
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
// Note:
|
||||
// sandbox_bind_mounts supports kinds of mount patterns, for example:
|
||||
// (1) "/path/to", with default readonly mode.
|
||||
// (2) "/path/to:ro", same as (1).
|
||||
// (3) "/path/to:rw", with readwrite mode.
|
||||
//
|
||||
// sandbox_bind_mounts: ["/path/to", "/path/to:rw", "/mnt/to:ro"]
|
||||
//
|
||||
|
||||
use std::{
|
||||
collections::HashMap,
|
||||
fs,
|
||||
path::{Path, PathBuf},
|
||||
};
|
||||
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
|
||||
use super::utils::{do_get_host_path, mkdir_with_permissions};
|
||||
use kata_sys_util::{fs::get_base_name, mount};
|
||||
use kata_types::mount::{SANDBOX_BIND_MOUNTS_DIR, SANDBOX_BIND_MOUNTS_RO, SANDBOX_BIND_MOUNTS_RW};
|
||||
|
||||
#[derive(Clone, Default, Debug)]
|
||||
pub struct SandboxBindMounts {
|
||||
sid: String,
|
||||
host_mounts_path: PathBuf,
|
||||
sandbox_bindmounts: Vec<String>,
|
||||
}
|
||||
|
||||
impl SandboxBindMounts {
|
||||
pub fn new(sid: String, sandbox_bindmounts: Vec<String>) -> Result<Self> {
|
||||
// /run/kata-containers/shared/sandboxes/<sid>/rw/passthrough/sandbox-mounts
|
||||
let bindmounts_path =
|
||||
do_get_host_path(SANDBOX_BIND_MOUNTS_DIR, sid.as_str(), "", true, false);
|
||||
let host_mounts_path = PathBuf::from(bindmounts_path);
|
||||
|
||||
Ok(SandboxBindMounts {
|
||||
sid,
|
||||
host_mounts_path,
|
||||
sandbox_bindmounts,
|
||||
})
|
||||
}
|
||||
|
||||
fn parse_sandbox_bind_mounts<'a>(&self, bindmnt_src: &'a str) -> Result<(&'a str, &'a str)> {
|
||||
// get the bindmount's r/w mode
|
||||
let bindmount_mode = if bindmnt_src.ends_with(SANDBOX_BIND_MOUNTS_RW) {
|
||||
SANDBOX_BIND_MOUNTS_RW
|
||||
} else {
|
||||
SANDBOX_BIND_MOUNTS_RO
|
||||
};
|
||||
|
||||
// get the true bindmount from the string
|
||||
let bindmount = bindmnt_src.trim_end_matches(bindmount_mode);
|
||||
|
||||
Ok((bindmount_mode, bindmount))
|
||||
}
|
||||
|
||||
pub fn setup_sandbox_bind_mounts(&self) -> Result<()> {
|
||||
let mut mounted_list: Vec<PathBuf> = Vec::new();
|
||||
let mut mounted_map: HashMap<String, bool> = HashMap::new();
|
||||
for src in &self.sandbox_bindmounts {
|
||||
let (bindmount_mode, bindmount) = self
|
||||
.parse_sandbox_bind_mounts(src)
|
||||
.context("parse sandbox bind mounts failed")?;
|
||||
|
||||
// get the basename of the canonicalized mount path mnt_name: dirX
|
||||
let mnt_name = get_base_name(bindmount)?
|
||||
.into_string()
|
||||
.map_err(|e| anyhow!("failed to get base name {:?}", e))?;
|
||||
|
||||
// if repeated mounted, do umount it and return error
|
||||
if mounted_map.insert(mnt_name.clone(), true).is_some() {
|
||||
for p in &mounted_list {
|
||||
nix::mount::umount(p)
|
||||
.context("mounted_map insert one repeated mounted, do umount it")?;
|
||||
}
|
||||
|
||||
return Err(anyhow!(
|
||||
"sandbox-bindmounts: path {} is already specified.",
|
||||
bindmount
|
||||
));
|
||||
}
|
||||
|
||||
// mount_dest: /run/kata-containers/shared/sandboxes/<sid>/rw/passthrough/sandbox-mounts/dirX
|
||||
let mount_dest = self.host_mounts_path.clone().join(mnt_name.as_str());
|
||||
mkdir_with_permissions(self.host_mounts_path.clone().to_path_buf(), 0o750).context(
|
||||
format!(
|
||||
"create host mounts path {:?}",
|
||||
self.host_mounts_path.clone()
|
||||
),
|
||||
)?;
|
||||
|
||||
info!(
|
||||
sl!(),
|
||||
"sandbox-bindmounts mount_src: {:?} => mount_dest: {:?}", bindmount, &mount_dest
|
||||
);
|
||||
|
||||
// mount -o bind,ro host_shared mount_dest
|
||||
// host_shared: ${bindmount}
|
||||
mount::bind_mount_unchecked(Path::new(bindmount), &mount_dest, true).map_err(|e| {
|
||||
for p in &mounted_list {
|
||||
nix::mount::umount(p).unwrap_or_else(|x| {
|
||||
format!("do umount failed: {:?}", x);
|
||||
});
|
||||
}
|
||||
e
|
||||
})?;
|
||||
|
||||
// default sandbox bind mounts mode is ro.
|
||||
if bindmount_mode == SANDBOX_BIND_MOUNTS_RO {
|
||||
info!(sl!(), "sandbox readonly bind mount.");
|
||||
// dest_ro: /run/kata-containers/shared/sandboxes/<sid>/ro/passthrough/sandbox-mounts
|
||||
let mount_dest_ro =
|
||||
do_get_host_path(SANDBOX_BIND_MOUNTS_DIR, &self.sid, "", true, true);
|
||||
let sandbox_bindmounts_ro = [mount_dest_ro, mnt_name.clone()].join("/");
|
||||
|
||||
mount::bind_remount(sandbox_bindmounts_ro, true)
|
||||
.context("remount ro directory with ro permission")?;
|
||||
}
|
||||
|
||||
mounted_list.push(mount_dest);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn cleanup_sandbox_bind_mounts(&self) -> Result<()> {
|
||||
for src in &self.sandbox_bindmounts {
|
||||
let parsed_mnts = self
|
||||
.parse_sandbox_bind_mounts(src)
|
||||
.context("parse sandbox bind mounts")?;
|
||||
|
||||
let mnt_name = get_base_name(parsed_mnts.1)?
|
||||
.into_string()
|
||||
.map_err(|e| anyhow!("failed to convert to string{:?}", e))?;
|
||||
|
||||
// /run/kata-containers/shared/sandboxes/<sid>/passthrough/rw/sandbox-mounts/dir
|
||||
let mnt_dest = self.host_mounts_path.join(mnt_name.as_str());
|
||||
mount::umount_timeout(mnt_dest, 0).context("umount bindmount failed")?;
|
||||
}
|
||||
|
||||
if fs::metadata(self.host_mounts_path.clone())?.is_dir() {
|
||||
fs::remove_dir_all(self.host_mounts_path.clone()).context(format!(
|
||||
"remove sandbox bindmount point {:?}.",
|
||||
self.host_mounts_path.clone()
|
||||
))?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@@ -56,6 +56,7 @@ pub(crate) async fn prepare_virtiofs(
|
||||
fs_type: fs_type.to_string(),
|
||||
queue_size: 0,
|
||||
queue_num: 0,
|
||||
options: vec![],
|
||||
},
|
||||
};
|
||||
h.add_device(DeviceType::ShareFs(share_fs_device))
|
||||
|
||||
@@ -4,13 +4,27 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::{
|
||||
os::unix::fs::PermissionsExt,
|
||||
path::{Path, PathBuf},
|
||||
};
|
||||
|
||||
use anyhow::Result;
|
||||
use kata_sys_util::mount;
|
||||
|
||||
use super::*;
|
||||
|
||||
pub(crate) fn mkdir_with_permissions(path_target: PathBuf, mode: u32) -> Result<()> {
|
||||
let new_path = &path_target;
|
||||
std::fs::create_dir_all(new_path)
|
||||
.context(format!("unable to create new path: {:?}", new_path))?;
|
||||
|
||||
// mode format: 0o750, ...
|
||||
std::fs::set_permissions(new_path, std::fs::Permissions::from_mode(mode))?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn ensure_dir_exist(path: &Path) -> Result<()> {
|
||||
if !path.exists() {
|
||||
std::fs::create_dir_all(path).context(format!("failed to create directory {:?}", path))?;
|
||||
|
||||
@@ -12,7 +12,6 @@ use kata_types::k8s::is_watchable_mount;
|
||||
use kata_types::mount;
|
||||
use nix::sys::stat::stat;
|
||||
use std::fs;
|
||||
use std::os::unix::fs::PermissionsExt;
|
||||
use std::path::Path;
|
||||
|
||||
const WATCHABLE_PATH_NAME: &str = "watchable";
|
||||
@@ -21,7 +20,10 @@ pub const EPHEMERAL_PATH: &str = "/run/kata-containers/sandbox/ephemeral";
|
||||
|
||||
use super::{
|
||||
get_host_rw_shared_path,
|
||||
utils::{self, do_get_host_path, get_host_ro_shared_path, get_host_shared_path},
|
||||
utils::{
|
||||
self, do_get_host_path, get_host_ro_shared_path, get_host_shared_path,
|
||||
mkdir_with_permissions,
|
||||
},
|
||||
ShareFsMount, ShareFsMountResult, ShareFsRootfsConfig, ShareFsVolumeConfig,
|
||||
KATA_GUEST_SHARE_DIR, PASSTHROUGH_FS_DIR,
|
||||
};
|
||||
@@ -79,13 +81,11 @@ impl ShareFsMount for VirtiofsShareMount {
|
||||
.join(PASSTHROUGH_FS_DIR)
|
||||
.join(WATCHABLE_PATH_NAME);
|
||||
|
||||
fs::create_dir_all(&watchable_host_path).context(format!(
|
||||
"unable to create watchable path: {:?}",
|
||||
&watchable_host_path,
|
||||
mkdir_with_permissions(watchable_host_path.clone(), 0o750).context(format!(
|
||||
"unable to create watchable path {:?}",
|
||||
watchable_host_path
|
||||
))?;
|
||||
|
||||
fs::set_permissions(watchable_host_path, fs::Permissions::from_mode(0o750))?;
|
||||
|
||||
// path: /run/kata-containers/shared/containers/passthrough/watchable/config-map-name
|
||||
let file_name = Path::new(&guest_path)
|
||||
.file_name()
|
||||
|
||||
@@ -4,29 +4,32 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
use anyhow::Result;
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use async_trait::async_trait;
|
||||
use std::{collections::HashMap, fs, path::Path};
|
||||
use nix::sys::{stat, stat::SFlag};
|
||||
use tokio::sync::RwLock;
|
||||
|
||||
use crate::share_fs::{do_get_guest_path, do_get_host_path};
|
||||
|
||||
use super::{share_fs_volume::generate_mount_path, Volume};
|
||||
use agent::Storage;
|
||||
use anyhow::{anyhow, Context};
|
||||
use super::Volume;
|
||||
use crate::volume::utils::{
|
||||
generate_shared_path, volume_mount_info, DEFAULT_VOLUME_FS_TYPE, KATA_DIRECT_VOLUME_TYPE,
|
||||
KATA_MOUNT_BIND_TYPE,
|
||||
};
|
||||
use hypervisor::{
|
||||
device::{device_manager::DeviceManager, DeviceConfig},
|
||||
device::{
|
||||
device_manager::{do_handle_device, DeviceManager},
|
||||
DeviceConfig, DeviceType,
|
||||
},
|
||||
BlockConfig,
|
||||
};
|
||||
use nix::sys::stat::{self, SFlag};
|
||||
use tokio::sync::RwLock;
|
||||
#[derive(Debug)]
|
||||
|
||||
#[derive(Clone)]
|
||||
pub(crate) struct BlockVolume {
|
||||
storage: Option<agent::Storage>,
|
||||
mount: oci::Mount,
|
||||
device_id: String,
|
||||
}
|
||||
|
||||
/// BlockVolume: block device volume
|
||||
/// BlockVolume for bind-mount block volume and direct block volume
|
||||
impl BlockVolume {
|
||||
pub(crate) async fn new(
|
||||
d: &RwLock<DeviceManager>,
|
||||
@@ -35,54 +38,71 @@ impl BlockVolume {
|
||||
cid: &str,
|
||||
sid: &str,
|
||||
) -> Result<Self> {
|
||||
let fstat = stat::stat(m.source.as_str()).context(format!("stat {}", m.source))?;
|
||||
info!(sl!(), "device stat: {:?}", fstat);
|
||||
let mut options = HashMap::new();
|
||||
if read_only {
|
||||
options.insert("read_only".to_string(), "true".to_string());
|
||||
}
|
||||
let mnt_src: &str = &m.source;
|
||||
// default block device fs type: ext4.
|
||||
let mut blk_dev_fstype = DEFAULT_VOLUME_FS_TYPE.to_string();
|
||||
|
||||
let block_device_config = &mut BlockConfig {
|
||||
major: stat::major(fstat.st_rdev) as i64,
|
||||
minor: stat::minor(fstat.st_rdev) as i64,
|
||||
..Default::default()
|
||||
let block_device_config = match m.r#type.as_str() {
|
||||
KATA_MOUNT_BIND_TYPE => {
|
||||
let fstat = stat::stat(mnt_src).context(format!("stat {}", m.source))?;
|
||||
|
||||
BlockConfig {
|
||||
major: stat::major(fstat.st_rdev) as i64,
|
||||
minor: stat::minor(fstat.st_rdev) as i64,
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
KATA_DIRECT_VOLUME_TYPE => {
|
||||
// get volume mountinfo from mountinfo.json
|
||||
let v = volume_mount_info(mnt_src)
|
||||
.context("deserde information from mountinfo.json")?;
|
||||
// check volume type
|
||||
if v.volume_type != KATA_DIRECT_VOLUME_TYPE {
|
||||
return Err(anyhow!("volume type {:?} is invalid", v.volume_type));
|
||||
}
|
||||
|
||||
let fstat = stat::stat(v.device.as_str())
|
||||
.with_context(|| format!("stat volume device file: {}", v.device.clone()))?;
|
||||
if SFlag::from_bits_truncate(fstat.st_mode) != SFlag::S_IFREG
|
||||
&& SFlag::from_bits_truncate(fstat.st_mode) != SFlag::S_IFBLK
|
||||
{
|
||||
return Err(anyhow!(
|
||||
"invalid volume device {:?} for volume type {:?}",
|
||||
v.device,
|
||||
v.volume_type
|
||||
));
|
||||
}
|
||||
|
||||
blk_dev_fstype = v.fs_type.clone();
|
||||
|
||||
BlockConfig {
|
||||
path_on_host: v.device,
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
return Err(anyhow!(
|
||||
"unsupport direct block volume r#type: {:?}",
|
||||
m.r#type.as_str()
|
||||
))
|
||||
}
|
||||
};
|
||||
|
||||
let device_id = d
|
||||
.write()
|
||||
// create and insert block device into Kata VM
|
||||
let device_info = do_handle_device(d, &DeviceConfig::BlockCfg(block_device_config.clone()))
|
||||
.await
|
||||
.new_device(&DeviceConfig::BlockCfg(block_device_config.clone()))
|
||||
.await
|
||||
.context("failed to create deviec")?;
|
||||
.context("do handle device failed.")?;
|
||||
|
||||
d.write()
|
||||
// generate host guest shared path
|
||||
let guest_path = generate_shared_path(m.destination.clone(), read_only, cid, sid)
|
||||
.await
|
||||
.try_add_device(device_id.as_str())
|
||||
.await
|
||||
.context("failed to add deivce")?;
|
||||
|
||||
let file_name = Path::new(&m.source).file_name().unwrap().to_str().unwrap();
|
||||
let file_name = generate_mount_path(cid, file_name);
|
||||
let guest_path = do_get_guest_path(&file_name, cid, true, false);
|
||||
let host_path = do_get_host_path(&file_name, sid, cid, true, read_only);
|
||||
fs::create_dir_all(&host_path)
|
||||
.map_err(|e| anyhow!("failed to create rootfs dir {}: {:?}", host_path, e))?;
|
||||
|
||||
// get complete device information
|
||||
let dev_info = d
|
||||
.read()
|
||||
.await
|
||||
.get_device_info(&device_id)
|
||||
.await
|
||||
.context("failed to get device info")?;
|
||||
.context("generate host-guest shared path failed")?;
|
||||
|
||||
// storage
|
||||
let mut storage = Storage::default();
|
||||
|
||||
if let DeviceConfig::BlockCfg(config) = dev_info {
|
||||
storage.driver = config.driver_option;
|
||||
storage.source = config.virt_path;
|
||||
}
|
||||
let mut storage = agent::Storage {
|
||||
mount_point: guest_path.clone(),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
storage.options = if read_only {
|
||||
vec!["ro".to_string()]
|
||||
@@ -90,21 +110,32 @@ impl BlockVolume {
|
||||
Vec::new()
|
||||
};
|
||||
|
||||
storage.mount_point = guest_path.clone();
|
||||
|
||||
// If the volume had specified the filesystem type, use it. Otherwise, set it
|
||||
// to ext4 since but right now we only support it.
|
||||
if m.r#type != "bind" {
|
||||
storage.fs_type = m.r#type.clone();
|
||||
} else {
|
||||
storage.fs_type = "ext4".to_string();
|
||||
// As the true Block Device wrapped in DeviceType, we need to
|
||||
// get it out from the wrapper, and the device_id will be for
|
||||
// BlockVolume.
|
||||
// safe here, device_info is correct and only unwrap it.
|
||||
let mut device_id = String::new();
|
||||
if let DeviceType::Block(device) = device_info {
|
||||
// blk, mmioblk
|
||||
storage.driver = device.config.driver_option;
|
||||
// /dev/vdX
|
||||
storage.source = device.config.virt_path;
|
||||
device_id = device.device_id;
|
||||
}
|
||||
|
||||
// In some case, dest is device /dev/xxx
|
||||
if m.destination.clone().starts_with("/dev") {
|
||||
storage.fs_type = "bind".to_string();
|
||||
storage.options.append(&mut m.options.clone());
|
||||
} else {
|
||||
// usually, the dest is directory.
|
||||
storage.fs_type = blk_dev_fstype;
|
||||
}
|
||||
|
||||
// mount
|
||||
let mount = oci::Mount {
|
||||
destination: m.destination.clone(),
|
||||
r#type: m.r#type.clone(),
|
||||
source: guest_path.clone(),
|
||||
r#type: storage.fs_type.clone(),
|
||||
source: guest_path,
|
||||
options: m.options.clone(),
|
||||
};
|
||||
|
||||
@@ -128,6 +159,7 @@ impl Volume for BlockVolume {
|
||||
} else {
|
||||
vec![]
|
||||
};
|
||||
|
||||
Ok(s)
|
||||
}
|
||||
|
||||
@@ -144,13 +176,22 @@ impl Volume for BlockVolume {
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn is_block_volume(m: &oci::Mount) -> bool {
|
||||
if m.r#type != "bind" {
|
||||
return false;
|
||||
pub(crate) fn is_block_volume(m: &oci::Mount) -> Result<bool> {
|
||||
let vol_types = vec![KATA_MOUNT_BIND_TYPE, KATA_DIRECT_VOLUME_TYPE];
|
||||
if !vol_types.contains(&m.r#type.as_str()) {
|
||||
return Ok(false);
|
||||
}
|
||||
if let Ok(fstat) = stat::stat(m.source.as_str()).context(format!("stat {}", m.source)) {
|
||||
info!(sl!(), "device stat: {:?}", fstat);
|
||||
return SFlag::from_bits_truncate(fstat.st_mode) == SFlag::S_IFBLK;
|
||||
|
||||
let fstat =
|
||||
stat::stat(m.source.as_str()).context(format!("stat mount source {} failed.", m.source))?;
|
||||
let s_flag = SFlag::from_bits_truncate(fstat.st_mode);
|
||||
|
||||
match m.r#type.as_str() {
|
||||
// case: mount bind and block device
|
||||
KATA_MOUNT_BIND_TYPE if s_flag == SFlag::S_IFBLK => Ok(true),
|
||||
// case: directvol and directory
|
||||
KATA_DIRECT_VOLUME_TYPE if s_flag == SFlag::S_IFDIR => Ok(true),
|
||||
// else: unsupported or todo for other volume type.
|
||||
_ => Ok(false),
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
@@ -9,16 +9,18 @@ mod default_volume;
|
||||
pub mod hugepage;
|
||||
mod share_fs_volume;
|
||||
mod shm_volume;
|
||||
use async_trait::async_trait;
|
||||
pub mod utils;
|
||||
|
||||
use std::{sync::Arc, vec::Vec};
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use hypervisor::device::device_manager::DeviceManager;
|
||||
use std::{sync::Arc, vec::Vec};
|
||||
use async_trait::async_trait;
|
||||
use tokio::sync::RwLock;
|
||||
|
||||
use crate::{share_fs::ShareFs, volume::block_volume::is_block_volume};
|
||||
|
||||
use self::hugepage::{get_huge_page_limits_map, get_huge_page_option};
|
||||
use crate::{share_fs::ShareFs, volume::block_volume::is_block_volume};
|
||||
use agent::Agent;
|
||||
use hypervisor::device::device_manager::DeviceManager;
|
||||
|
||||
const BIND: &str = "bind";
|
||||
|
||||
@@ -52,6 +54,7 @@ impl VolumeResource {
|
||||
spec: &oci::Spec,
|
||||
d: &RwLock<DeviceManager>,
|
||||
sid: &str,
|
||||
agent: Arc<dyn Agent>,
|
||||
) -> Result<Vec<Arc<dyn Volume>>> {
|
||||
let mut volumes: Vec<Arc<dyn Volume>> = vec![];
|
||||
let oci_mounts = &spec.mounts;
|
||||
@@ -65,7 +68,7 @@ impl VolumeResource {
|
||||
shm_volume::ShmVolume::new(m, shm_size)
|
||||
.with_context(|| format!("new shm volume {:?}", m))?,
|
||||
)
|
||||
} else if is_block_volume(m) {
|
||||
} else if is_block_volume(m).context("block volume type")? {
|
||||
// handle block volume
|
||||
Arc::new(
|
||||
block_volume::BlockVolume::new(d, m, read_only, cid, sid)
|
||||
@@ -85,7 +88,7 @@ impl VolumeResource {
|
||||
)
|
||||
} else if share_fs_volume::is_share_fs_volume(m) {
|
||||
Arc::new(
|
||||
share_fs_volume::ShareFsVolume::new(share_fs, m, cid, read_only)
|
||||
share_fs_volume::ShareFsVolume::new(share_fs, m, cid, read_only, agent.clone())
|
||||
.await
|
||||
.with_context(|| format!("new share fs volume {:?}", m))?,
|
||||
)
|
||||
|
||||
@@ -5,11 +5,15 @@
|
||||
//
|
||||
|
||||
use std::{
|
||||
fs::File,
|
||||
io::Read,
|
||||
os::unix::fs::MetadataExt,
|
||||
path::{Path, PathBuf},
|
||||
str::FromStr,
|
||||
sync::Arc,
|
||||
};
|
||||
|
||||
use agent::Agent;
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use async_trait::async_trait;
|
||||
use hypervisor::device::device_manager::DeviceManager;
|
||||
@@ -19,6 +23,9 @@ use super::Volume;
|
||||
use crate::share_fs::{MountedInfo, ShareFs, ShareFsVolumeConfig};
|
||||
use kata_types::mount;
|
||||
|
||||
use crate::share_fs::DEFAULT_KATA_GUEST_SANDBOX_DIR;
|
||||
use crate::share_fs::PASSTHROUGH_FS_DIR;
|
||||
|
||||
const SYS_MOUNT_PREFIX: [&str; 2] = ["/proc", "/sys"];
|
||||
|
||||
// copy file to container's rootfs if filesystem sharing is not supported, otherwise
|
||||
@@ -39,6 +46,7 @@ impl ShareFsVolume {
|
||||
m: &oci::Mount,
|
||||
cid: &str,
|
||||
readonly: bool,
|
||||
agent: Arc<dyn Agent>,
|
||||
) -> Result<Self> {
|
||||
// The file_name is in the format of "sandbox-{uuid}-{file_name}"
|
||||
let file_name = Path::new(&m.source).file_name().unwrap().to_str().unwrap();
|
||||
@@ -61,11 +69,67 @@ impl ShareFsVolume {
|
||||
Ok(src) => src,
|
||||
};
|
||||
|
||||
// If the mount source is a file, we can copy it to the sandbox
|
||||
if src.is_file() {
|
||||
// TODO: copy file
|
||||
debug!(sl!(), "FIXME: copy file {}", &m.source);
|
||||
} else {
|
||||
// This is where we set the value for the guest path
|
||||
let dest = [
|
||||
DEFAULT_KATA_GUEST_SANDBOX_DIR,
|
||||
PASSTHROUGH_FS_DIR,
|
||||
file_name.clone().as_str(),
|
||||
]
|
||||
.join("/");
|
||||
|
||||
debug!(
|
||||
sl!(),
|
||||
"copy local file {:?} to guest {:?}",
|
||||
&m.source,
|
||||
dest.clone()
|
||||
);
|
||||
|
||||
// Read file metadata
|
||||
let file_metadata = std::fs::metadata(src.clone())
|
||||
.with_context(|| format!("Failed to read metadata from file: {:?}", src))?;
|
||||
|
||||
// Open file
|
||||
let mut file = File::open(&src)
|
||||
.with_context(|| format!("Failed to open file: {:?}", src))?;
|
||||
|
||||
// Open read file contents to buffer
|
||||
let mut buffer = Vec::new();
|
||||
file.read_to_end(&mut buffer)
|
||||
.with_context(|| format!("Failed to read file: {:?}", src))?;
|
||||
|
||||
// Create gRPC request
|
||||
let r = agent::CopyFileRequest {
|
||||
path: dest.clone(),
|
||||
file_size: file_metadata.len() as i64,
|
||||
uid: file_metadata.uid() as i32,
|
||||
gid: file_metadata.gid() as i32,
|
||||
file_mode: file_metadata.mode(),
|
||||
data: buffer,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
debug!(sl!(), "copy_file: {:?} to sandbox {:?}", &src, dest.clone());
|
||||
|
||||
// Issue gRPC request to agent
|
||||
agent.copy_file(r).await.with_context(|| {
|
||||
format!(
|
||||
"copy file request failed: src: {:?}, dest: {:?}",
|
||||
file_name, dest
|
||||
)
|
||||
})?;
|
||||
|
||||
// append oci::Mount structure to volume mounts
|
||||
volume.mounts.push(oci::Mount {
|
||||
destination: m.destination.clone(),
|
||||
r#type: "bind".to_string(),
|
||||
source: dest.clone(),
|
||||
options: m.options.clone(),
|
||||
})
|
||||
} else {
|
||||
// If not, we can ignore it. Let's issue a warning so that the user knows.
|
||||
warn!(
|
||||
sl!(),
|
||||
"Ignoring non-regular file as FS sharing not supported. mount: {:?}", m
|
||||
);
|
||||
|
||||
65
src/runtime-rs/crates/resource/src/volume/utils.rs
Normal file
65
src/runtime-rs/crates/resource/src/volume/utils.rs
Normal file
@@ -0,0 +1,65 @@
|
||||
// Copyright (c) 2022-2023 Alibaba Cloud
|
||||
// Copyright (c) 2022-2023 Ant Group
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
use std::{fs, path::Path};
|
||||
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
|
||||
use crate::{
|
||||
share_fs::{do_get_guest_path, do_get_host_path},
|
||||
volume::share_fs_volume::generate_mount_path,
|
||||
};
|
||||
use kata_sys_util::eother;
|
||||
use kata_types::mount::{get_volume_mount_info, DirectVolumeMountInfo};
|
||||
|
||||
pub const DEFAULT_VOLUME_FS_TYPE: &str = "ext4";
|
||||
pub const KATA_MOUNT_BIND_TYPE: &str = "bind";
|
||||
pub const KATA_DIRECT_VOLUME_TYPE: &str = "directvol";
|
||||
pub const KATA_VFIO_VOLUME_TYPE: &str = "vfiovol";
|
||||
pub const KATA_SPDK_VOLUME_TYPE: &str = "spdkvol";
|
||||
|
||||
// volume mount info load infomation from mountinfo.json
|
||||
pub fn volume_mount_info(volume_path: &str) -> Result<DirectVolumeMountInfo> {
|
||||
get_volume_mount_info(volume_path)
|
||||
}
|
||||
|
||||
pub fn get_file_name<P: AsRef<Path>>(src: P) -> Result<String> {
|
||||
let file_name = src
|
||||
.as_ref()
|
||||
.file_name()
|
||||
.map(|v| v.to_os_string())
|
||||
.ok_or_else(|| {
|
||||
eother!(
|
||||
"failed to get file name of path {}",
|
||||
src.as_ref().to_string_lossy()
|
||||
)
|
||||
})?
|
||||
.into_string()
|
||||
.map_err(|e| anyhow!("failed to convert to string {:?}", e))?;
|
||||
|
||||
Ok(file_name)
|
||||
}
|
||||
|
||||
pub(crate) async fn generate_shared_path(
|
||||
dest: String,
|
||||
read_only: bool,
|
||||
cid: &str,
|
||||
sid: &str,
|
||||
) -> Result<String> {
|
||||
let file_name = get_file_name(&dest).context("failed to get file name.")?;
|
||||
let mount_name = generate_mount_path(cid, file_name.as_str());
|
||||
let guest_path = do_get_guest_path(&mount_name, cid, true, false);
|
||||
let host_path = do_get_host_path(&mount_name, sid, cid, true, read_only);
|
||||
|
||||
if dest.starts_with("/dev") {
|
||||
fs::File::create(&host_path).context(format!("failed to create file {:?}", &host_path))?;
|
||||
} else {
|
||||
std::fs::create_dir_all(&host_path)
|
||||
.map_err(|e| anyhow!("failed to create dir {}: {:?}", host_path, e))?;
|
||||
}
|
||||
|
||||
Ok(guest_path)
|
||||
}
|
||||
@@ -3,6 +3,7 @@
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use anyhow::Result;
|
||||
|
||||
@@ -26,12 +26,10 @@ pub trait Sandbox: Send + Sync {
|
||||
async fn cleanup(&self) -> Result<()>;
|
||||
async fn shutdown(&self) -> Result<()>;
|
||||
|
||||
// agent function
|
||||
async fn agent_sock(&self) -> Result<String>;
|
||||
|
||||
// utils
|
||||
async fn set_iptables(&self, is_ipv6: bool, data: Vec<u8>) -> Result<Vec<u8>>;
|
||||
async fn get_iptables(&self, is_ipv6: bool) -> Result<Vec<u8>>;
|
||||
async fn direct_volume_stats(&self, volume_path: &str) -> Result<String>;
|
||||
async fn direct_volume_resize(&self, resize_req: agent::ResizeVolumeRequest) -> Result<()>;
|
||||
async fn agent_sock(&self) -> Result<String>;
|
||||
}
|
||||
|
||||
@@ -13,4 +13,3 @@ pub mod manager;
|
||||
pub use manager::RuntimeHandlerManager;
|
||||
pub use shim_interface;
|
||||
mod shim_mgmt;
|
||||
mod static_resource;
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user