diff --git a/.github/workflows/add-issues-to-project.yaml b/.github/workflows/add-issues-to-project.yaml index 117e62600..6ba266261 100644 --- a/.github/workflows/add-issues-to-project.yaml +++ b/.github/workflows/add-issues-to-project.yaml @@ -39,7 +39,7 @@ jobs: popd &>/dev/null - name: Checkout code to allow hub to communicate with the project - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Add issue to issue backlog env: diff --git a/.github/workflows/add-pr-sizing-label.yaml b/.github/workflows/add-pr-sizing-label.yaml index 313c9f285..2fd0abc64 100644 --- a/.github/workflows/add-pr-sizing-label.yaml +++ b/.github/workflows/add-pr-sizing-label.yaml @@ -21,7 +21,16 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout code - uses: actions/checkout@v1 + uses: actions/checkout@v3 + with: + ref: ${{ github.event.pull_request.head.sha }} + fetch-depth: 0 + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ github.event.pull_request.base.ref }} - name: Install PR sizing label script run: | diff --git a/.github/workflows/basic-ci-amd64.yaml b/.github/workflows/basic-ci-amd64.yaml new file mode 100644 index 000000000..6ac0a5240 --- /dev/null +++ b/.github/workflows/basic-ci-amd64.yaml @@ -0,0 +1,200 @@ +name: CI | Basic amd64 tests +on: + workflow_call: + inputs: + tarball-suffix: + required: false + type: string + commit-hash: + required: false + type: string + target-branch: + required: false + type: string + default: "" + +jobs: + run-cri-containerd: + strategy: + # We can set this to true whenever we're 100% sure that + # the all the tests are not flaky, otherwise we'll fail + # all the tests due to a single flaky instance. + fail-fast: false + matrix: + containerd_version: ['lts', 'active'] + vmm: ['clh', 'qemu'] + runs-on: garm-ubuntu-2204-smaller + env: + CONTAINERD_VERSION: ${{ matrix.containerd_version }} + GOPATH: ${{ github.workspace }} + KATA_HYPERVISOR: ${{ matrix.vmm }} + steps: + - uses: actions/checkout@v3 + with: + ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} + + - name: Install dependencies + run: bash tests/integration/cri-containerd/gha-run.sh install-dependencies + + - name: get-kata-tarball + uses: actions/download-artifact@v3 + with: + name: kata-static-tarball-amd64${{ inputs.tarball-suffix }} + path: kata-artifacts + + - name: Install kata + run: bash tests/integration/cri-containerd/gha-run.sh install-kata kata-artifacts + + - name: Run cri-containerd tests + run: bash tests/integration/cri-containerd/gha-run.sh run + + run-containerd-stability: + strategy: + fail-fast: false + matrix: + containerd_version: ['lts', 'active'] + vmm: ['clh', 'qemu'] + runs-on: garm-ubuntu-2204-smaller + env: + CONTAINERD_VERSION: ${{ matrix.containerd_version }} + GOPATH: ${{ github.workspace }} + KATA_HYPERVISOR: ${{ matrix.vmm }} + steps: + - uses: actions/checkout@v3 + with: + ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} + + - name: Install dependencies + run: bash tests/stability/gha-run.sh install-dependencies + + - name: get-kata-tarball + uses: actions/download-artifact@v3 + with: + name: kata-static-tarball-amd64${{ inputs.tarball-suffix }} + path: kata-artifacts + + - name: Install kata + run: bash tests/stability/gha-run.sh install-kata kata-artifacts + + - name: Run containerd-stability tests + run: bash tests/stability/gha-run.sh run + + run-nydus: + strategy: + # We can set this to true whenever we're 100% sure that + # the all the tests are not flaky, otherwise we'll fail + # all the tests due to a single flaky instance. + fail-fast: false + matrix: + containerd_version: ['lts', 'active'] + vmm: ['clh', 'qemu', 'dragonball'] + runs-on: garm-ubuntu-2204-smaller + env: + CONTAINERD_VERSION: ${{ matrix.containerd_version }} + GOPATH: ${{ github.workspace }} + KATA_HYPERVISOR: ${{ matrix.vmm }} + steps: + - uses: actions/checkout@v3 + with: + ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} + + - name: Install dependencies + run: bash tests/integration/nydus/gha-run.sh install-dependencies + + - name: get-kata-tarball + uses: actions/download-artifact@v3 + with: + name: kata-static-tarball-amd64${{ inputs.tarball-suffix }} + path: kata-artifacts + + - name: Install kata + run: bash tests/integration/nydus/gha-run.sh install-kata kata-artifacts + + - name: Run nydus tests + run: bash tests/integration/nydus/gha-run.sh run + + run-runk: + runs-on: garm-ubuntu-2204-smaller + env: + CONTAINERD_VERSION: lts + steps: + - uses: actions/checkout@v4 + with: + ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} + + - name: Install dependencies + run: bash tests/integration/runk/gha-run.sh install-dependencies + + - name: get-kata-tarball + uses: actions/download-artifact@v3 + with: + name: kata-static-tarball-amd64${{ inputs.tarball-suffix }} + path: kata-artifacts + + - name: Install kata + run: bash tests/integration/runk/gha-run.sh install-kata kata-artifacts + + - name: Run tracing tests + run: bash tests/integration/runk/gha-run.sh run + + run-vfio: + strategy: + fail-fast: false + matrix: + vmm: ['clh', 'qemu'] + runs-on: garm-ubuntu-2304 + env: + GOPATH: ${{ github.workspace }} + KATA_HYPERVISOR: ${{ matrix.vmm }} + steps: + - uses: actions/checkout@v3 + with: + ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} + + - name: Install dependencies + run: bash tests/functional/vfio/gha-run.sh install-dependencies + + - name: get-kata-tarball + uses: actions/download-artifact@v3 + with: + name: kata-static-tarball-amd64${{ inputs.tarball-suffix }} + path: kata-artifacts + + - name: Run vfio tests + timeout-minutes: 15 + run: bash tests/functional/vfio/gha-run.sh run diff --git a/.github/workflows/build-kata-static-tarball-amd64.yaml b/.github/workflows/build-kata-static-tarball-amd64.yaml index 869d49bc6..48406ae27 100644 --- a/.github/workflows/build-kata-static-tarball-amd64.yaml +++ b/.github/workflows/build-kata-static-tarball-amd64.yaml @@ -16,6 +16,10 @@ on: commit-hash: required: false type: string + target-branch: + required: false + type: string + default: "" jobs: build-asset: @@ -23,9 +27,12 @@ jobs: strategy: matrix: asset: + - agent + - agent-ctl - cloud-hypervisor - cloud-hypervisor-glibc - firecracker + - kata-ctl - kernel - kernel-sev - kernel-dragonball-experimental @@ -33,6 +40,7 @@ jobs: - kernel-nvidia-gpu - kernel-nvidia-gpu-snp - kernel-nvidia-gpu-tdx-experimental + - log-parser-rs - nydus - ovmf - ovmf-sev @@ -44,12 +52,16 @@ jobs: - rootfs-initrd - rootfs-initrd-mariner - rootfs-initrd-sev + - runk - shim-v2 - tdvf + - trace-forwarder - virtiofsd stage: - ${{ inputs.stage }} exclude: + - asset: agent + stage: release - asset: cloud-hypervisor-glibc stage: release steps: @@ -66,6 +78,12 @@ jobs: ref: ${{ inputs.commit-hash }} fetch-depth: 0 # This is needed in order to keep the commit ids history + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} + - name: Build ${{ matrix.asset }} run: | make "${KATA_ASSET}-tarball" @@ -76,6 +94,10 @@ jobs: KATA_ASSET: ${{ matrix.asset }} TAR_OUTPUT: ${{ matrix.asset }}.tar.gz PUSH_TO_REGISTRY: ${{ inputs.push-to-registry }} + ARTEFACT_REGISTRY: ghcr.io + ARTEFACT_REGISTRY_USERNAME: ${{ github.actor }} + ARTEFACT_REGISTRY_PASSWORD: ${{ secrets.GITHUB_TOKEN }} + TARGET_BRANCH: ${{ inputs.target-branch }} - name: store-artifact ${{ matrix.asset }} uses: actions/upload-artifact@v3 @@ -92,6 +114,12 @@ jobs: - uses: actions/checkout@v3 with: ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} - name: get-artifacts uses: actions/download-artifact@v3 with: diff --git a/.github/workflows/build-kata-static-tarball-arm64.yaml b/.github/workflows/build-kata-static-tarball-arm64.yaml index cafc6e020..81095e000 100644 --- a/.github/workflows/build-kata-static-tarball-arm64.yaml +++ b/.github/workflows/build-kata-static-tarball-arm64.yaml @@ -16,10 +16,14 @@ on: commit-hash: required: false type: string + target-branch: + required: false + type: string + default: "" jobs: build-asset: - runs-on: arm64 + runs-on: arm64-builder strategy: matrix: asset: @@ -52,6 +56,13 @@ jobs: with: ref: ${{ inputs.commit-hash }} fetch-depth: 0 # This is needed in order to keep the commit ids history + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} + - name: Build ${{ matrix.asset }} run: | make "${KATA_ASSET}-tarball" @@ -62,6 +73,10 @@ jobs: KATA_ASSET: ${{ matrix.asset }} TAR_OUTPUT: ${{ matrix.asset }}.tar.gz PUSH_TO_REGISTRY: ${{ inputs.push-to-registry }} + ARTEFACT_REGISTRY: ghcr.io + ARTEFACT_REGISTRY_USERNAME: ${{ github.actor }} + ARTEFACT_REGISTRY_PASSWORD: ${{ secrets.GITHUB_TOKEN }} + TARGET_BRANCH: ${{ inputs.target-branch }} - name: store-artifact ${{ matrix.asset }} uses: actions/upload-artifact@v3 @@ -72,7 +87,7 @@ jobs: if-no-files-found: error create-kata-tarball: - runs-on: arm64 + runs-on: arm64-builder needs: build-asset steps: - name: Adjust a permission for repo @@ -82,6 +97,12 @@ jobs: - uses: actions/checkout@v3 with: ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} - name: get-artifacts uses: actions/download-artifact@v3 with: diff --git a/.github/workflows/build-kata-static-tarball-s390x.yaml b/.github/workflows/build-kata-static-tarball-s390x.yaml index 0fe7e9200..90be3aa77 100644 --- a/.github/workflows/build-kata-static-tarball-s390x.yaml +++ b/.github/workflows/build-kata-static-tarball-s390x.yaml @@ -16,6 +16,10 @@ on: commit-hash: required: false type: string + target-branch: + required: false + type: string + default: "" jobs: build-asset: @@ -48,6 +52,13 @@ jobs: with: ref: ${{ inputs.commit-hash }} fetch-depth: 0 # This is needed in order to keep the commit ids history + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} + - name: Build ${{ matrix.asset }} run: | make "${KATA_ASSET}-tarball" @@ -59,6 +70,10 @@ jobs: KATA_ASSET: ${{ matrix.asset }} TAR_OUTPUT: ${{ matrix.asset }}.tar.gz PUSH_TO_REGISTRY: ${{ inputs.push-to-registry }} + ARTEFACT_REGISTRY: ghcr.io + ARTEFACT_REGISTRY_USERNAME: ${{ github.actor }} + ARTEFACT_REGISTRY_PASSWORD: ${{ secrets.GITHUB_TOKEN }} + TARGET_BRANCH: ${{ inputs.target-branch }} - name: store-artifact ${{ matrix.asset }} uses: actions/upload-artifact@v3 @@ -79,6 +94,12 @@ jobs: - uses: actions/checkout@v3 with: ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} - name: get-artifacts uses: actions/download-artifact@v3 with: diff --git a/.github/workflows/ci-nightly.yaml b/.github/workflows/ci-nightly.yaml index 5c7676710..75f5f2667 100644 --- a/.github/workflows/ci-nightly.yaml +++ b/.github/workflows/ci-nightly.yaml @@ -15,4 +15,5 @@ jobs: commit-hash: ${{ github.sha }} pr-number: "nightly" tag: ${{ github.sha }}-nightly + target-branch: ${{ github.ref_name }} secrets: inherit diff --git a/.github/workflows/ci-on-push.yaml b/.github/workflows/ci-on-push.yaml index e1f3eaf96..59a297b78 100644 --- a/.github/workflows/ci-on-push.yaml +++ b/.github/workflows/ci-on-push.yaml @@ -28,4 +28,5 @@ jobs: commit-hash: ${{ github.event.pull_request.head.sha }} pr-number: ${{ github.event.pull_request.number }} tag: ${{ github.event.pull_request.number }}-${{ github.event.pull_request.head.sha }} + target-branch: ${{ github.event.pull_request.base.ref }} secrets: inherit diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 52a86b08d..71fc7ec56 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -11,6 +11,10 @@ on: tag: required: true type: string + target-branch: + required: false + type: string + default: "" jobs: build-kata-static-tarball-amd64: @@ -18,6 +22,7 @@ jobs: with: tarball-suffix: -${{ inputs.tag }} commit-hash: ${{ inputs.commit-hash }} + target-branch: ${{ inputs.target-branch }} publish-kata-deploy-payload-amd64: needs: build-kata-static-tarball-amd64 @@ -28,8 +33,94 @@ jobs: repo: ${{ github.repository_owner }}/kata-deploy-ci tag: ${{ inputs.tag }}-amd64 commit-hash: ${{ inputs.commit-hash }} + target-branch: ${{ inputs.target-branch }} secrets: inherit + build-and-publish-tee-confidential-unencrypted-image: + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v3 + with: + ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} + + - name: Set up QEMU + uses: docker/setup-qemu-action@v2 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v2 + + - name: Login to Kata Containers ghcr.io + uses: docker/login-action@v2 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Docker build and push + uses: docker/build-push-action@v4 + with: + tags: ghcr.io/kata-containers/test-images:unencrypted-${{ inputs.pr-number }} + push: true + context: tests/integration/kubernetes/runtimeclass_workloads/confidential/unencrypted/ + platforms: linux/amd64, linux/s390x + file: tests/integration/kubernetes/runtimeclass_workloads/confidential/unencrypted/Dockerfile + + run-docker-tests-on-garm: + needs: build-kata-static-tarball-amd64 + uses: ./.github/workflows/run-docker-tests-on-garm.yaml + with: + tarball-suffix: -${{ inputs.tag }} + commit-hash: ${{ inputs.commit-hash }} + target-branch: ${{ inputs.target-branch }} + + run-nerdctl-tests-on-garm: + needs: build-kata-static-tarball-amd64 + uses: ./.github/workflows/run-nerdctl-tests-on-garm.yaml + with: + tarball-suffix: -${{ inputs.tag }} + commit-hash: ${{ inputs.commit-hash }} + target-branch: ${{ inputs.target-branch }} + + run-kata-deploy-tests-on-aks: + needs: publish-kata-deploy-payload-amd64 + uses: ./.github/workflows/run-kata-deploy-tests-on-aks.yaml + with: + registry: ghcr.io + repo: ${{ github.repository_owner }}/kata-deploy-ci + tag: ${{ inputs.tag }}-amd64 + commit-hash: ${{ inputs.commit-hash }} + pr-number: ${{ inputs.pr-number }} + target-branch: ${{ inputs.target-branch }} + secrets: inherit + + run-kata-deploy-tests-on-garm: + needs: publish-kata-deploy-payload-amd64 + uses: ./.github/workflows/run-kata-deploy-tests-on-garm.yaml + with: + registry: ghcr.io + repo: ${{ github.repository_owner }}/kata-deploy-ci + tag: ${{ inputs.tag }}-amd64 + commit-hash: ${{ inputs.commit-hash }} + pr-number: ${{ inputs.pr-number }} + target-branch: ${{ inputs.target-branch }} + secrets: inherit + + run-kata-monitor-tests: + needs: build-kata-static-tarball-amd64 + uses: ./.github/workflows/run-kata-monitor-tests.yaml + with: + tarball-suffix: -${{ inputs.tag }} + commit-hash: ${{ inputs.commit-hash }} + target-branch: ${{ inputs.target-branch }} + run-k8s-tests-on-aks: needs: publish-kata-deploy-payload-amd64 uses: ./.github/workflows/run-k8s-tests-on-aks.yaml @@ -39,34 +130,43 @@ jobs: tag: ${{ inputs.tag }}-amd64 commit-hash: ${{ inputs.commit-hash }} pr-number: ${{ inputs.pr-number }} + target-branch: ${{ inputs.target-branch }} secrets: inherit - run-k8s-tests-on-sev: + run-k8s-tests-on-garm: needs: publish-kata-deploy-payload-amd64 - uses: ./.github/workflows/run-k8s-tests-on-sev.yaml + uses: ./.github/workflows/run-k8s-tests-on-garm.yaml with: registry: ghcr.io repo: ${{ github.repository_owner }}/kata-deploy-ci tag: ${{ inputs.tag }}-amd64 commit-hash: ${{ inputs.commit-hash }} + pr-number: ${{ inputs.pr-number }} + target-branch: ${{ inputs.target-branch }} + secrets: inherit - run-k8s-tests-on-snp: + run-k8s-tests-with-crio-on-garm: needs: publish-kata-deploy-payload-amd64 - uses: ./.github/workflows/run-k8s-tests-on-snp.yaml + uses: ./.github/workflows/run-k8s-tests-with-crio-on-garm.yaml with: registry: ghcr.io repo: ${{ github.repository_owner }}/kata-deploy-ci tag: ${{ inputs.tag }}-amd64 commit-hash: ${{ inputs.commit-hash }} + pr-number: ${{ inputs.pr-number }} + target-branch: ${{ inputs.target-branch }} + secrets: inherit - run-k8s-tests-on-tdx: - needs: publish-kata-deploy-payload-amd64 - uses: ./.github/workflows/run-k8s-tests-on-tdx.yaml + run-kata-coco-tests: + needs: [publish-kata-deploy-payload-amd64, build-and-publish-tee-confidential-unencrypted-image] + uses: ./.github/workflows/run-kata-coco-tests.yaml with: registry: ghcr.io repo: ${{ github.repository_owner }}/kata-deploy-ci tag: ${{ inputs.tag }}-amd64 commit-hash: ${{ inputs.commit-hash }} + pr-number: ${{ inputs.pr-number }} + target-branch: ${{ inputs.target-branch }} run-metrics-tests: needs: build-kata-static-tarball-amd64 @@ -74,24 +174,12 @@ jobs: with: tarball-suffix: -${{ inputs.tag }} commit-hash: ${{ inputs.commit-hash }} + target-branch: ${{ inputs.target-branch }} - run-cri-containerd-tests: + run-basic-amd64-tests: needs: build-kata-static-tarball-amd64 - uses: ./.github/workflows/run-cri-containerd-tests.yaml - with: - tarball-suffix: -${{ inputs.tag }} - commit-hash: ${{ inputs.commit-hash }} - - run-nydus-tests: - needs: build-kata-static-tarball-amd64 - uses: ./.github/workflows/run-nydus-tests.yaml - with: - tarball-suffix: -${{ inputs.tag }} - commit-hash: ${{ inputs.commit-hash }} - - run-vfio-tests: - needs: build-kata-static-tarball-amd64 - uses: ./.github/workflows/run-vfio-tests.yaml + uses: ./.github/workflows/basic-ci-amd64.yaml with: tarball-suffix: -${{ inputs.tag }} commit-hash: ${{ inputs.commit-hash }} + target-branch: ${{ inputs.target-branch }} diff --git a/.github/workflows/darwin-tests.yaml b/.github/workflows/darwin-tests.yaml index 02bbb0e72..8b3f9041a 100644 --- a/.github/workflows/darwin-tests.yaml +++ b/.github/workflows/darwin-tests.yaml @@ -21,6 +21,6 @@ jobs: with: go-version: 1.19.3 - name: Checkout code - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Build utils run: ./ci/darwin-test.sh diff --git a/.github/workflows/docs-url-alive-check.yaml b/.github/workflows/docs-url-alive-check.yaml index bd1f5ab2c..543215f77 100644 --- a/.github/workflows/docs-url-alive-check.yaml +++ b/.github/workflows/docs-url-alive-check.yaml @@ -22,7 +22,7 @@ jobs: echo "GOPATH=${{ github.workspace }}" >> $GITHUB_ENV echo "${{ github.workspace }}/bin" >> $GITHUB_PATH - name: Checkout code - uses: actions/checkout@v2 + uses: actions/checkout@v3 with: fetch-depth: 0 path: ./src/github.com/${{ github.repository }} diff --git a/.github/workflows/move-issues-to-in-progress.yaml b/.github/workflows/move-issues-to-in-progress.yaml index 0e15abaea..23819e18c 100644 --- a/.github/workflows/move-issues-to-in-progress.yaml +++ b/.github/workflows/move-issues-to-in-progress.yaml @@ -38,7 +38,17 @@ jobs: - name: Checkout code to allow hub to communicate with the project if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }} - uses: actions/checkout@v2 + uses: actions/checkout@v3 + with: + ref: ${{ github.event.pull_request.head.sha }} + fetch-depth: 0 + + - name: Rebase atop of the latest target branch + if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }} + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ github.event.pull_request.base.ref }} - name: Move issue to "In progress" if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }} diff --git a/.github/workflows/payload-after-push.yaml b/.github/workflows/payload-after-push.yaml index 46766c54b..bcc2aa7a0 100644 --- a/.github/workflows/payload-after-push.yaml +++ b/.github/workflows/payload-after-push.yaml @@ -4,6 +4,7 @@ on: branches: - main - stable-* + workflow_dispatch: concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} @@ -15,6 +16,7 @@ jobs: with: commit-hash: ${{ github.sha }} push-to-registry: yes + target-branch: ${{ github.ref_name }} secrets: inherit build-assets-arm64: @@ -22,6 +24,7 @@ jobs: with: commit-hash: ${{ github.sha }} push-to-registry: yes + target-branch: ${{ github.ref_name }} secrets: inherit build-assets-s390x: @@ -29,6 +32,7 @@ jobs: with: commit-hash: ${{ github.sha }} push-to-registry: yes + target-branch: ${{ github.ref_name }} secrets: inherit publish-kata-deploy-payload-amd64: @@ -39,6 +43,7 @@ jobs: registry: quay.io repo: kata-containers/kata-deploy-ci tag: kata-containers-amd64 + target-branch: ${{ github.ref_name }} secrets: inherit publish-kata-deploy-payload-arm64: @@ -49,6 +54,7 @@ jobs: registry: quay.io repo: kata-containers/kata-deploy-ci tag: kata-containers-arm64 + target-branch: ${{ github.ref_name }} secrets: inherit publish-kata-deploy-payload-s390x: @@ -59,6 +65,7 @@ jobs: registry: quay.io repo: kata-containers/kata-deploy-ci tag: kata-containers-s390x + target-branch: ${{ github.ref_name }} secrets: inherit publish-manifest: diff --git a/.github/workflows/publish-kata-deploy-payload-amd64.yaml b/.github/workflows/publish-kata-deploy-payload-amd64.yaml index b5ba900d8..253b93fbc 100644 --- a/.github/workflows/publish-kata-deploy-payload-amd64.yaml +++ b/.github/workflows/publish-kata-deploy-payload-amd64.yaml @@ -17,6 +17,10 @@ on: commit-hash: required: false type: string + target-branch: + required: false + type: string + default: "" jobs: kata-payload: @@ -25,6 +29,13 @@ jobs: - uses: actions/checkout@v3 with: ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} - name: get-kata-tarball uses: actions/download-artifact@v3 diff --git a/.github/workflows/publish-kata-deploy-payload-arm64.yaml b/.github/workflows/publish-kata-deploy-payload-arm64.yaml index 6c35ed8a3..f198814fe 100644 --- a/.github/workflows/publish-kata-deploy-payload-arm64.yaml +++ b/.github/workflows/publish-kata-deploy-payload-arm64.yaml @@ -17,10 +17,14 @@ on: commit-hash: required: false type: string + target-branch: + required: false + type: string + default: "" jobs: kata-payload: - runs-on: arm64 + runs-on: arm64-builder steps: - name: Adjust a permission for repo run: | @@ -29,6 +33,13 @@ jobs: - uses: actions/checkout@v3 with: ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} - name: get-kata-tarball uses: actions/download-artifact@v3 diff --git a/.github/workflows/publish-kata-deploy-payload-s390x.yaml b/.github/workflows/publish-kata-deploy-payload-s390x.yaml index ee7fa3fd7..6d1d44f7b 100644 --- a/.github/workflows/publish-kata-deploy-payload-s390x.yaml +++ b/.github/workflows/publish-kata-deploy-payload-s390x.yaml @@ -17,6 +17,10 @@ on: commit-hash: required: false type: string + target-branch: + required: false + type: string + default: "" jobs: kata-payload: @@ -29,6 +33,13 @@ jobs: - uses: actions/checkout@v3 with: ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} - name: get-kata-tarball uses: actions/download-artifact@v3 diff --git a/.github/workflows/release-arm64.yaml b/.github/workflows/release-arm64.yaml index cd7db8fdf..136177580 100644 --- a/.github/workflows/release-arm64.yaml +++ b/.github/workflows/release-arm64.yaml @@ -14,7 +14,7 @@ jobs: kata-deploy: needs: build-kata-static-tarball-arm64 - runs-on: arm64 + runs-on: arm64-builder steps: - name: Login to Kata Containers docker.io uses: docker/login-action@v2 diff --git a/.github/workflows/require-pr-porting-labels.yaml b/.github/workflows/require-pr-porting-labels.yaml index b16e5c371..4f799c4ba 100644 --- a/.github/workflows/require-pr-porting-labels.yaml +++ b/.github/workflows/require-pr-porting-labels.yaml @@ -36,7 +36,17 @@ jobs: - name: Checkout code to allow hub to communicate with the project if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }} - uses: actions/checkout@v2 + uses: actions/checkout@v3 + with: + ref: ${{ github.event.pull_request.head.sha }} + fetch-depth: 0 + + - name: Rebase atop of the latest target branch + if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }} + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ github.event.pull_request.base.ref }} - name: Install porting checker script run: | diff --git a/.github/workflows/run-cri-containerd-tests.yaml b/.github/workflows/run-cri-containerd-tests.yaml deleted file mode 100644 index 4b439733b..000000000 --- a/.github/workflows/run-cri-containerd-tests.yaml +++ /dev/null @@ -1,42 +0,0 @@ -name: CI | Run cri-containerd tests -on: - workflow_call: - inputs: - tarball-suffix: - required: false - type: string - commit-hash: - required: false - type: string - -jobs: - run-cri-containerd: - strategy: - fail-fast: true - matrix: - containerd_version: ['lts', 'active'] - vmm: ['clh', 'qemu'] - runs-on: garm-ubuntu-2204 - env: - CONTAINERD_VERSION: ${{ matrix.containerd_version }} - GOPATH: ${{ github.workspace }} - KATA_HYPERVISOR: ${{ matrix.vmm }} - steps: - - uses: actions/checkout@v3 - with: - ref: ${{ inputs.commit-hash }} - - - name: Install dependencies - run: bash tests/integration/cri-containerd/gha-run.sh install-dependencies - - - name: get-kata-tarball - uses: actions/download-artifact@v3 - with: - name: kata-static-tarball-amd64${{ inputs.tarball-suffix }} - path: kata-artifacts - - - name: Install kata - run: bash tests/integration/cri-containerd/gha-run.sh install-kata kata-artifacts - - - name: Run cri-containerd tests - run: bash tests/integration/cri-containerd/gha-run.sh run diff --git a/.github/workflows/run-docker-tests-on-garm.yaml b/.github/workflows/run-docker-tests-on-garm.yaml new file mode 100644 index 000000000..ea90759fa --- /dev/null +++ b/.github/workflows/run-docker-tests-on-garm.yaml @@ -0,0 +1,56 @@ +name: CI | Run docker integration tests +on: + workflow_call: + inputs: + tarball-suffix: + required: false + type: string + commit-hash: + required: false + type: string + target-branch: + required: false + type: string + default: "" + +jobs: + run-docker-tests: + strategy: + # We can set this to true whenever we're 100% sure that + # all the tests are not flaky, otherwise we'll fail them + # all due to a single flaky instance. + fail-fast: false + matrix: + vmm: + - clh + - qemu + runs-on: garm-ubuntu-2304-smaller + env: + KATA_HYPERVISOR: ${{ matrix.vmm }} + steps: + - uses: actions/checkout@v3 + with: + ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} + + - name: Install dependencies + run: bash tests/integration/docker/gha-run.sh install-dependencies + + - name: get-kata-tarball + uses: actions/download-artifact@v3 + with: + name: kata-static-tarball-amd64${{ inputs.tarball-suffix }} + path: kata-artifacts + + - name: Install kata + run: bash tests/integration/docker/gha-run.sh install-kata kata-artifacts + + - name: Run docker smoke test + timeout-minutes: 5 + run: bash tests/integration/docker/gha-run.sh run diff --git a/.github/workflows/run-k8s-tests-on-aks.yaml b/.github/workflows/run-k8s-tests-on-aks.yaml index 130be1829..23439e3f1 100644 --- a/.github/workflows/run-k8s-tests-on-aks.yaml +++ b/.github/workflows/run-k8s-tests-on-aks.yaml @@ -17,6 +17,10 @@ on: commit-hash: required: false type: string + target-branch: + required: false + type: string + default: "" jobs: run-k8s-tests: @@ -29,6 +33,9 @@ jobs: - clh - dragonball - qemu + instance-type: + - small + - normal include: - host_os: cbl-mariner vmm: clh @@ -40,11 +47,20 @@ jobs: GH_PR_NUMBER: ${{ inputs.pr-number }} KATA_HOST_OS: ${{ matrix.host_os }} KATA_HYPERVISOR: ${{ matrix.vmm }} + KUBERNETES: "vanilla" USING_NFD: "false" + K8S_TEST_HOST_TYPE: ${{ matrix.instance-type }} steps: - uses: actions/checkout@v3 with: ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} - name: Download Azure CLI run: bash tests/integration/kubernetes/gha-run.sh install-azure-cli diff --git a/.github/workflows/run-k8s-tests-on-garm.yaml b/.github/workflows/run-k8s-tests-on-garm.yaml new file mode 100644 index 000000000..1fd4b00ee --- /dev/null +++ b/.github/workflows/run-k8s-tests-on-garm.yaml @@ -0,0 +1,88 @@ +name: CI | Run kubernetes tests on GARM +on: + workflow_call: + inputs: + registry: + required: true + type: string + repo: + required: true + type: string + tag: + required: true + type: string + pr-number: + required: true + type: string + commit-hash: + required: false + type: string + target-branch: + required: false + type: string + default: "" + +jobs: + run-k8s-tests: + strategy: + fail-fast: false + matrix: + vmm: + - clh #cloud-hypervisor + - fc #firecracker + - qemu + snapshotter: + - devmapper + k8s: + - k3s + instance: + - garm-ubuntu-2004 + - garm-ubuntu-2004-smaller + include: + - instance: garm-ubuntu-2004 + instance-type: normal + - instance: garm-ubuntu-2004-smaller + instance-type: small + runs-on: ${{ matrix.instance }} + env: + DOCKER_REGISTRY: ${{ inputs.registry }} + DOCKER_REPO: ${{ inputs.repo }} + DOCKER_TAG: ${{ inputs.tag }} + PR_NUMBER: ${{ inputs.pr-number }} + KATA_HYPERVISOR: ${{ matrix.vmm }} + KUBERNETES: ${{ matrix.k8s }} + SNAPSHOTTER: ${{ matrix.snapshotter }} + USING_NFD: "false" + K8S_TEST_HOST_TYPE: ${{ matrix.instance-type }} + steps: + - uses: actions/checkout@v3 + with: + ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} + + - name: Deploy ${{ matrix.k8s }} + run: bash tests/integration/kubernetes/gha-run.sh deploy-k8s + + - name: Configure the ${{ matrix.snapshotter }} snapshotter + run: bash tests/integration/kubernetes/gha-run.sh configure-snapshotter + + - name: Deploy Kata + timeout-minutes: 10 + run: bash tests/integration/kubernetes/gha-run.sh deploy-kata-garm + + - name: Install `bats` + run: bash tests/integration/kubernetes/gha-run.sh install-bats + + - name: Run tests + timeout-minutes: 30 + run: bash tests/integration/kubernetes/gha-run.sh run-tests + + - name: Delete kata-deploy + if: always() + run: bash tests/integration/kubernetes/gha-run.sh cleanup-garm diff --git a/.github/workflows/run-k8s-tests-on-sev.yaml b/.github/workflows/run-k8s-tests-on-sev.yaml deleted file mode 100644 index a48425e1f..000000000 --- a/.github/workflows/run-k8s-tests-on-sev.yaml +++ /dev/null @@ -1,48 +0,0 @@ -name: CI | Run kubernetes tests on SEV -on: - workflow_call: - inputs: - registry: - required: true - type: string - repo: - required: true - type: string - tag: - required: true - type: string - commit-hash: - required: false - type: string - -jobs: - run-k8s-tests: - strategy: - fail-fast: false - matrix: - vmm: - - qemu-sev - runs-on: sev - env: - DOCKER_REGISTRY: ${{ inputs.registry }} - DOCKER_REPO: ${{ inputs.repo }} - DOCKER_TAG: ${{ inputs.tag }} - KATA_HYPERVISOR: ${{ matrix.vmm }} - KUBECONFIG: /home/kata/.kube/config - USING_NFD: "false" - steps: - - uses: actions/checkout@v3 - with: - ref: ${{ inputs.commit-hash }} - - - name: Deploy Kata - timeout-minutes: 10 - run: bash tests/integration/kubernetes/gha-run.sh deploy-kata-sev - - - name: Run tests - timeout-minutes: 30 - run: bash tests/integration/kubernetes/gha-run.sh run-tests - - - name: Delete kata-deploy - if: always() - run: bash tests/integration/kubernetes/gha-run.sh cleanup-sev diff --git a/.github/workflows/run-k8s-tests-on-snp.yaml b/.github/workflows/run-k8s-tests-on-snp.yaml deleted file mode 100644 index 7196a9a1b..000000000 --- a/.github/workflows/run-k8s-tests-on-snp.yaml +++ /dev/null @@ -1,48 +0,0 @@ -name: CI | Run kubernetes tests on SEV-SNP -on: - workflow_call: - inputs: - registry: - required: true - type: string - repo: - required: true - type: string - tag: - required: true - type: string - commit-hash: - required: false - type: string - -jobs: - run-k8s-tests: - strategy: - fail-fast: false - matrix: - vmm: - - qemu-snp - runs-on: sev-snp - env: - DOCKER_REGISTRY: ${{ inputs.registry }} - DOCKER_REPO: ${{ inputs.repo }} - DOCKER_TAG: ${{ inputs.tag }} - KATA_HYPERVISOR: ${{ matrix.vmm }} - KUBECONFIG: /home/kata/.kube/config - USING_NFD: "false" - steps: - - uses: actions/checkout@v3 - with: - ref: ${{ inputs.commit-hash }} - - - name: Deploy Kata - timeout-minutes: 10 - run: bash tests/integration/kubernetes/gha-run.sh deploy-kata-snp - - - name: Run tests - timeout-minutes: 30 - run: bash tests/integration/kubernetes/gha-run.sh run-tests - - - name: Delete kata-deploy - if: always() - run: bash tests/integration/kubernetes/gha-run.sh cleanup-snp diff --git a/.github/workflows/run-k8s-tests-on-tdx.yaml b/.github/workflows/run-k8s-tests-on-tdx.yaml deleted file mode 100644 index a3899177c..000000000 --- a/.github/workflows/run-k8s-tests-on-tdx.yaml +++ /dev/null @@ -1,47 +0,0 @@ -name: CI | Run kubernetes tests on TDX -on: - workflow_call: - inputs: - registry: - required: true - type: string - repo: - required: true - type: string - tag: - required: true - type: string - commit-hash: - required: false - type: string - -jobs: - run-k8s-tests: - strategy: - fail-fast: false - matrix: - vmm: - - qemu-tdx - runs-on: tdx - env: - DOCKER_REGISTRY: ${{ inputs.registry }} - DOCKER_REPO: ${{ inputs.repo }} - DOCKER_TAG: ${{ inputs.tag }} - KATA_HYPERVISOR: ${{ matrix.vmm }} - USING_NFD: "true" - steps: - - uses: actions/checkout@v3 - with: - ref: ${{ inputs.commit-hash }} - - - name: Deploy Kata - timeout-minutes: 10 - run: bash tests/integration/kubernetes/gha-run.sh deploy-kata-tdx - - - name: Run tests - timeout-minutes: 30 - run: bash tests/integration/kubernetes/gha-run.sh run-tests - - - name: Delete kata-deploy - if: always() - run: bash tests/integration/kubernetes/gha-run.sh cleanup-tdx diff --git a/.github/workflows/run-k8s-tests-with-crio-on-garm.yaml b/.github/workflows/run-k8s-tests-with-crio-on-garm.yaml new file mode 100644 index 000000000..14000dc63 --- /dev/null +++ b/.github/workflows/run-k8s-tests-with-crio-on-garm.yaml @@ -0,0 +1,86 @@ +name: CI | Run kubernetes tests, using CRI-O, on GARM +on: + workflow_call: + inputs: + registry: + required: true + type: string + repo: + required: true + type: string + tag: + required: true + type: string + pr-number: + required: true + type: string + commit-hash: + required: false + type: string + target-branch: + required: false + type: string + default: "" + +jobs: + run-k8s-tests: + strategy: + fail-fast: false + matrix: + vmm: + - qemu + k8s: + - k0s + instance: + - garm-ubuntu-2004 + - garm-ubuntu-2004-smaller + include: + - instance: garm-ubuntu-2004 + instance-type: normal + - instance: garm-ubuntu-2004-smaller + instance-type: small + - k8s: k0s + k8s-extra-params: '--cri-socket remote:unix:///var/run/crio/crio.sock --kubelet-extra-args --cgroup-driver="systemd"' + runs-on: ${{ matrix.instance }} + env: + DOCKER_REGISTRY: ${{ inputs.registry }} + DOCKER_REPO: ${{ inputs.repo }} + DOCKER_TAG: ${{ inputs.tag }} + PR_NUMBER: ${{ inputs.pr-number }} + KATA_HYPERVISOR: ${{ matrix.vmm }} + KUBERNETES: ${{ matrix.k8s }} + KUBERNETES_EXTRA_PARAMS: ${{ matrix.k8s-extra-params }} + USING_NFD: "false" + K8S_TEST_HOST_TYPE: ${{ matrix.instance-type }} + steps: + - uses: actions/checkout@v4 + with: + ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} + + - name: Configure CRI-O + run: bash tests/integration/kubernetes/gha-run.sh setup-crio + + - name: Deploy ${{ matrix.k8s }} + run: bash tests/integration/kubernetes/gha-run.sh deploy-k8s + + - name: Deploy Kata + timeout-minutes: 10 + run: bash tests/integration/kubernetes/gha-run.sh deploy-kata-garm + + - name: Install `bats` + run: bash tests/integration/kubernetes/gha-run.sh install-bats + + - name: Run tests + timeout-minutes: 30 + run: bash tests/integration/kubernetes/gha-run.sh run-tests + + - name: Delete kata-deploy + if: always() + run: bash tests/integration/kubernetes/gha-run.sh cleanup-garm diff --git a/.github/workflows/run-kata-coco-tests.yaml b/.github/workflows/run-kata-coco-tests.yaml new file mode 100644 index 000000000..2021d10ac --- /dev/null +++ b/.github/workflows/run-kata-coco-tests.yaml @@ -0,0 +1,176 @@ +name: CI | Run kata coco tests +on: + workflow_call: + inputs: + registry: + required: true + type: string + repo: + required: true + type: string + tag: + required: true + type: string + pr-number: + required: true + type: string + commit-hash: + required: false + type: string + target-branch: + required: false + type: string + default: "" + +jobs: + run-kata-deploy-tests-on-tdx: + strategy: + fail-fast: false + matrix: + vmm: + - qemu-tdx + runs-on: tdx + env: + DOCKER_REGISTRY: ${{ inputs.registry }} + DOCKER_REPO: ${{ inputs.repo }} + DOCKER_TAG: ${{ inputs.tag }} + PR_NUMBER: ${{ inputs.pr-number }} + KATA_HYPERVISOR: ${{ matrix.vmm }} + KUBERNETES: "k3s" + USING_NFD: "true" + steps: + - uses: actions/checkout@v3 + with: + ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} + + - name: Run tests + run: bash tests/functional/kata-deploy/gha-run.sh run-tests + + run-k8s-tests-on-tdx: + strategy: + fail-fast: false + matrix: + vmm: + - qemu-tdx + runs-on: tdx + env: + DOCKER_REGISTRY: ${{ inputs.registry }} + DOCKER_REPO: ${{ inputs.repo }} + DOCKER_TAG: ${{ inputs.tag }} + PR_NUMBER: ${{ inputs.pr-number }} + KATA_HYPERVISOR: ${{ matrix.vmm }} + KUBERNETES: "k3s" + USING_NFD: "true" + K8S_TEST_HOST_TYPE: "baremetal" + steps: + - uses: actions/checkout@v3 + with: + ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} + + - name: Deploy Kata + timeout-minutes: 10 + run: bash tests/integration/kubernetes/gha-run.sh deploy-kata-tdx + + - name: Run tests + timeout-minutes: 30 + run: bash tests/integration/kubernetes/gha-run.sh run-tests + + - name: Delete kata-deploy + if: always() + run: bash tests/integration/kubernetes/gha-run.sh cleanup-tdx + + run-k8s-tests-on-sev: + strategy: + fail-fast: false + matrix: + vmm: + - qemu-sev + runs-on: sev + env: + DOCKER_REGISTRY: ${{ inputs.registry }} + DOCKER_REPO: ${{ inputs.repo }} + DOCKER_TAG: ${{ inputs.tag }} + PR_NUMBER: ${{ inputs.pr-number }} + KATA_HYPERVISOR: ${{ matrix.vmm }} + KUBECONFIG: /home/kata/.kube/config + KUBERNETES: "vanilla" + USING_NFD: "false" + K8S_TEST_HOST_TYPE: "baremetal" + steps: + - uses: actions/checkout@v3 + with: + ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} + + - name: Deploy Kata + timeout-minutes: 10 + run: bash tests/integration/kubernetes/gha-run.sh deploy-kata-sev + + - name: Run tests + timeout-minutes: 30 + run: bash tests/integration/kubernetes/gha-run.sh run-tests + + - name: Delete kata-deploy + if: always() + run: bash tests/integration/kubernetes/gha-run.sh cleanup-sev + + run-k8s-tests-sev-snp: + strategy: + fail-fast: false + matrix: + vmm: + - qemu-snp + runs-on: sev-snp + env: + DOCKER_REGISTRY: ${{ inputs.registry }} + DOCKER_REPO: ${{ inputs.repo }} + DOCKER_TAG: ${{ inputs.tag }} + PR_NUMBER: ${{ inputs.pr-number }} + KATA_HYPERVISOR: ${{ matrix.vmm }} + KUBECONFIG: /home/kata/.kube/config + KUBERNETES: "vanilla" + USING_NFD: "false" + K8S_TEST_HOST_TYPE: "baremetal" + steps: + - uses: actions/checkout@v3 + with: + ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} + + - name: Deploy Kata + timeout-minutes: 10 + run: bash tests/integration/kubernetes/gha-run.sh deploy-kata-snp + + - name: Run tests + timeout-minutes: 30 + run: bash tests/integration/kubernetes/gha-run.sh run-tests + + - name: Delete kata-deploy + if: always() + run: bash tests/integration/kubernetes/gha-run.sh cleanup-snp diff --git a/.github/workflows/run-kata-deploy-tests-on-aks.yaml b/.github/workflows/run-kata-deploy-tests-on-aks.yaml new file mode 100644 index 000000000..74fcb0ea5 --- /dev/null +++ b/.github/workflows/run-kata-deploy-tests-on-aks.yaml @@ -0,0 +1,89 @@ +name: CI | Run kata-deploy tests on AKS +on: + workflow_call: + inputs: + registry: + required: true + type: string + repo: + required: true + type: string + tag: + required: true + type: string + pr-number: + required: true + type: string + commit-hash: + required: false + type: string + target-branch: + required: false + type: string + default: "" + +jobs: + run-kata-deploy-tests: + strategy: + fail-fast: false + matrix: + host_os: + - ubuntu + vmm: + - clh + - dragonball + - qemu + include: + - host_os: cbl-mariner + vmm: clh + runs-on: ubuntu-latest + env: + DOCKER_REGISTRY: ${{ inputs.registry }} + DOCKER_REPO: ${{ inputs.repo }} + DOCKER_TAG: ${{ inputs.tag }} + GH_PR_NUMBER: ${{ inputs.pr-number }} + KATA_HOST_OS: ${{ matrix.host_os }} + KATA_HYPERVISOR: ${{ matrix.vmm }} + KUBERNETES: "vanilla" + USING_NFD: "false" + steps: + - uses: actions/checkout@v3 + with: + ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} + + - name: Download Azure CLI + run: bash tests/functional/kata-deploy/gha-run.sh install-azure-cli + + - name: Log into the Azure account + run: bash tests/functional/kata-deploy/gha-run.sh login-azure + env: + AZ_APPID: ${{ secrets.AZ_APPID }} + AZ_PASSWORD: ${{ secrets.AZ_PASSWORD }} + AZ_TENANT_ID: ${{ secrets.AZ_TENANT_ID }} + + - name: Create AKS cluster + timeout-minutes: 10 + run: bash tests/functional/kata-deploy/gha-run.sh create-cluster + + - name: Install `bats` + run: bash tests/functional/kata-deploy/gha-run.sh install-bats + + - name: Install `kubectl` + run: bash tests/functional/kata-deploy/gha-run.sh install-kubectl + + - name: Download credentials for the Kubernetes CLI to use them + run: bash tests/functional/kata-deploy/gha-run.sh get-cluster-credentials + + - name: Run tests + run: bash tests/functional/kata-deploy/gha-run.sh run-tests + + - name: Delete AKS cluster + if: always() + run: bash tests/functional/kata-deploy/gha-run.sh delete-cluster diff --git a/.github/workflows/run-kata-deploy-tests-on-garm.yaml b/.github/workflows/run-kata-deploy-tests-on-garm.yaml new file mode 100644 index 000000000..7514a27b6 --- /dev/null +++ b/.github/workflows/run-kata-deploy-tests-on-garm.yaml @@ -0,0 +1,65 @@ +name: CI | Run kata-deploy tests on GARM +on: + workflow_call: + inputs: + registry: + required: true + type: string + repo: + required: true + type: string + tag: + required: true + type: string + pr-number: + required: true + type: string + commit-hash: + required: false + type: string + target-branch: + required: false + type: string + default: "" + +jobs: + run-kata-deploy-tests: + strategy: + fail-fast: false + matrix: + vmm: + - clh + - qemu + k8s: + - k0s + - k3s + - rke2 + runs-on: garm-ubuntu-2004-smaller + env: + DOCKER_REGISTRY: ${{ inputs.registry }} + DOCKER_REPO: ${{ inputs.repo }} + DOCKER_TAG: ${{ inputs.tag }} + PR_NUMBER: ${{ inputs.pr-number }} + KATA_HYPERVISOR: ${{ matrix.vmm }} + KUBERNETES: ${{ matrix.k8s }} + USING_NFD: "false" + steps: + - uses: actions/checkout@v4 + with: + ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} + + - name: Deploy ${{ matrix.k8s }} + run: bash tests/functional/kata-deploy/gha-run.sh deploy-k8s + + - name: Install `bats` + run: bash tests/functional/kata-deploy/gha-run.sh install-bats + + - name: Run tests + run: bash tests/functional/kata-deploy/gha-run.sh run-tests diff --git a/.github/workflows/run-kata-monitor-tests.yaml b/.github/workflows/run-kata-monitor-tests.yaml new file mode 100644 index 000000000..98e2a2276 --- /dev/null +++ b/.github/workflows/run-kata-monitor-tests.yaml @@ -0,0 +1,59 @@ +name: CI | Run kata-monitor tests +on: + workflow_call: + inputs: + tarball-suffix: + required: false + type: string + commit-hash: + required: false + type: string + target-branch: + required: false + type: string + default: "" + +jobs: + run-monitor: + strategy: + fail-fast: false + matrix: + vmm: + - qemu + container_engine: + - crio + - containerd + include: + - container_engine: containerd + containerd_version: lts + runs-on: garm-ubuntu-2204-smaller + env: + CONTAINER_ENGINE: ${{ matrix.container_engine }} + CONTAINERD_VERSION: ${{ matrix.containerd_version }} + KATA_HYPERVISOR: ${{ matrix.vmm }} + steps: + - uses: actions/checkout@v4 + with: + ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} + + - name: Install dependencies + run: bash tests/functional/kata-monitor/gha-run.sh install-dependencies + + - name: get-kata-tarball + uses: actions/download-artifact@v3 + with: + name: kata-static-tarball-amd64${{ inputs.tarball-suffix }} + path: kata-artifacts + + - name: Install kata + run: bash tests/functional/kata-monitor/gha-run.sh install-kata kata-artifacts + + - name: Run kata-monitor tests + run: bash tests/functional/kata-monitor/gha-run.sh run diff --git a/.github/workflows/run-metrics.yaml b/.github/workflows/run-metrics.yaml index 596a3dea4..c0e43c360 100644 --- a/.github/workflows/run-metrics.yaml +++ b/.github/workflows/run-metrics.yaml @@ -8,22 +8,28 @@ on: commit-hash: required: false type: string + target-branch: + required: false + type: string + default: "" jobs: - run-metrics: - strategy: - fail-fast: true - matrix: - vmm: ['clh', 'qemu'] - max-parallel: 1 + setup-kata: + name: Kata Setup runs-on: metrics env: GOPATH: ${{ github.workspace }} - KATA_HYPERVISOR: ${{ matrix.vmm }} steps: - uses: actions/checkout@v3 with: ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} - name: get-kata-tarball uses: actions/download-artifact@v3 @@ -34,6 +40,24 @@ jobs: - name: Install kata run: bash tests/metrics/gha-run.sh install-kata kata-artifacts + run-metrics: + needs: setup-kata + strategy: + # We can set this to true whenever we're 100% sure that + # the all the tests are not flaky, otherwise we'll fail + # all the tests due to a single flaky instance. + fail-fast: false + matrix: + vmm: ['clh', 'qemu'] + max-parallel: 1 + runs-on: metrics + env: + GOPATH: ${{ github.workspace }} + KATA_HYPERVISOR: ${{ matrix.vmm }} + steps: + - name: enabling the hypervisor + run: bash tests/metrics/gha-run.sh enabling-hypervisor + - name: run launch times test run: bash tests/metrics/gha-run.sh run-test-launchtimes @@ -52,6 +76,12 @@ jobs: - name: run fio test run: bash tests/metrics/gha-run.sh run-test-fio + - name: run iperf test + run: bash tests/metrics/gha-run.sh run-test-iperf + + - name: run latency test + run: bash tests/metrics/gha-run.sh run-test-latency + - name: make metrics tarball ${{ matrix.vmm }} run: bash tests/metrics/gha-run.sh make-tarball-results diff --git a/.github/workflows/run-nerdctl-tests-on-garm.yaml b/.github/workflows/run-nerdctl-tests-on-garm.yaml new file mode 100644 index 000000000..eb4e04bc2 --- /dev/null +++ b/.github/workflows/run-nerdctl-tests-on-garm.yaml @@ -0,0 +1,57 @@ +name: CI | Run nerdctl integration tests +on: + workflow_call: + inputs: + tarball-suffix: + required: false + type: string + commit-hash: + required: false + type: string + target-branch: + required: false + type: string + default: "" + +jobs: + run-nerdctl-tests: + strategy: + # We can set this to true whenever we're 100% sure that + # all the tests are not flaky, otherwise we'll fail them + # all due to a single flaky instance. + fail-fast: false + matrix: + vmm: + - clh + - dragonball + - qemu + runs-on: garm-ubuntu-2304-smaller + env: + KATA_HYPERVISOR: ${{ matrix.vmm }} + steps: + - uses: actions/checkout@v3 + with: + ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} + + - name: Install dependencies + run: bash tests/integration/nerdctl/gha-run.sh install-dependencies + + - name: get-kata-tarball + uses: actions/download-artifact@v3 + with: + name: kata-static-tarball-amd64${{ inputs.tarball-suffix }} + path: kata-artifacts + + - name: Install kata + run: bash tests/integration/nerdctl/gha-run.sh install-kata kata-artifacts + + - name: Run nerdctl smoke test + timeout-minutes: 5 + run: bash tests/integration/nerdctl/gha-run.sh run diff --git a/.github/workflows/run-nydus-tests.yaml b/.github/workflows/run-nydus-tests.yaml deleted file mode 100644 index 647582c08..000000000 --- a/.github/workflows/run-nydus-tests.yaml +++ /dev/null @@ -1,42 +0,0 @@ -name: CI | Run nydus tests -on: - workflow_call: - inputs: - tarball-suffix: - required: false - type: string - commit-hash: - required: false - type: string - -jobs: - run-nydus: - strategy: - fail-fast: true - matrix: - containerd_version: ['lts', 'active'] - vmm: ['clh', 'qemu', 'dragonball'] - runs-on: garm-ubuntu-2204 - env: - CONTAINERD_VERSION: ${{ matrix.containerd_version }} - GOPATH: ${{ github.workspace }} - KATA_HYPERVISOR: ${{ matrix.vmm }} - steps: - - uses: actions/checkout@v3 - with: - ref: ${{ inputs.commit-hash }} - - - name: Install dependencies - run: bash tests/integration/nydus/gha-run.sh install-dependencies - - - name: get-kata-tarball - uses: actions/download-artifact@v3 - with: - name: kata-static-tarball-amd64${{ inputs.tarball-suffix }} - path: kata-artifacts - - - name: Install kata - run: bash tests/integration/nydus/gha-run.sh install-kata kata-artifacts - - - name: Run nydus tests - run: bash tests/integration/nydus/gha-run.sh run diff --git a/.github/workflows/run-runk-tests.yaml b/.github/workflows/run-runk-tests.yaml new file mode 100644 index 000000000..36c111218 --- /dev/null +++ b/.github/workflows/run-runk-tests.yaml @@ -0,0 +1,46 @@ +name: CI | Run runk tests +on: + workflow_call: + inputs: + tarball-suffix: + required: false + type: string + commit-hash: + required: false + type: string + target-branch: + required: false + type: string + default: "" + +jobs: + run-runk: + runs-on: garm-ubuntu-2204-smaller + env: + CONTAINERD_VERSION: lts + steps: + - uses: actions/checkout@v4 + with: + ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} + + - name: Install dependencies + run: bash tests/integration/runk/gha-run.sh install-dependencies + + - name: get-kata-tarball + uses: actions/download-artifact@v3 + with: + name: kata-static-tarball-amd64${{ inputs.tarball-suffix }} + path: kata-artifacts + + - name: Install kata + run: bash tests/integration/runk/gha-run.sh install-kata kata-artifacts + + - name: Run tracing tests + run: bash tests/integration/runk/gha-run.sh run diff --git a/.github/workflows/run-vfio-tests.yaml b/.github/workflows/run-vfio-tests.yaml deleted file mode 100644 index ba34d2088..000000000 --- a/.github/workflows/run-vfio-tests.yaml +++ /dev/null @@ -1,37 +0,0 @@ -name: CI | Run vfio tests -on: - workflow_call: - inputs: - tarball-suffix: - required: false - type: string - commit-hash: - required: false - type: string - -jobs: - run-vfio: - strategy: - fail-fast: false - matrix: - vmm: ['clh', 'qemu'] - runs-on: garm-ubuntu-2204 - env: - GOPATH: ${{ github.workspace }} - KATA_HYPERVISOR: ${{ matrix.vmm }} - steps: - - uses: actions/checkout@v3 - with: - ref: ${{ inputs.commit-hash }} - - - name: Install dependencies - run: bash tests/functional/vfio/gha-run.sh install-dependencies - - - name: get-kata-tarball - uses: actions/download-artifact@v3 - with: - name: kata-static-tarball-amd64${{ inputs.tarball-suffix }} - path: kata-artifacts - - - name: Run vfio tests - run: bash tests/functional/vfio/gha-run.sh run diff --git a/.github/workflows/static-checks-dragonball.yaml b/.github/workflows/static-checks-dragonball.yaml deleted file mode 100644 index d47689e3a..000000000 --- a/.github/workflows/static-checks-dragonball.yaml +++ /dev/null @@ -1,37 +0,0 @@ -on: - pull_request: - types: - - opened - - edited - - reopened - - synchronize - paths-ignore: [ '**.md', '**.png', '**.jpg', '**.jpeg', '**.svg', '/docs/**' ] - -concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} - cancel-in-progress: true - -name: Static checks dragonball -jobs: - test-dragonball: - runs-on: dragonball - env: - RUST_BACKTRACE: "1" - steps: - - uses: actions/checkout@v3 - - name: Set env - if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }} - run: | - echo "GOPATH=${{ github.workspace }}" >> $GITHUB_ENV - - name: Install Rust - if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }} - run: | - ./ci/install_rust.sh - echo PATH="$HOME/.cargo/bin:$PATH" >> $GITHUB_ENV - - name: Run Unit Test - if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }} - run: | - cd src/dragonball - cargo version - rustc --version - sudo -E env PATH=$PATH LIBC=gnu SUPPORT_VIRTUALIZATION=true make test diff --git a/.github/workflows/static-checks.yaml b/.github/workflows/static-checks.yaml index 23f11aab3..c55adf4c7 100644 --- a/.github/workflows/static-checks.yaml +++ b/.github/workflows/static-checks.yaml @@ -12,74 +12,183 @@ concurrency: name: Static checks jobs: - static-checks: + check-kernel-config-version: + runs-on: ubuntu-latest + steps: + - name: Checkout the code + uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: Ensure the kernel config version has been updated + run: | + kernel_dir="tools/packaging/kernel/" + kernel_version_file="${kernel_dir}kata_config_version" + modified_files=$(git diff --name-only origin/$GITHUB_BASE_REF..HEAD) + if git diff --name-only origin/$GITHUB_BASE_REF..HEAD "${kernel_dir}" | grep "${kernel_dir}"; then + echo "Kernel directory has changed, checking if $kernel_version_file has been updated" + if echo "$modified_files" | grep -v "README.md" | grep "${kernel_dir}" >>"/dev/null"; then + echo "$modified_files" | grep "$kernel_version_file" >>/dev/null || ( echo "Please bump version in $kernel_version_file" && exit 1) + else + echo "Readme file changed, no need for kernel config version update." + fi + echo "Check passed" + fi + + build-checks: runs-on: ubuntu-20.04 strategy: + fail-fast: false matrix: - cmd: + component: + - agent + - dragonball + - runtime + - runtime-rs + - agent-ctl + - kata-ctl + - log-parser-rs + - runk + - trace-forwarder + command: - "make vendor" - - "make static-checks" - "make check" - "make test" - "sudo -E PATH=\"$PATH\" make test" + include: + - component: agent + component-path: src/agent + - component: dragonball + component-path: src/dragonball + - component: runtime + component-path: src/runtime + - component: runtime-rs + component-path: src/runtime-rs + - component: agent-ctl + component-path: src/tools/agent-ctl + - component: kata-ctl + component-path: src/tools/kata-ctl + - component: log-parser-rs + component-path: src/tools/log-parser-rs + - component: runk + component-path: src/tools/runk + - component: trace-forwarder + component-path: src/tools/trace-forwarder + - install-libseccomp: no + - component: agent + install-libseccomp: yes + - component: runk + install-libseccomp: yes + steps: + - name: Checkout the code + uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: Install yq + run: | + ./ci/install_yq.sh + env: + INSTALL_IN_GOPATH: false + - name: Install golang + if: ${{ matrix.component == 'runtime' }} + run: | + ./tests/install_go.sh -f -p + echo "/usr/local/go/bin" >> $GITHUB_PATH + - name: Install rust + if: ${{ matrix.component != 'runtime' }} + run: | + ./tests/install_rust.sh + echo "${HOME}/.cargo/bin" >> $GITHUB_PATH + - name: Install musl-tools + if: ${{ matrix.component != 'runtime' }} + run: sudo apt-get -y install musl-tools + - name: Install libseccomp + if: ${{ matrix.command != 'make vendor' && matrix.command != 'make check' && matrix.install-libseccomp == 'yes' }} + run: | + libseccomp_install_dir=$(mktemp -d -t libseccomp.XXXXXXXXXX) + gperf_install_dir=$(mktemp -d -t gperf.XXXXXXXXXX) + ./ci/install_libseccomp.sh "${libseccomp_install_dir}" "${gperf_install_dir}" + echo "Set environment variables for the libseccomp crate to link the libseccomp library statically" + echo "LIBSECCOMP_LINK_TYPE=static" >> $GITHUB_ENV + echo "LIBSECCOMP_LIB_PATH=${libseccomp_install_dir}/lib" >> $GITHUB_ENV + - name: Setup XDG_RUNTIME_DIR for the `runtime` tests + if: ${{ matrix.command != 'make vendor' && matrix.command != 'make check' && matrix.component == 'runtime' }} + run: | + XDG_RUNTIME_DIR=$(mktemp -d /tmp/kata-tests-$USER.XXX | tee >(xargs chmod 0700)) + echo "XDG_RUNTIME_DIR=${XDG_RUNTIME_DIR}" >> $GITHUB_ENV + - name: Running `${{ matrix.command }}` for ${{ matrix.component }} + run: | + cd ${{ matrix.component-path }} + ${{ matrix.command }} + env: + RUST_BACKTRACE: "1" + + build-checks-depending-on-kvm: + runs-on: garm-ubuntu-2004-smaller + strategy: + fail-fast: false + matrix: + component: + - runtime-rs + include: + - component: runtime-rs + command: "sudo -E env PATH=$PATH LIBC=gnu SUPPORT_VIRTUALIZATION=true make test" + - component: runtime-rs + component-path: src/dragonball + steps: + - name: Checkout the code + uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: Install system deps + run: | + sudo apt-get install -y build-essential musl-tools + - name: Install yq + run: | + sudo -E ./ci/install_yq.sh + env: + INSTALL_IN_GOPATH: false + - name: Install rust + run: | + export PATH="$PATH:/usr/local/bin" + ./tests/install_rust.sh + - name: Running `${{ matrix.command }}` for ${{ matrix.component }} + run: | + export PATH="$PATH:${HOME}/.cargo/bin" + cd ${{ matrix.component-path }} + ${{ matrix.command }} + env: + RUST_BACKTRACE: "1" + + static-checks: + runs-on: ubuntu-20.04 + strategy: + fail-fast: false + matrix: + cmd: + - "make static-checks" env: - RUST_BACKTRACE: "1" - target_branch: ${{ github.base_ref }} GOPATH: ${{ github.workspace }} steps: - - name: Free disk space - run: | - sudo rm -rf /usr/share/dotnet - sudo rm -rf "$AGENT_TOOLSDIRECTORY" - - name: Checkout code - uses: actions/checkout@v3 - with: - fetch-depth: 0 - path: ./src/github.com/${{ github.repository }} - - name: Install Go - uses: actions/setup-go@v3 - with: - go-version: 1.19.3 - - name: Check kernel config version - run: | - cd "${{ github.workspace }}/src/github.com/${{ github.repository }}" - kernel_dir="tools/packaging/kernel/" - kernel_version_file="${kernel_dir}kata_config_version" - modified_files=$(git diff --name-only origin/main..HEAD) - if git diff --name-only origin/main..HEAD "${kernel_dir}" | grep "${kernel_dir}"; then - echo "Kernel directory has changed, checking if $kernel_version_file has been updated" - if echo "$modified_files" | grep -v "README.md" | grep "${kernel_dir}" >>"/dev/null"; then - echo "$modified_files" | grep "$kernel_version_file" >>/dev/null || ( echo "Please bump version in $kernel_version_file" && exit 1) - else - echo "Readme file changed, no need for kernel config version update." - fi - echo "Check passed" - fi - - name: Set PATH - if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }} - run: | - echo "${{ github.workspace }}/bin" >> $GITHUB_PATH - - name: Setup - if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }} - run: | - cd ${GOPATH}/src/github.com/${{ github.repository }} && ./ci/setup.sh - - name: Installing rust - if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }} - run: | - cd ${GOPATH}/src/github.com/${{ github.repository }} && ./ci/install_rust.sh - PATH=$PATH:"$HOME/.cargo/bin" - rustup target add x86_64-unknown-linux-musl - rustup component add rustfmt clippy - - name: Setup seccomp - if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }} - run: | - libseccomp_install_dir=$(mktemp -d -t libseccomp.XXXXXXXXXX) - gperf_install_dir=$(mktemp -d -t gperf.XXXXXXXXXX) - cd ${GOPATH}/src/github.com/${{ github.repository }} && ./ci/install_libseccomp.sh "${libseccomp_install_dir}" "${gperf_install_dir}" - echo "Set environment variables for the libseccomp crate to link the libseccomp library statically" - echo "LIBSECCOMP_LINK_TYPE=static" >> $GITHUB_ENV - echo "LIBSECCOMP_LIB_PATH=${libseccomp_install_dir}/lib" >> $GITHUB_ENV - - name: Run check - if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }} - run: | - cd ${GOPATH}/src/github.com/${{ github.repository }} && ${{ matrix.cmd }} + - name: Checkout code + uses: actions/checkout@v4 + with: + fetch-depth: 0 + path: ./src/github.com/${{ github.repository }} + - name: Install yq + run: | + cd ${GOPATH}/src/github.com/${{ github.repository }} + ./ci/install_yq.sh + env: + INSTALL_IN_GOPATH: false + - name: Install golang + run: | + cd ${GOPATH}/src/github.com/${{ github.repository }} + ./tests/install_go.sh -f -p + echo "/usr/local/go/bin" >> $GITHUB_PATH + - name: Install system dependencies + run: | + sudo apt-get -y install moreutils hunspell pandoc + - name: Run check + run: | + export PATH=${PATH}:${GOPATH}/bin + cd ${GOPATH}/src/github.com/${{ github.repository }} && ${{ matrix.cmd }} diff --git a/ci/install_libseccomp.sh b/ci/install_libseccomp.sh index 683d0f65b..5d53be733 100755 --- a/ci/install_libseccomp.sh +++ b/ci/install_libseccomp.sh @@ -7,12 +7,10 @@ set -o errexit -cidir=$(dirname "$0") -source "${cidir}/lib.sh" +script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +script_name="$(basename "${BASH_SOURCE[0]}")" -clone_tests_repo - -source "${tests_repo_dir}/.ci/lib.sh" +source "${script_dir}/../tests/common.bash" # The following variables if set on the environment will change the behavior # of gperf and libseccomp configure scripts, that may lead this script to @@ -25,11 +23,11 @@ workdir="$(mktemp -d --tmpdir build-libseccomp.XXXXX)" # Variables for libseccomp libseccomp_version="${LIBSECCOMP_VERSION:-""}" if [ -z "${libseccomp_version}" ]; then - libseccomp_version=$(get_version "externals.libseccomp.version") + libseccomp_version=$(get_from_kata_deps "externals.libseccomp.version") fi libseccomp_url="${LIBSECCOMP_URL:-""}" if [ -z "${libseccomp_url}" ]; then - libseccomp_url=$(get_version "externals.libseccomp.url") + libseccomp_url=$(get_from_kata_deps "externals.libseccomp.url") fi libseccomp_tarball="libseccomp-${libseccomp_version}.tar.gz" libseccomp_tarball_url="${libseccomp_url}/releases/download/v${libseccomp_version}/${libseccomp_tarball}" @@ -38,11 +36,11 @@ cflags="-O2" # Variables for gperf gperf_version="${GPERF_VERSION:-""}" if [ -z "${gperf_version}" ]; then - gperf_version=$(get_version "externals.gperf.version") + gperf_version=$(get_from_kata_deps "externals.gperf.version") fi gperf_url="${GPERF_URL:-""}" if [ -z "${gperf_url}" ]; then - gperf_url=$(get_version "externals.gperf.url") + gperf_url=$(get_from_kata_deps "externals.gperf.url") fi gperf_tarball="gperf-${gperf_version}.tar.gz" gperf_tarball_url="${gperf_url}/${gperf_tarball}" diff --git a/docs/design/kata-vra.md b/docs/design/kata-vra.md index ba53c3371..bc62e762c 100644 --- a/docs/design/kata-vra.md +++ b/docs/design/kata-vra.md @@ -43,7 +43,7 @@ and perform DMA transactions _anywhere_. The second feature is ACS (Access Control Services), which controls which devices are allowed to communicate with one another and thus avoids improper -routing of packets irrespectively of whether IOMMU is enabled or not. +routing of packets `irrespectively` of whether IOMMU is enabled or not. When IOMMU is enabled, ACS is normally configured to force all PCI Express DMA to go through the root complex so IOMMU can translate it, impacting performance @@ -126,7 +126,7 @@ efficient P2P communication. ## PCI Express Virtual P2P Approval Capability Most of the time, the PCI Express topology is flattened and obfuscated to ensure -easy migration of the VM image between different physical hardware topologies. +easy migration of the VM image between different physical hardware `topologies`. In Kata, we can configure the hypervisor to use PCI Express root ports to hotplug the VFIO  devices one is passing through. A user can select how many PCI Express root ports to allocate depending on how many devices are passed through. @@ -220,7 +220,7 @@ containers that he wants to run with Kata. The goal is to make such things as transparent as possible, so we also introduced [CDI](https://github.com/container-orchestrated-devices/container-device-interface) (Container Device Interface) to Kata. CDI is a[ -specification](https://github.com/container-orchestrated-devices/container-device-interface/blob/master/SPEC.md) +specification](https://github.com/container-orchestrated-devices/container-device-interface/blob/main/SPEC.md) for container runtimes to support third-party devices. As written before, we can provide a clique ID for the devices that belong @@ -300,7 +300,7 @@ pcie_switch_port = 8 ``` Each device that is passed through is attached to a PCI Express downstream port -as illustrated below. We can even replicate the host’s two DPUs topologies with +as illustrated below. We can even replicate the host’s two DPUs `topologies` with added metadata through the CDI. Most of the time, a container only needs one pair of GPU and NIC for GPUDirect RDMA. This is more of a showcase of what we can do with the power of Kata and CDI. One could even think of adding groups of @@ -328,7 +328,7 @@ $ lspci -tv ``` The configuration of using either the root port or switch port can be applied on -a per Container or Pod basis, meaning we can switch PCI Express topologies on +a per Container or Pod basis, meaning we can switch PCI Express `topologies` on each run of an application. ## Hypervisor Resource Limits diff --git a/src/agent/Makefile b/src/agent/Makefile index ba065b4d0..94864aa9b 100644 --- a/src/agent/Makefile +++ b/src/agent/Makefile @@ -54,7 +54,7 @@ endif TARGET_PATH = target/$(TRIPLE)/$(BUILD_TYPE)/$(TARGET) ##VAR DESTDIR= is a directory prepended to each installed target file -DESTDIR := +DESTDIR ?= ##VAR BINDIR= is a directory for installing executable programs BINDIR := /usr/bin @@ -140,7 +140,7 @@ vendor: #TARGET test: run cargo tests -test: +test: $(GENERATED_FILES) @cargo test --all --target $(TRIPLE) $(EXTRA_RUSTFEATURES) -- --nocapture ##TARGET check: run test diff --git a/src/runtime-rs/Makefile b/src/runtime-rs/Makefile index 1981a37d9..6fb3f20ed 100644 --- a/src/runtime-rs/Makefile +++ b/src/runtime-rs/Makefile @@ -49,7 +49,7 @@ else ##TARGET default: build code default: runtime show-header ##TARGET test: run cargo tests -test: +test: static-checks-build @cargo test --all --target $(TRIPLE) $(EXTRA_RUSTFEATURES) -- --nocapture install: install-runtime install-configs endif diff --git a/src/runtime/config/configuration-clh.toml.in b/src/runtime/config/configuration-clh.toml.in index fd906ff0a..4531a4e65 100644 --- a/src/runtime/config/configuration-clh.toml.in +++ b/src/runtime/config/configuration-clh.toml.in @@ -188,12 +188,22 @@ block_device_driver = "virtio-blk" # Disable the 'seccomp' feature from Cloud Hypervisor, default false # disable_seccomp = true +# Enable vIOMMU, default false +# Enabling this will result in the VM having a vIOMMU device +# This will also add the following options to the kernel's +# command line: iommu=pt +#enable_iommu = true + # This option changes the default hypervisor and kernel parameters # to enable debug output where available. # # Default false #enable_debug = true +# Enable hot-plugging of VFIO devices to a root-port. +# The default setting is "no-port" +#hot_plug_vfio = "root-port" + # Path to OCI hook binaries in the *guest rootfs*. # This does not affect host-side hooks which must instead be added to # the OCI spec passed to the runtime. diff --git a/src/runtime/pkg/katautils/config.go b/src/runtime/pkg/katautils/config.go index f577df526..a4e17b239 100644 --- a/src/runtime/pkg/katautils/config.go +++ b/src/runtime/pkg/katautils/config.go @@ -1680,8 +1680,8 @@ func checkConfig(config oci.RuntimeConfig) error { // Only allow one of the following settings for cold-plug: // no-port, root-port, switch-port func checkPCIeConfig(coldPlug config.PCIePort, hotPlug config.PCIePort, machineType string, hypervisorType virtcontainers.HypervisorType) error { - if hypervisorType != virtcontainers.QemuHypervisor { - kataUtilsLogger.Warn("Advanced PCIe Topology only available for QEMU hypervisor, ignoring hot(cold)_vfio_port setting") + if hypervisorType != virtcontainers.QemuHypervisor && hypervisorType != virtcontainers.ClhHypervisor { + kataUtilsLogger.Warn("Advanced PCIe Topology only available for QEMU/CLH hypervisor, ignoring hot(cold)_vfio_port setting") return nil } @@ -1696,6 +1696,14 @@ func checkPCIeConfig(coldPlug config.PCIePort, hotPlug config.PCIePort, machineT if machineType != "q35" && machineType != "virt" { return nil } + if hypervisorType == virtcontainers.ClhHypervisor { + if coldPlug != config.NoPort { + return fmt.Errorf("cold-plug not supported on CLH") + } + if hotPlug != config.RootPort { + return fmt.Errorf("only hot-plug=%s supported on CLH", config.RootPort) + } + } var port config.PCIePort if coldPlug != config.NoPort { @@ -1704,10 +1712,6 @@ func checkPCIeConfig(coldPlug config.PCIePort, hotPlug config.PCIePort, machineT if hotPlug != config.NoPort { port = hotPlug } - if port == config.NoPort { - return fmt.Errorf("invalid vfio_port=%s setting, use on of %s, %s, %s", - port, config.BridgePort, config.RootPort, config.SwitchPort) - } if port == config.BridgePort || port == config.RootPort || port == config.SwitchPort { return nil } diff --git a/src/runtime/pkg/katautils/create_test.go b/src/runtime/pkg/katautils/create_test.go index 260800378..903e68d95 100644 --- a/src/runtime/pkg/katautils/create_test.go +++ b/src/runtime/pkg/katautils/create_test.go @@ -18,8 +18,10 @@ import ( "syscall" "testing" + config "github.com/kata-containers/kata-containers/src/runtime/pkg/device/config" ktu "github.com/kata-containers/kata-containers/src/runtime/pkg/katatestutils" "github.com/kata-containers/kata-containers/src/runtime/pkg/oci" + "github.com/kata-containers/kata-containers/src/runtime/virtcontainers" vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/compatoci" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/vcmock" @@ -419,3 +421,32 @@ func TestCreateContainer(t *testing.T) { assert.NoError(err) } } + +func TestVfioChecksClh(t *testing.T) { + assert := assert.New(t) + + // Check valid CLH vfio configs + f := func(coldPlug, hotPlug config.PCIePort) error { + return checkPCIeConfig(coldPlug, hotPlug, defaultMachineType, virtcontainers.ClhHypervisor) + } + assert.NoError(f(config.NoPort, config.NoPort)) + assert.NoError(f(config.NoPort, config.RootPort)) + assert.Error(f(config.RootPort, config.RootPort)) + assert.Error(f(config.RootPort, config.NoPort)) + assert.Error(f(config.NoPort, config.SwitchPort)) +} + +func TestVfioCheckQemu(t *testing.T) { + assert := assert.New(t) + + // Check valid Qemu vfio configs + f := func(coldPlug, hotPlug config.PCIePort) error { + return checkPCIeConfig(coldPlug, hotPlug, defaultMachineType, virtcontainers.QemuHypervisor) + } + + assert.NoError(f(config.NoPort, config.NoPort)) + assert.NoError(f(config.RootPort, config.NoPort)) + assert.NoError(f(config.NoPort, config.RootPort)) + assert.Error(f(config.RootPort, config.RootPort)) + assert.Error(f(config.SwitchPort, config.RootPort)) +} diff --git a/src/runtime/virtcontainers/clh.go b/src/runtime/virtcontainers/clh.go index 10a1fc7ab..f031811e9 100644 --- a/src/runtime/virtcontainers/clh.go +++ b/src/runtime/virtcontainers/clh.go @@ -490,6 +490,13 @@ func (clh *cloudHypervisor) CreateVM(ctx context.Context, id string, network Net } clh.vmconfig.Payload.SetKernel(kernelPath) + clh.vmconfig.Platform = chclient.NewPlatformConfig() + platform := clh.vmconfig.Platform + platform.SetNumPciSegments(2) + if clh.config.IOMMU { + platform.SetIommuSegments([]int32{0}) + } + if clh.config.ConfidentialGuest { if err := clh.enableProtection(); err != nil { return err @@ -528,6 +535,9 @@ func (clh *cloudHypervisor) CreateVM(ctx context.Context, id string, network Net // start the guest kernel with 'quiet' in non-debug mode params = append(params, Param{"quiet", ""}) } + if clh.config.IOMMU { + params = append(params, Param{"iommu", "pt"}) + } // Followed by extra kernel parameters defined in the configuration file params = append(params, clh.config.KernelParams...) @@ -536,6 +546,7 @@ func (clh *cloudHypervisor) CreateVM(ctx context.Context, id string, network Net // set random device generator to hypervisor clh.vmconfig.Rng = chclient.NewRngConfig(clh.config.EntropySource) + clh.vmconfig.Rng.SetIommu(clh.config.IOMMU) // set the initial root/boot disk of hypervisor imagePath, err := clh.config.ImageAssetPath() @@ -561,6 +572,7 @@ func (clh *cloudHypervisor) CreateVM(ctx context.Context, id string, network Net } else { pmem := chclient.NewPmemConfig(imagePath) *pmem.DiscardWrites = true + pmem.SetIommu(clh.config.IOMMU) if clh.vmconfig.Pmem != nil { *clh.vmconfig.Pmem = append(*clh.vmconfig.Pmem, *pmem) @@ -598,6 +610,7 @@ func (clh *cloudHypervisor) CreateVM(ctx context.Context, id string, network Net clh.vmconfig.Console = chclient.NewConsoleConfig(cctOFF) } + clh.vmconfig.Console.SetIommu(clh.config.IOMMU) cpu_topology := chclient.NewCpuTopology() cpu_topology.ThreadsPerCore = func(i int32) *int32 { return &i }(1) @@ -840,6 +853,7 @@ func (clh *cloudHypervisor) hotplugAddBlockDevice(drive *config.BlockDrive) erro queueSize := int32(1024) clhDisk.NumQueues = &queues clhDisk.QueueSize = &queueSize + clhDisk.SetIommu(clh.config.IOMMU) diskRateLimiterConfig := clh.getDiskRateLimiterConfig() if diskRateLimiterConfig != nil { @@ -865,6 +879,7 @@ func (clh *cloudHypervisor) hotPlugVFIODevice(device *config.VFIODev) error { // Create the clh device config via the constructor to ensure default values are properly assigned clhDevice := *chclient.NewDeviceConfig(device.SysfsDev) + clhDevice.SetIommu(clh.config.IOMMU) pciInfo, _, err := cl.VmAddDevicePut(ctx, clhDevice) if err != nil { return fmt.Errorf("Failed to hotplug device %+v %s", device, openAPIClientError(err)) @@ -1538,6 +1553,7 @@ func (clh *cloudHypervisor) addVSock(cid int64, path string) { }).Info("Adding HybridVSock") clh.vmconfig.Vsock = chclient.NewVsockConfig(cid, path) + clh.vmconfig.Vsock.SetIommu(clh.config.IOMMU) } func (clh *cloudHypervisor) getRateLimiterConfig(bwSize, bwOneTimeBurst, opsSize, opsOneTimeBurst int64) *chclient.RateLimiterConfig { @@ -1607,6 +1623,7 @@ func (clh *cloudHypervisor) addNet(e Endpoint) error { if netRateLimiterConfig != nil { net.SetRateLimiterConfig(*netRateLimiterConfig) } + net.SetIommu(clh.config.IOMMU) if clh.netDevices != nil { *clh.netDevices = append(*clh.netDevices, *net) @@ -1639,6 +1656,7 @@ func (clh *cloudHypervisor) addVolume(volume types.Volume) error { } fs := chclient.NewFsConfig(volume.MountTag, vfsdSockPath, numQueues, queueSize) + fs.SetPciSegment(1) clh.vmconfig.Fs = &[]chclient.FsConfig{*fs} clh.Logger().Debug("Adding share volume to hypervisor: ", volume.MountTag) diff --git a/src/runtime/virtcontainers/clh_test.go b/src/runtime/virtcontainers/clh_test.go index d617ab4e1..c141adfd9 100644 --- a/src/runtime/virtcontainers/clh_test.go +++ b/src/runtime/virtcontainers/clh_test.go @@ -68,6 +68,7 @@ func newClhConfig() (HypervisorConfig, error) { NetRateLimiterBwOneTimeBurst: int64(0), NetRateLimiterOpsMaxRate: int64(0), NetRateLimiterOpsOneTimeBurst: int64(0), + HotPlugVFIO: config.NoPort, }, nil } diff --git a/src/tools/kata-ctl/Cargo.lock b/src/tools/kata-ctl/Cargo.lock index eda34fb17..2ddbcdbb9 100644 --- a/src/tools/kata-ctl/Cargo.lock +++ b/src/tools/kata-ctl/Cargo.lock @@ -1946,6 +1946,7 @@ dependencies = [ "anyhow", "hyper", "hyperlocal", + "kata-types", "tokio", ] diff --git a/src/tools/kata-ctl/Makefile b/src/tools/kata-ctl/Makefile index 23ae7ca1e..546f0783a 100644 --- a/src/tools/kata-ctl/Makefile +++ b/src/tools/kata-ctl/Makefile @@ -52,13 +52,13 @@ clean: vendor: cargo vendor -test: +test: $(GENERATED_CODE) @RUSTFLAGS="$(EXTRA_RUSTFLAGS) --deny warnings" cargo test --target $(TRIPLE) $(if $(findstring release,$(BUILD_TYPE)),--release) $(EXTRA_RUSTFEATURES) -- --nocapture install: @RUSTFLAGS="$(EXTRA_RUSTFLAGS) --deny warnings" cargo install --locked --target $(TRIPLE) --path . --root $(INSTALL_PATH) -check: standard_rust_check +check: $(GENERATED_CODE) standard_rust_check .PHONY: \ build \ diff --git a/src/tools/kata-ctl/src/check.rs b/src/tools/kata-ctl/src/check.rs index 3bf899ad2..f23418680 100644 --- a/src/tools/kata-ctl/src/check.rs +++ b/src/tools/kata-ctl/src/check.rs @@ -539,10 +539,10 @@ mod tests { }, // Success scenarios TestData { - module_name: "kvm", + module_name: "loop", param_name: "", kernel_module: &KernelModule { - name: "kvm", + name: "loop", params: &[KernelParam { name: "nonexistantparam", value: KernelParamType::Simple("Y"), @@ -552,16 +552,16 @@ mod tests { result: Ok(()), }, TestData { - module_name: "kvm", - param_name: "kvmclock_periodic_sync", + module_name: "loop", + param_name: "hw_queue_depth", kernel_module: &KernelModule { - name: "kvm", + name: "loop", params: &[KernelParam { - name: "kvmclock_periodic_sync", - value: KernelParamType::Simple("Y"), + name: "hw_queue_depth", + value: KernelParamType::Simple("128"), }], }, - param_value: "Y", + param_value: "128", result: Ok(()), }, ]; diff --git a/tests/common.bash b/tests/common.bash index d71b3f43c..a11144574 100644 --- a/tests/common.bash +++ b/tests/common.bash @@ -158,7 +158,7 @@ function clean_env_ctr() info "Wait until the containers gets removed" for task_id in "${running_tasks[@]}"; do - sudo ctr t kill -a -s SIGTERM ${task_id} >/dev/null 2>&1 + sudo timeout -s SIGKILL 30s ctr t kill -a -s SIGTERM ${task_id} >/dev/null 2>&1 || true sleep 0.5 done @@ -186,31 +186,19 @@ function clean_env_ctr() if (( count_tasks > 0 )); then die "Can't remove running containers." fi - - kill_kata_components } # Kills running shim and hypervisor components function kill_kata_components() { - local kata_bin_dir="/opt/kata/bin" - local shim_path="${kata_bin_dir}/containerd-shim-kata-v2" - local hypervisor_path="${kata_bin_dir}/qemu-system-x86_64" - local pid_shim_count="$(pgrep -fc ${shim_path} || exit 0)" + local TIMEOUT="30s" + local PID_NAMES=( "containerd-shim-kata-v2" "qemu-system-x86_64" "cloud-hypervisor" ) - [ ${pid_shim_count} -gt "0" ] && sudo kill -SIGKILL "$(pgrep -f ${shim_path})" > /dev/null 2>&1 - - if [ "${KATA_HYPERVISOR}" = 'clh' ]; then - hypervisor_path="${kata_bin_dir}/cloud-hypervisor" - elif [ "${KATA_HYPERVISOR}" != 'qemu' ]; then - echo "Failed to stop the hypervisor: '${KATA_HYPERVISOR}' as it is not recognized" - return - fi - - local pid_hypervisor_count="$(pgrep -fc ${hypervisor_path} || exit 0)" - - if [ ${pid_hypervisor_count} -gt "0" ]; then - sudo kill -SIGKILL "$(pgrep -f ${hypervisor_path})" > /dev/null 2>&1 - fi + sudo systemctl stop containerd + # iterate over the list of kata components and stop them + for PID_NAME in "${PID_NAMES[@]}"; do + [[ ! -z "$(pidof ${PID_NAME})" ]] && sudo killall "${PID_NAME}" > /dev/null 2>&1 || true + done + sudo timeout -s SIGKILL "${TIMEOUT}" systemctl start containerd } # Restarts a systemd service while ensuring the start-limit-burst is set to 0. @@ -269,25 +257,71 @@ function restart_containerd_service() { return 0 } +function restart_crio_service() { + sudo systemctl restart crio +} + # Configures containerd function overwrite_containerd_config() { containerd_config="/etc/containerd/config.toml" sudo rm -f "${containerd_config}" sudo tee "${containerd_config}" << EOF version = 2 -[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options] - SystemdCgroup = true [plugins] [plugins."io.containerd.grpc.v1.cri"] [plugins."io.containerd.grpc.v1.cri".containerd] - default_runtime_name = "kata" [plugins."io.containerd.grpc.v1.cri".containerd.runtimes] + [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc] + base_runtime_spec = "" + cni_conf_dir = "" + cni_max_conf_num = 0 + container_annotations = [] + pod_annotations = [] + privileged_without_host_devices = false + runtime_engine = "" + runtime_path = "" + runtime_root = "" + runtime_type = "io.containerd.runc.v2" + [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options] + BinaryName = "" + CriuImagePath = "" + CriuPath = "" + CriuWorkPath = "" + IoGid = 0 + IoUid = 0 + NoNewKeyring = false + NoPivotRoot = false + Root = "" + ShimCgroup = "" + SystemdCgroup = false [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.kata] runtime_type = "io.containerd.kata.v2" EOF } +# Configures CRI-O +function overwrite_crio_config() { + crio_conf_d="/etc/crio/crio.conf.d" + sudo mkdir -p ${crio_conf_d} + + kata_config="${crio_conf_d}/99-kata-containers" + sudo tee "${kata_config}" << EOF +[crio.runtime.runtimes.kata] +runtime_path = "/usr/local/bin/containerd-shim-kata-v2" +runtime_type = "vm" +runtime_root = "/run/vc" +runtime_config_path = "/opt/kata/share/defaults/kata-containers/configuration.toml" +privileged_without_host_devices = true +EOF + + debug_config="${crio_conf_d}/100-debug" + sudo tee "${debug_config}" << EOF +[crio] +log_level = "debug" +EOF +} + function install_kata() { local kata_tarball="kata-static.tar.xz" declare -r katadir="/opt/kata" @@ -306,18 +340,35 @@ function install_kata() { sudo ln -sf "${b}" "${local_bin_dir}/$(basename $b)" done - if [[ ${KATA_HYPERVISOR} == "dragonball" ]]; then - sudo ln -sf "${katadir}/runtime-rs/bin/containerd-shim-kata-v2" "${local_bin_dir}/containerd-shim-kata-${KATA_HYPERVISOR}-v2" + if [ "${CONTAINER_ENGINE:=containerd}" = "containerd" ]; then + check_containerd_config_for_kata + restart_containerd_service else - sudo ln -sf "${katadir}/bin/containerd-shim-kata-v2" "${local_bin_dir}/containerd-shim-kata-${KATA_HYPERVISOR}-v2" + overwrite_crio_config + restart_crio_service fi - sudo ln -sf ${katadir}/share/defaults/kata-containers/configuration-${KATA_HYPERVISOR}.toml ${katadir}/share/defaults/kata-containers/configuration.toml - - check_containerd_config_for_kata - restart_containerd_service } +# creates a new kata configuration.toml hard link that +# points to the hypervisor passed by KATA_HYPERVISOR env var. +function enabling_hypervisor() { + declare -r KATA_DIR="/opt/kata" + declare -r CONFIG_DIR="${KATA_DIR}/share/defaults/kata-containers" + declare -r SRC_HYPERVISOR_CONFIG="${CONFIG_DIR}/configuration-${KATA_HYPERVISOR}.toml" + declare -r DEST_KATA_CONFIG="${CONFIG_DIR}/configuration.toml" + declare -r CONTAINERD_SHIM_KATA="/usr/local/bin/containerd-shim-kata-${KATA_HYPERVISOR}-v2" + + if [[ ${KATA_HYPERVISOR} == "dragonball" ]]; then + sudo ln -sf "${KATA_DIR}/runtime-rs/bin/containerd-shim-kata-v2" "${CONTAINERD_SHIM_KATA}" + else + sudo ln -sf "${KATA_DIR}/bin/containerd-shim-kata-v2" "${CONTAINERD_SHIM_KATA}" + fi + + sudo ln -sf "${SRC_HYPERVISOR_CONFIG}" "${DEST_KATA_CONFIG}" +} + + function check_containerd_config_for_kata() { # check containerd config declare -r line1="default_runtime_name = \"kata\"" @@ -339,6 +390,7 @@ function ensure_yq() { export GOPATH export PATH="${GOPATH}/bin:${PATH}" INSTALL_IN_GOPATH=true "${repo_root_dir}/ci/install_yq.sh" + hash -d yq 2> /dev/null || true # yq is preinstalled on GHA Ubuntu 22.04 runners so we clear Bash's PATH cache. } # dependency: What we want to get the version from the versions.yaml file @@ -383,6 +435,19 @@ function download_github_project_tarball() { wget https://github.com/${project}/releases/download/${version}/${tarball_name} } +# version: The version to be intalled +function install_cni_plugins() { + version="${1}" + + project="containernetworking/plugins" + tarball_name="cni-plugins-linux-$(${repo_root_dir}/tests/kata-arch.sh -g)-${version}.tgz" + + download_github_project_tarball "${project}" "${version}" "${tarball_name}" + sudo mkdir -p /opt/cni/bin + sudo tar -xvf "${tarball_name}" -C /opt/cni/bin + rm -f "${tarball_name}" +} + # base_version: The version to be intalled in the ${major}.${minor} format function install_cri_containerd() { base_version="${1}" @@ -413,3 +478,110 @@ function install_cri_tools() { sudo tar -xvf "${tarball_name}" -C /usr/local/bin rm -f "${tarball_name}" } + +function install_nydus() { + version="${1}" + + project="dragonflyoss/image-service" + tarball_name="nydus-static-${version}-linux-$(${repo_root_dir}/tests/kata-arch.sh -g).tgz" + + download_github_project_tarball "${project}" "${version}" "${tarball_name}" + sudo tar xfz "${tarball_name}" -C /usr/local/bin --strip-components=1 + rm -f "${tarball_name}" +} + +function install_nydus_snapshotter() { + version="${1}" + + project="containerd/nydus-snapshotter" + tarball_name="nydus-snapshotter-${version}-$(${repo_root_dir}/tests/kata-arch.sh).tgz" + + download_github_project_tarball "${project}" "${version}" "${tarball_name}" + sudo tar xfz "${tarball_name}" -C /usr/local/bin --strip-components=1 + rm -f "${tarball_name}" +} + +function _get_os_for_crio() { + source /etc/os-release + + if [ "${NAME}" != "Ubuntu" ]; then + echo "Only Ubuntu is supported for now" + exit 2 + fi + + echo "x${NAME}_${VERSION_ID}" +} + +# version: the CRI-O version to be installe +function install_crio() { + local version=${1} + + os=$(_get_os_for_crio) + + echo "deb https://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/stable/${os}/ /"|sudo tee /etc/apt/sources.list.d/devel:kubic:libcontainers:stable.list + echo "deb http://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/stable:/cri-o:/${version}/${os}/ /"|sudo tee /etc/apt/sources.list.d/devel:kubic:libcontainers:stable:cri-o:${version}.list + curl -L https://download.opensuse.org/repositories/devel:kubic:libcontainers:stable:cri-o:${version}/${os}/Release.key | sudo apt-key add - + curl -L https://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/stable/${os}/Release.key | sudo apt-key add - + sudo apt update + sudo apt install -y cri-o cri-o-runc + + # We need to set the default capabilities to ensure our tests will pass + # See: https://github.com/kata-containers/kata-containers/issues/8034 + sudo mkdir -p /etc/crio/crio.conf.d/ + cat <&2 echo "Invalid argument"; exit 2 ;; + esac +} + +main "$@" diff --git a/tests/functional/kata-deploy/kata-deploy.bats b/tests/functional/kata-deploy/kata-deploy.bats new file mode 100644 index 000000000..d4f957d05 --- /dev/null +++ b/tests/functional/kata-deploy/kata-deploy.bats @@ -0,0 +1,116 @@ +#!/usr/bin/env bats +# +# Copyright (c) 2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# + +load "${BATS_TEST_DIRNAME}/../../common.bash" + +setup() { + repo_root_dir="${BATS_TEST_DIRNAME}/../../../" + ensure_yq + + # We expect 2 runtime classes because: + # * `kata` is the default runtimeclass created, basically an alias for `kata-${KATA_HYPERVISOR}`. + # * `kata-${KATA_HYPERVISOR}` is the other one + # * As part of the tests we're only deploying the specific runtimeclass that will be used, instead of all of them. + expected_runtime_classes=2 + + # We expect both runtime classes to have the same handler: kata-${KATA_HYPERVISOR} + expected_handlers_re=( \ + "kata\s+kata-${KATA_HYPERVISOR}" \ + "kata-${KATA_HYPERVISOR}\s+kata-${KATA_HYPERVISOR}" \ + ) + + # Set the latest image, the one generated as part of the PR, to be used as part of the tests + sed -i -e "s|quay.io/kata-containers/kata-deploy:latest|${DOCKER_REGISTRY}/${DOCKER_REPO}:${DOCKER_TAG}|g" "${repo_root_dir}/tools/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml" + + # Enable debug for Kata Containers + yq write -i "${repo_root_dir}/tools/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml" 'spec.template.spec.containers[0].env[1].value' --tag '!!str' "true" + # Create the runtime class only for the shim that's being tested + yq write -i "${repo_root_dir}/tools/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml" 'spec.template.spec.containers[0].env[2].value' "${KATA_HYPERVISOR}" + # Set the tested hypervisor as the default `kata` shim + yq write -i "${repo_root_dir}/tools/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml" 'spec.template.spec.containers[0].env[3].value' "${KATA_HYPERVISOR}" + # Let the `kata-deploy` script take care of the runtime class creation / removal + yq write -i "${repo_root_dir}/tools/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml" 'spec.template.spec.containers[0].env[4].value' --tag '!!str' "true" + # Let the `kata-deploy` create the default `kata` runtime class + yq write -i "${repo_root_dir}/tools/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml" 'spec.template.spec.containers[0].env[5].value' --tag '!!str' "true" + + if [ "${KATA_HOST_OS}" = "cbl-mariner" ]; then + yq write -i "${repo_root_dir}/tools/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml" 'spec.template.spec.containers[0].env[+].name' "HOST_OS" + yq write -i "${repo_root_dir}/tools/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml" 'spec.template.spec.containers[0].env[-1].value' "${KATA_HOST_OS}" + fi + + echo "::group::Final kata-deploy.yaml that is used in the test" + cat "${repo_root_dir}/tools/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml" + grep "${DOCKER_REGISTRY}/${DOCKER_REPO}:${DOCKER_TAG}" "${repo_root_dir}/tools/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml" || die "Failed to setup the tests image" + echo "::endgroup::" + + kubectl apply -f "${repo_root_dir}/tools/packaging/kata-deploy/kata-rbac/base/kata-rbac.yaml" + if [ "${KUBERNETES}" = "k0s" ]; then + kubectl apply -k "${repo_root_dir}/tools/packaging/kata-deploy/kata-deploy/overlays/k0s" + elif [ "${KUBERNETES}" = "k3s" ]; then + kubectl apply -k "${repo_root_dir}/tools/packaging/kata-deploy/kata-deploy/overlays/k3s" + elif [ "${KUBERNETES}" = "rke2" ]; then + kubectl apply -k "${repo_root_dir}/tools/packaging/kata-deploy/kata-deploy/overlays/rke2" + else + kubectl apply -f "${repo_root_dir}/tools/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml" + fi + kubectl -n kube-system wait --timeout=10m --for=condition=Ready -l name=kata-deploy pod + + # Give some time for the pod to finish what's doing and have the + # runtimeclasses properly created + sleep 30s +} + +@test "Test runtimeclasses are being properly created" { + # We filter `kata-mshv-vm-isolation` out as that's present on AKS clusters, but that's not coming from kata-deploy + current_runtime_classes=$(kubectl get runtimeclasses | grep -v "kata-mshv-vm-isolation" | grep "kata" | wc -l) + [[ ${current_runtime_classes} -eq ${expected_runtime_classes} ]] + + for handler_re in ${expected_handlers_re[@]} + do + kubectl get runtimeclass | grep -E "${handler_re}" + done +} + +teardown() { + kubectl get runtimeclasses -o name | grep -v "kata-mshv-vm-isolation" + + if [ "${KUBERNETES}" = "k0s" ]; then + deploy_spec="-k \"${repo_root_dir}/tools/packaging/kata-deploy/kata-deploy/overlays/k0s\"" + cleanup_spec="-k \"${repo_root_dir}/tools/packaging/kata-deploy/kata-cleanup/overlays/k0s\"" + elif [ "${KUBERNETES}" = "k3s" ]; then + deploy_spec="-k \"${repo_root_dir}/tools/packaging/kata-deploy/kata-deploy/overlays/k3s\"" + cleanup_spec="-k \"${repo_root_dir}/tools/packaging/kata-deploy/kata-cleanup/overlays/k3s\"" + elif [ "${KUBERNETES}" = "rke2" ]; then + deploy_spec="-k \"${repo_root_dir}/tools/packaging/kata-deploy/kata-deploy/overlays/rke2\"" + cleanup_spec="-k \"${repo_root_dir}/tools/packaging/kata-deploy/kata-cleanup/overlays/rke2\"" + else + deploy_spec="-f \"${repo_root_dir}/tools/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml\"" + cleanup_spec="-f \"${repo_root_dir}/tools/packaging/kata-deploy/kata-cleanup/base/kata-cleanup.yaml\"" + fi + + kubectl delete ${deploy_spec} + kubectl -n kube-system wait --timeout=10m --for=delete -l name=kata-deploy pod + + # Let the `kata-deploy` script take care of the runtime class creation / removal + yq write -i "${repo_root_dir}/tools/packaging/kata-deploy/kata-cleanup/base/kata-cleanup.yaml" 'spec.template.spec.containers[0].env[4].value' --tag '!!str' "true" + # Create the runtime class only for the shim that's being tested + yq write -i "${repo_root_dir}/tools/packaging/kata-deploy/kata-cleanup/base/kata-cleanup.yaml" 'spec.template.spec.containers[0].env[2].value' "${KATA_HYPERVISOR}" + # Set the tested hypervisor as the default `kata` shim + yq write -i "${repo_root_dir}/tools/packaging/kata-deploy/kata-cleanup/base/kata-cleanup.yaml" 'spec.template.spec.containers[0].env[3].value' "${KATA_HYPERVISOR}" + # Let the `kata-deploy` create the default `kata` runtime class + yq write -i "${repo_root_dir}/tools/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml" 'spec.template.spec.containers[0].env[5].value' --tag '!!str' "true" + + sed -i -e "s|quay.io/kata-containers/kata-deploy:latest|${DOCKER_REGISTRY}/${DOCKER_REPO}:${DOCKER_TAG}|g" "${repo_root_dir}/tools/packaging/kata-deploy/kata-cleanup/base/kata-cleanup.yaml" + cat "${repo_root_dir}/tools/packaging/kata-deploy/kata-cleanup/base/kata-cleanup.yaml" + grep "${DOCKER_REGISTRY}/${DOCKER_REPO}:${DOCKER_TAG}" "${repo_root_dir}/tools/packaging/kata-deploy/kata-cleanup/base/kata-cleanup.yaml" || die "Failed to setup the tests image" + + kubectl apply ${cleanup_spec} + sleep 30s + + kubectl delete ${cleanup_spec} + kubectl delete -f "${repo_root_dir}/tools/packaging/kata-deploy/kata-rbac/base/kata-rbac.yaml" +} diff --git a/tests/functional/kata-deploy/run-kata-deploy-tests.sh b/tests/functional/kata-deploy/run-kata-deploy-tests.sh new file mode 100644 index 000000000..859bebf2e --- /dev/null +++ b/tests/functional/kata-deploy/run-kata-deploy-tests.sh @@ -0,0 +1,25 @@ +#!/bin/bash +# +# Copyright (c) 2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# + +set -e + +kata_deploy_dir=$(dirname "$(readlink -f "$0")") +source "${kata_deploy_dir}/../../common.bash" + +if [ -n "${KATA_DEPLOY_TEST_UNION:-}" ]; then + KATA_DEPLOY_TEST_UNION=($KATA_DEPLOY_TEST_UNION) +else + KATA_DEPLOY_TEST_UNION=( \ + "kata-deploy.bats" \ + ) +fi + +info "Run tests" +for KATA_DEPLOY_TEST_ENTRY in ${KATA_DEPLOY_TEST_UNION[@]} +do + bats "${KATA_DEPLOY_TEST_ENTRY}" +done diff --git a/tests/functional/kata-monitor/gha-run.sh b/tests/functional/kata-monitor/gha-run.sh new file mode 100755 index 000000000..93ac3f87d --- /dev/null +++ b/tests/functional/kata-monitor/gha-run.sh @@ -0,0 +1,78 @@ +#!/bin/bash +# +# Copyright (c) 2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# + +set -o errexit +set -o nounset +set -o pipefail + +kata_tarball_dir="${2:-kata-artifacts}" +kata_monitor_dir="$(dirname "$(readlink -f "$0")")" +source "${kata_monitor_dir}/../../common.bash" + +function install_dependencies() { + info "Installing the dependencies needed for running the cri-containerd tests" + + # Dependency list of projects that we can rely on the system packages + # - build-essential + # - Theoretically we only need `make`, but doesn't hurt to install + # the whole build-essential group + # - jq + declare -a system_deps=( + build-essential + jq + ) + + sudo apt-get update + sudo apt-get -y install "${system_deps[@]}" + + ensure_yq + + # Dependency list of projects that we can install them + # directly from their releases on GitHub: + # - cri-tools + # - containerd + # - cri-container-cni release tarball already includes CNI plugins + cri_tools_version=$(get_from_kata_deps "externals.critools.latest") + declare -a github_deps + github_deps[0]="cri_tools:${cri_tools_version}" + case "${CONTAINER_ENGINE}" in + containerd) + github_deps[1]="cri_containerd:$(get_from_kata_deps "externals.containerd.${CONTAINERD_VERSION}")" + ;; + crio) + github_deps[1]="cni_plugins:$(get_from_kata_deps "externals.cni-plugins.version")" + ;; + esac + + for github_dep in "${github_deps[@]}"; do + IFS=":" read -r -a dep <<< "${github_dep}" + install_${dep[0]} "${dep[1]}" + done + + if [ "${CONTAINER_ENGINE}" = "crio" ]; then + install_crio ${cri_tools_version#v} + fi +} + +function run() { + info "Running cri-containerd tests using ${KATA_HYPERVISOR} hypervisor" + + enabling_hypervisor + bash -c ${kata_monitor_dir}/kata-monitor-tests.sh +} + +function main() { + action="${1:-}" + case "${action}" in + install-dependencies) install_dependencies ;; + install-kata) install_kata ;; + run) run ;; + *) >&2 die "Invalid argument" ;; + esac +} + +main "$@" diff --git a/tests/functional/kata-monitor/kata-monitor-tests.sh b/tests/functional/kata-monitor/kata-monitor-tests.sh new file mode 100755 index 000000000..fe95cb867 --- /dev/null +++ b/tests/functional/kata-monitor/kata-monitor-tests.sh @@ -0,0 +1,294 @@ +#!/bin/bash +# +# Copyright (c) 2022 Red Hat +# +# SPDX-License-Identifier: Apache-2.0 +# +# This test file will test kata-monitor for basic functionality (retrieve kata sandboxes) +# It will assume an environment where: +# - a CRI container manager (container engine) will be up and running +# - crictl is installed and configured +# - the kata-monitor binary is available on the host +# + +set -o errexit +set -o nounset +set -o pipefail + +source "/etc/os-release" || source "/usr/lib/os-release" + +[ -n "${BASH_VERSION:-}" ] && set -o errtrace +[ -n "${DEBUG:-}" ] && set -o xtrace + +readonly MONITOR_HTTP_ENDPOINT="127.0.0.1:8090" +# we should collect few hundred metrics, let's put a reasonable minimum +readonly MONITOR_MIN_METRICS_NUM=200 +CONTAINER_ENGINE=${CONTAINER_ENGINE:-"containerd"} +CRICTL_RUNTIME=${CRICTL_RUNTIME:-"kata"} +KATA_MONITOR_BIN="${KATA_MONITOR_BIN:-$(command -v kata-monitor || true)}" +KATA_MONITOR_PID="" +TMPATH=$(mktemp -d -t kata-monitor-test-XXXXXXXXX) +METRICS_FILE="${TMPATH}/metrics.txt" +MONITOR_LOG_FILE="${TMPATH}/kata-monitor.log" +CACHE_UPD_TIMEOUT_SEC=${CACHE_UPD_TIMEOUT_SEC:-20} +POD_ID="" +CID="" +RUNC_POD_ID="" +RUNC_CID="" +CURRENT_TASK="" + +FALSE=1 +TRUE=0 + +trap error_with_msg ERR + +title() { + local step="$1" + echo -e "\n* STEP: $step" +} + +echo_ok() { + local msg="$1" + + echo "OK: $msg" +} + +# quiet crictrl +qcrictl() { + sudo crictl "$@" > /dev/null +} + +# this is just an hash of current date (+ nanoseconds) +gen_unique_id() { + date +%T:%N | md5sum | cut -d ' ' -f 1 +} + +error_with_msg() { + local msg=${1:-"cannot $CURRENT_TASK"} + + trap - ERR + echo -e "\nERROR: $msg" + if [ -f "$MONITOR_LOG_FILE" ]; then + echo -e "\nkata-monitor logs:\n----------------" + cat "$MONITOR_LOG_FILE" + fi + echo -e "\nkata-monitor testing: FAILED!" + cleanup + exit 1 +} + +cleanup() { + stop_workload + stop_workload "$RUNC_CID" "$RUNC_POD_ID" + + [ -n "$KATA_MONITOR_PID" ] \ + && [ -d "/proc/$KATA_MONITOR_PID" ] \ + && kill -9 "$KATA_MONITOR_PID" + + rm -rf "$TMPATH" +} + +create_sandbox_json() { + local uid_name_suffix="$(gen_unique_id)" + local sbfile="$TMPATH/sandbox-$uid_name_suffix.json" + + cat <$sbfile +{ + "metadata": { + "name": "nginx-$uid_name_suffix", + "namespace": "default", + "uid": "nginx-container-uid", + "attempt": 1 + }, + "logDirectory": "/tmp", + "linux": { + } +} +EOF + echo "$sbfile" +} + +create_container_json() { + local uid_name_suffix="$(gen_unique_id)" + local cntfile="$TMPATH/container-$uid_name_suffix.json" + + cat <$cntfile +{ + "metadata": { + "name": "busybox", + "namespace": "default", + "uid": "busybox-container-uid" + }, + "image":{ + "image": "busybox" + }, + "command": [ + "top" + ], + "log_path":"busybox.log", + "linux": { + } +} +EOF + echo "$cntfile" +} + +start_workload() { + local runtime=${1:-} + local args="" + local sbfile="" + local cntfile="" + + [ -n "$runtime" ] && args="-r $runtime" + + sbfile="$(create_sandbox_json)" + cntfile="$(create_container_json)" + + POD_ID=$(sudo crictl runp $args $sbfile) + CID=$(sudo crictl create $POD_ID $cntfile $sbfile) + qcrictl start $CID +} + +stop_workload() { + local cid="${1:-$CID}" + local pod_id="${2:-$POD_ID}" + local check + + [ -z "$pod_id" ] && return + check=$(sudo crictl pods -q -id $pod_id) + [ -z "$check" ] && return + + qcrictl stop $cid + qcrictl rm $cid + + qcrictl stopp $pod_id + qcrictl rmp $pod_id +} + +is_sandbox_there() { + local podid=${1} + local sbs s + + sbs=$(sudo curl -s ${MONITOR_HTTP_ENDPOINT}/sandboxes) + if [ -n "$sbs" ]; then + for s in $sbs; do + if [ "$s" = "$podid" ]; then + return $TRUE + break + fi + done + fi + return $FALSE +} + +is_sandbox_there_iterate() { + local podid=${1} + + for i in $(seq 1 $CACHE_UPD_TIMEOUT_SEC); do + is_sandbox_there "$podid" && return $TRUE + echo -n "." + sleep 1 + continue + done + + return $FALSE +} + +is_sandbox_missing_iterate() { + local podid=${1} + + for i in $(seq 1 $CACHE_UPD_TIMEOUT_SEC); do + is_sandbox_there "$podid" || return $TRUE + echo -n "." + sleep 1 + continue + done + + return $FALSE +} + +main() { + local args="" + + ########################### + title "pre-checks" + + CURRENT_TASK="connect to the container engine" + qcrictl pods + echo_ok "$CURRENT_TASK" + + ########################### + title "pull the image to be used" + sudo crictl pull busybox + + ########################### + title "create workloads" + + CURRENT_TASK="start workload (runc)" + start_workload + RUNC_POD_ID="$POD_ID" + RUNC_CID="$CID" + echo_ok "$CURRENT_TASK - POD ID:$POD_ID, CID:$CID" + + CURRENT_TASK="start workload ($CRICTL_RUNTIME)" + start_workload "$CRICTL_RUNTIME" + echo_ok "$CURRENT_TASK - POD ID:$POD_ID, CID:$CID" + + ########################### + title "start kata-monitor" + + [ ! -x "$KATA_MONITOR_BIN" ] && error_with_msg "kata-monitor binary not found" + + [ "$CONTAINER_ENGINE" = "crio" ] && args="--runtime-endpoint /run/crio/crio.sock" + + CURRENT_TASK="start kata-monitor" + sudo $KATA_MONITOR_BIN $args --log-level trace > "$MONITOR_LOG_FILE" 2>&1 & + KATA_MONITOR_PID="$!" + echo_ok "$CURRENT_TASK ($KATA_MONITOR_PID)" + + ########################### + title "kata-monitor cache update checks" + + CURRENT_TASK="retrieve $POD_ID in kata-monitor cache" + is_sandbox_there_iterate "$POD_ID" || error_with_msg + echo_ok "$CURRENT_TASK" + + CURRENT_TASK="look for runc pod $RUNC_POD_ID in kata-monitor cache" + is_sandbox_there_iterate "$RUNC_POD_ID" && error_with_msg "cache: got runc pod $RUNC_POD_ID" + echo_ok "runc pod $RUNC_POD_ID skipped from kata-monitor cache" + + ########################### + title "kata-monitor metrics retrieval" + + CURRENT_TASK="retrieve metrics from kata-monitor" + curl -s ${MONITOR_HTTP_ENDPOINT}/metrics > "$METRICS_FILE" + echo_ok "$CURRENT_TASK" + + CURRENT_TASK="retrieve metrics for pod $POD_ID" + METRICS_COUNT=$(grep -c "$POD_ID" "$METRICS_FILE") + [ ${METRICS_COUNT} -lt ${MONITOR_MIN_METRICS_NUM} ] \ + && error_with_msg "got too few metrics (#${METRICS_COUNT})" + echo_ok "$CURRENT_TASK - found #${METRICS_COUNT} metrics" + + ########################### + title "remove kata workload" + + CURRENT_TASK="stop workload ($CRICTL_RUNTIME)" + stop_workload + echo_ok "$CURRENT_TASK" + + ########################### + title "kata-monitor cache update checks (removal)" + + CURRENT_TASK="verify removal of $POD_ID from kata-monitor cache" + is_sandbox_missing_iterate "$POD_ID" || error_with_msg "pod $POD_ID was not removed" + echo_ok "$CURRENT_TASK" + + ########################### + CURRENT_TASK="cleanup" + cleanup + + echo -e "\nkata-monitor testing: PASSED!\n" +} + +main "@" diff --git a/tests/functional/vfio/gha-run.sh b/tests/functional/vfio/gha-run.sh index f4cb608de..97c72f80b 100755 --- a/tests/functional/vfio/gha-run.sh +++ b/tests/functional/vfio/gha-run.sh @@ -15,10 +15,33 @@ source "${vfio_dir}/../../common.bash" function install_dependencies() { info "Installing the dependencies needed for running the vfio tests" + ( + source /etc/os-release || source /usr/lib/os-release + case "${ID}" in + ubuntu) + # cloud image dependencies + deps=(xorriso curl qemu-utils openssh-client) + + sudo apt-get update + sudo apt-get install -y ${deps[@]} qemu-system-x86 + ;; + fedora) + # cloud image dependencies + deps=(xorriso curl qemu-img openssh) + + sudo dnf install -y ${deps[@]} qemu-system-x86-core + ;; + + "*") + die "Unsupported distro: ${ID}" + ;; + esac + ) } function run() { info "Running cri-containerd tests using ${KATA_HYPERVISOR} hypervisor" + "${vfio_dir}"/vfio_fedora_vm_wrapper.sh } function main() { diff --git a/tests/functional/vfio/guest-kernel.json.in b/tests/functional/vfio/guest-kernel.json.in new file mode 100644 index 000000000..31c0af9f0 --- /dev/null +++ b/tests/functional/vfio/guest-kernel.json.in @@ -0,0 +1,176 @@ +# +# Copyright (c) 2021 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# +{ + "ociVersion": "1.0.0-rc2-dev", + "platform": { + "os": "linux", + "arch": "amd64" + }, + "annotations": { + "io.katacontainers.config.hypervisor.enable_iommu": "false", + "io.katacontainers.config.runtime.vfio_mode": "guest-kernel" + }, + "process": { + "terminal": false, + "consoleSize": { + "height": 0, + "width": 0 + }, + "user": { + "uid": 0, + "gid": 0 + }, + "args": [ "/bin/tail", "-f", "/dev/null" ], + "env": [ + "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", + "TERM=xterm" + ], + "cwd": "/", + "rlimits": [{ + "type": "RLIMIT_NOFILE", + "hard": 1024, + "soft": 1024 + }], + "noNewPrivileges": true + }, + "root": { + "path": "@ROOTFS@", + "readonly": false + }, + "hostname": "vfio-test", + "mounts": [{ + "destination": "/proc", + "type": "proc", + "source": "proc" + }, + { + "destination": "/dev", + "type": "tmpfs", + "source": "tmpfs", + "options": [ + "nosuid", + "strictatime", + "mode=755", + "size=65536k" + ] + }, + { + "destination": "/dev/pts", + "type": "devpts", + "source": "devpts", + "options": [ + "nosuid", + "noexec", + "newinstance", + "ptmxmode=0666", + "mode=0620", + "gid=5" + ] + }, + { + "destination": "/dev/shm", + "type": "tmpfs", + "source": "shm", + "options": [ + "nosuid", + "noexec", + "nodev", + "mode=1777", + "size=65536k" + ] + }, + { + "destination": "/dev/mqueue", + "type": "mqueue", + "source": "mqueue", + "options": [ + "nosuid", + "noexec", + "nodev" + ] + }, + { + "destination": "/sys", + "type": "sysfs", + "source": "sysfs", + "options": [ + "nosuid", + "noexec", + "nodev", + "ro" + ] + }, + { + "destination": "/sys/fs/cgroup", + "type": "cgroup", + "source": "cgroup", + "options": [ + "nosuid", + "noexec", + "nodev", + "relatime", + "ro" + ] + } + ], + "hooks": {}, + "linux": { + "devices": [{ + "path": "@VFIO_PATH@", + "type": "c", + "major": @VFIO_MAJOR@, + "minor": @VFIO_MINOR@, + "fileMode": 384, + "uid": 0, + "gid": 0 + }], + "cgroupsPath": "kata/vfiotest", + "resources": { + "devices": [ + {"allow":false,"access":"rwm"}, + {"allow":true,"type":"c","major":1,"minor":3,"access":"rwm"}, + {"allow":true,"type":"c","major":1,"minor":5,"access":"rwm"}, + {"allow":true,"type":"c","major":1,"minor":8,"access":"rwm"}, + {"allow":true,"type":"c","major":1,"minor":9,"access":"rwm"}, + {"allow":true,"type":"c","major":5,"minor":0,"access":"rwm"}, + {"allow":true,"type":"c","major":5,"minor":1,"access":"rwm"}, + {"allow": true,"access": "rwm","major": @VFIO_MAJOR@,"minor": @VFIO_MINOR@,"type": "c"} + ] + }, + "namespaces": [{ + "type": "pid" + }, + { + "type": "network" + }, + { + "type": "ipc" + }, + { + "type": "uts" + }, + { + "type": "mount" + } + ], + "maskedPaths": [ + "/proc/kcore", + "/proc/latency_stats", + "/proc/timer_list", + "/proc/timer_stats", + "/proc/sched_debug", + "/sys/firmware" + ], + "readonlyPaths": [ + "/proc/asound", + "/proc/bus", + "/proc/fs", + "/proc/irq", + "/proc/sys", + "/proc/sysrq-trigger" + ] + } +} diff --git a/tests/functional/vfio/run.sh b/tests/functional/vfio/run.sh new file mode 100755 index 000000000..4f36709a8 --- /dev/null +++ b/tests/functional/vfio/run.sh @@ -0,0 +1,350 @@ +#!/bin/bash +# +# Copyright (c) 2021 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# + +set -x +set -o errexit +set -o nounset +set -o pipefail +set -o errtrace + +script_path=$(dirname "$0") +source "${script_path}/../../common.bash" + +addr= +tmp_data_dir="$(mktemp -d)" +rootfs_tar="${tmp_data_dir}/rootfs.tar" +trap cleanup EXIT + +# kata-runtime options +SANDBOX_CGROUP_ONLY="" +HYPERVISOR= +MACHINE_TYPE= +IMAGE_TYPE= + +cleanup() { + clean_env_ctr + sudo rm -rf "${tmp_data_dir}" + + [ -n "${host_pci}" ] && sudo driverctl unset-override "${host_pci}" +} + +host_pci_addr() { + lspci -D | grep "Ethernet controller" | grep "Virtio.*network device" | tail -1 | cut -d' ' -f1 +} + +get_vfio_path() { + local addr="$1" + echo "/dev/vfio/$(basename $(realpath /sys/bus/pci/drivers/vfio-pci/${host_pci}/iommu_group))" +} + +pull_rootfs() { + # pull and export busybox image in tar file + local image="quay.io/prometheus/busybox:latest" + sudo -E ctr i pull ${image} + sudo -E ctr i export "${rootfs_tar}" "${image}" + sudo chown ${USER}:${USER} "${rootfs_tar}" + sync +} + +create_bundle() { + local bundle_dir="$1" + mkdir -p "${bundle_dir}" + + # extract busybox rootfs + local rootfs_dir="${bundle_dir}/rootfs" + mkdir -p "${rootfs_dir}" + local layers_dir="$(mktemp -d)" + tar -C "${layers_dir}" -pxf "${rootfs_tar}" + for ((i=0;i<$(cat ${layers_dir}/manifest.json | jq -r ".[].Layers | length");i++)); do + tar -C ${rootfs_dir} -xf ${layers_dir}/$(cat ${layers_dir}/manifest.json | jq -r ".[].Layers[${i}]") + done + sync + + # Copy config.json + cp -a "${script_path}/config.json" "${bundle_dir}/config.json" +} + +run_container() { + local container_id="$1" + local bundle_dir="$2" + + sudo -E ctr run -d --runtime io.containerd.kata.v2 --config "${bundle_dir}/config.json" "${container_id}" +} + + +get_ctr_cmd_output() { + local container_id="$1" + shift + timeout 30s sudo -E ctr t exec --exec-id 2 "${container_id}" "${@}" +} + +check_guest_kernel() { + local container_id="$1" + # For vfio_mode=guest-kernel, the device should be bound to + # the guest kernel's native driver. To check this has worked, + # we look for an ethernet device named 'eth*' + get_ctr_cmd_output "${container_id}" ip a | grep "eth" || die "Missing VFIO network interface" +} + +check_vfio() { + local cid="$1" + # For vfio_mode=vfio, the device should be bound to the guest + # vfio-pci driver. + + # Check the control device is visible + get_ctr_cmd_output "${cid}" ls /dev/vfio/vfio || die "Couldn't find VFIO control device in container" + + # The device should *not* cause an ethernet interface to appear + ! get_ctr_cmd_output "${cid}" ip a | grep "eth" || die "Unexpected network interface" + + # There should be exactly one VFIO group device (there might + # be multiple IOMMU groups in the VM, but only one device + # should be bound to the VFIO driver, so there should still + # only be one VFIO device + group="$(get_ctr_cmd_output "${cid}" ls /dev/vfio | grep -v vfio)" + if [ $(echo "${group}" | wc -w) != "1" ] ; then + die "Expected exactly one VFIO group got: ${group}" + fi + + # There should be two devices in the IOMMU group: the ethernet + # device we care about, plus the PCIe to PCI bridge device + devs="$(get_ctr_cmd_output "${cid}" ls /sys/kernel/iommu_groups/"${group}"/devices)" + num_devices=$(echo "${devs}" | wc -w) + if [ "${HYPERVISOR}" = "qemu" ] && [ "${num_devices}" != "2" ] ; then + die "Expected exactly two devices got: ${devs}" + fi + if [ "${HYPERVISOR}" = "clh" ] && [ "${num_devices}" != "1" ] ; then + die "Expected exactly one device got: ${devs}" + fi + + # The bridge device will always sort first, because it is on + # bus zero, whereas the NIC will be on a non-zero bus + guest_pci=$(echo "${devs}" | tail -1) + + # This is a roundabout way of getting the environment + # variable, but to use the more obvious "echo $PCIDEVICE_..." + # we would have to escape the '$' enough to not be expanded + # before it's injected into the container, but not so much + # that it *is* expanded by the shell within the container. + # Doing that with another shell function in between is very + # fragile, so do it this way instead. + guest_env="$(get_ctr_cmd_output "${cid}" env | grep ^PCIDEVICE_VIRTIO_NET | sed s/^[^=]*=//)" + if [ "${guest_env}" != "${guest_pci}" ]; then + die "PCIDEVICE variable was \"${guest_env}\" instead of \"${guest_pci}\"" + fi +} + +get_dmesg() { + local container_id="$1" + get_ctr_cmd_output "${container_id}" dmesg +} + +# Show help about this script +help(){ +cat << EOF +Usage: $0 [-h] [options] + Description: + This script runs a kata container and passthrough a vfio device + Options: + -h, Help + -i , Specify initrd or image + -m , Specify kata-runtime machine type for qemu hypervisor + -p , Specify kata-runtime hypervisor + -s , Set sandbox_cgroup_only in the configuration file +EOF +} + +setup_configuration_file() { + local qemu_config_file="configuration-qemu.toml" + local clh_config_file="configuration-clh.toml" + local image_file="/opt/kata/share/kata-containers/kata-containers.img" + local initrd_file="/opt/kata/share/kata-containers/kata-containers-initrd.img" + local kata_config_file="" + + for file in $(kata-runtime --kata-show-default-config-paths); do + if [ ! -f "${file}" ]; then + continue + fi + + kata_config_file="${file}" + config_dir=$(dirname ${file}) + config_filename="" + + if [ "$HYPERVISOR" = "qemu" ]; then + config_filename="${qemu_config_file}" + elif [ "$HYPERVISOR" = "clh" ]; then + config_filename="${clh_config_file}" + fi + + config_file="${config_dir}/${config_filename}" + if [ -f "${config_file}" ]; then + rm -f "${kata_config_file}" + cp -a $(realpath "${config_file}") "${kata_config_file}" + break + fi + done + + # machine type applies to configuration.toml and configuration-qemu.toml + if [ -n "$MACHINE_TYPE" ]; then + if [ "$HYPERVISOR" = "qemu" ]; then + sed -i 's|^machine_type.*|machine_type = "'${MACHINE_TYPE}'"|g' "${kata_config_file}" + else + warn "Variable machine_type only applies to qemu. It will be ignored" + fi + fi + + # Make sure we have set hot_plug_vfio to a reasonable value + if [ "$HYPERVISOR" = "qemu" ]; then + sed -i -e 's|^#*.*hot_plug_vfio.*|hot_plug_vfio = "bridge-port"|' "${kata_config_file}" + elif [ "$HYPERVISOR" = "clh" ]; then + sed -i -e 's|^#*.*hot_plug_vfio.*|hot_plug_vfio = "root-port"|' "${kata_config_file}" + fi + + if [ -n "${SANDBOX_CGROUP_ONLY}" ]; then + sed -i 's|^sandbox_cgroup_only.*|sandbox_cgroup_only='${SANDBOX_CGROUP_ONLY}'|g' "${kata_config_file}" + fi + + # Change to initrd or image depending on user input. + # Non-default configs must be changed to specify either initrd or image, image is default. + if [ "$IMAGE_TYPE" = "initrd" ]; then + if $(grep -q "^image.*" ${kata_config_file}); then + if $(grep -q "^initrd.*" ${kata_config_file}); then + sed -i '/^image.*/d' "${kata_config_file}" + else + sed -i 's|^image.*|initrd = "'${initrd_file}'"|g' "${kata_config_file}" + fi + fi + else + if $(grep -q "^initrd.*" ${kata_config_file}); then + if $(grep -q "^image.*" ${kata_config_file}); then + sed -i '/^initrd.*/d' "${kata_config_file}" + else + sed -i 's|^initrd.*|image = "'${image_file}'"|g' "${kata_config_file}" + fi + fi + fi + + # enable debug + sed -i -e 's/^#\(enable_debug\).*=.*$/\1 = true/g' \ + -e 's/^#\(debug_console_enabled\).*=.*$/\1 = true/g' \ + -e 's/^kernel_params = "\(.*\)"/kernel_params = "\1 mitigations=off agent.log=debug"/g' \ + "${kata_config_file}" + + # enable VFIO relevant hypervisor annotations + sed -i -e 's/^\(enable_annotations\).*=.*$/\1 = ["enable_iommu"]/' \ + "${kata_config_file}" +} + +run_test_container() { + local container_id="$1" + local bundle_dir="$2" + local config_json_in="$3" + local host_pci="$4" + + # generate final config.json + sed -e '/^#.*/d' \ + -e 's|@VFIO_PATH@|'"${vfio_device}"'|g' \ + -e 's|@VFIO_MAJOR@|'"${vfio_major}"'|g' \ + -e 's|@VFIO_MINOR@|'"${vfio_minor}"'|g' \ + -e 's|@VFIO_CTL_MAJOR@|'"${vfio_ctl_major}"'|g' \ + -e 's|@VFIO_CTL_MINOR@|'"${vfio_ctl_minor}"'|g' \ + -e 's|@ROOTFS@|'"${bundle_dir}/rootfs"'|g' \ + -e 's|@HOST_PCI@|'"${host_pci}"'|g' \ + "${config_json_in}" > "${script_path}/config.json" + + create_bundle "${bundle_dir}" + + # run container + run_container "${container_id}" "${bundle_dir}" + + # output VM dmesg + get_dmesg "${container_id}" +} + +main() { + local OPTIND + while getopts "hi:m:p:s:" opt;do + case ${opt} in + h) + help + exit 0; + ;; + i) + IMAGE_TYPE="${OPTARG}" + ;; + m) + MACHINE_TYPE="${OPTARG}" + ;; + p) + HYPERVISOR="${OPTARG}" + ;; + s) + SANDBOX_CGROUP_ONLY="${OPTARG}" + ;; + ?) + # parse failure + help + die "Failed to parse arguments" + ;; + esac + done + shift $((OPTIND-1)) + + # + # Get the device ready on the host + # + setup_configuration_file + + restart_containerd_service + sudo modprobe vfio + sudo modprobe vfio-pci + + host_pci=$(host_pci_addr) + [ -n "${host_pci}" ] || die "virtio ethernet controller PCI address not found" + + cat /proc/cmdline | grep -q "intel_iommu=on" || \ + die "intel_iommu=on not found in kernel cmdline" + + sudo driverctl set-override "${host_pci}" vfio-pci + + vfio_device="$(get_vfio_path "${host_pci}")" + [ -n "${vfio_device}" ] || die "vfio device not found" + vfio_major="$(printf '%d' $(stat -c '0x%t' ${vfio_device}))" + vfio_minor="$(printf '%d' $(stat -c '0x%T' ${vfio_device}))" + + [ -n "/dev/vfio/vfio" ] || die "vfio control device not found" + vfio_ctl_major="$(printf '%d' $(stat -c '0x%t' /dev/vfio/vfio))" + vfio_ctl_minor="$(printf '%d' $(stat -c '0x%T' /dev/vfio/vfio))" + + # Get the rootfs we'll use for all tests + pull_rootfs + + # + # Run the tests + # + + # test for guest-kernel mode + guest_kernel_cid="vfio-guest-kernel-${RANDOM}" + run_test_container "${guest_kernel_cid}" \ + "${tmp_data_dir}/vfio-guest-kernel" \ + "${script_path}/guest-kernel.json.in" \ + "${host_pci}" + check_guest_kernel "${guest_kernel_cid}" + + # Remove the container so we can re-use the device for the next test + clean_env_ctr + + # test for vfio mode + vfio_cid="vfio-vfio-${RANDOM}" + run_test_container "${vfio_cid}" \ + "${tmp_data_dir}/vfio-vfio" \ + "${script_path}/vfio.json.in" \ + "${host_pci}" + check_vfio "${vfio_cid}" +} + +main $@ diff --git a/tests/functional/vfio/vfio.json.in b/tests/functional/vfio/vfio.json.in new file mode 100644 index 000000000..19667c01e --- /dev/null +++ b/tests/functional/vfio/vfio.json.in @@ -0,0 +1,187 @@ +# +# Copyright (c) 2021 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# +{ + "ociVersion": "1.0.0-rc2-dev", + "platform": { + "os": "linux", + "arch": "amd64" + }, + "annotations": { + "io.katacontainers.config.hypervisor.enable_iommu": "true", + "io.katacontainers.config.runtime.vfio_mode": "vfio" + }, + "process": { + "terminal": false, + "consoleSize": { + "height": 0, + "width": 0 + }, + "user": { + "uid": 0, + "gid": 0 + }, + "args": [ "/bin/tail", "-f", "/dev/null" ], + "env": [ + "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", + "TERM=xterm", + "PCIDEVICE_VIRTIO_NET=@HOST_PCI@" + ], + "cwd": "/", + "rlimits": [{ + "type": "RLIMIT_NOFILE", + "hard": 1024, + "soft": 1024 + }], + "noNewPrivileges": true + }, + "root": { + "path": "@ROOTFS@", + "readonly": false + }, + "hostname": "vfio-test", + "mounts": [{ + "destination": "/proc", + "type": "proc", + "source": "proc" + }, + { + "destination": "/dev", + "type": "tmpfs", + "source": "tmpfs", + "options": [ + "nosuid", + "strictatime", + "mode=755", + "size=65536k" + ] + }, + { + "destination": "/dev/pts", + "type": "devpts", + "source": "devpts", + "options": [ + "nosuid", + "noexec", + "newinstance", + "ptmxmode=0666", + "mode=0620", + "gid=5" + ] + }, + { + "destination": "/dev/shm", + "type": "tmpfs", + "source": "shm", + "options": [ + "nosuid", + "noexec", + "nodev", + "mode=1777", + "size=65536k" + ] + }, + { + "destination": "/dev/mqueue", + "type": "mqueue", + "source": "mqueue", + "options": [ + "nosuid", + "noexec", + "nodev" + ] + }, + { + "destination": "/sys", + "type": "sysfs", + "source": "sysfs", + "options": [ + "nosuid", + "noexec", + "nodev", + "ro" + ] + }, + { + "destination": "/sys/fs/cgroup", + "type": "cgroup", + "source": "cgroup", + "options": [ + "nosuid", + "noexec", + "nodev", + "relatime", + "ro" + ] + } + ], + "hooks": {}, + "linux": { + "devices": [{ + "path": "/dev/vfio/vfio", + "type": "c", + "major": @VFIO_CTL_MAJOR@, + "minor": @VFIO_CTL_MINOR@, + "fileMode": 438, + "uid": 0, + "gid": 0 + }, + { + "path": "@VFIO_PATH@", + "type": "c", + "major": @VFIO_MAJOR@, + "minor": @VFIO_MINOR@, + "fileMode": 384, + "uid": 0, + "gid": 0 + }], + "cgroupsPath": "kata/vfiotest", + "resources": { + "devices": [ + {"allow":false,"access":"rwm"}, + {"allow":true,"type":"c","major":1,"minor":3,"access":"rwm"}, + {"allow":true,"type":"c","major":1,"minor":5,"access":"rwm"}, + {"allow":true,"type":"c","major":1,"minor":8,"access":"rwm"}, + {"allow":true,"type":"c","major":1,"minor":9,"access":"rwm"}, + {"allow":true,"type":"c","major":5,"minor":0,"access":"rwm"}, + {"allow":true,"type":"c","major":5,"minor":1,"access":"rwm"}, + {"allow": true,"access": "rwm","major": @VFIO_CTL_MAJOR@,"minor": @VFIO_CTL_MINOR@,"type": "c"}, + {"allow": true,"access": "rwm","major": @VFIO_MAJOR@,"minor": @VFIO_MINOR@,"type": "c"} + ] + }, + "namespaces": [{ + "type": "pid" + }, + { + "type": "network" + }, + { + "type": "ipc" + }, + { + "type": "uts" + }, + { + "type": "mount" + } + ], + "maskedPaths": [ + "/proc/kcore", + "/proc/latency_stats", + "/proc/timer_list", + "/proc/timer_stats", + "/proc/sched_debug", + "/sys/firmware" + ], + "readonlyPaths": [ + "/proc/asound", + "/proc/bus", + "/proc/fs", + "/proc/irq", + "/proc/sys", + "/proc/sysrq-trigger" + ] + } +} diff --git a/tests/functional/vfio/vfio_fedora_vm_wrapper.sh b/tests/functional/vfio/vfio_fedora_vm_wrapper.sh new file mode 100755 index 000000000..bddd03445 --- /dev/null +++ b/tests/functional/vfio/vfio_fedora_vm_wrapper.sh @@ -0,0 +1,329 @@ +#!/bin/bash +# +# Copyright (c) 2020 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# + +# Run the .ci/jenkins_job_build.sh script in a VM +# that supports VFIO, then run VFIO functional tests + +set -o xtrace +set -o errexit +set -o nounset +set -o pipefail +set -o errtrace + +cidir=$(readlink -f $(dirname "$0")) + +source /etc/os-release || source /usr/lib/os-release +# +source "${cidir}/../../common.bash" +export WORKSPACE="${WORKSPACE:-${HOME}}" +export GIT_URL="https://github.com/kata-containers/kata-containers.git" +export KATA_HYPERVISOR="${KATA_HYPERVISOR:-qemu}" +# + +http_proxy=${http_proxy:-} +https_proxy=${https_proxy:-} +vm_ip="127.0.15.1" +vm_port="10022" +# Don't save data in /tmp, we need it after rebooting the system +data_dir="${HOME}/functional-vfio-test" +ssh_key_file="${data_dir}/key" +arch=$(uname -m) +artifacts_dir="${WORKSPACE}/artifacts" + +kill_vms() { + sudo killall -9 qemu-system-${arch} +} + +cleanup() { + mkdir -p ${artifacts_dir} + sudo chown -R ${USER} ${artifacts_dir} + scp_vm ${artifacts_dir}/* ${artifacts_dir} || true + kill_vms + + echo "::group::L2 journal" + cat "${artifacts_dir}/journal.log" + echo "::endgroup::" + + echo "::group::L1 dmesg" + sudo dmesg + echo "::endgroup::" +} + +create_ssh_key() { + rm -f "${ssh_key_file}" + ssh-keygen -f "${ssh_key_file}" -t rsa -N "" +} + +create_meta_data() { + file="$1" + cat < "${file}" +{ + "uuid": "d1b4aafa-5d75-4f9c-87eb-2ceabe110c39", + "hostname": "test" +} +EOF +} + +create_user_data() { + file="$1" + ssh_pub_key_file="$2" + + ssh_pub_key="$(cat "${ssh_pub_key_file}")" + dnf_proxy="" + service_proxy="" + docker_user_proxy="{}" + environment=$(env | egrep "ghprb|WORKSPACE|KATA|GIT|JENKINS|_PROXY|_proxy" | \ + sed -e "s/'/'\"'\"'/g" \ + -e "s/\(^[[:alnum:]_]\+\)=/\1='/" \ + -e "s/$/'/" \ + -e 's/^/ export /') + + if [ -n "${http_proxy}" ] && [ -n "${https_proxy}" ]; then + dnf_proxy="proxy=${http_proxy}" + service_proxy='[Service] + Environment="HTTP_PROXY='${http_proxy}'" "HTTPS_PROXY='${https_proxy}'" "NO_PROXY='${no_proxy}'"' + docker_user_proxy='{"proxies": { "default": { + "httpProxy": "'${http_proxy}'", + "httpsProxy": "'${https_proxy}'", + "noProxy": "'${no_proxy}'" + } } }' + fi + + cat < "${file}" +#cloud-config +package_upgrade: false +runcmd: +- chown -R ${USER}:${USER} /home/${USER} +- touch /.done +users: +- gecos: User + gid: "1000" + lock-passwd: true + name: ${USER} + shell: /bin/bash + ssh-authorized-keys: + - ${ssh_pub_key} + sudo: ALL=(ALL) NOPASSWD:ALL + uid: "1000" +write_files: +- content: | + [main] + fastestmirror=True + gpgcheck=1 + max_parallel_downloads=10 + installonly_limit=2 + clean_requirements_on_remove=True + keepcache=True + ip_resolve=4 + path: /etc/dnf/dnf.conf +- content: | +${environment} + path: /etc/environment +- content: | + ${service_proxy} + path: /etc/systemd/system/docker.service.d/http-proxy.conf +- content: | + ${service_proxy} + path: /etc/systemd/system/containerd.service.d/http-proxy.conf +- content: | + ${docker_user_proxy} + path: ${HOME}/.docker/config.json +- content: | + ${docker_user_proxy} + path: /root/.docker/config.json +- content: | + set -x + set -o errexit + set -o nounset + set -o pipefail + set -o errtrace + . /etc/environment + . /etc/os-release + + [ "\$ID" = "fedora" ] || (echo >&2 "$0 only supports Fedora"; exit 1) + + echo "${dnf_proxy}" | sudo tee -a /etc/dnf/dnf.conf + + for i in \$(seq 1 50); do + [ -f /.done ] && break + echo "waiting for cloud-init to finish" + sleep 5; + done + + export DEBUG=true + export GOPATH=\${WORKSPACE}/go + export PATH=\${GOPATH}/bin:/usr/local/go/bin:/usr/sbin:\${PATH} + export GOROOT="/usr/local/go" + + # Make sure the packages were installed + # Sometimes cloud-init is unable to install them + sudo dnf install -y git wget pciutils driverctl + + git config --global user.email "foo@bar" + git config --global user.name "Foo Bar" + + sudo mkdir -p /workspace + sudo mount -t 9p -o access=any,trans=virtio,version=9p2000.L workspace /workspace + mkdir -p ${artifacts_dir} + trap "cd /workspace; sudo journalctl -b0 > ${artifacts_dir}/journal.log || true; sudo chown -R \${USER} ${artifacts_dir}" EXIT + + pushd /workspace + source tests/common.bash + ensure_yq + cri_containerd=\$(get_from_kata_deps "externals.containerd.lts") + cri_tools=\$(get_from_kata_deps "externals.critools.latest") + install_cri_containerd \${cri_containerd} + install_cri_tools \${cri_tools} + + kata_tarball_dir="kata-artifacts" + install_kata + + sudo /workspace/tests/functional/vfio/run.sh -s false -p \${KATA_HYPERVISOR} -m q35 -i image + sudo /workspace/tests/functional/vfio/run.sh -s true -p \${KATA_HYPERVISOR} -m q35 -i image + + path: /home/${USER}/run.sh + permissions: '0755' +EOF +} + +create_config_iso() { + iso_file="$1" + ssh_pub_key_file="${ssh_key_file}.pub" + iso_data_dir="${data_dir}/d" + meta_data_file="${iso_data_dir}/openstack/latest/meta_data.json" + user_data_file="${iso_data_dir}/openstack/latest/user_data" + + mkdir -p $(dirname "${user_data_file}") + + create_meta_data "${meta_data_file}" + create_user_data "${user_data_file}" "${ssh_pub_key_file}" + + [ -f "${iso_file}" ] && rm -f "${iso_file}" + + xorriso -as mkisofs -R -V config-2 -o "${iso_file}" "${iso_data_dir}" +} + +pull_fedora_cloud_image() { + fedora_img="$1" + fedora_version=38 + # Add a version to the image cache, otherwise the tests are going to + # use always the same image without rebuilding it, regardless the version + # set in fedora_version + fedora_img_cache="${fedora_img}.cache.${fedora_version}" + fedora_img_url="https://download.fedoraproject.org/pub/fedora/linux/releases/${fedora_version}/Cloud/${arch}/images/Fedora-Cloud-Base-${fedora_version}-1.6.${arch}.raw.xz" + + if [ ! -f "${fedora_img_cache}" ]; then + curl -sL ${fedora_img_url} -o "${fedora_img_cache}.xz" + xz -f -d "${fedora_img_cache}.xz" + fi + + cp -a "${fedora_img_cache}" "${fedora_img}" + + # setup cloud image + sudo losetup -D + loop=$(sudo losetup --show -Pf "${fedora_img}") + sudo mount "${loop}p2" /mnt + + # add intel_iommu=on to the guest kernel command line + kernelopts="intel_iommu=on iommu=pt selinux=0 mitigations=off idle=poll kvm.tdp_mmu=0" + entries=$(sudo ls /mnt/loader/entries/) + for entry in ${entries}; do + sudo sed -i '/^options / s/$/ '"${kernelopts}"' /g' /mnt/loader/entries/"${entry}" + done + sudo sed -i 's|kernelopts="|kernelopts="'"${kernelopts}"'|g' /mnt/grub2/grub.cfg + sudo sed -i 's|kernelopts=|kernelopts='"${kernelopts}"'|g' /mnt/grub2/grubenv + + # cleanup + sudo umount -R /mnt/ + sudo losetup -d "${loop}" + + qemu-img resize -f raw "${fedora_img}" +20G +} + +reload_kvm() { + # TDP_MMU is buggy on Hyper-V until v6.3/v6.4 + sudo rmmod kvm-intel kvm-amd kvm || true + sudo modprobe kvm tdp_mmu=0 + sudo modprobe kvm-intel || true + sudo modprobe kvm-amd || true +} + +run_vm() { + image="$1" + config_iso="$2" + disable_modern="off" + hostname="$(hostname)" + memory="8192M" + cpus=2 + machine_type="q35" + + reload_kvm + + sudo /usr/bin/qemu-system-${arch} -m "${memory}" -smp cpus="${cpus}" \ + -cpu host,host-phys-bits \ + -machine ${machine_type},accel=kvm,kernel_irqchip=split \ + -device intel-iommu,intremap=on,caching-mode=on,device-iotlb=on \ + -drive file=${image},if=virtio,aio=threads,format=raw \ + -drive file=${config_iso_file},if=virtio,media=cdrom \ + -daemonize -enable-kvm -device virtio-rng-pci -display none -vga none \ + -netdev user,hostfwd=tcp:${vm_ip}:${vm_port}-:22,hostname="${hostname}",id=net0 \ + -device virtio-net-pci,netdev=net0,disable-legacy=on,disable-modern="${disable_modern}",iommu_platform=on,ats=on \ + -netdev user,id=net1 \ + -device virtio-net-pci,netdev=net1,disable-legacy=on,disable-modern="${disable_modern}",iommu_platform=on,ats=on \ + -fsdev local,path=${repo_root_dir},security_model=passthrough,id=fs0 \ + -device virtio-9p-pci,fsdev=fs0,mount_tag=workspace + +} + +ssh_vm() { + cmd=$@ + ssh -q -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -o IdentitiesOnly=yes -i "${ssh_key_file}" -p "${vm_port}" "${USER}@${vm_ip}" "${cmd}" +} + +scp_vm() { + guest_src=$1 + host_dest=$2 + scp -q -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -o IdentitiesOnly=yes -i "${ssh_key_file}" -P "${vm_port}" ${USER}@${vm_ip}:${guest_src} ${host_dest} +} + +wait_for_vm() { + for i in $(seq 1 30); do + if ssh_vm true; then + return 0 + fi + info "waiting for VM to start" + sleep 5 + done + return 1 +} + +main() { + trap cleanup EXIT + + config_iso_file="${data_dir}/config.iso" + fedora_img="${data_dir}/image.img" + + mkdir -p "${data_dir}" + + create_ssh_key + + create_config_iso "${config_iso_file}" + + for i in $(seq 1 5); do + pull_fedora_cloud_image "${fedora_img}" + run_vm "${fedora_img}" "${config_iso_file}" + if wait_for_vm; then + break + fi + info "Couldn't connect to the VM. Stopping VM and starting a new one." + kill_vms + done + + ssh_vm "/home/${USER}/run.sh" +} + +main $@ diff --git a/tests/gha-run-k8s-common.sh b/tests/gha-run-k8s-common.sh new file mode 100644 index 000000000..4f7de35e1 --- /dev/null +++ b/tests/gha-run-k8s-common.sh @@ -0,0 +1,266 @@ +#!/usr/bin/env bash + +# Copyright (c) 2023 Microsoft Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +set -o errexit +set -o nounset +set -o pipefail + +tests_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +source "${tests_dir}/common.bash" + +K8S_TEST_HOST_TYPE="${K8S_TEST_HOST_TYPE:-small}" + +function _print_instance_type() { + case ${K8S_TEST_HOST_TYPE} in + small) + echo "Standard_D2s_v5" + ;; + normal) + echo "Standard_D4s_v5" + ;; + *) + echo "Unknown instance type '${K8S_TEST_HOST_TYPE}'" >&2 + exit 1 + esac +} + +function _print_cluster_name() { + test_type="${1:-k8s}" + + short_sha="$(git rev-parse --short=12 HEAD)" + echo "${test_type}-${GH_PR_NUMBER}-${short_sha}-${KATA_HYPERVISOR}-${KATA_HOST_OS}-amd64-${K8S_TEST_HOST_TYPE:0:1}" +} + +function _print_rg_name() { + test_type="${1:-k8s}" + + echo "${AZ_RG:-"kataCI-$(_print_cluster_name ${test_type})"}" +} + +function install_azure_cli() { + curl -sL https://aka.ms/InstallAzureCLIDeb | sudo bash + # The aks-preview extension is required while the Mariner Kata host is in preview. + az extension add --name aks-preview +} + +function login_azure() { + az login \ + --service-principal \ + -u "${AZ_APPID}" \ + -p "${AZ_PASSWORD}" \ + --tenant "${AZ_TENANT_ID}" +} + +function create_cluster() { + test_type="${1:-k8s}" + + # First ensure it didn't fail to get cleaned up from a previous run. + delete_cluster "${test_type}" || true + + local rg="$(_print_rg_name ${test_type})" + + az group create \ + -l eastus2 \ + -n "${rg}" + + az aks create \ + -g "${rg}" \ + --node-resource-group "node-${rg}" \ + -n "$(_print_cluster_name ${test_type})" \ + -s "$(_print_instance_type)" \ + --node-count 1 \ + --generate-ssh-keys \ + $([ "${KATA_HOST_OS}" = "cbl-mariner" ] && echo "--os-sku AzureLinux --workload-runtime KataMshvVmIsolation") +} + +function install_bats() { + # Installing bats from the lunar repo. + # This installs newer version of the bats which supports setup_file and teardown_file functions. + # These functions are helpful when adding new tests that require one time setup. + + sudo apt install -y software-properties-common + sudo add-apt-repository 'deb http://archive.ubuntu.com/ubuntu/ lunar universe' + sudo apt install -y bats + sudo add-apt-repository --remove 'deb http://archive.ubuntu.com/ubuntu/ lunar universe' +} + +function install_kubectl() { + sudo az aks install-cli +} + +function get_cluster_credentials() { + test_type="${1:-k8s}" + + az aks get-credentials \ + -g "$(_print_rg_name ${test_type})" \ + -n "$(_print_cluster_name ${test_type})" +} + +function delete_cluster() { + test_type="${1:-k8s}" + + az group delete \ + -g "$(_print_rg_name ${test_type})" \ + --yes +} + +function delete_cluster_kcli() { + CLUSTER_NAME="${CLUSTER_NAME:-kata-k8s}" + kcli delete -y kube "$CLUSTER_NAME" +} + +function get_nodes_and_pods_info() { + kubectl debug $(kubectl get nodes -o name) -it --image=quay.io/kata-containers/kata-debug:latest || true + kubectl get pods -o name | grep node-debugger | xargs kubectl delete || true +} + +function deploy_k0s() { + curl -sSLf https://get.k0s.sh | sudo sh + + sudo k0s install controller --single ${KUBERNETES_EXTRA_PARAMS:-} + + sudo k0s start + + # This is an arbitrary value that came up from local tests + sleep 120s + + # Download the kubectl binary into /usr/bin so we can avoid depending + # on `k0s kubectl` command + ARCH=$(uname -m) + if [ "${ARCH}" = "x86_64" ]; then + ARCH=amd64 + fi + kubectl_version=$(sudo k0s kubectl version 2>/dev/null | grep "Client Version" | sed -e 's/Client Version: //') + sudo curl -fL --progress-bar -o /usr/bin/kubectl https://storage.googleapis.com/kubernetes-release/release/${kubectl_version}/bin/linux/${ARCH}/kubectl + sudo chmod +x /usr/bin/kubectl + + mkdir -p ~/.kube + sudo cp /var/lib/k0s/pki/admin.conf ~/.kube/config + sudo chown ${USER}:${USER} ~/.kube/config +} + +function deploy_k3s() { + curl -sfL https://get.k3s.io | sh -s - --write-kubeconfig-mode 644 + + # This is an arbitrary value that came up from local tests + sleep 120s + + # Download the kubectl binary into /usr/bin and remove /usr/local/bin/kubectl + # + # We need to do this to avoid hitting issues like: + # ```sh + # error: open /etc/rancher/k3s/k3s.yaml.lock: permission denied + # ``` + # Which happens basically because k3s links `/usr/local/bin/kubectl` + # to `/usr/local/bin/k3s`, and that does extra stuff that vanilla + # `kubectl` doesn't do. + ARCH=$(uname -m) + if [ "${ARCH}" = "x86_64" ]; then + ARCH=amd64 + fi + kubectl_version=$(/usr/local/bin/k3s kubectl version --short 2>/dev/null | grep "Client Version" | sed -e 's/Client Version: //' -e 's/\+k3s1//') + sudo curl -fL --progress-bar -o /usr/bin/kubectl https://storage.googleapis.com/kubernetes-release/release/${kubectl_version}/bin/linux/${ARCH}/kubectl + sudo chmod +x /usr/bin/kubectl + sudo rm -rf /usr/local/bin/kubectl + + mkdir -p ~/.kube + cp /etc/rancher/k3s/k3s.yaml ~/.kube/config +} + +function create_cluster_kcli() { + CLUSTER_NAME="${CLUSTER_NAME:-kata-k8s}" + + delete_cluster_kcli || true + + kcli create kube "${KUBE_TYPE:-generic}" \ + -P domain="kata.com" \ + -P pool="${LIBVIRT_POOL:-default}" \ + -P ctlplanes="${CLUSTER_CONTROL_NODES:-1}" \ + -P workers="${CLUSTER_WORKERS:-1}" \ + -P network="${LIBVIRT_NETWORK:-default}" \ + -P image="${CLUSTER_IMAGE:-ubuntu2004}" \ + -P sdn=flannel \ + -P nfs=false \ + -P disk_size="${CLUSTER_DISK_SIZE:-20}" \ + "${CLUSTER_NAME}" + + export KUBECONFIG="$HOME/.kcli/clusters/$CLUSTER_NAME/auth/kubeconfig" + + local cmd="kubectl get nodes | grep '.*worker.*\'" + echo "Wait at least one worker be Ready" + if ! waitForProcess "330" "30" "$cmd"; then + echo "ERROR: worker nodes not ready." + kubectl get nodes + return 1 + fi + + # Ensure that system pods are running or completed. + cmd="[ \$(kubectl get pods -A --no-headers | grep -v 'Running\|Completed' | wc -l) -eq 0 ]" + echo "Wait system pods be running or completed" + if ! waitForProcess "90" "30" "$cmd"; then + echo "ERROR: not all pods are Running or Completed." + kubectl get pods -A + kubectl get pods -A + return 1 + fi +} + +function deploy_rke2() { + curl -sfL https://get.rke2.io | sudo sh - + + sudo systemctl enable --now rke2-server.service + + # This is an arbitrary value that came up from local tests + sleep 120s + + # Link the kubectl binary into /usr/bin + sudo ln -sf /var/lib/rancher/rke2/bin/kubectl /usr/local/bin/kubectl + + mkdir -p ~/.kube + sudo cp /etc/rancher/rke2/rke2.yaml ~/.kube/config + sudo chown ${USER}:${USER} ~/.kube/config +} + +function _get_k0s_kubernetes_version_for_crio() { + # k0s version will look like: + # v1.27.5+k0s.0 + # + # The CRI-O repo for such version of Kubernetes expects something like: + # 1.27 + k0s_version=$(curl -sSLf "https://docs.k0sproject.io/stable.txt") + + # Remove everything after the second '.' + crio_version=${k0s_version%\.*+*} + # Remove the 'v' + crio_version=${crio_version#v} + + echo ${crio_version} +} + +function setup_crio() { + # Get the CRI-O version to be installed depending on the version of the + # "k8s distro" that we are using + case ${KUBERNETES} in + k0s) crio_version=$(_get_k0s_kubernetes_version_for_crio) ;; + *) >&2 echo "${KUBERNETES} flavour is not supported with CRI-O"; exit 2 ;; + + esac + + install_crio ${crio_version} +} + +function deploy_k8s() { + echo "::group::Deploying ${KUBERNETES}" + + case ${KUBERNETES} in + k0s) deploy_k0s ;; + k3s) deploy_k3s ;; + rke2) deploy_rke2 ;; + *) >&2 echo "${KUBERNETES} flavour is not supported"; exit 2 ;; + esac + + echo "::endgroup::" +} diff --git a/tests/git-helper.sh b/tests/git-helper.sh new file mode 100755 index 000000000..81ec53cfa --- /dev/null +++ b/tests/git-helper.sh @@ -0,0 +1,37 @@ +#!/usr/bin/env bash +# +# Copyright (c) 2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# + +set -o errexit +set -o nounset +set -o pipefail + +function add_kata_bot_info() { + echo "Adding user name and email to the local git repo" + + git config user.email "katacontainersbot@gmail.com" + git config user.name "Kata Containers Bot" +} + +function rebase_atop_of_the_latest_target_branch() { + if [ -n "${TARGET_BRANCH}" ]; then + echo "Rebasing atop of the latest ${TARGET_BRANCH}" + git rebase origin/${TARGET_BRANCH} + fi +} + +function main() { + action="${1:-}" + + add_kata_bot_info + + case "${action}" in + rebase-atop-of-the-latest-target-branch) rebase_atop_of_the_latest_target_branch;; + *) >&2 echo "Invalid argument"; exit 2 ;; + esac +} + +main "$@" diff --git a/tests/install_go.sh b/tests/install_go.sh index 3827bb7b3..dadaf6ca0 100755 --- a/tests/install_go.sh +++ b/tests/install_go.sh @@ -87,7 +87,7 @@ if command -v go; then fi fi -goarch=$("${repo_root_dir}/tests/kata-arch.sh" --golang) +goarch=$(arch_to_golang) info "Download go version ${go_version}" kernel_name=$(uname -s) diff --git a/tests/install_rust.sh b/tests/install_rust.sh new file mode 100755 index 000000000..abb93cac6 --- /dev/null +++ b/tests/install_rust.sh @@ -0,0 +1,45 @@ +#!/bin/bash +# +# Copyright (c) 2019 Ant Financial +# +# SPDX-License-Identifier: Apache-2.0 + +set -o errexit +set -o nounset +set -o pipefail + +script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +script_name="$(basename "${BASH_SOURCE[0]}")" + +source "${script_dir}/common.bash" + +rustarch=$(arch_to_rust) + +version="${1:-""}" +if [ -z "${version}" ]; then + version=$(get_from_kata_deps "languages.rust.meta.newest-version") +fi + +echo "Install rust ${version}" + +if ! command -v rustup > /dev/null; then + curl https://sh.rustup.rs -sSf | sh -s -- -y --default-toolchain ${version} +fi + +export PATH="${PATH}:${HOME}/.cargo/bin" + +## Still try to install the target version of toolchain, +## in case that the rustup has been installed but +## with a different version toolchain. +## Even though the target version toolchain has been installed, +## this command will not take too long to run. +rustup toolchain install ${version} +rustup default ${version} +if [ "${rustarch}" == "powerpc64le" ] || [ "${rustarch}" == "s390x" ] ; then + rustup target add ${rustarch}-unknown-linux-gnu +else + rustup target add ${rustarch}-unknown-linux-musl + $([ "$(whoami)" != "root" ] && echo sudo) ln -sf /usr/bin/g++ /bin/musl-g++ +fi +rustup component add rustfmt +rustup component add clippy diff --git a/tests/integration/cri-containerd/gha-run.sh b/tests/integration/cri-containerd/gha-run.sh index c36c96605..542964423 100755 --- a/tests/integration/cri-containerd/gha-run.sh +++ b/tests/integration/cri-containerd/gha-run.sh @@ -59,7 +59,8 @@ function install_dependencies() { function run() { info "Running cri-containerd tests using ${KATA_HYPERVISOR} hypervisor" - return 0 + enabling_hypervisor + bash -c ${cri_containerd_dir}/integration-tests.sh } function main() { diff --git a/tests/integration/cri-containerd/integration-tests.sh b/tests/integration/cri-containerd/integration-tests.sh index 0e4df5578..33f212856 100755 --- a/tests/integration/cri-containerd/integration-tests.sh +++ b/tests/integration/cri-containerd/integration-tests.sh @@ -21,9 +21,9 @@ export PATH="$PATH:/usr/local/sbin" export PATH="$PATH:/usr/local/go/bin" # Runtime to be used for testing -RUNTIME=${RUNTIME:-containerd-shim-kata-v2} -FACTORY_TEST=${FACTORY_TEST:-""} KATA_HYPERVISOR="${KATA_HYPERVISOR:-qemu}" +RUNTIME=${RUNTIME:-containerd-shim-kata-${KATA_HYPERVISOR}-v2} +FACTORY_TEST=${FACTORY_TEST:-""} USE_DEVMAPPER="${USE_DEVMAPPER:-false}" ARCH=$(uname -m) @@ -130,11 +130,17 @@ trap cleanup EXIT function err_report() { local log_file="${REPORT_DIR}/containerd.log" if [ -f "$log_file" ]; then - echo "ERROR: containerd log :" + echo "::group::ERROR: containerd log :" echo "-------------------------------------" cat "${log_file}" echo "-------------------------------------" + echo "::endgroup::" fi + echo "::group::ERROR: Kata Containers logs : " + echo "-------------------------------------" + sudo journalctl -xe -t kata + echo "-------------------------------------" + echo "::endgroup::" } @@ -197,10 +203,12 @@ EOF } function testContainerStop() { + info "show pod $podid" + sudo crictl --timeout=20s pods --id $podid info "stop pod $podid" - sudo crictl stopp $podid + sudo crictl --timeout=20s stopp $podid info "remove pod $podid" - sudo crictl rmp $podid + sudo crictl --timeout=20s rmp $podid sudo cp "$default_containerd_config_backup" "$default_containerd_config" restart_containerd_service diff --git a/tests/integration/docker/gha-run.sh b/tests/integration/docker/gha-run.sh new file mode 100755 index 000000000..cab3401ea --- /dev/null +++ b/tests/integration/docker/gha-run.sh @@ -0,0 +1,58 @@ +#!/bin/bash +# +# Copyright (c) 2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# + +set -o errexit +set -o nounset +set -o pipefail + +kata_tarball_dir="${2:-kata-artifacts}" +docker_dir="$(dirname "$(readlink -f "$0")")" +source "${docker_dir}/../../common.bash" + +function install_dependencies() { + info "Installing the dependencies needed for running the docker smoke test" + + # Add Docker's official GPG key: + sudo apt-get update + sudo apt-get -y install ca-certificates curl gnupg + sudo install -m 0755 -d /etc/apt/keyrings + curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /etc/apt/keyrings/docker.gpg + sudo chmod a+r /etc/apt/keyrings/docker.gpg + + # Add the repository to Apt sources: + echo \ + "deb [arch="$(dpkg --print-architecture)" signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu \ + "$(. /etc/os-release && echo "$VERSION_CODENAME")" stable" | \ + sudo tee /etc/apt/sources.list.d/docker.list > /dev/null + sudo apt-get update + + sudo apt-get install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin +} + +function run() { + info "Running docker smoke test tests using ${KATA_HYPERVISOR} hypervisor" + + enabling_hypervisor + + info "Running docker with runc" + sudo docker run --rm --entrypoint nping instrumentisto/nmap --tcp-connect -c 2 -p 80 www.github.com + + info "Running docker with Kata Containers (${KATA_HYPERVISOR})" + sudo docker run --rm --runtime io.containerd.kata-${KATA_HYPERVISOR}.v2 --entrypoint nping instrumentisto/nmap --tcp-connect -c 2 -p 80 www.github.com +} + +function main() { + action="${1:-}" + case "${action}" in + install-dependencies) install_dependencies ;; + install-kata) install_kata ;; + run) run ;; + *) >&2 die "Invalid argument" ;; + esac +} + +main "$@" diff --git a/tests/integration/kubernetes/confidential_common.sh b/tests/integration/kubernetes/confidential_common.sh new file mode 100644 index 000000000..0e049cadd --- /dev/null +++ b/tests/integration/kubernetes/confidential_common.sh @@ -0,0 +1,34 @@ +#!/usr/bin/env bash +# Copyright 2022-2023 Advanced Micro Devices, Inc. +# Copyright 2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# + +source "${BATS_TEST_DIRNAME}/tests_common.sh" + +function setup_unencrypted_confidential_pod() { + get_pod_config_dir + + export SSH_KEY_FILE="${pod_config_dir}/confidential/unencrypted/ssh/unencrypted" + + if [ -n "${PR_NUMBER}" ]; then + # Use correct address in pod yaml + sed -i "s/-nightly/-${PR_NUMBER}/" "${pod_config_dir}/pod-confidential-unencrypted.yaml" + fi + + # Set permissions on private key file + sudo chmod 600 "${SSH_KEY_FILE}" +} + +# This function relies on `KATA_HYPERVISOR` being an environment variable +# and returns the remote command to be executed to that specific hypervisor +# in order to identify whether the workload is running on a TEE environment +function get_remote_command_per_hypervisor() { + declare -A REMOTE_COMMAND_PER_HYPERVISOR + REMOTE_COMMAND_PER_HYPERVISOR[qemu-sev]="dmesg | grep \"Memory Encryption Features active:.*\(SEV$\|SEV \)\"" + REMOTE_COMMAND_PER_HYPERVISOR[qemu-snp]="dmesg | grep \"Memory Encryption Features active:.*SEV-SNP\"" + REMOTE_COMMAND_PER_HYPERVISOR[qemu-tdx]="cpuid | grep TDX_GUEST" + + echo "${REMOTE_COMMAND_PER_HYPERVISOR[${KATA_HYPERVISOR}]}" +} diff --git a/tests/integration/kubernetes/gha-run.sh b/tests/integration/kubernetes/gha-run.sh index d756ccfdc..c72e35be7 100755 --- a/tests/integration/kubernetes/gha-run.sh +++ b/tests/integration/kubernetes/gha-run.sh @@ -9,63 +9,100 @@ set -o nounset set -o pipefail kubernetes_dir="$(dirname "$(readlink -f "$0")")" -source "${kubernetes_dir}/../../common.bash" +source "${kubernetes_dir}/../../gha-run-k8s-common.sh" +# shellcheck disable=2154 tools_dir="${repo_root_dir}/tools" -AZ_RG="${AZ_RG:-kataCI}" +DOCKER_REGISTRY=${DOCKER_REGISTRY:-quay.io} +DOCKER_REPO=${DOCKER_REPO:-kata-containers/kata-deploy-ci} +DOCKER_TAG=${DOCKER_TAG:-kata-containers-latest} +KATA_DEPLOY_WAIT_TIMEOUT=${KATA_DEPLOY_WAIT_TIMEOUT:-10m} +KATA_HYPERVISOR=${KATA_HYPERVISOR:-qemu} +KUBERNETES="${KUBERNETES:-}" -function _print_cluster_name() { - short_sha="$(git rev-parse --short=12 HEAD)" - echo "${GH_PR_NUMBER}-${short_sha}-${KATA_HYPERVISOR}-${KATA_HOST_OS}-amd64" +function configure_devmapper() { + sudo mkdir -p /var/lib/containerd/devmapper + sudo truncate --size 10G /var/lib/containerd/devmapper/data-disk.img + sudo truncate --size 10G /var/lib/containerd/devmapper/meta-disk.img + + cat<&2 echo "${KUBERNETES} flavour is not supported"; exit 2 ;; + esac + + # We're not using this with baremetal machines, so we're fine on cutting + # corners here and just append this to the configuration file. + cat<&2 echo "${KUBERNETES} flavour is not supported"; exit 2 ;; + esac + + sleep 60s + sudo cat ${containerd_config_file} } -function install_azure_cli() { - curl -sL https://aka.ms/InstallAzureCLIDeb | sudo bash - # The aks-preview extension is required while the Mariner Kata host is in preview. - az extension add --name aks-preview -} +function configure_snapshotter() { + echo "::group::Configuring ${SNAPSHOTTER}" -function login_azure() { - az login \ - --service-principal \ - -u "${AZ_APPID}" \ - -p "${AZ_PASSWORD}" \ - --tenant "${AZ_TENANT_ID}" -} + case ${SNAPSHOTTER} in + devmapper) configure_devmapper ;; + *) >&2 echo "${SNAPSHOTTER} flavour is not supported"; exit 2 ;; + esac -function create_cluster() { - # First, ensure that the cluster didn't fail to get cleaned up from a previous run. - delete_cluster || true - - az aks create \ - -g "${AZ_RG}" \ - -n "$(_print_cluster_name)" \ - -s "Standard_D4s_v5" \ - --node-count 1 \ - --generate-ssh-keys \ - $([ "${KATA_HOST_OS}" = "cbl-mariner" ] && echo "--os-sku AzureLinux --workload-runtime KataMshvVmIsolation") -} - -function install_bats() { - sudo apt-get update - sudo apt-get -y install bats -} - -function install_kubectl() { - sudo az aks install-cli -} - -function get_cluster_credentials() { - az aks get-credentials \ - -g "${AZ_RG}" \ - -n "$(_print_cluster_name)" + echo "::endgroup::" } function deploy_kata() { platform="${1}" ensure_yq - # Emsure we're in the default namespace + [ "$platform" = "kcli" ] && \ + export KUBECONFIG="$HOME/.kcli/clusters/${CLUSTER_NAME:-kata-k8s}/auth/kubeconfig" + + # Ensure we're in the default namespace kubectl config set-context --current --namespace=default sed -i -e "s|quay.io/kata-containers/kata-deploy:latest|${DOCKER_REGISTRY}/${DOCKER_REPO}:${DOCKER_TAG}|g" "${tools_dir}/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml" @@ -82,16 +119,16 @@ function deploy_kata() { echo "::group::Final kata-deploy.yaml that is used in the test" cat "${tools_dir}/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml" - cat "${tools_dir}/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml" | grep "${DOCKER_REGISTRY}/${DOCKER_REPO}:${DOCKER_TAG}" || die "Failed to setup the tests image" + grep "${DOCKER_REGISTRY}/${DOCKER_REPO}:${DOCKER_TAG}" "${tools_dir}/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml" || die "Failed to setup the tests image" echo "::endgroup::" kubectl apply -f "${tools_dir}/packaging/kata-deploy/kata-rbac/base/kata-rbac.yaml" - if [ "${platform}" = "tdx" ]; then + if [ "${KUBERNETES}" = "k3s" ]; then kubectl apply -k "${tools_dir}/packaging/kata-deploy/kata-deploy/overlays/k3s" else kubectl apply -f "${tools_dir}/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml" fi - kubectl -n kube-system wait --timeout=10m --for=condition=Ready -l name=kata-deploy pod + kubectl -n kube-system wait --timeout="${KATA_DEPLOY_WAIT_TIMEOUT}" --for=condition=Ready -l name=kata-deploy pod # This is needed as the kata-deploy pod will be set to "Ready" when it starts running, # which may cause issues like not having the node properly labeled or the artefacts @@ -103,7 +140,7 @@ function deploy_kata() { fi echo "::group::kata-deploy logs" - kubectl -n kube-system logs -l name=kata-deploy + kubectl -n kube-system logs --tail=100 -l name=kata-deploy echo "::endgroup::" echo "::group::Runtime classes" @@ -112,11 +149,16 @@ function deploy_kata() { } function run_tests() { + platform="${1:-}" + + [ "$platform" = "kcli" ] && \ + export KUBECONFIG="$HOME/.kcli/clusters/${CLUSTER_NAME:-kata-k8s}/auth/kubeconfig" + # Delete any spurious tests namespace that was left behind kubectl delete namespace kata-containers-k8s-tests &> /dev/null || true # Create a new namespace for the tests and switch to it - kubectl apply -f ${kubernetes_dir}/runtimeclass_workloads/tests-namespace.yaml + kubectl apply -f "${kubernetes_dir}/runtimeclass_workloads/tests-namespace.yaml" kubectl config set-context --current --namespace=kata-containers-k8s-tests pushd "${kubernetes_dir}" @@ -127,13 +169,17 @@ function run_tests() { function cleanup() { platform="${1}" + test_type="${2:-k8s}" ensure_yq + [ "$platform" = "kcli" ] && \ + export KUBECONFIG="$HOME/.kcli/clusters/${CLUSTER_NAME:-kata-k8s}/auth/kubeconfig" + echo "Gather information about the nodes and pods before cleaning up the node" get_nodes_and_pods_info if [ "${platform}" = "aks" ]; then - delete_cluster + delete_cluster ${test_type} return fi @@ -141,7 +187,7 @@ function cleanup() { kubectl config set-context --current --namespace=default kubectl delete namespace kata-containers-k8s-tests - if [ "${platform}" = "tdx" ]; then + if [ "${KUBERNETES}" = "k3s" ]; then deploy_spec="-k "${tools_dir}/packaging/kata-deploy/kata-deploy/overlays/k3s"" cleanup_spec="-k "${tools_dir}/packaging/kata-deploy/kata-cleanup/overlays/k3s"" else @@ -149,6 +195,7 @@ function cleanup() { cleanup_spec="-f "${tools_dir}/packaging/kata-deploy/kata-cleanup/base/kata-cleanup.yaml"" fi + # shellcheck disable=2086 kubectl delete ${deploy_spec} kubectl -n kube-system wait --timeout=10m --for=delete -l name=kata-deploy pod @@ -157,28 +204,19 @@ function cleanup() { sed -i -e "s|quay.io/kata-containers/kata-deploy:latest|${DOCKER_REGISTRY}/${DOCKER_REPO}:${DOCKER_TAG}|g" "${tools_dir}/packaging/kata-deploy/kata-cleanup/base/kata-cleanup.yaml" cat "${tools_dir}/packaging/kata-deploy/kata-cleanup/base/kata-cleanup.yaml" - cat "${tools_dir}/packaging/kata-deploy/kata-cleanup/base/kata-cleanup.yaml" | grep "${DOCKER_REGISTRY}/${DOCKER_REPO}:${DOCKER_TAG}" || die "Failed to setup the tests image" + grep "${DOCKER_REGISTRY}/${DOCKER_REPO}:${DOCKER_TAG}" "${tools_dir}/packaging/kata-deploy/kata-cleanup/base/kata-cleanup.yaml" || die "Failed to setup the tests image" + # shellcheck disable=2086 kubectl apply ${cleanup_spec} sleep 180s + # shellcheck disable=2086 kubectl delete ${cleanup_spec} kubectl delete -f "${tools_dir}/packaging/kata-deploy/kata-rbac/base/kata-rbac.yaml" } -function delete_cluster() { - az aks delete \ - -g "${AZ_RG}" \ - -n "$(_print_cluster_name)" \ - --yes -} - -function get_nodes_and_pods_info() { - kubectl debug $(kubectl get nodes -o name) -it --image=quay.io/kata-containers/kata-debug:latest || true - kubectl get pods -o name | grep node-debugger | xargs kubectl delete || true -} - function main() { export KATA_HOST_OS="${KATA_HOST_OS:-}" + export K8S_TEST_HOST_TYPE="${K8S_TEST_HOST_TYPE:-}" action="${1:-}" @@ -186,18 +224,28 @@ function main() { install-azure-cli) install_azure_cli ;; login-azure) login_azure ;; create-cluster) create_cluster ;; + create-cluster-kcli) create_cluster_kcli ;; + configure-snapshotter) configure_snapshotter ;; + setup-crio) setup_crio ;; + deploy-k8s) deploy_k8s ;; install-bats) install_bats ;; install-kubectl) install_kubectl ;; get-cluster-credentials) get_cluster_credentials ;; deploy-kata-aks) deploy_kata "aks" ;; + deploy-kata-kcli) deploy_kata "kcli" ;; deploy-kata-sev) deploy_kata "sev" ;; deploy-kata-snp) deploy_kata "snp" ;; deploy-kata-tdx) deploy_kata "tdx" ;; + deploy-kata-garm) deploy_kata "garm" ;; run-tests) run_tests ;; + run-tests-kcli) run_tests "kcli" ;; + cleanup-kcli) cleanup "kcli" ;; cleanup-sev) cleanup "sev" ;; cleanup-snp) cleanup "snp" ;; cleanup-tdx) cleanup "tdx" ;; + cleanup-garm) cleanup "garm" ;; delete-cluster) cleanup "aks" ;; + delete-cluster-kcli) delete_cluster_kcli ;; *) >&2 echo "Invalid argument"; exit 2 ;; esac } diff --git a/tests/integration/kubernetes/k8s-confidential.bats b/tests/integration/kubernetes/k8s-confidential.bats new file mode 100644 index 000000000..c78c7d518 --- /dev/null +++ b/tests/integration/kubernetes/k8s-confidential.bats @@ -0,0 +1,49 @@ +#!/usr/bin/env bats +# Copyright 2022-2023 Advanced Micro Devices, Inc. +# Copyright 2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# + +load "${BATS_TEST_DIRNAME}/../../common.bash" +load "${BATS_TEST_DIRNAME}/confidential_common.sh" +load "${BATS_TEST_DIRNAME}/tests_common.sh" + +setup() { + SUPPORTED_HYPERVISORS=("qemu-sev" "qemu-snp" "qemu-tdx") + + # This check must be done with "${KATA_HYPERVISOR}" to avoid + # having substrings, like qemu, being matched with qemu-$something. + [[ " ${SUPPORTED_HYPERVISORS[*]} " =~ " ${KATA_HYPERVISOR} " ]] || skip "Test not supported for ${KATA_HYPERVISOR}." + + get_pod_config_dir + setup_unencrypted_confidential_pod +} + +@test "Test unencrypted confidential container launch success and verify that we are running in a secure enclave." { + # Start the service/deployment/pod + kubectl apply -f "${pod_config_dir}/pod-confidential-unencrypted.yaml" + + # Retrieve pod name, wait for it to come up, retrieve pod ip + pod_name=$(kubectl get pod -o wide | grep "confidential-unencrypted" | awk '{print $1;}') + + # Check pod creation + kubectl wait --for=condition=Ready --timeout=$timeout pod "${pod_name}" + + pod_ip=$(kubectl get pod -o wide | grep "confidential-unencrypted" | awk '{print $6;}') + + # Run the remote command + coco_enabled=$(ssh -i ${SSH_KEY_FILE} -o "StrictHostKeyChecking no" -o "PasswordAuthentication=no" root@${pod_ip} /bin/sh -c "$(get_remote_command_per_hypervisor)" || true) + + if [ -z "$coco_enabled" ]; then + >&2 echo -e "Confidential compute is expected but not enabled." + return 1 + fi +} + +teardown() { + [[ " ${SUPPORTED_HYPERVISORS[*]} " =~ " ${KATA_HYPERVISOR} " ]] || skip "Test not supported for ${KATA_HYPERVISOR}." + + kubectl describe "pod/${pod_name}" || true + kubectl delete -f "${pod_config_dir}/pod-confidential-unencrypted.yaml" || true +} diff --git a/tests/integration/kubernetes/k8s-cpu-ns.bats b/tests/integration/kubernetes/k8s-cpu-ns.bats index f3c69a2f6..878e761a2 100644 --- a/tests/integration/kubernetes/k8s-cpu-ns.bats +++ b/tests/integration/kubernetes/k8s-cpu-ns.bats @@ -10,6 +10,7 @@ load "${BATS_TEST_DIRNAME}/tests_common.sh" setup() { [ "${KATA_HYPERVISOR}" == "firecracker" ] && skip "test not working see: ${fc_limitations}" + [ "${KATA_HYPERVISOR}" == "fc" ] && skip "test not working see: ${fc_limitations}" [ "${KATA_HYPERVISOR}" == "dragonball" ] && skip "test not working see: ${dragonball_limitations}" ( [ "${KATA_HYPERVISOR}" == "qemu-tdx" ] || [ "${KATA_HYPERVISOR}" == "qemu-snp" ] || [ "${KATA_HYPERVISOR}" == "qemu-sev" ] ) \ && skip "TEEs do not support memory / CPU hotplug" @@ -28,12 +29,6 @@ setup() { } @test "Check CPU constraints" { - [ "${KATA_HYPERVISOR}" == "firecracker" ] && skip "test not working see: ${fc_limitations}" - [ "${KATA_HYPERVISOR}" == "dragonball" ] && skip "test not working see: ${dragonball_limitations}" - ( [ "${KATA_HYPERVISOR}" == "qemu-tdx" ] || [ "${KATA_HYPERVISOR}" == "qemu-snp" ] || [ "${KATA_HYPERVISOR}" == "qemu-sev" ] ) \ - && skip "TEEs do not support memory / CPU hotplug" - - # Create the pod kubectl create -f "${pod_config_dir}/pod-cpu.yaml" @@ -76,6 +71,7 @@ setup() { teardown() { [ "${KATA_HYPERVISOR}" == "firecracker" ] && skip "test not working see: ${fc_limitations}" + [ "${KATA_HYPERVISOR}" == "fc" ] && skip "test not working see: ${fc_limitations}" [ "${KATA_HYPERVISOR}" == "dragonball" ] && skip "test not working see: ${dragonball_limitations}" ( [ "${KATA_HYPERVISOR}" == "qemu-tdx" ] || [ "${KATA_HYPERVISOR}" == "qemu-snp" ] || [ "${KATA_HYPERVISOR}" == "qemu-sev" ] ) \ && skip "TEEs do not support memory / CPU hotplug" diff --git a/tests/integration/kubernetes/k8s-credentials-secrets.bats b/tests/integration/kubernetes/k8s-credentials-secrets.bats index 51d2ba995..8becf2e2e 100644 --- a/tests/integration/kubernetes/k8s-credentials-secrets.bats +++ b/tests/integration/kubernetes/k8s-credentials-secrets.bats @@ -10,13 +10,12 @@ load "${BATS_TEST_DIRNAME}/tests_common.sh" setup() { [ "${KATA_HYPERVISOR}" == "firecracker" ] && skip "test not working see: ${fc_limitations}" + [ "${KATA_HYPERVISOR}" == "fcr" ] && skip "test not working see: ${fc_limitations}" get_pod_config_dir } @test "Credentials using secrets" { - [ "${KATA_HYPERVISOR}" == "firecracker" ] && skip "test not working see: ${fc_limitations}" - secret_name="test-secret" pod_name="secret-test-pod" second_pod_name="secret-envars-test-pod" @@ -52,6 +51,7 @@ setup() { teardown() { [ "${KATA_HYPERVISOR}" == "firecracker" ] && skip "test not working see: ${fc_limitations}" + [ "${KATA_HYPERVISOR}" == "fc" ] && skip "test not working see: ${fc_limitations}" # Debugging information kubectl describe "pod/$pod_name" diff --git a/tests/integration/kubernetes/k8s-file-volume.bats b/tests/integration/kubernetes/k8s-file-volume.bats index fd09d7886..37ccd85f8 100644 --- a/tests/integration/kubernetes/k8s-file-volume.bats +++ b/tests/integration/kubernetes/k8s-file-volume.bats @@ -11,22 +11,24 @@ TEST_INITRD="${TEST_INITRD:-no}" setup() { [ "${KATA_HYPERVISOR}" == "firecracker" ] && skip "test not working see: ${fc_limitations}" + [ "${KATA_HYPERVISOR}" == "fc" ] && skip "test not working see: ${fc_limitations}" pod_name="test-file-volume" container_name="busybox-file-volume-container" - tmp_file=$(exec_host mktemp /tmp/file-volume-test-foo.XXXXX) + node="$(get_one_kata_node)" + tmp_file=$(exec_host "$node" mktemp /tmp/file-volume-test-foo.XXXXX) mount_path="/tmp/foo.txt" file_body="test" get_pod_config_dir } @test "Test readonly volume for pods" { - [ "${KATA_HYPERVISOR}" == "firecracker" ] && skip "test not working see: ${fc_limitations}" # Write test body to temp file - exec_host "echo "$file_body" > $tmp_file" + exec_host "$node" "echo "$file_body" > $tmp_file" # Create test yaml sed -e "s|HOST_FILE|$tmp_file|" ${pod_config_dir}/pod-file-volume.yaml > ${pod_config_dir}/test-pod-file-volume.yaml sed -i "s|MOUNT_PATH|$mount_path|" ${pod_config_dir}/test-pod-file-volume.yaml + sed -i "s|NODE|$node|" ${pod_config_dir}/test-pod-file-volume.yaml # Create pod kubectl create -f "${pod_config_dir}/test-pod-file-volume.yaml" @@ -41,7 +43,8 @@ setup() { teardown() { [ "${KATA_HYPERVISOR}" == "firecracker" ] && skip "test not working see: ${fc_limitations}" + [ "${KATA_HYPERVISOR}" == "fc" ] && skip "test not working see: ${fc_limitations}" kubectl delete pod "$pod_name" - exec_host rm -f $tmp_file + exec_host "$node" rm -f $tmp_file rm -f ${pod_config_dir}/test-pod-file-volume.yaml.yaml } diff --git a/tests/integration/kubernetes/k8s-inotify.bats b/tests/integration/kubernetes/k8s-inotify.bats index f3dbc073f..9a87b0a96 100644 --- a/tests/integration/kubernetes/k8s-inotify.bats +++ b/tests/integration/kubernetes/k8s-inotify.bats @@ -10,6 +10,7 @@ load "${BATS_TEST_DIRNAME}/tests_common.sh" setup() { [ "${KATA_HYPERVISOR}" == "firecracker" ] && skip "test not working see: ${fc_limitations}" + [ "${KATA_HYPERVISOR}" == "fc" ] && skip "test not working see: ${fc_limitations}" get_pod_config_dir } @@ -40,6 +41,7 @@ setup() { teardown() { [ "${KATA_HYPERVISOR}" == "firecracker" ] && skip "test not working see: ${fc_limitations}" + [ "${KATA_HYPERVISOR}" == "fc" ] && skip "test not working see: ${fc_limitations}" # Debugging information kubectl describe "pod/$pod_name" kubectl delete pod "$pod_name" diff --git a/tests/integration/kubernetes/k8s-kill-all-process-in-container.bats b/tests/integration/kubernetes/k8s-kill-all-process-in-container.bats index 5081b8d7d..b40429245 100644 --- a/tests/integration/kubernetes/k8s-kill-all-process-in-container.bats +++ b/tests/integration/kubernetes/k8s-kill-all-process-in-container.bats @@ -15,7 +15,7 @@ setup() { get_pod_config_dir } -@test "Check PID namespaces" { +@test "Kill all processes in container" { # Create the pod kubectl create -f "${pod_config_dir}/initcontainer-shareprocesspid.yaml" diff --git a/tests/integration/kubernetes/k8s-nested-configmap-secret.bats b/tests/integration/kubernetes/k8s-nested-configmap-secret.bats index b84fb89cc..ebffb6e18 100644 --- a/tests/integration/kubernetes/k8s-nested-configmap-secret.bats +++ b/tests/integration/kubernetes/k8s-nested-configmap-secret.bats @@ -10,6 +10,7 @@ load "${BATS_TEST_DIRNAME}/tests_common.sh" setup() { [ "${KATA_HYPERVISOR}" == "firecracker" ] && skip "test not working see: ${fc_limitations}" + [ "${KATA_HYPERVISOR}" == "fc" ] && skip "test not working see: ${fc_limitations}" get_pod_config_dir @@ -30,6 +31,7 @@ setup() { teardown() { [ "${KATA_HYPERVISOR}" == "firecracker" ] && skip "test not working see: ${fc_limitations}" + [ "${KATA_HYPERVISOR}" == "fc" ] && skip "test not working see: ${fc_limitations}" # Debugging information kubectl describe "pod/$pod_name" diff --git a/tests/integration/kubernetes/k8s-pod-quota.bats b/tests/integration/kubernetes/k8s-pod-quota.bats index d9a527725..f74d8a595 100644 --- a/tests/integration/kubernetes/k8s-pod-quota.bats +++ b/tests/integration/kubernetes/k8s-pod-quota.bats @@ -8,6 +8,8 @@ load "${BATS_TEST_DIRNAME}/../../common.bash" load "${BATS_TEST_DIRNAME}/tests_common.sh" setup() { + [ "${KATA_HYPERVISOR}" == "fc" ] && skip "test not working see: https://github.com/kata-containers/kata-containers/issues/7873" + get_pod_config_dir } @@ -31,5 +33,12 @@ setup() { } teardown() { + [ "${KATA_HYPERVISOR}" == "fc" ] && skip "test not working see: https://github.com/kata-containers/kata-containers/issues/7873" + + # Debugging information + kubectl describe deployment ${deployment_name} + + # Clean-up + kubectl delete -f "${pod_config_dir}/pod-quota-deployment.yaml" kubectl delete -f "${pod_config_dir}/resource-quota.yaml" } diff --git a/tests/integration/kubernetes/k8s-projected-volume.bats b/tests/integration/kubernetes/k8s-projected-volume.bats index 33788e475..87160eeb5 100644 --- a/tests/integration/kubernetes/k8s-projected-volume.bats +++ b/tests/integration/kubernetes/k8s-projected-volume.bats @@ -10,13 +10,12 @@ load "${BATS_TEST_DIRNAME}/tests_common.sh" setup() { [ "${KATA_HYPERVISOR}" == "firecracker" ] && skip "test not working see: ${fc_limitations}" + [ "${KATA_HYPERVISOR}" == "fc" ] && skip "test not working see: ${fc_limitations}" get_pod_config_dir } @test "Projected volume" { - [ "${KATA_HYPERVISOR}" == "firecracker" ] && skip "test not working see: ${fc_limitations}" - password="1f2d1e2e67df" username="admin" pod_name="test-projected-volume" @@ -53,6 +52,7 @@ setup() { teardown() { [ "${KATA_HYPERVISOR}" == "firecracker" ] && skip "test not working see: ${fc_limitations}" + [ "${KATA_HYPERVISOR}" == "fc" ] && skip "test not working see: ${fc_limitations}" # Debugging information kubectl describe "pod/$pod_name" diff --git a/tests/integration/kubernetes/k8s-volume.bats b/tests/integration/kubernetes/k8s-volume.bats index 0489ff461..7bb69f95d 100644 --- a/tests/integration/kubernetes/k8s-volume.bats +++ b/tests/integration/kubernetes/k8s-volume.bats @@ -11,16 +11,21 @@ TEST_INITRD="${TEST_INITRD:-no}" setup() { [ "${KATA_HYPERVISOR}" == "firecracker" ] && skip "test not working see: ${fc_limitations}" + [ "${KATA_HYPERVISOR}" == "fc" ] && skip "test not working see: ${fc_limitations}" get_pod_config_dir - tmp_file=$(exec_host mktemp -d /tmp/data.XXXX) + node=$(get_one_kata_node) + tmp_file=$(exec_host "$node" mktemp -d /tmp/data.XXXX) + pv_yaml=$(mktemp --tmpdir pv_config.XXXXXX.yaml) pod_yaml=$(mktemp --tmpdir pod_config.XXXXXX.yaml) msg="Hello from Kubernetes" - exec_host "echo $msg > $tmp_file/index.html" + exec_host "$node" "echo $msg > $tmp_file/index.html" pod_name="pv-pod" # Define temporary file at yaml - sed -e "s|tmp_data|${tmp_file}|g" ${pod_config_dir}/pv-volume.yaml > "$pod_yaml" + sed -e "s|tmp_data|${tmp_file}|g" ${pod_config_dir}/pv-volume.yaml > "$pv_yaml" + sed -e "s|NODE|${node}|g" "${pod_config_dir}/pv-pod.yaml" > "$pod_yaml" + } @test "Create Persistent Volume" { @@ -30,7 +35,7 @@ setup() { volume_claim="pv-claim" # Create the persistent volume - kubectl create -f "$pod_yaml" + kubectl create -f "$pv_yaml" # Check the persistent volume is Available cmd="kubectl get pv $volume_name | grep Available" @@ -44,7 +49,7 @@ setup() { waitForProcess "$wait_time" "$sleep_time" "$cmd" # Create pod - kubectl create -f "${pod_config_dir}/pv-pod.yaml" + kubectl create -f "$pod_yaml" # Check pod creation kubectl wait --for=condition=Ready --timeout=$timeout pod "$pod_name" @@ -55,13 +60,15 @@ setup() { teardown() { [ "${KATA_HYPERVISOR}" == "firecracker" ] && skip "test not working see: ${fc_limitations}" + [ "${KATA_HYPERVISOR}" == "fc" ] && skip "test not working see: ${fc_limitations}" # Debugging information kubectl describe "pod/$pod_name" kubectl delete pod "$pod_name" + rm -f "$pod_yaml" kubectl delete pvc "$volume_claim" kubectl delete pv "$volume_name" - rm -f "$pod_yaml" - exec_host rm -rf "$tmp_file" + rm -f "$pv_yaml" + exec_host "$node" rm -rf "$tmp_file" } diff --git a/tests/integration/kubernetes/run_kubernetes_tests.sh b/tests/integration/kubernetes/run_kubernetes_tests.sh index f8b635d22..e0cd401d1 100644 --- a/tests/integration/kubernetes/run_kubernetes_tests.sh +++ b/tests/integration/kubernetes/run_kubernetes_tests.sh @@ -13,11 +13,13 @@ source "${kubernetes_dir}/../../common.bash" TARGET_ARCH="${TARGET_ARCH:-x86_64}" KATA_HYPERVISOR="${KATA_HYPERVISOR:-qemu}" K8S_TEST_DEBUG="${K8S_TEST_DEBUG:-false}" +K8S_TEST_HOST_TYPE="${K8S_TEST_HOST_TYPE:-small}" if [ -n "${K8S_TEST_UNION:-}" ]; then K8S_TEST_UNION=($K8S_TEST_UNION) else - K8S_TEST_UNION=( \ + K8S_TEST_SMALL_HOST_UNION=( \ + "k8s-confidential.bats" \ "k8s-attach-handlers.bats" \ "k8s-caps.bats" \ "k8s-configmap.bats" \ @@ -36,18 +38,15 @@ else "k8s-liveness-probes.bats" \ "k8s-memory.bats" \ "k8s-nested-configmap-secret.bats" \ - "k8s-number-cpus.bats" \ "k8s-oom.bats" \ "k8s-optional-empty-configmap.bats" \ "k8s-optional-empty-secret.bats" \ - "k8s-parallel.bats" \ "k8s-pid-ns.bats" \ "k8s-pod-quota.bats" \ "k8s-port-forward.bats" \ "k8s-projected-volume.bats" \ "k8s-qos-pods.bats" \ "k8s-replication.bats" \ - "k8s-scale-nginx.bats" \ "k8s-seccomp.bats" \ "k8s-sysctls.bats" \ "k8s-security-context.bats" \ @@ -55,6 +54,29 @@ else "k8s-volume.bats" \ "k8s-nginx-connectivity.bats" \ ) + + K8S_TEST_NORMAL_HOST_UNION=( \ + "k8s-number-cpus.bats" \ + "k8s-parallel.bats" \ + "k8s-scale-nginx.bats" \ + ) + + case ${K8S_TEST_HOST_TYPE} in + small) + K8S_TEST_UNION=(${K8S_TEST_SMALL_HOST_UNION[@]}) + ;; + normal) + K8S_TEST_UNION=(${K8S_TEST_NORMAL_HOST_UNION[@]}) + ;; + baremetal) + K8S_TEST_UNION=(${K8S_TEST_SMALL_HOST_UNION[@]} ${K8S_TEST_NORMAL_HOST_UNION[@]}) + + ;; + *) + echo "${K8S_TEST_HOST_TYPE} is an invalid K8S_TEST_HOST_TYPE option. Valid options are: small | normal | baremetal" + return 1 + ;; + esac fi # we may need to skip a few test cases when running on non-x86_64 arch diff --git a/tests/integration/kubernetes/runtimeclass_workloads/confidential/unencrypted/Dockerfile b/tests/integration/kubernetes/runtimeclass_workloads/confidential/unencrypted/Dockerfile new file mode 100644 index 000000000..d093c7fe8 --- /dev/null +++ b/tests/integration/kubernetes/runtimeclass_workloads/confidential/unencrypted/Dockerfile @@ -0,0 +1,37 @@ +# Copyright (c) 2023 Intel Corporatiion +# +# SPDX-License-Identifier: Apache-2.0 + +# We know that using latest is error prone, we're taking the risk here. +# hadolint ignore=DL3007 +FROM alpine:latest + +# We don't need a specific version of those packages +# hadolint ignore=DL3018 +RUN apk add --no-cache curl openssh-server + +# Download and install `cpuid`, which will be used to detect +# whether we're the container is running on a TEE guest +# hadolint ignore=DL3059 +RUN /bin/sh -c \ + 'ARCH=$(uname -m) && \ + [[ "${ARCH}" == "x86_64" ]] && \ + curl -LO https://github.com/klauspost/cpuid/releases/download/v2.2.5/cpuid-Linux_x86_64_2.2.5.tar.gz && \ + tar -xvzf cpuid-Linux_x86_64_2.2.5.tar.gz -C /usr/bin && \ + rm -rf cpuid-Linux_x86_64_2.2.5.tar.gz && \ + rm -f /usr/bin/LICENSE' || true + +# This is done just to avoid the following error starting sshd +# `sshd: no hostkeys available -- exiting.` +# hadolint ignore=DL3059 +RUN ssh-keygen -t ed25519 -f /etc/ssh/ssh_host_ed25519_key -P "" + +# A password needs to be set for login to work. An empty password is +# unproblematic as password-based login to root is not allowed. +# hadolint ignore=DL3059 +RUN passwd -d root + +# Generated with `ssh-keygen -t ed25519 -f unencrypted -P "" -C ""` +COPY ssh/unencrypted.pub /root/.ssh/authorized_keys + +ENTRYPOINT ["/usr/sbin/sshd", "-D"] diff --git a/tests/integration/kubernetes/runtimeclass_workloads/confidential/unencrypted/ssh/unencrypted b/tests/integration/kubernetes/runtimeclass_workloads/confidential/unencrypted/ssh/unencrypted new file mode 100644 index 000000000..1f75d37d6 --- /dev/null +++ b/tests/integration/kubernetes/runtimeclass_workloads/confidential/unencrypted/ssh/unencrypted @@ -0,0 +1,7 @@ +-----BEGIN OPENSSH PRIVATE KEY----- +b3BlbnNzaC1rZXktdjEAAAAABG5vbmUAAAAEbm9uZQAAAAAAAAABAAAAMwAAAAtzc2gtZW +QyNTUxOQAAACD5RDTjWd8c793pKpOUGt+/D+Fa7PMVUQtSudt6R8JMYAAAAIh44GnReOBp +0QAAAAtzc2gtZWQyNTUxOQAAACD5RDTjWd8c793pKpOUGt+/D+Fa7PMVUQtSudt6R8JMYA +AAAEDwZtSRH/KNwmm/QCMHcif3iMQpGPOr2d12hcQqMY3KJPlENONZ3xzv3ekqk5Qa378P +4Vrs8xVRC1K523pHwkxgAAAAAAECAwQF +-----END OPENSSH PRIVATE KEY----- diff --git a/tests/integration/kubernetes/runtimeclass_workloads/confidential/unencrypted/ssh/unencrypted.pub b/tests/integration/kubernetes/runtimeclass_workloads/confidential/unencrypted/ssh/unencrypted.pub new file mode 100644 index 000000000..ce3b9ef60 --- /dev/null +++ b/tests/integration/kubernetes/runtimeclass_workloads/confidential/unencrypted/ssh/unencrypted.pub @@ -0,0 +1 @@ +ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIPlENONZ3xzv3ekqk5Qa378P4Vrs8xVRC1K523pHwkxg diff --git a/tests/integration/kubernetes/runtimeclass_workloads/inotify-configmap-pod.yaml b/tests/integration/kubernetes/runtimeclass_workloads/inotify-configmap-pod.yaml index c85240c94..2e23864bf 100644 --- a/tests/integration/kubernetes/runtimeclass_workloads/inotify-configmap-pod.yaml +++ b/tests/integration/kubernetes/runtimeclass_workloads/inotify-configmap-pod.yaml @@ -16,10 +16,8 @@ spec: args: ["-c", "inotifywait --timeout 120 -r /config/ && [[ -L /config/config.toml ]] && echo success" ] resources: requests: - cpu: 1 memory: 50Mi limits: - cpu: 1 memory: 1024Mi volumeMounts: - name: config diff --git a/tests/integration/kubernetes/runtimeclass_workloads/pod-confidential-unencrypted.yaml b/tests/integration/kubernetes/runtimeclass_workloads/pod-confidential-unencrypted.yaml new file mode 100644 index 000000000..591d86de8 --- /dev/null +++ b/tests/integration/kubernetes/runtimeclass_workloads/pod-confidential-unencrypted.yaml @@ -0,0 +1,33 @@ +# Copyright (c) 2023 Advanced Micro Devices, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# +kind: Service +apiVersion: v1 +metadata: + name: "confidential-unencrypted" +spec: + selector: + app: "confidential-unencrypted" + ports: + - port: 22 +--- +kind: Deployment +apiVersion: apps/v1 +metadata: + name: "confidential-unencrypted" +spec: + selector: + matchLabels: + app: "confidential-unencrypted" + template: + metadata: + labels: + app: "confidential-unencrypted" + spec: + runtimeClassName: kata + containers: + - name: "confidential-unencrypted" + image: ghcr.io/kata-containers/test-images:unencrypted-nightly + imagePullPolicy: Always + diff --git a/tests/integration/kubernetes/runtimeclass_workloads/pod-file-volume.yaml b/tests/integration/kubernetes/runtimeclass_workloads/pod-file-volume.yaml index 4784b1477..e7a194f42 100644 --- a/tests/integration/kubernetes/runtimeclass_workloads/pod-file-volume.yaml +++ b/tests/integration/kubernetes/runtimeclass_workloads/pod-file-volume.yaml @@ -11,6 +11,7 @@ spec: terminationGracePeriodSeconds: 0 runtimeClassName: kata restartPolicy: Never + nodeName: NODE volumes: - name: shared-file hostPath: diff --git a/tests/integration/kubernetes/runtimeclass_workloads/pv-pod.yaml b/tests/integration/kubernetes/runtimeclass_workloads/pv-pod.yaml index 6a165b971..c3686a981 100644 --- a/tests/integration/kubernetes/runtimeclass_workloads/pv-pod.yaml +++ b/tests/integration/kubernetes/runtimeclass_workloads/pv-pod.yaml @@ -10,6 +10,7 @@ metadata: spec: terminationGracePeriodSeconds: 0 runtimeClassName: kata + nodeName: NODE volumes: - name: pv-storage persistentVolumeClaim: diff --git a/tests/integration/kubernetes/setup.sh b/tests/integration/kubernetes/setup.sh index fb68e47cb..45e652f32 100755 --- a/tests/integration/kubernetes/setup.sh +++ b/tests/integration/kubernetes/setup.sh @@ -22,18 +22,20 @@ set_runtime_class() { set_kernel_path() { if [[ "${KATA_HOST_OS}" = "cbl-mariner" ]]; then mariner_kernel_path="/usr/share/cloud-hypervisor/vmlinux.bin" - find ${kubernetes_dir}/runtimeclass_workloads_work/*.yaml -exec yq write -i {} 'metadata.annotations[io.katacontainers.config.hypervisor.kernel]' "${mariner_kernel_path}" \; + # Not using find -exec as that still returns 0 on failure. + find ${kubernetes_dir}/runtimeclass_workloads_work/*.yaml -print0 | xargs -0 -I% yq write -i % 'metadata.annotations[io.katacontainers.config.hypervisor.kernel]' "${mariner_kernel_path}" fi } set_initrd_path() { if [[ "${KATA_HOST_OS}" = "cbl-mariner" ]]; then initrd_path="/opt/kata/share/kata-containers/kata-containers-initrd-mariner.img" - find ${kubernetes_dir}/runtimeclass_workloads_work/*.yaml -exec yq write -i {} 'metadata.annotations[io.katacontainers.config.hypervisor.initrd]' "${initrd_path}" \; + find ${kubernetes_dir}/runtimeclass_workloads_work/*.yaml -print0 | xargs -0 -I% yq write -i % 'metadata.annotations[io.katacontainers.config.hypervisor.initrd]' "${initrd_path}" fi } main() { + ensure_yq reset_workloads_work_dir set_runtime_class set_kernel_path diff --git a/tests/integration/kubernetes/tests_common.sh b/tests/integration/kubernetes/tests_common.sh index b6d448cac..3c8876e13 100644 --- a/tests/integration/kubernetes/tests_common.sh +++ b/tests/integration/kubernetes/tests_common.sh @@ -38,13 +38,34 @@ get_pod_config_dir() { info "k8s configured to use runtimeclass" } +# Return the first worker found that is kata-runtime labeled. +get_one_kata_node() { + local resource_name + resource_name="$(kubectl get node -l katacontainers.io/kata-runtime=true -o name | head -1)" + # Remove leading "/node" + echo "${resource_name/"node/"}" +} + # Runs a command in the host filesystem. +# +# Parameters: +# $1 - the node name +# exec_host() { - node="$(kubectl get node -o name)" + node="$1" # `kubectl debug` always returns 0, so we hack it to return the right exit code. - command="$@" + command="${@:2}" command+='; echo -en \\n$?' - output="$(kubectl debug -qit "${node}" --image=alpine:latest -- chroot /host bash -c "${command}")" + # We're trailing the `\r` here due to: https://github.com/kata-containers/kata-containers/issues/8051 + # tl;dr: When testing with CRI-O we're facing the foillowing error: + # ``` + # (from function `exec_host' in file tests_common.sh, line 51, + # in test file k8s-file-volume.bats, line 25) + # `exec_host "echo "$file_body" > $tmp_file"' failed with status 127 + # [bats-exec-test:38] INFO: k8s configured to use runtimeclass + # bash: line 1: $'\r': command not found + # ``` + output="$(kubectl debug -qit "node/${node}" --image=alpine:latest -- chroot /host bash -c "${command}" | tr -d '\r')" kubectl get pods -o name | grep node-debugger | xargs kubectl delete > /dev/null exit_code="$(echo "${output}" | tail -1)" echo "$(echo "${output}" | head -n -1)" diff --git a/tests/integration/nerdctl/gha-run.sh b/tests/integration/nerdctl/gha-run.sh new file mode 100644 index 000000000..c4803d5db --- /dev/null +++ b/tests/integration/nerdctl/gha-run.sh @@ -0,0 +1,83 @@ +#!/bin/bash +# +# Copyright (c) 2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# + +set -o errexit +set -o nounset +set -o pipefail + +kata_tarball_dir="${2:-kata-artifacts}" +nerdctl_dir="$(dirname "$(readlink -f "$0")")" +source "${nerdctl_dir}/../../common.bash" + +function install_dependencies() { + info "Installing the dependencies for running the nerdctl tests" + + # Dependency list of projects that we can rely on the system packages + # - wget + # - Used to download the nerdctl-full tarball + # - pip + # - Used to install lastversion, which will be used to get the latest + # release of the nerdctl + declare -a system_deps=( + wget + pip + ) + + sudo apt update + sudo apt -y install "${system_deps[@]}" + + # Install lastversion from pip + # + # --break-system-packages is, unfortunately, needed here as it'll also + # bring in some python3 dependencies on its own + pip install lastversion --break-system-packages + + # As the command above will install lastversion on $HOME/.local/bin, we + # need to add it to the PATH + export PATH=$PATH:${HOME}/.local/bin + + # Download the nerdctl-full tarball, as it comes with all the deps + # needed. + nerdctl_lastest_version=$(lastversion containerd/nerdctl) + wget https://github.com/containerd/nerdctl/releases/download/v${nerdctl_lastest_version}/nerdctl-full-${nerdctl_lastest_version}-linux-amd64.tar.gz + + # Unpack the latest nerdctl into /usr/local/ + sudo tar -xvf nerdctl-full-${nerdctl_lastest_version}-linux-amd64.tar.gz -C /usr/local/ + + # Start containerd service + sudo systemctl daemon-reload + sudo systemctl start containerd + + # Create the default containerd configuration + sudo mkdir -p /etc/containerd + containerd config default > sudo tee /etc/containerd/config.toml + sudo systemctl restart containerd +} + +function run() { + info "Running nerdctl smoke test tests using ${KATA_HYPERVISOR} hypervisor" + + enabling_hypervisor + + info "Running nerdctl with runc" + sudo nerdctl run --rm --entrypoint nping instrumentisto/nmap --tcp-connect -c 2 -p 80 www.github.com + + info "Running nerdctl with Kata Containers (${KATA_HYPERVISOR})" + sudo nerdctl run --rm --runtime io.containerd.kata-${KATA_HYPERVISOR}.v2 --entrypoint nping instrumentisto/nmap --tcp-connect -c 2 -p 80 www.github.com +} + +function main() { + action="${1:-}" + case "${action}" in + install-dependencies) install_dependencies ;; + install-kata) install_kata ;; + run) run ;; + *) >&2 die "Invalid argument" ;; + esac +} + +main "$@" diff --git a/tests/integration/nydus/gha-run.sh b/tests/integration/nydus/gha-run.sh index 86707d504..357fe6fcf 100755 --- a/tests/integration/nydus/gha-run.sh +++ b/tests/integration/nydus/gha-run.sh @@ -16,20 +16,48 @@ source "${nydus_dir}/../../common.bash" function install_dependencies() { info "Installing the dependencies needed for running the nydus tests" - return 0 + # Dependency list of projects that we can rely on the system packages + # - jq + declare -a system_deps=( + jq + ) + + sudo apt-get update + sudo apt-get -y install "${system_deps[@]}" + + ensure_yq + + # Dependency list of projects that we can install them + # directly from their releases on GitHub: + # - containerd + # - cri-container-cni release tarball already includes CNI plugins + # - cri-tools + # - nydus + # - nydus-snapshotter + declare -a github_deps + github_deps[0]="cri_containerd:$(get_from_kata_deps "externals.containerd.${CONTAINERD_VERSION}")" + github_deps[1]="cri_tools:$(get_from_kata_deps "externals.critools.latest")" + github_deps[2]="nydus:$(get_from_kata_deps "externals.nydus.version")" + github_deps[3]="nydus_snapshotter:$(get_from_kata_deps "externals.nydus-snapshotter.version")" + + for github_dep in "${github_deps[@]}"; do + IFS=":" read -r -a dep <<< "${github_dep}" + install_${dep[0]} "${dep[1]}" + done } function run() { info "Running nydus tests using ${KATA_HYPERVISOR} hypervisor" - return 0 + enabling_hypervisor + bash -c "${nydus_dir}/nydus_tests.sh" } function main() { action="${1:-}" case "${action}" in install-dependencies) install_dependencies ;; - install-kata) return 0 ;; + install-kata) install_kata ;; run) run ;; *) >&2 die "Invalid argument" ;; esac diff --git a/tests/integration/nydus/nydus-container.yaml b/tests/integration/nydus/nydus-container.yaml index a30327ade..a8b0248dd 100644 --- a/tests/integration/nydus/nydus-container.yaml +++ b/tests/integration/nydus/nydus-container.yaml @@ -1,5 +1,7 @@ metadata: name: nydus-container + namespace: default + uid: nydus-containerd-uid image: image: ghcr.io/dragonflyoss/image-service/alpine:nydus-latest command: diff --git a/tests/integration/nydus/nydus-sandbox.yaml b/tests/integration/nydus/nydus-sandbox.yaml index 9f039d726..7a3e32c93 100644 --- a/tests/integration/nydus/nydus-sandbox.yaml +++ b/tests/integration/nydus/nydus-sandbox.yaml @@ -2,4 +2,5 @@ metadata: attempt: 1 name: nydus-sandbox namespace: default + uid: nydus-sandbox-uid log_directory: /tmp diff --git a/tests/integration/nydus/nydus_tests.sh b/tests/integration/nydus/nydus_tests.sh index fe49580da..e4e70139a 100755 --- a/tests/integration/nydus/nydus_tests.sh +++ b/tests/integration/nydus/nydus_tests.sh @@ -12,8 +12,7 @@ set -o pipefail set -o errtrace dir_path=$(dirname "$0") -source "${dir_path}/../../lib/common.bash" -source "${dir_path}/../../.ci/lib.sh" +source "${dir_path}/../../common.bash" source "/etc/os-release" || source "/usr/lib/os-release" KATA_HYPERVISOR="${KATA_HYPERVISOR:-qemu}" @@ -30,56 +29,20 @@ containerd_config_backup="/tmp/containerd.config.toml" # test image for container IMAGE="${IMAGE:-ghcr.io/dragonflyoss/image-service/alpine:nydus-latest}" -if [ "$KATA_HYPERVISOR" != "qemu" ] && [ "$KATA_HYPERVISOR" != "cloud-hypervisor" ] && [ "$KATA_HYPERVISOR" != "dragonball" ]; then - echo "Skip nydus test for $KATA_HYPERVISOR, it only works for QEMU/CLH/DB now." +if [ "$KATA_HYPERVISOR" != "qemu" ] && [ "$KATA_HYPERVISOR" != "clh" ]; then + echo "Skip nydus test for $KATA_HYPERVISOR, it only works for QEMU/CLH now." exit 0 fi -arch="$(uname -m)" -if [ "$arch" != "x86_64" ]; then - echo "Skip nydus test for $arch, it only works for x86_64 now. See https://github.com/kata-containers/tests/issues/4445" - exit 0 -fi - -function install_from_tarball() { - local package_name="$1" - local binary_name="$2" - [ -n "$package_name" ] || die "need package_name" - [ -n "$binary_name" ] || die "need package release binary_name" - - local url=$(get_version "externals.${package_name}.url") - local version=$(get_version "externals.${package_name}.version") - local tarball_url="${url}/releases/download/${version}/${binary_name}-${version}-$arch.tgz" - if [ "${package_name}" == "nydus" ]; then - local goarch="$(${dir_path}/../../.ci/kata-arch.sh --golang)" - tarball_url="${url}/releases/download/${version}/${binary_name}-${version}-linux-$goarch.tgz" - fi - echo "Download tarball from ${tarball_url}" - curl -Ls "$tarball_url" | sudo tar xfz - -C /usr/local/bin --strip-components=1 -} - function setup_nydus() { - # install nydus - install_from_tarball "nydus" "nydus-static" - - # install nydus-snapshotter - install_from_tarball "nydus-snapshotter" "nydus-snapshotter" - # Config nydus snapshotter sudo -E cp "$dir_path/nydusd-config.json" /etc/ + sudo -E cp "$dir_path/snapshotter-config.toml" /etc/ # start nydus-snapshotter - nohup /usr/local/bin/containerd-nydus-grpc \ - --config-path /etc/nydusd-config.json \ - --shared-daemon \ - --log-level debug \ - --root /var/lib/containerd/io.containerd.snapshotter.v1.nydus \ - --cache-dir /var/lib/nydus/cache \ - --nydusd-path /usr/local/bin/nydusd \ - --nydusimg-path /usr/local/bin/nydus-image \ - --disable-cache-manager true \ - --enable-nydus-overlayfs true \ - --log-to-stdout >/dev/null 2>&1 & + sudo nohup /usr/local/bin/containerd-nydus-grpc \ + --config /etc/snapshotter-config.toml \ + --nydusd-config /etc/nydusd-config.json & } function config_kata() { @@ -136,40 +99,51 @@ function config_containerd() { [plugins.cri.containerd.runtimes.runc.options] BinaryName = "${runc_path}" Root = "" - [plugins.cri.containerd.runtimes.kata] - runtime_type = "io.containerd.kata.v2" + [plugins.cri.containerd.runtimes.kata-${KATA_HYPERVISOR}] + runtime_type = "io.containerd.kata-${KATA_HYPERVISOR}.v2" privileged_without_host_devices = true EOF } +function check_nydus_snapshotter_exist() { + echo "check_nydus_snapshotter_exist" + bin="containerd-nydus-grpc" + if pgrep -f "$bin" >/dev/null; then + echo "nydus-snapshotter is running" + else + die "nydus-snapshotter is not running" + fi +} + function setup() { setup_nydus config_kata config_containerd restart_containerd_service check_processes + check_nydus_snapshotter_exist extract_kata_env } function run_test() { - sudo -E crictl pull "${IMAGE}" - pod=$(sudo -E crictl runp -r kata $dir_path/nydus-sandbox.yaml) + sudo -E crictl --timeout=20s pull "${IMAGE}" + pod=$(sudo -E crictl --timeout=20s runp -r kata-${KATA_HYPERVISOR} $dir_path/nydus-sandbox.yaml) echo "Pod $pod created" - cnt=$(sudo -E crictl create $pod $dir_path/nydus-container.yaml $dir_path/nydus-sandbox.yaml) + cnt=$(sudo -E crictl --timeout=20s create $pod $dir_path/nydus-container.yaml $dir_path/nydus-sandbox.yaml) echo "Container $cnt created" - sudo -E crictl start $cnt + sudo -E crictl --timeout=20s start $cnt echo "Container $cnt started" # ensure container is running - state=$(sudo -E crictl inspect $cnt | jq .status.state | tr -d '"') + state=$(sudo -E crictl --timeout=20s inspect $cnt | jq .status.state | tr -d '"') [ $state == "CONTAINER_RUNNING" ] || die "Container is not running($state)" # run a command in container - crictl exec $cnt ls + sudo -E crictl --timeout=20s exec $cnt ls # cleanup containers - sudo -E crictl stop $cnt - sudo -E crictl stopp $pod - sudo -E crictl rmp $pod + sudo -E crictl --timeout=20s stop $cnt + sudo -E crictl --timeout=20s stopp $pod + sudo -E crictl --timeout=20s rmp $pod } function teardown() { @@ -177,11 +151,11 @@ function teardown() { # kill nydus-snapshotter bin=containerd-nydus-grpc - kill -9 $(pidof $bin) || true + sudo -E kill -9 $(pidof $bin) || true [ "$(pidof $bin)" == "" ] || die "$bin is running" bin=nydusd - kill -9 $(pidof $bin) || true + sudo -E kill -9 $(pidof $bin) || true [ "$(pidof $bin)" == "" ] || die "$bin is running" # restore kata configuratiom.toml if needed diff --git a/tests/integration/nydus/nydusd-config.json b/tests/integration/nydus/nydusd-config.json index 654b39353..4b75426d7 100644 --- a/tests/integration/nydus/nydusd-config.json +++ b/tests/integration/nydus/nydusd-config.json @@ -3,17 +3,13 @@ "backend": { "type": "registry", "config": { - "scheme": "https", "timeout": 5, "connect_timeout": 5, "retry_limit": 2 } }, "cache": { - "type": "blobcache", - "config": { - "work_dir": "/var/lib/nydus/cache" - } + "type": "blobcache" } }, "mode": "direct", @@ -22,6 +18,8 @@ "enable_xattr": true, "fs_prefetch": { "enable": true, - "threads_count": 2 + "threads_count": 8, + "merging_size": 1048576, + "prefetch_all": true } -} +} \ No newline at end of file diff --git a/tests/integration/nydus/snapshotter-config.toml b/tests/integration/nydus/snapshotter-config.toml new file mode 100644 index 000000000..7b668236c --- /dev/null +++ b/tests/integration/nydus/snapshotter-config.toml @@ -0,0 +1,128 @@ +version = 1 +# Snapshotter's own home directory where it stores and creates necessary resources +root = "/var/lib/containerd-nydus" +# The snapshotter's GRPC server socket, containerd will connect to plugin on this socket +address = "/run/containerd-nydus/containerd-nydus-grpc.sock" +daemon_mode = "dedicated" +# Whether snapshotter should try to clean up resources when it is closed +cleanup_on_close = false + +[system] +# Snapshotter's debug and trace HTTP server interface +enable = true +# Unix domain socket path where system controller is listening on +address = "/run/containerd-nydus/system.sock" + +[system.debug] +# Snapshotter can profile the CPU utilization of each nydusd daemon when it is being started. +# This option specifies the profile duration when nydusd is downloading and uncomproessing data. +daemon_cpu_profile_duration_secs = 5 +# Enable by assigning an address, empty indicates pprof server is disabled +pprof_address = "" + +[daemon] +# Specify a configuration file for nydusd +nydusd_config = "/etc/nydusd-config.json" +nydusd_path = "/usr/local/bin/nydusd" +nydusimage_path = "/usr/local/bin/nydus-image" +# fusedev or fscache +fs_driver = "fusedev" +# How to process when daemon dies: "none", "restart" or "failover" +recover_policy = "restart" +# Nydusd worker thread number to handle FUSE or fscache requests, [0-1024]. +# Setting to 0 will use the default configuration of nydusd. +threads_number = 4 +# Log rotation size for nydusd, in unit MB(megabytes) +log_rotation_size = 100 + + +[cgroup] +# Whether to use separate cgroup for nydusd. +enable = true +# The memory limit for nydusd cgroup, which contains all nydusd processes. +# Percentage is supported as well, please ensure it is end with "%". +# The default unit is bytes. Acceptable values include "209715200", "200MiB", "200Mi" and "10%". +memory_limit = "" + +[log] +# Print logs to stdout rather than logging files +log_to_stdout = false +# Snapshotter's log level +level = "info" +log_rotation_compress = true +log_rotation_local_time = true +# Max number of days to retain logs +log_rotation_max_age = 7 +log_rotation_max_backups = 5 +# In unit MB(megabytes) +log_rotation_max_size = 100 + +[metrics] +# Enable by assigning an address, empty indicates metrics server is disabled +address = ":9110" + +[remote] +convert_vpc_registry = false + +[remote.mirrors_config] +# Snapshotter will overwrite daemon's mirrors configuration +# if the values loaded from this driectory are not null before starting a daemon. +# Set to "" or an empty directory to disable it. +#dir = "/etc/nydus/certs.d" + +[remote.auth] +# Fetch the private registry auth by listening to K8s API server +enable_kubeconfig_keychain = false +# synchronize `kubernetes.io/dockerconfigjson` secret from kubernetes API server with specified kubeconfig (default `$KUBECONFIG` or `~/.kube/config`) +kubeconfig_path = "" +# Fetch the private registry auth as CRI image service proxy +enable_cri_keychain = false +# the target image service when using image proxy +#image_service_address = "/run/containerd/containerd.sock" + +[snapshot] +# Let containerd use nydus-overlayfs mount helper +enable_nydus_overlayfs = true +# Insert Kata Virtual Volume option to `Mount.Options` +enable_kata_volume = false +# Whether to remove resources when a snapshot is removed +sync_remove = false + +[cache_manager] +disable = false +gc_period = "24h" +# Directory to host cached files +cache_dir = "" + +[image] +public_key_file = "" +validate_signature = false + +# The configuraions for features that are not production ready +[experimental] +# Whether to enable stargz support +enable_stargz = false +# Whether to enable referrers support +# The option enables trying to fetch the Nydus image associated with the OCI image and run it. +# Also see https://github.com/opencontainers/distribution-spec/blob/main/spec.md#listing-referrers +enable_referrer_detect = false +[experimental.tarfs] +# Whether to enable nydus tarfs mode. Tarfs is supported by: +# - The EROFS filesystem driver since Linux 6.4 +# - Nydus Image Service release v2.3 +enable_tarfs = false +# Mount rafs on host by loopdev and EROFS +mount_tarfs_on_host = false +# Only enable nydus tarfs mode for images with `tarfs hint` label when true +tarfs_hint = false +# Maximum of concurrence to converting OCIv1 images to tarfs, 0 means default +max_concurrent_proc = 0 +# Mode to export tarfs images: +# - "none" or "": do not export tarfs +# - "layer_verity_only": only generate disk verity information for a layer blob +# - "image_verity_only": only generate disk verity information for all blobs of an image +# - "layer_block": generate a raw block disk image with tarfs for a layer +# - "image_block": generate a raw block disk image with tarfs for an image +# - "layer_block_with_verity": generate a raw block disk image with tarfs for a layer with dm-verity info +# - "image_block_with_verity": generate a raw block disk image with tarfs for an image with dm-verity info +export_mode = "" \ No newline at end of file diff --git a/tests/integration/runk/gha-run.sh b/tests/integration/runk/gha-run.sh new file mode 100755 index 000000000..3f97c63aa --- /dev/null +++ b/tests/integration/runk/gha-run.sh @@ -0,0 +1,59 @@ +#!/bin/bash +# +# Copyright (c) 2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# + +set -o errexit +set -o nounset +set -o pipefail + +kata_tarball_dir="${2:-kata-artifacts}" +runk_dir="$(dirname "$(readlink -f "$0")")" +source "${runk_dir}/../../common.bash" + +function install_dependencies() { + info "Installing the dependencies needed for running the runk tests" + + # Dependency list of projects that we can rely on the system packages + # - jq + declare -a system_deps=( + jq + ) + + sudo apt-get update + sudo apt-get -y install "${system_deps[@]}" + + ensure_yq + + # Dependency list of projects that we can install them + # directly from their releases on GitHub: + # - containerd + # - cri-container-cni release tarball already includes CNI plugins + declare -a github_deps + github_deps[0]="cri_containerd:$(get_from_kata_deps "externals.containerd.${CONTAINERD_VERSION}")" + + for github_dep in "${github_deps[@]}"; do + IFS=":" read -r -a dep <<< "${github_dep}" + install_${dep[0]} "${dep[1]}" + done +} + +function run() { + info "Running runk tests using" + + bash -c ${runk_dir}/runk-tests.sh +} + +function main() { + action="${1:-}" + case "${action}" in + install-dependencies) install_dependencies ;; + install-kata) install_kata ;; + run) run ;; + *) >&2 die "Invalid argument" ;; + esac +} + +main "$@" diff --git a/tests/integration/runk/runk-tests.sh b/tests/integration/runk/runk-tests.sh new file mode 100755 index 000000000..714f10af5 --- /dev/null +++ b/tests/integration/runk/runk-tests.sh @@ -0,0 +1,99 @@ +#!/bin/bash +# +# Copyright (c) 2023 Kata Contributors +# +# SPDX-License-Identifier: Apache-2.0 +# +# This test will validate runk with containerd + +set -o errexit +set -o nounset +set -o pipefail +set -o errtrace + +tracing_dir="$(dirname "$(readlink -f "$0")")" +source "${tracing_dir}/../../common.bash" +source "${tracing_dir}/../../metrics/lib/common.bash" + +RUNK_BIN_PATH="/usr/local/bin/runk" +TEST_IMAGE="docker.io/library/busybox:latest" +CONTAINER_ID="id1" +PID_FILE="${CONTAINER_ID}.pid" +WORK_DIR="$(mktemp -d --tmpdir runk.XXXXX)" + +setup() { + echo "pull container image" + check_images ${TEST_IMAGE} +} + +test_runk() { + echo "start container with runk" + # Bind mount ${WORK_DIR}:/tmp. Tests below will store files in this dir and check them when container is frozon. + sudo ctr run --pid-file ${PID_FILE} -d --runc-binary ${RUNK_BIN_PATH} --mount type=bind,src=${WORK_DIR},dst=/tmp,options=rbind:rw ${TEST_IMAGE} ${CONTAINER_ID} + read CID PID STATUS <<< $(sudo ctr t ls | grep ${CONTAINER_ID}) + [ ${PID} == $(cat ${PID_FILE}) ] || die "pid is not consistent" + [ ${STATUS} == "RUNNING" ] || die "container status is not RUNNING" + + echo "exec process in a container" + sudo ctr t exec --exec-id id1 ${CONTAINER_ID} sh -c "echo hello > /tmp/foo" + [ "hello" == "$(sudo ctr t exec --exec-id id1 ${CONTAINER_ID} cat /tmp/foo)" ] || die "exec process failed" + + echo "test ps command" + sudo ctr t exec --detach --exec-id id1 ${CONTAINER_ID} sh + # one line is the titles, and the other 2 lines are porcess info + [ "3" == "$(sudo ctr t ps ${CONTAINER_ID} | wc -l)" ] || die "ps command failed" + + echo "test pause and resume" + # The process outputs lines into /tmp/{CONTAINER_ID}, which can be read in host when it's frozon. + sudo ctr t exec --detach --exec-id id2 ${CONTAINER_ID} sh -c "while true; do echo hello >> /tmp/${CONTAINER_ID}; sleep 0.1; done" + # sleep for 1s to make sure the process outputs some lines + sleep 1 + sudo ctr t pause ${CONTAINER_ID} + [ "PAUSED" == "$(sudo ctr t ls | grep ${CONTAINER_ID} | grep -o PAUSED)" ] || die "status is not PAUSED" + echo "container is paused" + local TMP_FILE="${WORK_DIR}/${CONTAINER_ID}" + local lines1=$(cat ${TMP_FILE} | wc -l) + # sleep for a while and check the lines are not changed. + sleep 1 + local lines2=$(cat ${TMP_FILE} | wc -l) + [ ${lines1} == ${lines2} ] || die "paused container is still running" + sudo ctr t resume ${CONTAINER_ID} + [ "RUNNING" == "$(sudo ctr t ls | grep ${CONTAINER_ID} | grep -o RUNNING)" ] || die "status is not RUNNING" + echo "container is resumed" + # sleep for a while and check the lines are changed. + sleep 1 + local lines3=$(cat ${TMP_FILE} | wc -l) + [ ${lines2} -lt ${lines3} ] || die "resumed container is not running" + + echo "kill the container and poll until it is stopped" + sudo ctr t kill --signal SIGKILL --all ${CONTAINER_ID} + # poll for a while until the task receives signal and exit + local cmd='[ "STOPPED" == "$(sudo ctr t ls | grep ${CONTAINER_ID} | awk "{print \$3}")" ]' + waitForProcess 10 1 "${cmd}" || die "failed to kill task" + + echo "check the container is stopped" + # there is only title line of ps command + [ "1" == "$(sudo ctr t ps ${CONTAINER_ID} | wc -l)" ] || die "kill command failed" + + # High-level container runtimes such as containerd call the kill command with + # --all option in order to terminate all processes inside the container + # even if the container already is stopped. Hence, a low-level runtime + # should allow kill --all regardless of the container state like runc. + echo "test kill --all is allowed regardless of the container state" + sudo ctr t kill --signal SIGKILL ${CONTAINER_ID} && die "kill should fail" + sudo ctr t kill --signal SIGKILL --all ${CONTAINER_ID} || die "kill --all should not fail" + + echo "delete the container" + sudo ctr t rm ${CONTAINER_ID} + [ -z "$(sudo ctr t ls | grep ${CONTAINER_ID})" ] || die "failed to delete task" + sudo ctr c rm ${CONTAINER_ID} || die "failed to delete container" +} + +clean_up() { + rm -f ${PID_FILE} + rm -rf ${WORK_DIR} +} + +setup +test_runk +clean_up diff --git a/tests/metrics/README.md b/tests/metrics/README.md index 05ccf3891..56af31318 100644 --- a/tests/metrics/README.md +++ b/tests/metrics/README.md @@ -1,7 +1,5 @@ # Kata Containers metrics -> **_Warning:_** Migration of metrics tests is WIP and you may not find all tests available here, but you can take a look at the [tests repo](https://github.com/kata-containers/tests/tree/main/metrics). - This directory contains the metrics tests for Kata Containers. The tests within this directory have a number of potential use cases: @@ -35,7 +33,7 @@ regression checking, we try to define and stick to some "quality measures" for o ## Categories -Kata Container metrics tend to fall into a set of categories, and we organise the tests +Kata Container metrics tend to fall into a set of categories, and we organize the tests within this folder as such. Each sub-folder contains its own `README` detailing its own tests. @@ -62,9 +60,8 @@ For further details see the [density tests documentation](density). Tests relating to networking. General items could include: - bandwidth - latency -- jitter +- `jitter` - parallel bandwidth -- write and read percentiles For further details see the [network tests documentation](network). @@ -78,10 +75,12 @@ For further details see the [storage tests documentation](storage). Test relating to measure reading and writing against clusters. +For further details see the [disk tests documentation](disk). + ### Machine Learning Tests relating with TensorFlow and Pytorch implementations of several popular -convolutional models. +`convolutional` models. For further details see the [machine learning tests documentation](machine_learning). @@ -114,7 +113,7 @@ to do some JSON handling themselves before injecting their JSON into the API. #### `metrics_json_init()` -Initialise the API. Must be called before all other JSON API calls. +Initialize the API. Must be called before all other JSON API calls. Should be matched by a final call to `metrics_json_save`. Relies upon the `TEST_NAME` variable to derive the file name the final JSON @@ -148,7 +147,7 @@ Add a JSON formatted fragment at the top level. #### `metrics_json_start_array()` -Initialise the JSON array API subsystem, ready to accept JSON fragments via +Initialize the JSON array API subsystem, ready to accept JSON fragments via `metrics_json_add_array_element`. This JSON array API subset allows accumulation of multiple entries into a @@ -210,3 +209,7 @@ set if necessary. ## `checkmetrics` `checkmetrics` is a CLI tool to check a metrics CI results file. For further reference see the [`checkmetrics`](cmd/checkmetrics). + +## Report generator + +See the [report generator](report) documentation. diff --git a/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml index 3e71b7e74..c0436b24c 100644 --- a/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml +++ b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml @@ -29,7 +29,7 @@ description = "measure memory usage" # within (inclusive) checkvar = ".\"memory-footprint\".Results | .[] | .average.Result" checktype = "mean" -midval = 2518364.00 +midval = 127220.25 minpercent = 20.0 maxpercent = 20.0 @@ -73,27 +73,92 @@ minpercent = 20.0 maxpercent = 20.0 [[metric]] -name = "tensorflow" +name = "tensorflow_nhwc" type = "json" description = "tensorflow resnet model" # Min and Max values to set a 'range' that # the median of the CSV Results data must fall # within (inclusive) -checkvar = ".\"tensorflow\".Results | .[] | .resnet.Result" +checkvar = ".\"tensorflow_nhwc\".Results | .[] | .resnet.Result" checktype = "mean" midval = 3566.0 minpercent = 20.0 maxpercent = 20.0 [[metric]] -name = "tensorflow" +name = "tensorflow_nhwc" type = "json" description = "tensorflow alexnet model" # Min and Max values to set a 'range' that # the median of the CSV Results data must fall # within (inclusive) -checkvar = ".\"tensorflow\".Results | .[] | .alexnet.Result" +checkvar = ".\"tensorflow_nhwc\".Results | .[] | .alexnet.Result" checktype = "mean" midval = 98.0 minpercent = 20.0 maxpercent = 20.0 + +[[metric]] +name = "latency" +type = "json" +description = "measure container latency" +# Min and Max values to set a 'range' that +# the median of the CSV Results data must fall +# within (inclusive) +checkvar = ".\"latency\".Results | .[] | .latency.Result" +checktype = "mean" +midval = 0.75 +minpercent = 20.0 +maxpercent = 20.0 + +[[metric]] +name = "network-iperf3" +type = "json" +description = "measure container cpu utilization using iperf3" +# Min and Max values to set a 'range' that +# the median of the CSV Results data must fall +# within (inclusive) +checkvar = ".\"network-iperf3\".Results | .[] | .cpu.Result" +checktype = "mean" +midval = 85.60 +minpercent = 20.0 +maxpercent = 20.0 + +[[metric]] +name = "network-iperf3" +type = "json" +description = "measure container bandwidth using iperf3" +# Min and Max values to set a 'range' that +# the median of the CSV Results data must fall +# within (inclusive) +checkvar = ".\"network-iperf3\".Results | .[] | .bandwidth.Result" +checktype = "mean" +midval = 61176889941.19 +minpercent = 20.0 +maxpercent = 20.0 + +[[metric]] +name = "network-iperf3" +type = "json" +description = "measure container parallel bandwidth using iperf3" +# Min and Max values to set a 'range' that +# the median of the CSV Results data must fall +# within (inclusive) +checkvar = ".\"network-iperf3\".Results | .[] | .parallel.Result" +checktype = "mean" +midval = 47734838389.0 +minpercent = 20.0 +maxpercent = 20.0 + +[[metric]] +name = "network-iperf3" +type = "json" +description = "iperf" +# Min and Max values to set a 'range' that +# the median of the CSV Results data must fall +# within (inclusive) +checkvar = ".\"network-iperf3\".Results | .[] | .jitter.Result" +checktype = "mean" +midval = 0.044 +minpercent = 50.0 +maxpercent = 50.0 diff --git a/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-qemu-kata-metric8.toml b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-qemu-kata-metric8.toml index d615db0b3..666a898be 100644 --- a/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-qemu-kata-metric8.toml +++ b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-qemu-kata-metric8.toml @@ -29,7 +29,7 @@ description = "measure memory usage" # within (inclusive) checkvar = ".\"memory-footprint\".Results | .[] | .average.Result" checktype = "mean" -midval = 2435844.00 +midval = 122832.40 minpercent = 20.0 maxpercent = 20.0 @@ -73,27 +73,92 @@ minpercent = 20.0 maxpercent = 20.0 [[metric]] -name = "tensorflow" +name = "tensorflow_nhwc" type = "json" description = "tensorflow resnet model" # Min and Max values to set a 'range' that # the median of the CSV Results data must fall # within (inclusive) -checkvar = ".\"tensorflow\".Results | .[] | .resnet.Result" +checkvar = ".\"tensorflow_nhwc\".Results | .[] | .resnet.Result" checktype = "mean" midval = 3546.0 minpercent = 20.0 maxpercent = 20.0 [[metric]] -name = "tensorflow" +name = "tensorflow_nhwc" type = "json" description = "tensorflow alexnet model" # Min and Max values to set a 'range' that # the median of the CSV Results data must fall # within (inclusive) -checkvar = ".\"tensorflow\".Results | .[] | .alexnet.Result" +checkvar = ".\"tensorflow_nhwc\".Results | .[] | .alexnet.Result" checktype = "mean" midval = 98.0 minpercent = 20.0 maxpercent = 20.0 + +[[metric]] +name = "latency" +type = "json" +description = "measure container latency" +# Min and Max values to set a 'range' that +# the median of the CSV Results data must fall +# within (inclusive) +checkvar = ".\"latency\".Results | .[] | .latency.Result" +checktype = "mean" +midval = 0.70 +minpercent = 20.0 +maxpercent = 20.0 + +[[metric]] +name = "network-iperf3" +type = "json" +description = "measure container cpu utilization using iperf3" +# Min and Max values to set a 'range' that +# the median of the CSV Results data must fall +# within (inclusive) +checkvar = ".\"network-iperf3\".Results | .[] | .cpu.Result" +checktype = "mean" +midval = 99.86 +minpercent = 20.0 +maxpercent = 20.0 + +[[metric]] +name = "network-iperf3" +type = "json" +description = "measure container bandwidth using iperf3" +# Min and Max values to set a 'range' that +# the median of the CSV Results data must fall +# within (inclusive) +checkvar = ".\"network-iperf3\".Results | .[] | .bandwidth.Result" +checktype = "mean" +midval = 55344417086.81 +minpercent = 20.0 +maxpercent = 20.0 + +[[metric]] +name = "network-iperf3" +type = "json" +description = "measure container parallel bandwidth using iperf3" +# Min and Max values to set a 'range' that +# the median of the CSV Results data must fall +# within (inclusive) +checkvar = ".\"network-iperf3\".Results | .[] | .parallel.Result" +checktype = "mean" +midval = 52644229340.0 +minpercent = 20.0 +maxpercent = 20.0 + +[[metric]] +name = "network-iperf3" +type = "json" +description = "iperf" +# Min and Max values to set a 'range' that +# the median of the CSV Results data must fall +# within (inclusive) +checkvar = ".\"network-iperf3\".Results | .[] | .jitter.Result" +checktype = "mean" +midval = 0.041 +minpercent = 50.0 +maxpercent = 50.0 diff --git a/tests/metrics/cpu/README.md b/tests/metrics/cpu/README.md index 3dd3a2928..339cd8a10 100644 --- a/tests/metrics/cpu/README.md +++ b/tests/metrics/cpu/README.md @@ -5,5 +5,6 @@ This is a test of C-Ray which is a simple raytracer designed to test the floatin Individual test can be run by hand, for example: ``` -$ cd metrics/disk/c-ray $ ./cray.sh +$ cd metrics/cpu/c-ray +$ ./cray.sh ``` diff --git a/tests/metrics/density/memory_usage.sh b/tests/metrics/density/memory_usage.sh index f0505aaff..4487f0dec 100755 --- a/tests/metrics/density/memory_usage.sh +++ b/tests/metrics/density/memory_usage.sh @@ -19,7 +19,7 @@ source "${SCRIPT_PATH}/../lib/common.bash" # Busybox image: Choose a small workload image, this is # in order to measure the runtime footprint, not the workload # footprint. -IMAGE='quay.io/prometheus/busybox:latest' +IMAGE="quay.io/prometheus/busybox:latest" CMD='tail -f /dev/null' NUM_CONTAINERS="$1" @@ -31,6 +31,11 @@ KSM_ENABLE_FILE="/sys/kernel/mm/ksm/run" MEM_TMP_FILE=$(mktemp meminfo.XXXXXXXXXX) PS_TMP_FILE=$(mktemp psinfo.XXXXXXXXXX) +# Variables used to collect memory footprint +global_hypervisor_mem=0 +global_virtiofsd_mem=0 +global_shim_mem=0 + function remove_tmp_file() { rm -rf "${MEM_TMP_FILE}" "${PS_TMP_FILE}" } @@ -85,14 +90,14 @@ EOF # This function measures the PSS average # memory of a process. function get_pss_memory(){ - ps="$1" - mem_amount=0 - count=0 - avg=0 + local ps="${1}" + local shim_result_on="${2:-0}" + local mem_amount=0 + local count=0 + local avg=0 - if [ -z "${ps}" ]; then - die "No argument to get_pss_memory()" - fi + [ -z "${ps}" ] && die "No argument to get_pss_memory()" + ps="$(readlink -f ${ps})" # Save all the processes names # This will be help us to retrieve raw information @@ -104,19 +109,23 @@ function get_pss_memory(){ # This will help us to retrieve raw information echo "${data}" >> "${MEM_TMP_FILE}" - gral_data=$(echo "${data// /+}" | bc) - for i in "${gral_data}"; do - if (( $i > 0 ));then - mem_amount=$(( i + mem_amount )) - (( count++ )) + for i in ${data[*]}; do + if [ ${i} -gt 0 ]; then + let "mem_amount+=i" + let "count+=1" + [ $count -eq $NUM_CONTAINERS ] && break fi done - if (( "${count}" > 0 ));then - avg=$(bc -l <<< "scale=2; ${mem_amount} / ${count}") - fi + [ ${count} -eq 0 ] && die "No pss memory was measured for PID: ${ps}" - echo "${avg}" + avg=$(bc -l <<< "scale=2; ${mem_amount} / ${count}") + + if [ "${shim_result_on}" -eq "1" ]; then + global_shim_mem="${avg}" + else + global_hypervisor_mem="${avg}" + fi } function ppid() { @@ -137,12 +146,9 @@ function get_pss_memory_virtiofsd() { avg=0 virtiofsd_path=${1:-} - if [ -z "${virtiofsd_path}" ]; then - die "virtiofsd_path not provided" - fi + [ -z "${virtiofsd_path}" ] && die "virtiofsd_path not provided" echo "${virtiofsd_path}" >> "${PS_TMP_FILE}" - virtiofsd_pids=$(ps aux | grep [v]irtiofsd | awk '{print $2}' | head -1) data=$(sudo smem --no-header -P "^${virtiofsd_path}" -c pid -c "pid pss") @@ -164,15 +170,13 @@ function get_pss_memory_virtiofsd() { if ((pss_process > 0)); then mem_amount=$((pss_process + mem_amount)) - ((count++)) + let "count+=1" fi fi done - if (( "${count}" > 0 ));then - avg=$(bc -l <<< "scale=2; ${mem_amount} / ${count}") - fi - echo "${avg}" + [ "${count}" -gt 0 ] && global_virtiofsd_mem=$(bc -l <<< "scale=2; ${mem_amount} / ${count}") + } function get_individual_memory(){ @@ -278,22 +282,25 @@ EOF # Now if you do not have enough rights # the smem failure to read the stats will also be trapped. - hypervisor_mem="$(get_pss_memory ${HYPERVISOR_PATH})" - if [ "${hypervisor_mem}" == "0" ]; then + get_pss_memory ${HYPERVISOR_PATH} + + if [ "${global_hypervisor_mem}" == "0" ]; then die "Failed to find PSS for ${HYPERVISOR_PATH}" fi - virtiofsd_mem="$(get_pss_memory_virtiofsd ${VIRTIOFSD_PATH})" - if [ "${virtiofsd_mem}" == "0" ]; then + get_pss_memory_virtiofsd ${VIRTIOFSD_PATH} + + if [ "${global_virtiofsd_mem}" == "0" ]; then echo >&2 "WARNING: Failed to find PSS for ${VIRTIOFSD_PATH}" fi - shim_mem="$(get_pss_memory ${SHIM_PATH})" - if [ "${shim_mem}" == "0" ]; then + + get_pss_memory ${SHIM_PATH} 1 + + if [ "${global_shim_mem}" == "0" ]; then die "Failed to find PSS for ${SHIM_PATH}" fi - mem_usage="$(bc -l <<< "scale=2; ${hypervisor_mem} +${virtiofsd_mem} + ${shim_mem}")" - memory_usage="${mem_usage}" + mem_usage="$(bc -l <<< "scale=2; ${global_hypervisor_mem} + ${global_virtiofsd_mem} + ${global_shim_mem}")" local json="$(cat << EOF { @@ -302,15 +309,15 @@ EOF "Units" : "KB" }, "qemus": { - "Result": ${hypervisor_mem}, + "Result": ${global_hypervisor_mem}, "Units" : "KB" }, "virtiofsds": { - "Result": ${virtiofsd_mem}, + "Result": ${global_virtiofsd_mem}, "Units" : "KB" }, "shims": { - "Result": ${shim_mem}, + "Result": ${global_shim_mem}, "Units" : "KB" } } @@ -354,9 +361,7 @@ function main(){ #Check for KSM before reporting test name, as it can modify it check_for_ksm - init_env - check_cmds "${SMEM_BIN}" bc check_images "${IMAGE}" @@ -378,6 +383,7 @@ function main(){ get_individual_memory fi + info "memory usage test completed" metrics_json_save clean_env_ctr } diff --git a/tests/metrics/disk/README.md b/tests/metrics/disk/README.md new file mode 100644 index 000000000..274fb9730 --- /dev/null +++ b/tests/metrics/disk/README.md @@ -0,0 +1,18 @@ +# Kata Containers Cassandra Metrics + +Kata Containers provides a series of read and write performance tests using +Cassandra Stress tool. The Cassandra Stress tool is a Java-based stress testing +utility for basic benchmarking and load testing a cluster. This tool helps us +to populate a cluster and stress test CQL tables and queries. This test is +based in two operations, the first one is writing against the cluster or +populating the database and the second one is reading the cluster that was +populated by the writing test. + +## Running the test + +Individual tests can be run by hand, for example: + +``` +$ cd metrics/disk/cassandra_kubernetes +$ ./cassandra.sh +``` diff --git a/tests/metrics/disk/cassandra_kubernetes/cassandra.sh b/tests/metrics/disk/cassandra_kubernetes/cassandra.sh new file mode 100755 index 000000000..b6c636548 --- /dev/null +++ b/tests/metrics/disk/cassandra_kubernetes/cassandra.sh @@ -0,0 +1,198 @@ +#!/bin/bash +# +# Copyright (c) 2022-2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +set -o pipefail + +SCRIPT_PATH=$(dirname "$(readlink -f "$0")") + +source "${SCRIPT_PATH}/../../lib/common.bash" +TEST_NAME="${TEST_NAME:-cassandra}" +cassandra_file=$(mktemp cassandraresults.XXXXXXXXXX) +cassandra_read_file=$(mktemp cassandrareadresults.XXXXXXXXXX) + +function remove_tmp_file() { + rm -rf "${cassandra_file}" "${cassandra_read_file}" +} + +trap remove_tmp_file EXIT + +function cassandra_write_test() { + cassandra_start + export pod_name="cassandra-0" + export write_cmd="/usr/local/apache-cassandra-3.11.2/tools/bin/cassandra-stress write n=1000000 cl=one -mode native cql3 -schema keyspace="keyspace1" -pop seq=1..1000000 -node cassandra" + number_of_retries="50" + for _ in $(seq 1 "$number_of_retries"); do + if kubectl exec -i cassandra-0 -- sh -c 'nodetool status' | grep Up; then + ok="1" + break; + fi + sleep 1 + done + # This is needed to wait that cassandra is up + sleep 30 + kubectl exec -i cassandra-0 -- sh -c "$write_cmd" > "${cassandra_file}" + write_op_rate=$(cat "${cassandra_file}" | grep -e "Op rate" | cut -d':' -f2 | sed -e 's/^[ \t]*//' | cut -d ' ' -f1) + write_latency_mean=$(cat "${cassandra_file}" | grep -e "Latency mean" | cut -d':' -f2 | sed -e 's/^[ \t]*//' | cut -d ' ' -f1) + write_latency_95th=$(cat "${cassandra_file}" | grep -e "Latency 95th percentile" | cut -d':' -f2 | sed -e 's/^[ \t]*//' | cut -d ' ' -f1) + write_latency_99th=$(cat "${cassandra_file}" | grep -e "Latency 99th percentile" | cut -d':' -f2 | sed -e 's/^[ \t]*//' | cut -d ' ' -f1) + write_latency_median=$(cat "${cassandra_file}" | grep -e "Latency median" | cut -d':' -f2 | sed -e 's/^[ \t]*//' | cut -d ' ' -f1) + + export read_cmd="/usr/local/apache-cassandra-3.11.2/tools/bin/cassandra-stress read n=200000 -rate threads=50" + kubectl exec -i cassandra-0 -- sh -c "$read_cmd" > "${cassandra_read_file}" + read_op_rate=$(cat "${cassandra_read_file}" | grep -e "Op rate" | cut -d':' -f2 | sed -e 's/^[ \t]*//' | cut -d ' ' -f1) + read_latency_mean=$(cat "${cassandra_read_file}" | grep -e "Latency mean" | cut -d':' -f2 | sed -e 's/^[ \t]*//' | cut -d ' ' -f1) + read_latency_95th=$(cat "${cassandra_read_file}" | grep -e "Latency 95th percentile" | cut -d':' -f2 | sed -e 's/^[ \t]*//' | cut -d ' ' -f1) + read_latency_99th=$(cat "${cassandra_read_file}" | grep -e "Latency 99th percentile" | cut -d':' -f2 | sed -e 's/^[ \t]*//' | cut -d ' ' -f1) + read_latency_median=$(cat "${cassandra_read_file}" | grep -e "Latency median" | cut -d':' -f2 | sed -e 's/^[ \t]*//' | cut -d ' ' -f1) + + metrics_json_init + # Save configuration + metrics_json_start_array + + local json="$(cat << EOF + { + "Write Op rate": { + "Result" : "$write_op_rate", + "Units" : "op/s" + }, + "Write Latency Mean": { + "Result" : "$write_latency_mean", + "Units" : "ms" + }, + "Write Latency 95th percentile": { + "Result" : "$write_latency_95th", + "Units" : "ms" + }, + "Write Latency 99th percentile": { + "Result" : "$write_latency_99th", + "Units" : "ms" + }, + "Write Latency Median" : { + "Result" : "$write_latency_median", + "Units" : "ms" + }, + "Read Op rate": { + "Result" : "$read_op_rate", + "Units" : "op/s" + }, + "Read Latency Mean": { + "Result" : "$read_latency_mean", + "Units" : "ms" + }, + "Read Latency 95th percentile": { + "Result" : "$read_latency_95th", + "Units" : "ms" + }, + "Read Latency 99th percentile": { + "Result" : "$read_latency_99th", + "Units" : "ms" + }, + "Read Latency Median" : { + "Result" : "$read_latency_median", + "Units" : "ms" + } + } +EOF +)" + + metrics_json_add_array_element "$json" + metrics_json_end_array "Results" + + metrics_json_save + cassandra_cleanup +} + +function cassandra_start() { + cmds=("bc" "jq") + check_cmds "${cmds[@]}" + + # Check no processes are left behind + check_processes + + export service_name="cassandra" + export app_name="cassandra" + + wait_time=20 + sleep_time=2 + + vol_capacity="3Gi" + volume_name="block-loop-pv" + volume_claim="block-loop-pvc" + + # Create Loop Device + export tmp_disk_image=$(mktemp --tmpdir disk.XXXXXX.img) + truncate "$tmp_disk_image" --size "3GB" + export loop_dev=$(sudo losetup -f) + sudo losetup "$loop_dev" "$tmp_disk_image" + + # Create Storage Class + kubectl create -f "${SCRIPT_PATH}/volume/block-local-storage.yaml" + + # Create Persistent Volume + export tmp_pv_yaml=$(mktemp --tmpdir block_persistent_vol.XXXXX.yaml) + sed -e "s|LOOP_DEVICE|${loop_dev}|" "${SCRIPT_PATH}/volume/block-loop-pv.yaml" > "$tmp_pv_yaml" + sed -i "s|HOSTNAME|$(hostname | awk '{print tolower($0)}')|" "$tmp_pv_yaml" + sed -i "s|CAPACITY|${vol_capacity}|" "$tmp_pv_yaml" + + kubectl create -f "$tmp_pv_yaml" + cmd="kubectl get pv/${volume_name} | grep Available" + waitForProcess "$wait_time" "$sleep_time" "$cmd" + + # Create Persistent Volume Claim + export tmp_pvc_yaml=$(mktemp --tmpdir block_persistent_vol.XXXXX.yaml) + sed -e "s|CAPACITY|${vol_capacity}|" "${SCRIPT_PATH}/volume/block-loop-pvc.yaml" > "$tmp_pvc_yaml" + kubectl create -f "$tmp_pvc_yaml" + + # Create service + kubectl create -f "${SCRIPT_PATH}/runtimeclass_workloads/cassandra-service.yaml" + + # Check service + kubectl get svc | grep "$service_name" + + # Create workload using volume + ctr_dev_path="/dev/xda" + export tmp_pod_yaml=$(mktemp --tmpdir pod-pv.XXXXX.yaml) + sed -e "s|DEVICE_PATH|${ctr_dev_path}|" "${SCRIPT_PATH}/runtimeclass_workloads/cassandra-statefulset.yaml" > "$tmp_pod_yaml" + kubectl create -f "$tmp_pod_yaml" + cmd="kubectl rollout status --watch --timeout=120s statefulset/$app_name" + waitForProcess "$wait_time" "$sleep_time" "$cmd" + + # Verify persistent volume claim is bound + kubectl get pvc | grep "Bound" + + # Check pods are running + cmd="kubectl get pods -o jsonpath='{.items[*].status.phase}' | grep Running" + waitForProcess "$wait_time" "$sleep_time" "$cmd" +} + +function cassandra_cleanup() { + kubectl patch pvc block-loop-pvc -p '{"metadata":{"finalizers":null}}' + kubectl delete pvc block-loop-pvc --force + kubectl patch pv block-loop-pv -p '{"metadata":{"finalizers":null}}' + kubectl delete pv block-loop-pv --force + kubectl delete svc "$service_name" + kubectl delete pod -l app="$app_name" + kubectl delete storageclass block-local-storage + kubectl delete statefulsets "$app_name" + + # Delete temporary yaml files + rm -f "$tmp_pv_yaml" + rm -f "$tmp_pvc_yaml" + rm -f "$tmp_pod_yaml" + + # Remove image and loop device + sudo losetup -d "$loop_dev" + rm -f "$tmp_disk_image" + + check_processes +} + +function main() { + init_env + cassandra_write_test +} + +main "$@" diff --git a/tests/metrics/disk/cassandra_kubernetes/runtimeclass_workloads/cassandra-service.yaml b/tests/metrics/disk/cassandra_kubernetes/runtimeclass_workloads/cassandra-service.yaml new file mode 100644 index 000000000..26e43d09c --- /dev/null +++ b/tests/metrics/disk/cassandra_kubernetes/runtimeclass_workloads/cassandra-service.yaml @@ -0,0 +1,17 @@ +# +# Copyright (c) 2022-2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# +apiVersion: v1 +kind: Service +metadata: + labels: + app: cassandra + name: cassandra +spec: + clusterIP: None + ports: + - port: 9042 + selector: + app: cassandra diff --git a/tests/metrics/disk/cassandra_kubernetes/runtimeclass_workloads/cassandra-statefulset.yaml b/tests/metrics/disk/cassandra_kubernetes/runtimeclass_workloads/cassandra-statefulset.yaml new file mode 100644 index 000000000..fdc10df51 --- /dev/null +++ b/tests/metrics/disk/cassandra_kubernetes/runtimeclass_workloads/cassandra-statefulset.yaml @@ -0,0 +1,56 @@ +# +# Copyright (c) 2022-2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# +apiVersion: "apps/v1" +kind: StatefulSet +metadata: + name: cassandra +spec: + serviceName: cassandra + replicas: 1 + selector: + matchLabels: + app: cassandra + template: + metadata: + labels: + app: cassandra + spec: + runtimeClassName: kata + containers: + - name: cassandra + image: gcr.io/google-samples/cassandra:v13 + imagePullPolicy: Always + ports: + - containerPort: 7000 + name: intra-node + - containerPort: 7001 + name: tls-intra-node + - containerPort: 7199 + name: jmx + - containerPort: 9042 + name: cql + env: + - name: CASSANDRA_SEEDS + value: cassandra-0.cassandra.default.svc.cluster.local + - name: MAX_HEAP_SIZE + value: 256M + - name: HEAP_NEWSIZE + value: 100M + - name: CASSANDRA_CLUSTER_NAME + value: "Cassandra" + - name: CASSANDRA_DC + value: "DC1" + - name: CASSANDRA_RACK + value: "Rack1" + - name: CASSANDRA_ENDPOINT_SNITCH + value: GossipingPropertyFileSnitch + volumeDevices: + - devicePath: DEVICE_PATH + name: my-volume + volumes: + - name: my-volume + persistentVolumeClaim: + claimName: block-loop-pvc diff --git a/tests/metrics/disk/cassandra_kubernetes/volume/block-local-storage.yaml b/tests/metrics/disk/cassandra_kubernetes/volume/block-local-storage.yaml new file mode 100644 index 000000000..5f0209408 --- /dev/null +++ b/tests/metrics/disk/cassandra_kubernetes/volume/block-local-storage.yaml @@ -0,0 +1,6 @@ +kind: StorageClass +apiVersion: storage.k8s.io/v1 +metadata: + name: block-local-storage +provisioner: kubernetes.io/no-provisioner +volumeBindingMode: WaitForFirstConsumer diff --git a/tests/metrics/disk/cassandra_kubernetes/volume/block-loop-pv.yaml b/tests/metrics/disk/cassandra_kubernetes/volume/block-loop-pv.yaml new file mode 100644 index 000000000..b657a059c --- /dev/null +++ b/tests/metrics/disk/cassandra_kubernetes/volume/block-loop-pv.yaml @@ -0,0 +1,22 @@ +apiVersion: v1 +kind: PersistentVolume +metadata: + name: block-loop-pv +spec: + capacity: + storage: CAPACITY + volumeMode: Block + accessModes: + - ReadWriteOnce + persistentVolumeReclaimPolicy: Retain + storageClassName: block-local-storage + local: + path: LOOP_DEVICE + nodeAffinity: + required: + nodeSelectorTerms: + - matchExpressions: + - key: kubernetes.io/hostname + operator: In + values: + - HOSTNAME diff --git a/tests/metrics/disk/cassandra_kubernetes/volume/block-loop-pvc.yaml b/tests/metrics/disk/cassandra_kubernetes/volume/block-loop-pvc.yaml new file mode 100644 index 000000000..e796af206 --- /dev/null +++ b/tests/metrics/disk/cassandra_kubernetes/volume/block-loop-pvc.yaml @@ -0,0 +1,12 @@ +kind: PersistentVolumeClaim +apiVersion: v1 +metadata: + name: block-loop-pvc +spec: + accessModes: + - ReadWriteOnce + storageClassName: block-local-storage + volumeMode: Block + resources: + requests: + storage: CAPACITY diff --git a/tests/metrics/gha-run.sh b/tests/metrics/gha-run.sh index dfe31a1cc..3c7686e12 100755 --- a/tests/metrics/gha-run.sh +++ b/tests/metrics/gha-run.sh @@ -80,23 +80,34 @@ function run_test_blogbench() { function run_test_tensorflow() { info "Running TensorFlow test using ${KATA_HYPERVISOR} hypervisor" - bash tests/metrics/machine_learning/tensorflow.sh 1 20 - - check_metrics + bash tests/metrics/machine_learning/tensorflow_nhwc.sh 1 20 } function run_test_fio() { - info "Running FIO test using ${KATA_HYPERVISOR} hypervisor" - # ToDo: remove the exit once the metrics workflow is stable - exit 0 + info "Skipping FIO test temporarily using ${KATA_HYPERVISOR} hypervisor" - bash storage/fio-k8s/fio-test-ci.sh + # bash tests/metrics/storage/fio-k8s/fio-test-ci.sh +} + +function run_test_iperf() { + info "Running Iperf test using ${KATA_HYPERVISOR} hypervisor" + + bash tests/metrics/network/iperf3_kubernetes/k8s-network-metrics-iperf3.sh -a +} + +function run_test_latency() { + info "Running Latency test using ${KATA_HYPERVISOR} hypervisor" + + bash tests/metrics/network/latency_kubernetes/latency-network.sh + + check_metrics } function main() { action="${1:-}" case "${action}" in install-kata) install_kata && install_checkmetrics ;; + enabling-hypervisor) enabling_hypervisor ;; make-tarball-results) make_tarball_results ;; run-test-launchtimes) run_test_launchtimes ;; run-test-memory-usage) run_test_memory_usage ;; @@ -104,6 +115,8 @@ function main() { run-test-blogbench) run_test_blogbench ;; run-test-tensorflow) run_test_tensorflow ;; run-test-fio) run_test_fio ;; + run-test-iperf) run_test_iperf ;; + run-test-latency) run_test_latency ;; *) >&2 die "Invalid argument" ;; esac } diff --git a/tests/metrics/lib/common.bash b/tests/metrics/lib/common.bash index c43019a70..8528d2c7b 100755 --- a/tests/metrics/lib/common.bash +++ b/tests/metrics/lib/common.bash @@ -168,6 +168,8 @@ function init_env() cmd=("docker" "ctr") + sudo systemctl restart docker + # check dependencies check_cmds "${cmd[@]}" @@ -192,6 +194,8 @@ function kill_processes_before_start() CTR_PROCS=$(sudo "${CTR_EXE}" t list -q) [[ -n "${CTR_PROCS}" ]] && clean_env_ctr + kill_kata_components + check_processes } @@ -359,3 +363,57 @@ function wait_ksm_settle() done info "Timed out after ${1}s waiting for KSM to settle" } + +function collect_results() { + local WORKLOAD="$1" + [[ -z "${WORKLOAD}" ]] && die "Container workload is missing" + + local tasks_running=("${containers[@]}") + local retries=100 + + while [ "${#tasks_running[@]}" -gt 0 ] && [ "${retries}" -gt 0 ]; do + for i in "${!tasks_running[@]}"; do + check_file=$(sudo -E "${CTR_EXE}" t exec --exec-id "$(random_name)" "${tasks_running[i]}" sh -c "${WORKLOAD}") + + # if the current task is done, remove the corresponding container from the active list + [ "${check_file}" = 1 ] && unset 'tasks_running[i]' + done + ((retries--)) + sleep 3 + echo -n "." + done + echo -e "\n" +} + +function check_containers_are_up() { + local NUM_CONTAINERS="$1" + [[ -z "${NUM_CONTAINERS}" ]] && die "Number of containers is missing" + + local TIMEOUT=60 + local containers_launched=0 + for i in $(seq "${TIMEOUT}") ; do + info "Verify that the containers are running" + containers_launched="$(sudo ${CTR_EXE} t list | grep -c "RUNNING")" + [ "${containers_launched}" -eq "${NUM_CONTAINERS}" ] && break + sleep 1 + [ "${i}" == "${TIMEOUT}" ] && return 1 + done +} + +function check_containers_are_running() { + local NUM_CONTAINERS="$1" + [[ -z "${NUM_CONTAINERS}" ]] && die "Number of containers is missing" + + # Check that the requested number of containers are running + local timeout_launch="10" + check_containers_are_up "${NUM_CONTAINERS}" & pid=$! + (sleep "${timeout_launch}" && kill -HUP "${pid}") 2>/dev/null & pid_tout=$! + + if wait "${pid}" 2>/dev/null; then + pkill -HUP -P "${pid_tout}" + wait "${pid_tout}" + else + warn "Time out exceeded" + return 1 + fi +} diff --git a/tests/metrics/lib/json.bash b/tests/metrics/lib/json.bash index 017d6494a..1a3082095 100755 --- a/tests/metrics/lib/json.bash +++ b/tests/metrics/lib/json.bash @@ -147,7 +147,7 @@ done) EOF )" - echo "$json" > $json_filename + echo "${json}" | jq . > "${json_filename}" # If we have a JSON URL or host/socket pair set up, post the results there as well. # Optionally compress into a single line. diff --git a/tests/metrics/machine_learning/README.md b/tests/metrics/machine_learning/README.md index f55adba20..41cc139dc 100644 --- a/tests/metrics/machine_learning/README.md +++ b/tests/metrics/machine_learning/README.md @@ -1,4 +1,4 @@ -# Kata Containers Tensorflow Metrics +# Kata Containers TensorFlow Metrics Kata Containers provides a series of performance tests using the TensorFlow reference benchmarks (tf_cnn_benchmarks). @@ -30,16 +30,16 @@ Individual tests can be run by hand, for example: $ cd metrics/machine_learning $ ./tensorflow.sh 40 100 ``` -# Kata Containers Tensorflow `MobileNet` Metrics +# Kata Containers TensorFlow `MobileNet` Metrics `MobileNets` are small, low-latency, low-power models parameterized to meet the resource constraints of a variety of use cases. They can be built upon for classification, detection, embeddings and segmentation similar to how other popular large scale models, such as Inception, are used. `MobileNets` can be run efficiently on mobile devices with `Tensorflow` Lite. -Kata Containers provides a test for running `MobileNet V1` inference using Intel-Optimized `Tensorflow`. +Kata Containers provides a test for running `MobileNet V1` inference using Intel-Optimized `TensorFlow`. -## Running the `Tensorflow` `MobileNet` test +## Running the `TensorFlow` `MobileNet` test Individual test can be run by hand, for example: ``` @@ -47,3 +47,16 @@ $ cd metrics/machine_learning $ ./tensorflow_mobilenet_benchmark.sh 25 60 ``` +# Kata Containers TensorFlow `ResNet50` Metrics + +`ResNet50` is an image classification model pre-trained on the `ImageNet` dataset. +Kata Containers provides a test for running `ResNet50` inference using Intel-Optimized +`TensorFlow`. + +## Running the `TensorFlow` `ResNet50` test +Individual test can be run by hand, for example: + +``` +$ cd metrics/machine_learning +$ ./tensorflow_resnet50_int8.sh 25 60 +``` diff --git a/tests/metrics/machine_learning/tensorflow_mobilenet_dockerfile/Dockerfile b/tests/metrics/machine_learning/mobilenet_v1_bfloat16_fp32_dockerfile/Dockerfile similarity index 100% rename from tests/metrics/machine_learning/tensorflow_mobilenet_dockerfile/Dockerfile rename to tests/metrics/machine_learning/mobilenet_v1_bfloat16_fp32_dockerfile/Dockerfile diff --git a/tests/metrics/machine_learning/pytorch.sh b/tests/metrics/machine_learning/pytorch.sh index 9958c1690..c07cdf3ab 100755 --- a/tests/metrics/machine_learning/pytorch.sh +++ b/tests/metrics/machine_learning/pytorch.sh @@ -29,17 +29,6 @@ function remove_tmp_file() { trap remove_tmp_file EXIT -function check_containers_are_up() { - local containers_launched=0 - for i in $(seq "${TIMEOUT}") ; do - info "Verify that the containers are running" - containers_launched="$(sudo ${CTR_EXE} t list | grep -c "RUNNING")" - [ "${containers_launched}" -eq "${NUM_CONTAINERS}" ] && break - sleep 1 - [ "${i}" == "${TIMEOUT}" ] && return 1 - done -} - function equation_of_state_pytorch_test() { info "Running Equation of State Pytorch test" for i in "${containers[@]}"; do @@ -146,7 +135,7 @@ function main() { # Check that the requested number of containers are running - check_containers_are_up + check_containers_are_up "${NUM_CONTAINERS}" equation_of_state_pytorch_test diff --git a/tests/metrics/machine_learning/resnet50_fp32_dockerfile/Dockerfile b/tests/metrics/machine_learning/resnet50_fp32_dockerfile/Dockerfile new file mode 100644 index 000000000..edbdf7370 --- /dev/null +++ b/tests/metrics/machine_learning/resnet50_fp32_dockerfile/Dockerfile @@ -0,0 +1,21 @@ +# Copyright (c) 2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +# Usage: FROM [image name] +FROM intel/intel-optimized-tensorflow:2.9.1 + +ENV DEBIAN_FRONTEND=noninteractive + +# Version of the Dockerfile +LABEL DOCKERFILE_VERSION="1.0" + +RUN apt-get update && \ + apt-get install -y --no-install-recommends wget nano curl build-essential git && \ + apt-get install -y python3.8 python3-pip && \ + pip install --no-cache-dir intel-tensorflow-avx512==2.8.0 && \ + pip install --no-cache-dir protobuf==3.20.* && \ + wget -q https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_8/resnet50_fp32_pretrained_model.pb && \ + git clone https://github.com/IntelAI/models.git + +CMD ["/bin/bash"] diff --git a/tests/metrics/machine_learning/resnet50_int8_dockerfile/Dockerfile b/tests/metrics/machine_learning/resnet50_int8_dockerfile/Dockerfile new file mode 100644 index 000000000..6e0c1e611 --- /dev/null +++ b/tests/metrics/machine_learning/resnet50_int8_dockerfile/Dockerfile @@ -0,0 +1,21 @@ +# Copyright (c) 2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +# Usage: FROM [image name] +FROM intel/intel-optimized-tensorflow:2.9.1 + +ENV DEBIAN_FRONTEND=noninteractive + +# Version of the Dockerfile +LABEL DOCKERFILE_VERSION="1.0" + +RUN apt-get update && \ + apt-get install -y --no-install-recommends wget nano curl build-essential git && \ + apt-get install -y python3.8 python3-pip && \ + pip install --no-cache-dir intel-tensorflow-avx512==2.8.0 && \ + pip install --no-cache-dir protobuf==3.20.* && \ + wget -q https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_8/resnet50_int8_pretrained_model.pb && \ + git clone https://github.com/IntelAI/models.git + +CMD ["/bin/bash"] diff --git a/tests/metrics/machine_learning/tensorflow_mobilenet_benchmark.sh b/tests/metrics/machine_learning/tensorflow_mobilenet_v1_bfloat16_fp32.sh similarity index 77% rename from tests/metrics/machine_learning/tensorflow_mobilenet_benchmark.sh rename to tests/metrics/machine_learning/tensorflow_mobilenet_v1_bfloat16_fp32.sh index 63b42c1e7..63a37154f 100755 --- a/tests/metrics/machine_learning/tensorflow_mobilenet_benchmark.sh +++ b/tests/metrics/machine_learning/tensorflow_mobilenet_v1_bfloat16_fp32.sh @@ -10,12 +10,12 @@ set -o pipefail SCRIPT_PATH=$(dirname "$(readlink -f "$0")") source "${SCRIPT_PATH}/../lib/common.bash" -IMAGE="docker.io/library/tensorflowmobilenet:latest" -DOCKERFILE="${SCRIPT_PATH}/tensorflow_mobilenet_dockerfile/Dockerfile" +IMAGE="docker.io/library/mobilenet_v1_bfloat16_fp32:latest" +DOCKERFILE="${SCRIPT_PATH}/mobilenet_v1_bfloat16_fp32_dockerfile/Dockerfile" tensorflow_file=$(mktemp tensorflowresults.XXXXXXXXXX) NUM_CONTAINERS="$1" TIMEOUT="$2" -TEST_NAME="tensorflow-intelai" +TEST_NAME="mobilenet_v1_bfloat16_fp32" PAYLOAD_ARGS="tail -f /dev/null" TESTDIR="${TESTDIR:-/testdir}" # Options to control the start of the workload using a trigger-file @@ -27,7 +27,6 @@ start_script="mobilenet_start.sh" CMD="$dst_dir/$start_script" guest_trigger_file="$dst_dir/$trigger_file" host_trigger_file="$src_dir/$trigger_file" -timeout=600 INITIAL_NUM_PIDS=1 CMD_FILE="cat results | grep 'Average Throughput' | wc -l" CMD_RESULTS="cat results | grep 'Average Throughput' | cut -d':' -f2 | cut -d' ' -f2 | tr '\n' ','" @@ -43,7 +42,7 @@ cat << EOF Usage: $0 Description: This script launches n number of containers - to run the tf cnn benchmarks using a Tensorflow + to run the MobileNet model using a Tensorflow container. Options: : Number of containers to run. @@ -62,7 +61,7 @@ EOF chmod +x "${script}" } -function mobilenet_test() { +function mobilenet_v1_bfloat16_fp32_test() { local CMD_EXPORT_VAR="export KMP_AFFINITY=granularity=fine,verbose,compact && export OMP_NUM_THREADS=16" info "Export environment variables" @@ -70,7 +69,7 @@ function mobilenet_test() { sudo -E "${CTR_EXE}" t exec -d --exec-id "$(random_name)" "${i}" sh -c "${CMD_EXPORT_VAR}" done - info "Running Mobilenet Tensorflow test" + info "Running Mobilenet V1 Bfloat16 FP32 Tensorflow test" local pids=() local j=0 for i in "${containers[@]}"; do @@ -87,14 +86,7 @@ function mobilenet_test() { touch "${host_trigger_file}" info "All containers are running the workload..." - for i in "${containers[@]}"; do - check_file=$(sudo -E "${CTR_EXE}" t exec -d --exec-id "$(random_name)" "${i}" sh -c "${CMD_FILE}") - retries="30" - for j in $(seq 1 "${retries}"); do - [ "${check_file}" -eq "1" ] && break - sleep 1 - done - done + collect_results "${CMD_FILE}" for i in "${containers[@]}"; do sudo -E "${CTR_EXE}" t exec --exec-id "$(random_name)" "${i}" sh -c "${CMD_RESULTS}" >> "${tensorflow_file}" @@ -104,7 +96,7 @@ function mobilenet_test() { local average_mobilenet=$(echo "${mobilenet_results}" | sed 's/.$//' | sed "s/,/+/g;s/.*/(&)\/$NUM_CONTAINERS/g" | bc -l) local json="$(cat << EOF { - "Mobilenet": { + "Mobilenet_v1_bfloat16_fp32": { "Result": "${mobilenet_results}", "Average": "${average_mobilenet}", "Units": "images/s" @@ -116,17 +108,6 @@ EOF metrics_json_end_array "Results" } -function check_containers_are_up() { - local containers_launched=0 - for i in $(seq "${TIMEOUT}") ; do - info "Verify that the containers are running" - containers_launched="$(sudo ${CTR_EXE} t list | grep -c "RUNNING")" - [ "${containers_launched}" -eq "${NUM_CONTAINERS}" ] && break - sleep 1 - [ "${i}" == "${TIMEOUT}" ] && return 1 - done -} - function main() { # Verify enough arguments if [ $# != 2 ]; then @@ -160,26 +141,16 @@ function main() { metrics_json_start_array # Check that the requested number of containers are running - check_containers_are_up + check_containers_are_up "${NUM_CONTAINERS}" # Check that the requested number of containers are running - local timeout_launch="10" - check_containers_are_up & pid=$! - (sleep "${timeout_launch}" && kill -HUP "${pid}") 2>/dev/null & pid_tout=$! - - if wait "${pid}" 2>/dev/null; then - pkill -HUP -P "${pid_tout}" - wait "${pid_tout}" - else - warn "Time out exceeded" - return 1 - fi + check_containers_are_running "${NUM_CONTAINERS}" # Get the initial number of pids in a single container before the workload starts INITIAL_NUM_PIDS=$(sudo -E "${CTR_EXE}" t metrics "${containers[-1]}" | grep pids.current | grep pids.current | xargs | cut -d ' ' -f 2) ((INITIAL_NUM_PIDS++)) - mobilenet_test + mobilenet_v1_bfloat16_fp32_test metrics_json_save diff --git a/tests/metrics/machine_learning/tensorflow.sh b/tests/metrics/machine_learning/tensorflow_nhwc.sh similarity index 53% rename from tests/metrics/machine_learning/tensorflow.sh rename to tests/metrics/machine_learning/tensorflow_nhwc.sh index fc6c1f8c9..328c055d4 100755 --- a/tests/metrics/machine_learning/tensorflow.sh +++ b/tests/metrics/machine_learning/tensorflow_nhwc.sh @@ -10,15 +10,15 @@ set -o pipefail SCRIPT_PATH=$(dirname "$(readlink -f "$0")") source "${SCRIPT_PATH}/../lib/common.bash" -IMAGE="docker.io/library/tensorflow:latest" -DOCKERFILE="${SCRIPT_PATH}/tensorflow_dockerfile/Dockerfile" +IMAGE="docker.io/library/tensorflow_nhwc:latest" +DOCKERFILE="${SCRIPT_PATH}/tensorflow_nhwc_dockerfile/Dockerfile" BATCH_SIZE="100" NUM_BATCHES="100" resnet_tensorflow_file=$(mktemp resnettensorflowresults.XXXXXXXXXX) alexnet_tensorflow_file=$(mktemp alexnettensorflowresults.XXXXXXXXXX) NUM_CONTAINERS="$1" TIMEOUT="$2" -TEST_NAME="tensorflow" +TEST_NAME="tensorflow_nhwc" PAYLOAD_ARGS="tail -f /dev/null" # Options to control the start of the workload using a trigger-file dst_dir="/host" @@ -29,13 +29,16 @@ alexnet_start_script="alexnet_start.sh" resnet_start_script="resnet_start.sh" CMD_RESNET="$dst_dir/$resnet_start_script" CMD_ALEXNET="$dst_dir/$alexnet_start_script" -timeout=600 INITIAL_NUM_PIDS=1 -CMD_FILE="cat alexnet_results | grep 'total images' | wc -l" -RESNET_CMD_FILE="cat resnet_results | grep 'total images' | wc -l" +ALEXNET_FILE="alexnet_results" +ALEXNET_CHECK_FILE_CMD="cat /${ALEXNET_FILE} | grep 'total images' | wc -l" +RESNET_FILE="resnet_results" +RESNET_CHECK_FILE_CMD="cat /${RESNET_FILE} | grep 'total images' | wc -l" function remove_tmp_file() { rm -rf "${resnet_tensorflow_file}" "${alexnet_tensorflow_file}" + rm -rf "${src_dir}" + clean_env_ctr } trap remove_tmp_file EXIT @@ -59,7 +62,8 @@ function create_resnet_start_script() { cat <>"${script}" #!/bin/bash -python benchmarks/scripts/tf_cnn_benchmarks/tf_cnn_benchmarks.py -data_format=NHWC --device cpu --batch_size=${BATCH_SIZE} --num_batches=${NUM_BATCHES} > resnet_results +pushd "benchmarks/scripts/tf_cnn_benchmarks" +python tf_cnn_benchmarks.py -data_format=NHWC --device cpu --batch_size=${BATCH_SIZE} --num_batches=${NUM_BATCHES} > "/${RESNET_FILE}" EOF chmod +x "${script}" } @@ -70,17 +74,20 @@ function create_alexnet_start_script() { cat <>"${script}" #!/bin/bash -python benchmarks/scripts/tf_cnn_benchmarks/tf_cnn_benchmarks.py --num_batches=${NUM_BATCHES} --device=cpu --batch_size=${BATCH_SIZE} --forward_only=true --model=alexnet --data_format=NHWC > alexnet_results +pushd "benchmarks/scripts/tf_cnn_benchmarks" +python tf_cnn_benchmarks.py --num_batches=${NUM_BATCHES} --device=cpu --batch_size=${BATCH_SIZE} --forward_only=true --model=alexnet --data_format=NHWC > "/${ALEXNET_FILE}" EOF chmod +x "${script}" } -function tensorflow_test() { - info "Copy Resnet Tensorflow test" +function launch_workload() { + WORKLOAD=${1} + [[ -z ${WORKLOAD} ]] && die "Container workload is missing" + local pids=() local j=0 for i in "${containers[@]}"; do - $(sudo -E "${CTR_EXE}" t exec -d --exec-id "$(random_name)" "${i}" sh -c "${CMD_RESNET}")& + $(sudo -E "${CTR_EXE}" t exec -d --exec-id "$(random_name)" "${i}" sh -c "${WORKLOAD}")& pids["${j}"]=$! ((j++)) done @@ -89,59 +96,39 @@ function tensorflow_test() { for pid in ${pids[*]}; do wait "${pid}" done +} - info "All containers are running the workload..." +function tensorflow_nhwc_test() { + # Resnet section + info "Running TF-Resnet test" + launch_workload "${CMD_RESNET}" + collect_results "${RESNET_CHECK_FILE_CMD}" + # Alexnet section + info "Running TF-Alexnet test" + launch_workload "${CMD_ALEXNET}" + collect_results "${ALEXNET_CHECK_FILE_CMD}" + + info "Tensorflow workload completed" + # Retrieving results for i in "${containers[@]}"; do - check_file=$(sudo -E "${CTR_EXE}" t exec --exec-id "$(random_name)" "${i}" sh -c "${RESNET_CMD_FILE}") - retries="300" - for j in $(seq 1 "${retries}"); do - [ "${check_file}" -eq "1" ] && break - sleep 1 - done + sudo -E "${CTR_EXE}" t exec --exec-id "$(random_name)" "${i}" sh -c "cat /${RESNET_FILE}" >> "${resnet_tensorflow_file}" + sudo -E "${CTR_EXE}" t exec --exec-id "$(random_name)" "${i}" sh -c "cat /${ALEXNET_FILE}" >> "${alexnet_tensorflow_file}" done - info "Copy Alexnet Tensorflow test" - local pids=() - local j=0 - for i in "${containers[@]}"; do - $(sudo -E "${CTR_EXE}" t exec -d --exec-id "$(random_name)" "${i}" sh -c "${CMD_ALEXNET}")& - pids["${j}"]=$! - ((j++)) - done + # Parsing resnet results + local resnet_results=$(cat "${resnet_tensorflow_file}" | grep "total images/sec" | cut -d ":" -f2 | sed -e 's/^[ \t]*//' | tr '\n' ',' | sed 's/.$//') + local res_sum="$(sed -e 's/,/\n/g' <<< ${resnet_results} | awk 'BEGIN {total=0} {total += $1} END {print total}')" + local num_elements="$(awk '{print NF}' FS=',' <<<${resnet_results})" + local average_resnet="$(echo "scale=2 ; ${res_sum} / ${num_elements}" | bc)" - # wait for all pids - for pid in ${pids[*]}; do - wait "${pid}" - done - - for i in "${containers[@]}"; do - check_file=$(sudo -E "${CTR_EXE}" t exec --exec-id "$(random_name)" "${i}" sh -c "${CMD_FILE}") - retries="300" - for j in $(seq 1 "${retries}"); do - [ "${check_file}" -eq "1" ] && break - sleep 1 - done - done - - for i in "${containers[@]}"; do - sudo -E "${CTR_EXE}" t exec --exec-id "$(random_name)" "${i}" sh -c "cat resnet_results" >> "${resnet_tensorflow_file}" - done - - local res_results=$(cat "${resnet_tensorflow_file}" | grep "total images/sec" | cut -d ":" -f2 | sed -e 's/^[ \t]*//' | tr '\n' ',' | sed 's/.$//') - local resnet_results=$(printf "%.0f\n" "${res_results}") - local res_average=$(echo "${resnet_results}" | sed "s/,/+/g;s/.*/(&)\/${NUM_CONTAINERS}/g" | bc -l) - local average_resnet=$(printf "%.0f\n" "${res_average}") - - for i in "${containers[@]}"; do - sudo -E "${CTR_EXE}" t exec --exec-id "$(random_name)" "${i}" sh -c "cat alexnet_results" >> "${alexnet_tensorflow_file}" - done - - local alex_results=$(cat "${alexnet_tensorflow_file}" | grep "total images/sec" | cut -d ":" -f2 | sed -e 's/^[ \t]*//' | tr '\n' ',' | sed 's/.$//') - local alexnet_results=$(printf "%.0f\n" "${alex_results}") - local alex_average=$(echo "${alexnet_results}" | sed "s/,/+/g;s/.*/(&)\/${NUM_CONTAINERS}/g" | bc -l) - local average_alexnet=$(printf "%.0f\n" "${alex_average}") + # Parsing alexnet results + local alexnet_results=$(cat "${alexnet_tensorflow_file}" | grep "total images/sec" | cut -d ":" -f2 | sed -e 's/^[ \t]*//' | tr '\n' ',' | sed 's/.$//') + local alex_sum="$(sed -e 's/,/\n/g' <<< ${alexnet_results} | awk 'BEGIN {total=0} {total += $1} END {print total}')" + num_elements="$(awk '{print NF}' FS=',' <<< ${alexnet_results})" + local average_alexnet="$(echo " scale=2 ; ${alex_sum} / ${num_elements}" | bc)" + # writing json results file local json="$(cat << EOF { "resnet": { @@ -161,20 +148,9 @@ EOF metrics_json_end_array "Results" } -function check_containers_are_up() { - local containers_launched=0 - for i in $(seq "${TIMEOUT}") ; do - info "Verify that the containers are running" - containers_launched="$(sudo ${CTR_EXE} t list | grep -c "RUNNING")" - [ "${containers_launched}" -eq "${NUM_CONTAINERS}" ] && break - sleep 1 - [ "${i}" == "${TIMEOUT}" ] && return 1 - done -} - function main() { # Verify enough arguments - if [ $# != 2 ]; then + if [ "$#" -lt 2 ]; then echo >&2 "error: Not enough arguments [$@]" help exit 1 @@ -206,31 +182,16 @@ function main() { metrics_json_start_array # Check that the requested number of containers are running - check_containers_are_up + check_containers_are_up "${NUM_CONTAINERS}" # Check that the requested number of containers are running - local timeout_launch="10" - check_containers_are_up & pid=$! - (sleep "${timeout_launch}" && kill -HUP "${pid}") 2>/dev/null & pid_tout=$! - - if wait "${pid}" 2>/dev/null; then - pkill -HUP -P "${pid_tout}" - wait "${pid_tout}" - else - warn "Time out exceeded" - return 1 - fi + check_containers_are_running "${NUM_CONTAINERS}" # Get the initial number of pids in a single container before the workload starts INITIAL_NUM_PIDS=$(sudo -E "${CTR_EXE}" t metrics "${containers[-1]}" | grep pids.current | grep pids.current | xargs | cut -d ' ' -f 2) ((INITIAL_NUM_PIDS++)) - - tensorflow_test - + tensorflow_nhwc_test metrics_json_save - - rm -rf "${src_dir}" - - clean_env_ctr } + main "$@" diff --git a/tests/metrics/machine_learning/tensorflow_dockerfile/Dockerfile b/tests/metrics/machine_learning/tensorflow_nhwc_dockerfile/Dockerfile similarity index 100% rename from tests/metrics/machine_learning/tensorflow_dockerfile/Dockerfile rename to tests/metrics/machine_learning/tensorflow_nhwc_dockerfile/Dockerfile diff --git a/tests/metrics/machine_learning/tensorflow_resnet50_fp32.sh b/tests/metrics/machine_learning/tensorflow_resnet50_fp32.sh new file mode 100755 index 000000000..10abbd381 --- /dev/null +++ b/tests/metrics/machine_learning/tensorflow_resnet50_fp32.sh @@ -0,0 +1,161 @@ +#!/bin/bash +# +# Copyright (c) 2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +set -o pipefail + +# General env +SCRIPT_PATH=$(dirname "$(readlink -f "$0")") +source "${SCRIPT_PATH}/../lib/common.bash" + +IMAGE="docker.io/library/resnet50_fp32:latest" +DOCKERFILE="${SCRIPT_PATH}/resnet50_fp32_dockerfile/Dockerfile" +tensorflow_file=$(mktemp tensorflowresults.XXXXXXXXXX) +NUM_CONTAINERS="$1" +TIMEOUT="$2" +TEST_NAME="resnet50_fp32" +PAYLOAD_ARGS="tail -f /dev/null" +TESTDIR="${TESTDIR:-/testdir}" +# Options to control the start of the workload using a trigger-file +dst_dir="/host" +src_dir=$(mktemp --tmpdir -d tensorflowresnet.XXXXXXXXXX) +MOUNT_OPTIONS="type=bind,src=$src_dir,dst=$dst_dir,options=rbind:ro" +start_script="resnet50_fp32_start.sh" +# CMD points to the script that starts the workload +CMD="$dst_dir/$start_script" +guest_trigger_file="$dst_dir/$trigger_file" +host_trigger_file="$src_dir/$trigger_file" +INITIAL_NUM_PIDS=1 +CMD_FILE="cat results | grep 'Average Throughput' | wc -l" +CMD_RESULTS="cat results | grep 'Average Throughput' | cut -d':' -f2 | cut -d' ' -f2 | tr '\n' ','" + +function remove_tmp_file() { + rm -rf "${tensorflow_file}" +} + +trap remove_tmp_file EXIT + +function help() { +cat << EOF +Usage: $0 + Description: + This script launches n number of containers + to run the ResNet50 fp32 model using a Tensorflow + container. + Options: + : Number of containers to run. + : Timeout to launch the containers. +EOF +} + +function create_start_script() { + local script="${src_dir}/${start_script}" + rm -rf "${script}" + +cat <>"${script}" +#!/bin/bash +python3.8 models/benchmarks/launch_benchmark.py --benchmark-only --framework tensorflow --model-name resnet50 --precision fp32 --mode inference --in-graph /resnet50_fp32_pretrained_model.pb --batch-size 1 --num-intra-threads 16 >> results +EOF + chmod +x "${script}" +} + +function resnet50_fp32_test() { + local CMD_EXPORT_VAR="export KMP_AFFINITY=granularity=fine,verbose,compact && export OMP_NUM_THREADS=16" + + info "Export environment variables" + for i in "${containers[@]}"; do + sudo -E "${CTR_EXE}" t exec -d --exec-id "$(random_name)" "${i}" sh -c "${CMD_EXPORT_VAR}" + done + + info "Running ResNet50 FP32 Tensorflow test" + local pids=() + local j=0 + for i in "${containers[@]}"; do + $(sudo -E "${CTR_EXE}" t exec --exec-id "$(random_name)" "${i}" sh -c "${CMD}")& + pids["${j}"]=$! + ((j++)) + done + + # wait for all pids + for pid in ${pids[*]}; do + wait "${pid}" + done + + touch "${host_trigger_file}" + info "All containers are running the workload..." + + collect_results "${CMD_FILE}" + + for i in "${containers[@]}"; do + sudo -E "${CTR_EXE}" t exec --exec-id "$(random_name)" "${i}" sh -c "${CMD_RESULTS}" >> "${tensorflow_file}" + done + + local resnet50_fp32_results=$(cat "${tensorflow_file}" | sed 's/.$//') + local average_resnet50_fp32=$(echo "${resnet50_fp32_results}" | sed 's/.$//' | sed "s/,/+/g;s/.*/(&)\/$NUM_CONTAINERS/g" | bc -l) + local json="$(cat << EOF + { + "ResNet50_fp32": { + "Result": "${resnet50_fp32_results}", + "Average": "${average_resnet50_fp32}", + "Units": "images/s" + } + } +EOF +)" + metrics_json_add_array_element "$json" + metrics_json_end_array "Results" +} + +function main() { + # Verify enough arguments + if [ $# != 2 ]; then + echo >&2 "error: Not enough arguments [$@]" + help + exit 1 + fi + + local i=0 + local containers=() + local not_started_count="${NUM_CONTAINERS}" + + # Check tools/commands dependencies + cmds=("awk" "docker" "bc") + check_cmds "${cmds[@]}" + check_ctr_images "${IMAGE}" "${DOCKERFILE}" + + init_env + create_start_script + + info "Creating ${NUM_CONTAINERS} containers" + + for ((i=1; i<= "${NUM_CONTAINERS}"; i++)); do + containers+=($(random_name)) + sudo -E "${CTR_EXE}" run -d --runtime "${CTR_RUNTIME}" --mount="${MOUNT_OPTIONS}" "${IMAGE}" "${containers[-1]}" sh -c "${PAYLOAD_ARGS}" + ((not_started_count--)) + info "${not_started_count} remaining containers" + done + + metrics_json_init + metrics_json_start_array + + # Check that the requested number of containers are running + check_containers_are_up "${NUM_CONTAINERS}" + + # Check that the requested number of containers are running + check_containers_are_running "${NUM_CONTAINERS}" + + # Get the initial number of pids in a single container before the workload starts + INITIAL_NUM_PIDS=$(sudo -E "${CTR_EXE}" t metrics "${containers[-1]}" | grep pids.current | grep pids.current | xargs | cut -d ' ' -f 2) + ((INITIAL_NUM_PIDS++)) + + resnet50_fp32_test + + metrics_json_save + + sudo rm -rf "${src_dir}" + + clean_env_ctr +} +main "$@" diff --git a/tests/metrics/machine_learning/tensorflow_resnet50_int8.sh b/tests/metrics/machine_learning/tensorflow_resnet50_int8.sh new file mode 100755 index 000000000..19c3e1192 --- /dev/null +++ b/tests/metrics/machine_learning/tensorflow_resnet50_int8.sh @@ -0,0 +1,155 @@ +#!/bin/bash +# +# Copyright (c) 2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +set -o pipefial + +# General env +SCRIPT_PATH=$(dirname "$(readlink -f "$0")") +source "${SCRIPT_PATH}/../lib/common.bash" + +IMAGE="docker.io/library/resnet50int8:latest" +DOCKERFILE="${SCRIPT_PATH}/resnet50_int8_dockerfile/Dockerfile" +tensorflow_file=$(mktemp tensorflowresults.XXXXXXXXXX) +NUM_CONTAINERS="$1" +TIMEOUT="$2" +TEST_NAME="tensorflow-resnet50int8" +PAYLOAD_ARGS="tail -f /dev/null" +TESTDIR="${TESTDIR:-/testdir}" +# Options to control the start of the workload using a trigger-file +dst_dir="/host" +src_dir=$(mktemp --tmpdir -d tensorflowresnet50int8.XXXXXXXXXX) +MOUNT_OPTIONS="type=bind,src=$src_dir,dst=$dst_dir,options=rbind:ro" +start_script="resnet50int8_start.sh" +# CMD points to the script that starts the workload +# export DNNL_MAX_CPU_ISA=AVX512_CORE_AMX +CMD="export KMP_AFFINITY=granularity=fine,verbose,compact && export OMP_NUM_THREADS=16 && $dst_dir/$start_script" +guest_trigger_file="$dst_dir/$trigger_file" +host_trigger_file="$src_dir/$trigger_file" +INITIAL_NUM_PIDS=1 +CMD_FILE="cat results | grep 'Throughput' | wc -l" +CMD_RESULTS="cat results | grep 'Throughput' | cut -d':' -f2 | cut -d' ' -f2 | tr '\n' ','" + +function remove_tmp_file() { + rm -rf "${tensorflow_file}" +} + +trap remove_tmp_file EXIT + +function help() { +cat << EOF +Usage: $0 + Description: + This script launches n number of containers + to run the ResNet50 int8 using a Tensorflow + container. + Options: + : Number of containers to run. + : Timeout to launch the containers. +EOF +} + +function create_start_script() { + local script="${src_dir}/${start_script}" + rm -rf "${script}" + +cat <>"${script}" +#!/bin/bash +python3.8 models/benchmarks/launch_benchmark.py --benchmark-only --framework tensorflow --model-name resnet50 --precision int8 --mode inference --in-graph /resnet50_int8_pretrained_model.pb --batch-size 116 --num-intra-threads 16 >> results +EOF + chmod +x "${script}" +} + +function resnet50_int8_test() { + info "Running ResNet50 Int8 Tensorflow test" + local pids=() + local j=0 + for i in "${containers[@]}"; do + $(sudo -E "${CTR_EXE}" t exec --exec-id "$(random_name)" "${i}" sh -c "${CMD}")& + pids["${j}"]=$! + ((j++)) + done + + # wait for all pids + for pid in ${pids[*]}; do + wait "${pid}" + done + + touch "${host_trigger_file}" + info "All containers are running the workload..." + collect_results "${CMD_FILE}" + + for i in "${containers[@]}"; do + sudo -E "${CTR_EXE}" t exec --exec-id "$(random_name)" "${i}" sh -c "${CMD_RESULTS}" >> "${tensorflow_file}" + done + + local resnet50_int8_results=$(cat "${tensorflow_file}" | sed 's/.$//') + local average_resnet50_int8=$(echo "${resnet50_int8_results}" | sed 's/.$//'| sed "s/,/+/g;s/.*/(&)\/2/g" | bc -l) + + local json="$(cat << EOF + { + "ResNet50Int8": { + "Result": "${resnet50_int8_results}", + "Average": "${average_resnet50_int8}", + "Units": "images/s" + } + } +EOF +)" + metrics_json_add_array_element "$json" + metrics_json_end_array "Results" +} + +function main() { + # Verify enough arguments + if [ $# != 2 ]; then + echo >&2 "error: Not enough arguments [$@]" + help + exit 1 + fi + + local i=0 + local containers=() + local not_started_count="${NUM_CONTAINERS}" + + # Check tools/commands dependencies + cmds=("awk" "docker" "bc") + check_cmds "${cmds[@]}" + check_ctr_images "${IMAGE}" "${DOCKERFILE}" + + init_env + create_start_script + + info "Creating ${NUM_CONTAINERS} containers" + + for ((i=1; i<= "${NUM_CONTAINERS}"; i++)); do + containers+=($(random_name)) + sudo -E "${CTR_EXE}" run -d --runtime "${CTR_RUNTIME}" --mount="${MOUNT_OPTIONS}" "${IMAGE}" "${containers[-1]}" sh -c "${PAYLOAD_ARGS}" + ((not_started_count--)) + info "$not_started_count remaining containers" + done + + metrics_json_init + metrics_json_start_array + + # Check that the requested number of containers are running + check_containers_are_up "${NUM_CONTAINERS}" + + # Check that the requested number of containers are running + check_containers_are_running "${NUM_CONTAINERS}" + + # Get the initial number of pids in a single container before the workload starts + INITIAL_NUM_PIDS=$(sudo -E "${CTR_EXE}" t metrics "${containers[-1]}" | grep pids.current | grep pids.current | xargs | cut -d ' ' -f 2) + ((INITIAL_NUM_PIDS++)) + + resnet50_int8_test + + metrics_json_save + + sudo rm -rf "${src_dir}" + + clean_env_ctr +} +main "$@" diff --git a/tests/metrics/network/README.md b/tests/metrics/network/README.md index c109a6841..971aff6ae 100644 --- a/tests/metrics/network/README.md +++ b/tests/metrics/network/README.md @@ -10,6 +10,8 @@ bandwidth, jitter, latency and parallel bandwidth. ## Networking tests - `k8s-network-metrics-iperf3.sh` measures bandwidth which is the speed of the data transfer. +- `latency-network.sh` measures network latency. +- `nginx-network.sh` is a benchmark of the lightweight Nginx HTTPS web-server and measures the HTTP requests over a fixed period of time with a configurable number of concurrent clients/connections. ## Running the tests diff --git a/tests/metrics/network/iperf3_kubernetes/k8s-network-metrics-iperf3.sh b/tests/metrics/network/iperf3_kubernetes/k8s-network-metrics-iperf3.sh index 6593bb651..88beb895c 100755 --- a/tests/metrics/network/iperf3_kubernetes/k8s-network-metrics-iperf3.sh +++ b/tests/metrics/network/iperf3_kubernetes/k8s-network-metrics-iperf3.sh @@ -26,6 +26,8 @@ source "${SCRIPT_PATH}/../../lib/common.bash" iperf_file=$(mktemp iperfresults.XXXXXXXXXX) TEST_NAME="${TEST_NAME:-network-iperf3}" COLLECT_ALL="${COLLECT_ALL:-false}" +IPERF_DEPLOYMENT="${SCRIPT_PATH}/runtimeclass_workloads/iperf3-deployment.yaml" +IPERF_DAEMONSET="${SCRIPT_PATH}/runtimeclass_workloads/iperf3-daemonset.yaml" function remove_tmp_file() { rm -rf "${iperf_file}" @@ -180,51 +182,52 @@ function iperf3_start_deployment() { # Check no processes are left behind check_processes - export service="iperf3-server" - export deployment="iperf3-server-deployment" - wait_time=20 sleep_time=2 # Create deployment - kubectl create -f "${SCRIPT_PATH}/runtimeclass_workloads/iperf3-deployment.yaml" + kubectl create -f "${IPERF_DEPLOYMENT}" # Check deployment creation - local cmd="kubectl wait --for=condition=Available deployment/${deployment}" - waitForProcess "$wait_time" "$sleep_time" "$cmd" + local cmd="kubectl wait --for=condition=Available deployment/iperf3-server-deployment" + waitForProcess "${wait_time}" "${sleep_time}" "${cmd}" # Create DaemonSet - kubectl create -f "${SCRIPT_PATH}/runtimeclass_workloads/iperf3-daemonset.yaml" - - # Expose deployment - kubectl expose deployment/"${deployment}" + kubectl create -f "${IPERF_DAEMONSET}" # Get the names of the server pod export server_pod_name=$(kubectl get pods -o name | grep server | cut -d '/' -f2) # Verify the server pod is working - local cmd="kubectl get pod $server_pod_name -o yaml | grep 'phase: Running'" - waitForProcess "$wait_time" "$sleep_time" "$cmd" + local cmd="kubectl get pod ${server_pod_name} -o yaml | grep 'phase: Running'" + waitForProcess "${wait_time}" "${sleep_time}" "${cmd}" # Get the names of client pod export client_pod_name=$(kubectl get pods -o name | grep client | cut -d '/' -f2) # Verify the client pod is working - local cmd="kubectl get pod $client_pod_name -o yaml | grep 'phase: Running'" - waitForProcess "$wait_time" "$sleep_time" "$cmd" + local cmd="kubectl get pod ${client_pod_name} -o yaml | grep 'phase: Running'" + waitForProcess "${wait_time}" "${sleep_time}" "${cmd}" # Get the ip address of the server pod - export server_ip_add=$(kubectl get pod "$server_pod_name" -o jsonpath='{.status.podIP}') + export server_ip_add=$(kubectl get pod "${server_pod_name}" -o jsonpath='{.status.podIP}') } function iperf3_deployment_cleanup() { - kubectl delete pod "$server_pod_name" "$client_pod_name" - kubectl delete ds iperf3-clients - kubectl delete deployment "$deployment" - kubectl delete service "$deployment" + info "iperf: deleting deployments and services" + kubectl delete pod "${server_pod_name}" "${client_pod_name}" + kubectl delete -f "${IPERF_DAEMONSET}" + kubectl delete -f "${IPERF_DEPLOYMENT}" + kill_kata_components && sleep 1 + kill_kata_components check_processes + info "End of iperf3 test" } +# The deployment must be removed in +# any case the script terminates. +trap iperf3_deployment_cleanup EXIT + function help() { echo "$(cat << EOF Usage: $0 "[options]" @@ -307,8 +310,8 @@ function main() { export COLLECT_ALL=true && iperf3_bandwidth && iperf3_jitter && iperf3_cpu && iperf3_parallel fi + info "iperf3: saving test results" metrics_json_save - iperf3_deployment_cleanup } main "$@" diff --git a/tests/metrics/network/latency_kubernetes/latency-client.yaml b/tests/metrics/network/latency_kubernetes/latency-client.yaml new file mode 100644 index 000000000..8e1f3bd0e --- /dev/null +++ b/tests/metrics/network/latency_kubernetes/latency-client.yaml @@ -0,0 +1,17 @@ +# Copyright (c) 2022-2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# +apiVersion: v1 +kind: Pod +metadata: + name: latency-client +spec: + terminationGracePeriodSeconds: 0 + runtimeClassName: kata + containers: + - name: client-container + image: quay.io/prometheus/busybox:latest + command: + - sleep + - "180" diff --git a/tests/metrics/network/latency_kubernetes/latency-network.sh b/tests/metrics/network/latency_kubernetes/latency-network.sh new file mode 100755 index 000000000..fedd18135 --- /dev/null +++ b/tests/metrics/network/latency_kubernetes/latency-network.sh @@ -0,0 +1,88 @@ +#!/bin/bash +# +# Copyright (c) 2022 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +set -o pipefail + +SCRIPT_PATH=$(dirname "$(readlink -f "$0")") + +source "${SCRIPT_PATH}/../../lib/common.bash" +latency_file=$(mktemp latencyresults.XXXXXXXXXX) +TEST_NAME="${TEST_NAME:-latency}" + +function remove_tmp_file() { + rm -rf "${latency_file}" +} + +trap remove_tmp_file EXIT + +function main() { + init_env + cmds=("bc" "jq") + check_cmds "${cmds[@]}" + + init_env + + # Check no processes are left behind + check_processes + + wait_time=20 + sleep_time=2 + + # Create server + kubectl create -f "${SCRIPT_PATH}/latency-server.yaml" + + # Get the names of the server pod + export server_pod_name="latency-server" + + # Verify the server pod is working + local cmd="kubectl get pod $server_pod_name -o yaml | grep 'phase: Running'" + waitForProcess "$wait_time" "$sleep_time" "$cmd" + + # Create client + kubectl create -f "${SCRIPT_PATH}/latency-client.yaml" + + # Get the names of the client pod + export client_pod_name="latency-client" + + # Verify the client pod is working + local cmd="kubectl get pod $client_pod_name -o yaml | grep 'phase: Running'" + waitForProcess "$wait_time" "$sleep_time" "$cmd" + + # Get the ip address of the server pod + export server_ip_add=$(kubectl get pod "$server_pod_name" -o jsonpath='{.status.podIP}') + + # Number of packets (sent) + local number="${number:-30}" + + local client_command="ping -c ${number} ${server_ip_add}" + + kubectl exec "$client_pod_name" -- sh -c "$client_command" > "$latency_file" + + metrics_json_init + + local latency=$(cat $latency_file | grep avg | cut -f2 -d'=' | sed 's/[[:blank:]]//g' | cut -f2 -d'/') + + metrics_json_start_array + + local json="$(cat << EOF + { + "latency": { + "Result" : $latency, + "Units" : "ms" + } + } +EOF +)" + + metrics_json_add_array_element "$json" + metrics_json_end_array "Results" + metrics_json_save + + kubectl delete pod "$client_pod_name" "$server_pod_name" + kubectl get pods -A + check_processes +} +main "$@" diff --git a/tests/metrics/network/latency_kubernetes/latency-server.yaml b/tests/metrics/network/latency_kubernetes/latency-server.yaml new file mode 100644 index 000000000..ce301f653 --- /dev/null +++ b/tests/metrics/network/latency_kubernetes/latency-server.yaml @@ -0,0 +1,18 @@ +# +# Copyright (c) 2022-2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# +apiVersion: v1 +kind: Pod +metadata: + name: latency-server +spec: + terminationGracePeriodSeconds: 0 + runtimeClassName: kata + containers: + - name: server-container + image: quay.io/prometheus/busybox:latest + command: + - sleep + - "180" diff --git a/tests/metrics/network/nginx_kubernetes/nginx-network.sh b/tests/metrics/network/nginx_kubernetes/nginx-network.sh new file mode 100755 index 000000000..1fe37e4eb --- /dev/null +++ b/tests/metrics/network/nginx_kubernetes/nginx-network.sh @@ -0,0 +1,69 @@ +#!/bin/bash +# +# Copyright (c) 2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +set -o pipefail + +SCRIPT_PATH=$(dirname "$(readlink -f "$0")") + +source "${SCRIPT_PATH}/../../lib/common.bash" +nginx_file=$(mktemp nginxresults.XXXXXXXXXX) +TEST_NAME="${TEST_NAME:-nginx}" + +function remove_tmp_file() { + rm -rf "${nginx_file}" +} + +trap remove_tmp_file EXIT + +function main() { + init_env + cmds=("bc" "jq" "ab") + check_cmds "${cmds[@]}" + + # Check no processes are left behind + check_processes + + wait_time=20 + sleep_time=2 + timeout="20s" + + deployment="nginx-deployment" + kubectl create -f "${SCRIPT_PATH}/runtimeclass_workloads/nginx-networking.yaml" + kubectl wait --for=condition=Available --timeout="${timeout}" deployment/"${deployment}" + kubectl expose deployment/"${deployment}" + ip=$(kubectl get service/nginx-deployment -o jsonpath='{.spec.clusterIP}') + + ab -n 100000 -c 100 http://"${ip}":80/ > "${nginx_file}" + metrics_json_init + rps=$(cat "${nginx_file}" | grep "Requests" | awk '{print $4}') + echo "Requests per second: ${rps}" + + metrics_json_start_array + + local json="$(cat << EOF + { + "requests": { + "Result" : ${rps}, + "Units": "rps" + } + } +EOF +)" + + metrics_json_add_array_element "$json" + metrics_json_end_array "Results" + metrics_json_save + + nginx_cleanup +} + +function nginx_cleanup() { + kubectl delete deployment "${deployment}" + kubectl delete service "${deployment}" + check_processes +} + +main "$@" diff --git a/tests/metrics/network/nginx_kubernetes/runtimeclass_workloads/nginx-networking.yaml b/tests/metrics/network/nginx_kubernetes/runtimeclass_workloads/nginx-networking.yaml new file mode 100644 index 000000000..1475f13b7 --- /dev/null +++ b/tests/metrics/network/nginx_kubernetes/runtimeclass_workloads/nginx-networking.yaml @@ -0,0 +1,25 @@ +# Copyright (c) 2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# +apiVersion: apps/v1 +kind: Deployment +metadata: + name: nginx-deployment +spec: + selector: + matchLabels: + app: nginx + replicas: 2 + template: + metadata: + labels: + app: nginx + spec: + terminationGracePeriodSeconds: 0 + runtimeClassName: kata + containers: + - name: nginx + image: nginx:1.14.2 + ports: + - containerPort: 80 diff --git a/tests/metrics/report/README.md b/tests/metrics/report/README.md new file mode 100644 index 000000000..da586953d --- /dev/null +++ b/tests/metrics/report/README.md @@ -0,0 +1,64 @@ +# Kata Containers metrics report generator + +The files within this directory can be used to generate a "metrics report" for Kata Containers. The +primary workflow consists of two stages: +1) Run the provided report metrics data gathering scripts on the system(s) you wish to analyze. +2) Run the provided report generation script to analyze the data and generate a report file. + +## Data gathering + +Data gathering is provided by the `grabdata.sh` script. When run, this script executes a set of +tests from the `tests/metrics` directory. The JSON results files will be placed into the +`tests/metrics/results` directory. Once the results are generated, create a suitably named +subdirectory of `tests/metrics/results`, and move the JSON files into it. Repeat this process if +you want to compare multiple sets of results. Note, the report generation scripts process all +subdirectories of `tests/metrics/results` when generating the report. + +> **Note:** By default, the `grabdata.sh` script tries to launch some moderately large containers +> (i.e. 8Gbyte RAM) and may fail to produce some results on a memory constrained system. + +You can restrict the subset of tests run by `grabdata.sh` via its commandline parameters: + +| Option | Description | +| ------ | ------------------------| +| -a | Run all tests (default) | +| -d | Run the density tests | +| -h | Print this help | +| -s | Run the storage tests | +| -t | Run the time tests | + +## Report generation + +Report generation is provided by the `makereport.sh` script. By default this script processes all +subdirectories of the `tests/metrics/results` directory to generate the report. To run in the +default mode, execute the following: + +``` +$ ./makereport.sh +``` + +The report generation tool uses [`Rmarkdown`](https://github.com/rstudio/rmarkdown), [R](https://www.r-project.org/about.html) and +[Pandoc](https://pandoc.org/) to produce a PDF report. To avoid the need for all users to set up a +working environment with all the necessary tooling, the `makereport.sh` script utilises a +`Dockerfile` with the environment pre-defined in order to produce the report. Thus, you need to +have Docker installed on your system in order to run the report generation. The resulting +`metrics_report.pdf` is generated into the `output` subdirectory of the `report` directory. + +## Debugging and development + +To aid in script development and debugging, the `makereport.sh` script offers a debug facility via +the `-d` command line option. Using this option will place you into a `bash` shell within the +running `Dockerfile` image used to generate the report, whilst also mapping your host side `R` +scripts from the `report_dockerfile` subdirectory into the container, thus facilitating a "live" +edit/reload/run development cycle. From there you can examine the Docker image environment, and +execute the generation scripts. E.g., to test the `scaling.R` script, you can execute: + +``` +$ makereport.sh -d +# R +> source('/inputdir/Env.R') +> source('/scripts/lifecycle-time.R') +``` + +You can then edit the `report_dockerfile/lifecycle-time.R` file on the host, and re-run +the `source('/scripts/lifecycle-time.R')` command inside the still running `R` container. diff --git a/tests/metrics/report/grabdata.sh b/tests/metrics/report/grabdata.sh new file mode 100755 index 000000000..3544339f8 --- /dev/null +++ b/tests/metrics/report/grabdata.sh @@ -0,0 +1,168 @@ +#!/bin/bash +# Copyright (c) 2018-2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +# Run a set of the metrics tests to gather data to be used with the report +# generator. The general ideal is to have the tests configured to generate +# useful, meaninful and repeatable (stable, with minimised variance) results. +# If the tests have to be run more or longer to achieve that, then generally +# that is fine - this test is not intended to be quick, it is intended to +# be repeatable. + +# Note - no 'set -e' in this file - if one of the metrics tests fails +# then we wish to continue to try the rest. +# Finally at the end, in some situations, we explicitly exit with a +# failure code if necessary. + +SCRIPT_DIR=$(dirname "$(readlink -f "$0")") +source "${SCRIPT_DIR}/../lib/common.bash" +RESULTS_DIR=${SCRIPT_DIR}/../results + +# By default we run all the tests +RUN_ALL=1 + +help() { + usage=$(cat << EOF +Usage: $0 [-h] [options] + Description: + This script gathers a number of metrics for use in the + report generation script. Which tests are run can be + configured on the commandline. Specifically enabling + individual tests will disable the 'all' option, unless + 'all' is also specified last. + Options: + -a, Run all tests (default). + -d, Run the density tests. + -h, Print this help. + -s, Run the storage tests. + -t, Run the time tests. +EOF +) + echo "$usage" +} + +# Set up the initial state +init() { + metrics_onetime_init + + local OPTIND + while getopts "adhst" opt;do + case ${opt} in + a) + RUN_ALL=1 + ;; + d) + RUN_DENSITY=1 + RUN_ALL= + ;; + h) + help + exit 0; + ;; + s) + RUN_STORAGE=1 + RUN_ALL= + ;; + t) + RUN_TIME=1 + RUN_ALL= + ;; + ?) + # parse failure + help + die "Failed to parse arguments" + ;; + esac + done + shift $((OPTIND-1)) +} + +run_density_ksm() { + echo "Running KSM density tests" + + # Run the memory footprint test - the main test that + # KSM affects. Run for a sufficient number of containers + # (that gives us a fair view of how memory gets shared across + # containers), and a large enough timeout for KSM to settle. + # If KSM has not settled down by then, just take the measurement. + # 'auto' mode should detect when KSM has settled automatically. + bash density/memory_usage.sh 20 300 auto + + # Get a measure for the overhead we take from the container memory + bash density/memory_usage_inside_container.sh +} + +run_density() { + echo "Running non-KSM density tests" + + # Run the density tests - no KSM, so no need to wait for settle + # Set a token short timeout, and use enough containers to get a + # good average measurement. + bash density/memory_usage.sh 20 5 +} + +run_time() { + echo "Running time tests" + # Run the time tests - take time measures for an ubuntu image, over + # 100 'first and only container' launches. + # NOTE - whichever container you test here must support a full 'date' + # command - busybox based containers (including Alpine) will not work. + bash time/launch_times.sh -i public.ecr.aws/ubuntu/ubuntu:latest -n 100 +} + +run_storage() { + echo "Running storage tests" + + bash storage/blogbench.sh +} + + +# Execute metrics scripts +run() { + pushd "$SCRIPT_DIR/.." + + # If KSM is available on this platform, let's run any tests that are + # affected by having KSM on/off first, and then turn it off for the + # rest of the tests, as KSM may introduce some extra noise in the + # results by stealing CPU time for instance. + if [[ -f ${KSM_ENABLE_FILE} ]]; then + # No point enabling and disabling KSM if we have nothing to test. + if [ -n "$RUN_ALL" ] || [ -n "$RUN_DENSITY" ]; then + save_ksm_settings + trap restore_ksm_settings EXIT QUIT KILL + set_ksm_aggressive + + run_density_ksm + + # And now ensure KSM is turned off for the rest of the tests + disable_ksm + fi + else + echo "No KSM control file, skipping KSM tests" + fi + + if [ -n "$RUN_ALL" ] || [ -n "$RUN_TIME" ]; then + run_time + fi + + if [ -n "$RUN_ALL" ] || [ -n "$RUN_DENSITY" ]; then + run_density + fi + + if [ -n "$RUN_ALL" ] || [ -n "$RUN_STORAGE" ]; then + run_storage + fi + + popd +} + +finish() { + echo "Now please create a suitably descriptively named subdirectory in" + echo "$RESULTS_DIR and copy the .json results files into it before running" + echo "this script again." +} + +init "$@" +run +finish diff --git a/tests/metrics/report/makereport.sh b/tests/metrics/report/makereport.sh new file mode 100755 index 000000000..3926d7567 --- /dev/null +++ b/tests/metrics/report/makereport.sh @@ -0,0 +1,97 @@ +#!/bin/bash +# Copyright (c) 2018-2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +# Take the data found in subdirectories of the metrics 'results' directory, +# and turn them into a PDF report. Use a Dockerfile containing all the tooling +# and scripts we need to do that. + +set -e + +SCRIPT_PATH=$(dirname "$(readlink -f "$0")") +source "${SCRIPT_PATH}/../lib/common.bash" + +IMAGE="${IMAGE:-metrics-report}" +DOCKERFILE="${SCRIPT_PATH}/report_dockerfile/Dockerfile" + +HOST_INPUT_DIR="${SCRIPT_PATH}/../results" +R_ENV_FILE="${HOST_INPUT_DIR}/Env.R" +HOST_OUTPUT_DIR="${SCRIPT_PATH}/output" + +GUEST_INPUT_DIR="/inputdir/" +GUEST_OUTPUT_DIR="/outputdir/" + +# If in debugging mode, we also map in the scripts dir so you can +# dynamically edit and re-load them at the R prompt +HOST_SCRIPT_DIR="${SCRIPT_PATH}/report_dockerfile" +GUEST_SCRIPT_DIR="/scripts/" + +setup() { + echo "Checking subdirectories" + check_subdir="$(ls -dx ${HOST_INPUT_DIR}/*/ 2> /dev/null | wc -l)" + if [ $check_subdir -eq 0 ]; then + die "No subdirs in [${HOST_INPUT_DIR}] to read results from." + fi + + echo "Checking Dockerfile" + check_dockerfiles_images "$IMAGE" "$DOCKERFILE" + + mkdir -p "$HOST_OUTPUT_DIR" && true + + echo "inputdir=\"${GUEST_INPUT_DIR}\"" > ${R_ENV_FILE} + echo "outputdir=\"${GUEST_OUTPUT_DIR}\"" >> ${R_ENV_FILE} + + # A bit of a hack to get an R syntax'd list of dirs to process + # Also, need it as not host-side dir path - so short relative names + resultdirs="$(cd ${HOST_INPUT_DIR}; ls -dx */)" + resultdirslist=$(echo ${resultdirs} | sed 's/ \+/", "/g') + echo "resultdirs=c(" >> ${R_ENV_FILE} + echo " \"${resultdirslist}\"" >> ${R_ENV_FILE} + echo ")" >> ${R_ENV_FILE} +} + +run() { + docker run -ti --rm -v ${HOST_INPUT_DIR}:${GUEST_INPUT_DIR} -v ${HOST_OUTPUT_DIR}:${GUEST_OUTPUT_DIR} ${extra_volumes} ${IMAGE} ${extra_command} + ls -la ${HOST_OUTPUT_DIR}/* +} + +help() { + usage=$(cat << EOF +Usage: $0 [-h] [options] + Description: + This script generates a metrics report document + from the results directory one level up in the + directory tree (../results). + Options: + -d, Run in debug (interactive) mode + -h, Print this help +EOF +) + echo "$usage" +} + +main() { + + local OPTIND + while getopts "d" opt;do + case ${opt} in + d) + # In debug mode, run a shell instead of the default report generation + extra_command="bash" + extra_volumes="-v ${HOST_SCRIPT_DIR}:${GUEST_SCRIPT_DIR}" + ;; + ?) + # parse failure + help + die "Failed to parse arguments" + ;; + esac + done + shift $((OPTIND-1)) + + setup + run +} + +main "$@" diff --git a/tests/metrics/report/report_dockerfile/Dockerfile b/tests/metrics/report/report_dockerfile/Dockerfile new file mode 100644 index 000000000..74c502652 --- /dev/null +++ b/tests/metrics/report/report_dockerfile/Dockerfile @@ -0,0 +1,44 @@ +# Copyright (c) 2018-2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +# Set up an Ubuntu image with the components needed to generate a +# metrics report. That includes: +# - R +# - The R 'tidyverse' +# - pandoc +# - The report generation R files and helper scripts + +# Start with the base rocker tidyverse. +# We would have used the 'verse' base, that already has some of the docs processing +# installed, but I could not figure out how to add in the extra bits we needed to +# the lite tex version is uses. +# Here we specify a tag for base image instead of using latest to let it free from +# the risk from the update of latest base image. +FROM rocker/tidyverse:3.6.0 + +# Version of the Dockerfile +LABEL DOCKERFILE_VERSION="1.2" + +# Without this some of the package installs stop to try and ask questions... +ENV DEBIAN_FRONTEND=noninteractive + +# Install the extra doc processing parts we need for our Rmarkdown PDF flow. +RUN apt-get update -qq && \ + apt-get install -y --no-install-recommends \ + texlive-latex-base \ + texlive-fonts-recommended \ + latex-xcolor && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists + +# Install the extra R packages we need. +RUN install2.r --error --deps TRUE \ + gridExtra \ + ggpubr + +# Pull in our actual worker scripts +COPY . /scripts + +# By default generate the report +CMD ["/scripts/genreport.sh"] diff --git a/tests/metrics/report/report_dockerfile/dut-details.R b/tests/metrics/report/report_dockerfile/dut-details.R new file mode 100644 index 000000000..3b16c8c2b --- /dev/null +++ b/tests/metrics/report/report_dockerfile/dut-details.R @@ -0,0 +1,122 @@ +#!/usr/bin/env Rscript +# Copyright (c) 2018-2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +# Display details for the 'Device Under Test', for all data sets being processed. + +suppressMessages(suppressWarnings(library(tidyr))) # for gather(). +library(tibble) +suppressMessages(suppressWarnings(library(plyr))) # rbind.fill + # So we can plot multiple graphs +library(gridExtra) # together. +suppressMessages(suppressWarnings(library(ggpubr))) # for ggtexttable. +suppressMessages(library(jsonlite)) # to load the data. + +# A list of all the known results files we might find the information inside. +resultsfiles=c( + "boot-times.json", + "memory-footprint.json", + "memory-footprint-ksm.json", + "memory-footprint-inside-container.json" + ) + +data=c() +stats=c() +stats_names=c() + +# For each set of results +for (currentdir in resultdirs) { + count=1 + dirstats=c() + for (resultsfile in resultsfiles) { + fname=paste(inputdir, currentdir, resultsfile, sep="/") + if ( !file.exists(fname)) { + #warning(paste("Skipping non-existent file: ", fname)) + next + } + + # Derive the name from the test result dirname + datasetname=basename(currentdir) + + # Import the data + fdata=fromJSON(fname) + + if (length(fdata$'kata-env') != 0 ) { + # We have kata-runtime data + dirstats=tibble("Run Ver"=as.character(fdata$'kata-env'$Runtime$Version$Semver)) + dirstats=cbind(dirstats, "Run SHA"=as.character(fdata$'kata-env'$Runtime$Version$Commit)) + + pver=as.character(fdata$'kata-env'$Proxy$Version) + pver=sub("^[[:alpha:][:blank:]-]*", "", pver) + # uncomment if you want to drop the commit sha as well + #pver=sub("([[:digit:].]*).*", "\\1", pver) + dirstats=cbind(dirstats, "Proxy Ver"=pver) + + # Trim the shim string + sver=as.character(fdata$'kata-env'$Shim$Version) + sver=sub("^[[:alpha:][:blank:]-]*", "", sver) + # uncomment if you want to drop the commit sha as well + #sver=sub("([[:digit:].]*).*", "\\1", sver) + dirstats=cbind(dirstats, "Shim Ver"=sver) + + # Default QEMU ver string is far too long and noisy - trim. + hver=as.character(fdata$'kata-env'$Hypervisor$Version) + hver=sub("^[[:alpha:][:blank:]]*", "", hver) + hver=sub("([[:digit:].]*).*", "\\1", hver) + dirstats=cbind(dirstats, "Hyper Ver"=hver) + + iver=as.character(fdata$'kata-env'$Image$Path) + iver=sub("^[[:alpha:]/-]*", "", iver) + dirstats=cbind(dirstats, "Image Ver"=iver) + + kver=as.character(fdata$'kata-env'$Kernel$Path) + kver=sub("^[[:alpha:]/-]*", "", kver) + dirstats=cbind(dirstats, "Guest Krnl"=kver) + + dirstats=cbind(dirstats, "Host arch"=as.character(fdata$'kata-env'$Host$Architecture)) + dirstats=cbind(dirstats, "Host Distro"=as.character(fdata$'kata-env'$Host$Distro$Name)) + dirstats=cbind(dirstats, "Host DistVer"=as.character(fdata$'kata-env'$Host$Distro$Version)) + dirstats=cbind(dirstats, "Host Model"=as.character(fdata$'kata-env'$Host$CPU$Model)) + dirstats=cbind(dirstats, "Host Krnl"=as.character(fdata$'kata-env'$Host$Kernel)) + dirstats=cbind(dirstats, "runtime"=as.character(fdata$test$runtime)) + + break + } else { + if (length(fdata$'runc-env') != 0 ) { + dirstats=tibble("Run Ver"=as.character(fdata$'runc-env'$Version$Semver)) + dirstats=cbind(dirstats, "Run SHA"=as.character(fdata$'runc-env'$Version$Commit)) + dirstats=cbind(dirstats, "runtime"=as.character(fdata$test$runtime)) + } else { + dirstats=tibble("runtime"="Unknown") + } + break + } + } + + if ( length(dirstats) == 0 ) { + warning(paste("No valid data found for directory ", currentdir)) + } + + # use plyr rbind.fill so we can combine disparate version info frames + stats=rbind.fill(stats, dirstats) + stats_names=rbind(stats_names, datasetname) +} + +rownames(stats) = stats_names + +# Rotate the tibble so we get data dirs as the columns +spun_stats = as_tibble(cbind(What=names(stats), t(stats))) + +# Build us a text table of numerical results +stats_plot = suppressWarnings(ggtexttable(data.frame(spun_stats, check.names=FALSE), + theme=ttheme(base_size=6), + rows=NULL + )) + +# It may seem odd doing a grid of 1x1, but it should ensure we get a uniform format and +# layout to match the other charts and tables in the report. +master_plot = grid.arrange( + stats_plot, + nrow=1, + ncol=1 ) diff --git a/tests/metrics/report/report_dockerfile/fio-reads.R b/tests/metrics/report/report_dockerfile/fio-reads.R new file mode 100644 index 000000000..5f3f1c212 --- /dev/null +++ b/tests/metrics/report/report_dockerfile/fio-reads.R @@ -0,0 +1,269 @@ +#!/usr/bin/env Rscript +# Copyright (c) 2018-2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +# Display details for `fio` random read storage IO tests. + + +library(ggplot2) # ability to plot nicely +library(gridExtra) # So we can plot multiple graphs together +suppressMessages(suppressWarnings(library(ggpubr))) # for ggtexttable +suppressMessages(library(jsonlite)) # to load the data +suppressMessages(suppressWarnings(library(tidyr))) # for gather +library(tibble) + +testnames=c( + "fio-randread-128", + "fio-randread-256", + "fio-randread-512", + "fio-randread-1k", + "fio-randread-2k", + "fio-randread-4k", + "fio-randread-8k", + "fio-randread-16k", + "fio-randread-32k", + "fio-randread-64k" + ) + +data2=c() +all_ldata=c() +all_ldata2=c() +stats=c() +rstats=c() +rstats_names=c() + +# Where to store up the stats for the tables +read_bw_stats=c() +read_iops_stats=c() +read_lat95_stats=c() +read_lat99_stats=c() + +# For each set of results +for (currentdir in resultdirs) { + bw_dirstats=c() + iops_dirstats=c() + lat95_dirstats=c() + lat99_dirstats=c() + # Derive the name from the test result dirname + datasetname=basename(currentdir) + + for (testname in testnames) { + fname=paste(inputdir, currentdir, testname, '.json', sep="") + if ( !file.exists(fname)) { + #warning(paste("Skipping non-existent file: ", fname)) + next + } + + # Import the data + fdata=fromJSON(fname) + # De-ref the test named unique data + fdata=fdata[[testname]] + + blocksize=fdata$Raw$'global options'$bs + + # Extract the latency data - it comes as a table of percentiles, so + # we have to do a little work... + clat=data.frame(clat_ns=fdata$Raw$jobs[[1]]$read$clat_ns$percentile) + + # Generate a clat data set with 'clean' percentile numbers so + # we can sensibly plot it later on. + clat2=clat + colnames(clat2)<-sub("clat_ns.", "", colnames(clat2)) + colnames(clat2)<-sub("0000", "", colnames(clat2)) + ldata2=gather(clat2) + colnames(ldata2)[colnames(ldata2)=="key"] <- "percentile" + colnames(ldata2)[colnames(ldata2)=="value"] <- "ms" + ldata2$ms=ldata2$ms/1000000 #ns->ms + ldata2=cbind(ldata2, runtime=rep(datasetname, length(ldata2$percentile))) + ldata2=cbind(ldata2, blocksize=rep(blocksize, length(ldata2$percentile))) + + # Pull the 95 and 99 percentile numbers for the boxplot + # Plotting all values for all runtimes and blocksizes is just way too + # noisy to make a meaninful picture, so we use this subset. + # Our values fall more in the range of ms... + pc95data=tibble(percentile=clat$clat_ns.95.000000/1000000) + pc95data=cbind(pc95data, runtime=rep(paste(datasetname, "95pc", sep="-"), length(pc95data$percentile))) + pc99data=tibble(percentile=clat$clat_ns.99.000000/1000000) + pc99data=cbind(pc99data, runtime=rep(paste(datasetname, "99pc", sep="-"), length(pc95data$percentile))) + ldata=rbind(pc95data, pc99data) + ldata=cbind(ldata, blocksize=rep(blocksize, length(ldata$percentile))) + + # We want total bandwidth, so that is the sum of the bandwidths + # from all the read 'jobs'. + mdata=data.frame(read_bw_mps=as.numeric(sum(fdata$Raw$jobs[[1]]$read$bw)/1024)) + mdata=cbind(mdata, iops_tot=as.numeric(sum(fdata$Raw$jobs[[1]]$read$iops))) + mdata=cbind(mdata, runtime=rep(datasetname, length(mdata[, "read_bw_mps"]) )) + mdata=cbind(mdata, blocksize=rep(blocksize, length(mdata[, "read_bw_mps"]) )) + + # Extract the stats tables + bw_dirstats=rbind(bw_dirstats, round(mdata$read_bw_mps, digits=1)) + # Rowname hack to get the blocksize recorded + rownames(bw_dirstats)[nrow(bw_dirstats)]=blocksize + + iops_dirstats=rbind(iops_dirstats, round(mdata$iops_tot, digits=1)) + rownames(iops_dirstats)[nrow(iops_dirstats)]=blocksize + + # And do the 95 and 99 percentiles as tables as well + lat95_dirstats=rbind(lat95_dirstats, round(mean(clat$clat_ns.95.000000)/1000000, digits=1)) + rownames(lat95_dirstats)[nrow(lat95_dirstats)]=blocksize + lat99_dirstats=rbind(lat99_dirstats, round(mean(clat$clat_ns.99.000000)/1000000, digits=1)) + rownames(lat99_dirstats)[nrow(lat99_dirstats)]=blocksize + + # Collect up as sets across all files and runtimes. + data2=rbind(data2, mdata) + all_ldata=rbind(all_ldata, ldata) + all_ldata2=rbind(all_ldata2, ldata2) + } + + # Collect up for each dir we process into a column + read_bw_stats=cbind(read_bw_stats, bw_dirstats) + colnames(read_bw_stats)[ncol(read_bw_stats)]=datasetname + + read_iops_stats=cbind(read_iops_stats, iops_dirstats) + colnames(read_iops_stats)[ncol(read_iops_stats)]=datasetname + + read_lat95_stats=cbind(read_lat95_stats, lat95_dirstats) + colnames(read_lat95_stats)[ncol(read_lat95_stats)]=datasetname + read_lat99_stats=cbind(read_lat99_stats, lat99_dirstats) + colnames(read_lat99_stats)[ncol(read_lat99_stats)]=datasetname +} + +# To get a nice looking table, we need to extract the rownames into their +# own column +read_bw_stats=cbind(Bandwidth=rownames(read_bw_stats), read_bw_stats) +read_bw_stats=cbind(read_bw_stats, Units=rep("MB/s", nrow(read_bw_stats))) + +read_iops_stats=cbind(IOPS=rownames(read_iops_stats), read_iops_stats) +read_iops_stats=cbind(read_iops_stats, Units=rep("IOP/s", nrow(read_iops_stats))) + +read_lat95_stats=cbind('lat 95pc'=rownames(read_lat95_stats), read_lat95_stats) +read_lat95_stats=cbind(read_lat95_stats, Units=rep("ms", nrow(read_lat95_stats))) +read_lat99_stats=cbind('lat 99pc'=rownames(read_lat99_stats), read_lat99_stats) +read_lat99_stats=cbind(read_lat99_stats, Units=rep("ms", nrow(read_lat99_stats))) + +# Bandwidth line plot +read_bw_line_plot <- ggplot() + + geom_line( data=data2, aes(blocksize, read_bw_mps, group=runtime, color=runtime)) + + ylim(0, NA) + + ggtitle("Random Read total bandwidth") + + xlab("Blocksize") + + ylab("Bandwidth (MiB/s)") + + theme( + axis.text.x=element_text(angle=90), + legend.position=c(0.35,0.8), + legend.title=element_text(size=5), + legend.text=element_text(size=5), + legend.background = element_rect(fill=alpha('blue', 0.2)) + ) + +# IOPS line plot +read_iops_line_plot <- ggplot() + + geom_line( data=data2, aes(blocksize, iops_tot, group=runtime, color=runtime)) + + ylim(0, NA) + + ggtitle("Random Read total IOPS") + + xlab("Blocksize") + + ylab("IOPS") + + theme( + axis.text.x=element_text(angle=90), + legend.position=c(0.35,0.8), + legend.title=element_text(size=5), + legend.text=element_text(size=5), + legend.background = element_rect(fill=alpha('blue', 0.2)) + ) + +# 95 and 99 percentile box plot +read_clat_box_plot <- ggplot() + + geom_boxplot( data=all_ldata, aes(blocksize, percentile, color=runtime)) + + stat_summary( data=all_ldata, aes(blocksize, percentile, group=runtime, color=runtime), fun.y=mean, geom="line") + + ylim(0, NA) + + ggtitle("Random Read completion latency", subtitle="95&99 percentiles, boxplot over jobs") + + xlab("Blocksize") + + ylab("Latency (ms)") + + theme(axis.text.x=element_text(angle=90)) + + # Use the 'paired' colour matrix as we are setting these up as pairs of + # 95 and 99 percentiles, and it is much easier to visually group those to + # each runtime if we use this colourmap. + scale_colour_brewer(palette="Paired") +# it would be nice to use the same legend theme as the other plots on this +# page, but because of the number of entries it tends to flow off the picture. +# theme( +# axis.text.x=element_text(angle=90), +# legend.position=c(0.35,0.8), +# legend.title=element_text(size=5), +# legend.text=element_text(size=5), +# legend.background = element_rect(fill=alpha('blue', 0.2)) +# ) + +# As the boxplot is actually quite hard to interpret, also show a linegraph +# of all the percentiles for a single blocksize. +which_blocksize='4k' +clat_line_subtitle=paste("For blocksize", which_blocksize, sep=" ") +single_blocksize=subset(all_ldata2, blocksize==which_blocksize) +clat_line=aggregate( + single_blocksize$ms, + by=list( + percentile=single_blocksize$percentile, + blocksize=single_blocksize$blocksize, + runtime=single_blocksize$runtime + ), + FUN=mean +) + +clat_line$percentile=as.numeric(clat_line$percentile) + +read_clat_line_plot <- ggplot() + + geom_line( data=clat_line, aes(percentile, x, group=runtime, color=runtime)) + + ylim(0, NA) + + ggtitle("Random Read completion latency percentiles", subtitle=clat_line_subtitle) + + xlab("Percentile") + + ylab("Time (ms)") + + theme( + axis.text.x=element_text(angle=90), + legend.position=c(0.35,0.8), + legend.title=element_text(size=5), + legend.text=element_text(size=5), + legend.background = element_rect(fill=alpha('blue', 0.2)) + ) + +# Output the pretty pictures +graphics_plot = grid.arrange( + read_bw_line_plot, + read_iops_line_plot, + read_clat_box_plot, + read_clat_line_plot, + nrow=2, + ncol=2 ) + +# A bit of an odd tweak to force a pagebreak between the pictures and +# the tables. This only works because we have a `results='asis'` in the Rmd +# R fragment. +cat("\n\n\\pagebreak\n") + +read_bw_stats_plot = suppressWarnings(ggtexttable(read_bw_stats, + theme=ttheme(base_size=10), + rows=NULL + )) + +read_iops_stats_plot = suppressWarnings(ggtexttable(read_iops_stats, + theme=ttheme(base_size=10), + rows=NULL + )) + +read_lat95_stats_plot = suppressWarnings(ggtexttable(read_lat95_stats, + theme=ttheme(base_size=10), + rows=NULL + )) +read_lat99_stats_plot = suppressWarnings(ggtexttable(read_lat99_stats, + theme=ttheme(base_size=10), + rows=NULL + )) + +# and then the statistics tables +stats_plot = grid.arrange( + read_bw_stats_plot, + read_iops_stats_plot, + read_lat95_stats_plot, + read_lat99_stats_plot, + nrow=4, + ncol=1 ) diff --git a/tests/metrics/report/report_dockerfile/fio-writes.R b/tests/metrics/report/report_dockerfile/fio-writes.R new file mode 100644 index 000000000..5fa223176 --- /dev/null +++ b/tests/metrics/report/report_dockerfile/fio-writes.R @@ -0,0 +1,260 @@ +#!/usr/bin/env Rscript +# Copyright (c) 2018-2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +# Display details for 'fio' random writes storage IO tests. + + +library(ggplot2) # ability to plot nicely +library(gridExtra) # So we can plot multiple graphs together +suppressMessages(suppressWarnings(library(ggpubr))) # for ggtexttable +suppressMessages(library(jsonlite)) # to load the data +suppressMessages(suppressWarnings(library(tidyr))) # for gather +library(tibble) + +testnames=c( + "fio-randwrite-128", + "fio-randwrite-256", + "fio-randwrite-512", + "fio-randwrite-1k", + "fio-randwrite-2k", + "fio-randwrite-4k", + "fio-randwrite-8k", + "fio-randwrite-16k", + "fio-randwrite-32k", + "fio-randwrite-64k" + ) + +data2=c() +all_ldata=c() +all_ldata2=c() +stats=c() +rstats=c() +rstats_names=c() + + +# Where to store up the stats for the tables +write_bw_stats=c() +write_iops_stats=c() +write_lat95_stats=c() +write_lat99_stats=c() + +# For each set of results +for (currentdir in resultdirs) { + bw_dirstats=c() + iops_dirstats=c() + lat95_dirstats=c() + lat99_dirstats=c() + # Derive the name from the test result dirname + datasetname=basename(currentdir) + + for (testname in testnames) { + fname=paste(inputdir, currentdir, testname, '.json', sep="") + if ( !file.exists(fname)) { + #warning(paste("Skipping non-existent file: ", fname)) + next + } + + # Import the data + fdata=fromJSON(fname) + # De-nest the test specific named data + fdata=fdata[[testname]] + + blocksize=fdata$Raw$'global options'$bs + + # Extract the latency data - it comes as a table of percentiles, so + # we have to do a little work... + clat=data.frame(clat_ns=fdata$Raw$jobs[[1]]$write$clat_ns$percentile) + + # Generate a clat data set with 'clean' percentile numbers so + # we can sensibly plot it later on. + clat2=clat + colnames(clat2)<-sub("clat_ns.", "", colnames(clat2)) + colnames(clat2)<-sub("0000", "", colnames(clat2)) + ldata2=gather(clat2) + colnames(ldata2)[colnames(ldata2)=="key"] <- "percentile" + colnames(ldata2)[colnames(ldata2)=="value"] <- "ms" + ldata2$ms=ldata2$ms/1000000 #ns->ms + ldata2=cbind(ldata2, runtime=rep(datasetname, length(ldata2$percentile))) + ldata2=cbind(ldata2, blocksize=rep(blocksize, length(ldata2$percentile))) + + # Pull the 95 and 99 percentiles for the boxplot diagram. + # Our values fall more in the range of ms... + pc95data=tibble(percentile=clat$clat_ns.95.000000/1000000) + pc95data=cbind(pc95data, runtime=rep(paste(datasetname, "95pc", sep="-"), length(pc95data$percentile))) + pc99data=tibble(percentile=clat$clat_ns.99.000000/1000000) + pc99data=cbind(pc99data, runtime=rep(paste(datasetname, "99pc", sep="-"), length(pc95data$percentile))) + ldata=rbind(pc95data, pc99data) + ldata=cbind(ldata, blocksize=rep(blocksize, length(ldata$percentile))) + + # We want total bandwidth, so that is the sum of the bandwidths + # from all the write 'jobs'. + mdata=data.frame(write_bw_mps=as.numeric(sum(fdata$Raw$jobs[[1]]$write$bw)/1024)) + mdata=cbind(mdata, iops_tot=as.numeric(sum(fdata$Raw$jobs[[1]]$write$iops))) + mdata=cbind(mdata, runtime=rep(datasetname, length(mdata[, "write_bw_mps"]) )) + mdata=cbind(mdata, blocksize=rep(blocksize, length(mdata[, "write_bw_mps"]) )) + + # Extract the stats tables + bw_dirstats=rbind(bw_dirstats, round(mdata$write_bw_mps, digits=1)) + # Rowname hack to get the blocksize recorded + rownames(bw_dirstats)[nrow(bw_dirstats)]=blocksize + + iops_dirstats=rbind(iops_dirstats, round(mdata$iops_tot, digits=1)) + rownames(iops_dirstats)[nrow(iops_dirstats)]=blocksize + + # And do the 95 and 99 percentiles as tables as well + lat95_dirstats=rbind(lat95_dirstats, round(mean(clat$clat_ns.95.000000)/1000000, digits=1)) + rownames(lat95_dirstats)[nrow(lat95_dirstats)]=blocksize + lat99_dirstats=rbind(lat99_dirstats, round(mean(clat$clat_ns.99.000000)/1000000, digits=1)) + rownames(lat99_dirstats)[nrow(lat99_dirstats)]=blocksize + + # Store away as single sets + data2=rbind(data2, mdata) + all_ldata=rbind(all_ldata, ldata) + all_ldata2=rbind(all_ldata2, ldata2) + } + + # Collect up for each dir we process into a column + write_bw_stats=cbind(write_bw_stats, bw_dirstats) + colnames(write_bw_stats)[ncol(write_bw_stats)]=datasetname + + write_iops_stats=cbind(write_iops_stats, iops_dirstats) + colnames(write_iops_stats)[ncol(write_iops_stats)]=datasetname + + write_lat95_stats=cbind(write_lat95_stats, lat95_dirstats) + colnames(write_lat95_stats)[ncol(write_lat95_stats)]=datasetname + write_lat99_stats=cbind(write_lat99_stats, lat99_dirstats) + colnames(write_lat99_stats)[ncol(write_lat99_stats)]=datasetname +} + +# To get a nice looking table, we need to extract the rownames into their +# own column +write_bw_stats=cbind(Bandwidth=rownames(write_bw_stats), write_bw_stats) +write_bw_stats=cbind(write_bw_stats, Units=rep("MB/s", nrow(write_bw_stats))) + +write_iops_stats=cbind(IOPS=rownames(write_iops_stats), write_iops_stats) +write_iops_stats=cbind(write_iops_stats, Units=rep("IOP/s", nrow(write_iops_stats))) + +write_lat95_stats=cbind('lat 95pc'=rownames(write_lat95_stats), write_lat95_stats) +write_lat95_stats=cbind(write_lat95_stats, Units=rep("ms", nrow(write_lat95_stats))) +write_lat99_stats=cbind('lat 99pc'=rownames(write_lat99_stats), write_lat99_stats) +write_lat99_stats=cbind(write_lat99_stats, Units=rep("ms", nrow(write_lat99_stats))) + +# lineplot of total bandwidth across blocksizes. +write_bw_line_plot <- ggplot() + + geom_line( data=data2, aes(blocksize, write_bw_mps, group=runtime, color=runtime)) + + ylim(0, NA) + + ggtitle("Random Write total bandwidth") + + xlab("Blocksize") + + ylab("Bandwidth (MiB/s)") + + theme( + axis.text.x=element_text(angle=90), + legend.position=c(0.35,0.8), + legend.title=element_text(size=5), + legend.text=element_text(size=5), + legend.background = element_rect(fill=alpha('blue', 0.2)) + ) + +# lineplot of IOPS across blocksizes +write_iops_line_plot <- ggplot() + + geom_line( data=data2, aes(blocksize, iops_tot, group=runtime, color=runtime)) + + ylim(0, NA) + + ggtitle("Random Write total IOPS") + + xlab("Blocksize") + + ylab("IOPS") + + theme( + axis.text.x=element_text(angle=90), + legend.position=c(0.35,0.8), + legend.title=element_text(size=5), + legend.text=element_text(size=5), + legend.background = element_rect(fill=alpha('blue', 0.2)) + ) + +# boxplot of 95 and 99 percentiles covering the parallel jobs, shown across +# the blocksizes. +write_clat_box_plot <- ggplot() + + geom_boxplot( data=all_ldata, aes(blocksize, percentile, color=runtime)) + + stat_summary( data=all_ldata, aes(blocksize, percentile, group=runtime, color=runtime), fun.y=mean, geom="line") + + ylim(0, NA) + + ggtitle("Random Write completion latency", subtitle="95&99 Percentiles, boxplot across jobs") + + xlab("Blocksize") + + ylab("Latency (ms)") + + theme(axis.text.x=element_text(angle=90)) + + # Use the 'paired' colour matrix as we are setting these up as pairs of + # 95 and 99 percentiles, and it is much easier to visually group those to + # each runtime if we use this colourmap. + scale_colour_brewer(palette="Paired") + + +# completion latency line plot across the percentiles, for a specific blocksize only +# as otherwise the graph would be far too noisy. +which_blocksize='4k' +clat_line_subtitle=paste("For blocksize", which_blocksize, sep=" ") +single_blocksize=subset(all_ldata2, blocksize==which_blocksize) +clat_line=aggregate( + single_blocksize$ms, + by=list( + percentile=single_blocksize$percentile, + blocksize=single_blocksize$blocksize, + runtime=single_blocksize$runtime + ), + FUN=mean +) + +clat_line$percentile=as.numeric(clat_line$percentile) + +write_clat_line_plot <- ggplot() + + geom_line( data=clat_line, aes(percentile, x, group=runtime, color=runtime)) + + ylim(0, NA) + + ggtitle("Random Write completion latency percentiles", subtitle=clat_line_subtitle) + + xlab("Percentile") + + ylab("Time (ms)") + + theme( + axis.text.x=element_text(angle=90), + legend.position=c(0.35,0.8), + legend.title=element_text(size=5), + legend.text=element_text(size=5), + legend.background = element_rect(fill=alpha('blue', 0.2)) + ) + +master_plot = grid.arrange( + write_bw_line_plot, + write_iops_line_plot, + write_clat_box_plot, + write_clat_line_plot, + nrow=2, + ncol=2 ) + +# A bit of an odd tweak to force a pagebreak between the pictures and +# the tables. This only works because we have a `results='asis'` in the Rmd +# R fragment. +cat("\n\n\\pagebreak\n") + +write_bw_stats_plot = suppressWarnings(ggtexttable(write_bw_stats, + theme=ttheme(base_size=10), + rows=NULL + )) + +write_iops_stats_plot = suppressWarnings(ggtexttable(write_iops_stats, + theme=ttheme(base_size=10), + rows=NULL + )) + +write_lat95_stats_plot = suppressWarnings(ggtexttable(write_lat95_stats, + theme=ttheme(base_size=10), + rows=NULL + )) +write_lat99_stats_plot = suppressWarnings(ggtexttable(write_lat99_stats, + theme=ttheme(base_size=10), + rows=NULL + )) + +# and then the statistics tables +stats_plot = grid.arrange( + write_bw_stats_plot, + write_iops_stats_plot, + write_lat95_stats_plot, + write_lat99_stats_plot, + nrow=4, + ncol=1 ) diff --git a/tests/metrics/report/report_dockerfile/footprint-density.R b/tests/metrics/report/report_dockerfile/footprint-density.R new file mode 100644 index 000000000..3539dc9cc --- /dev/null +++ b/tests/metrics/report/report_dockerfile/footprint-density.R @@ -0,0 +1,111 @@ +#!/usr/bin/env Rscript +# Copyright (c) 2018-2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +# Show system memory reduction, and hence container 'density', by analysing the +# scaling footprint data results and the 'system free' memory. + +library(ggplot2) # ability to plot nicely. + # So we can plot multiple graphs +library(gridExtra) # together. +suppressMessages(suppressWarnings(library(ggpubr))) # for ggtexttable. +suppressMessages(library(jsonlite)) # to load the data. + +testnames=c( + paste("footprint-busybox.*", test_name_extra, sep=""), + paste("footprint-mysql.*", test_name_extra, sep=""), + paste("footprint-elasticsearch.*", test_name_extra, sep="") +) + +data=c() +stats=c() +rstats=c() +rstats_names=c() + +for (currentdir in resultdirs) { + count=1 + dirstats=c() + for (testname in testnames) { + matchdir=paste(inputdir, currentdir, sep="") + matchfile=paste(testname, '\\.json', sep="") + files=list.files(matchdir, pattern=matchfile) + if ( length(files) == 0 ) { + #warning(paste("Pattern [", matchdir, "/", matchfile, "] matched nothing")) + } + for (ffound in files) { + fname=paste(inputdir, currentdir, ffound, sep="") + if ( !file.exists(fname)) { + warning(paste("Skipping non-existent file: ", fname)) + next + } + + # Derive the name from the test result dirname + datasetname=basename(currentdir) + + # Import the data + fdata=fromJSON(fname) + # De-nest the test name specific data + shortname=substr(ffound, 1, nchar(ffound)-nchar(".json")) + fdata=fdata[[shortname]] + + payload=fdata$Config$payload + testname=paste(datasetname, payload) + + cdata=data.frame(avail_mb=as.numeric(fdata$Results$system$avail)/(1024*1024)) + cdata=cbind(cdata, avail_decr=as.numeric(fdata$Results$system$avail_decr)) + cdata=cbind(cdata, count=seq_len(length(cdata[, "avail_mb"]))) + cdata=cbind(cdata, testname=rep(testname, length(cdata[, "avail_mb"]) )) + cdata=cbind(cdata, payload=rep(payload, length(cdata[, "avail_mb"]) )) + cdata=cbind(cdata, dataset=rep(datasetname, length(cdata[, "avail_mb"]) )) + + # Gather our statistics + sdata=data.frame(num_containers=length(cdata[, "avail_mb"])) + # Pick out the last avail_decr value - which in theory should be + # the most we have consumed... + sdata=cbind(sdata, mem_consumed=cdata[, "avail_decr"][length(cdata[, "avail_decr"])]) + sdata=cbind(sdata, avg_bytes_per_c=sdata$mem_consumed / sdata$num_containers) + sdata=cbind(sdata, runtime=testname) + + # Store away as a single set + data=rbind(data, cdata) + stats=rbind(stats, sdata) + + s = c( + "Test"=testname, + "n"=sdata$num_containers, + "size"=(sdata$mem_consumed) / 1024, + "kb/n"=round((sdata$mem_consumed / sdata$num_containers) / 1024, digits=1), + "n/Gb"= round((1*1024*1024*1024) / (sdata$mem_consumed / sdata$num_containers), digits=1) + ) + + rstats=rbind(rstats, s) + count = count + 1 + } + } +} + +# Set up the text table headers +colnames(rstats)=c("Test", "n", "Tot_Kb", "avg_Kb", "n_per_Gb") + +# Build us a text table of numerical results +stats_plot = suppressWarnings(ggtexttable(data.frame(rstats), + theme=ttheme(base_size=10), + rows=NULL + )) + +# plot how samples varioed over 'time' +line_plot <- ggplot() + + geom_point( data=data, aes(count, avail_mb, group=testname, color=payload, shape=dataset)) + + geom_line( data=data, aes(count, avail_mb, group=testname, color=payload)) + + xlab("Containers") + + ylab("System Avail (Mb)") + + ggtitle("System Memory free") + + ylim(0, NA) + + theme(axis.text.x=element_text(angle=90)) + +master_plot = grid.arrange( + line_plot, + stats_plot, + nrow=2, + ncol=1 ) diff --git a/tests/metrics/report/report_dockerfile/genreport.sh b/tests/metrics/report/report_dockerfile/genreport.sh new file mode 100644 index 000000000..5689c93d0 --- /dev/null +++ b/tests/metrics/report/report_dockerfile/genreport.sh @@ -0,0 +1,19 @@ +#!/bin/bash +# Copyright (c) 2018-2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +REPORTNAME="metrics_report.pdf" + +cd scripts + +Rscript --slave -e "library(knitr);knit('pdf.Rmd')" +Rscript --slave -e "library(knitr);pandoc('pdf.md', format='latex')" + +Rscript --slave -e "library(knitr);knit('html.Rmd')" +Rscript --slave -e "library(knitr);pandoc('html.md', format='html')" + +cp /scripts/pdf.pdf /outputdir/${REPORTNAME} +cp /scripts/figure/*.png /outputdir/ +echo "PNGs of graphs and tables can be found in the output directory." +echo "The report, named ${REPORTNAME}, can be found in the output directory" diff --git a/tests/metrics/report/report_dockerfile/html.Rmd b/tests/metrics/report/report_dockerfile/html.Rmd new file mode 100644 index 000000000..86229f335 --- /dev/null +++ b/tests/metrics/report/report_dockerfile/html.Rmd @@ -0,0 +1,23 @@ +--- +# Copyright (c) 2018-2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# +title: "Kata Containers metrics report" +author: "Auto generated" +date: "`r format(Sys.time(), '%d %B, %Y')`" +output: + html_document: +urlcolor: blue +--- + +```{r setup, include=FALSE} +#Set these opts to get pdf images which fit into beamer slides better +opts_chunk$set(dev = 'png') +# Pick up any env set by the invoking script, such as the root dir of the +# results data tree +source("/inputdir/Env.R") +``` + +```{r child = 'metrics_report.Rmd'} +``` diff --git a/tests/metrics/report/report_dockerfile/lifecycle-time.R b/tests/metrics/report/report_dockerfile/lifecycle-time.R new file mode 100644 index 000000000..4969f23c9 --- /dev/null +++ b/tests/metrics/report/report_dockerfile/lifecycle-time.R @@ -0,0 +1,157 @@ +#!/usr/bin/env Rscript +# Copyright (c) 2018-2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +# Display how long the various phases of a container lifecycle (run, execute, die etc. +# take. + +library(ggplot2) # ability to plot nicely. +suppressMessages(suppressWarnings(library(tidyr))) # for gather(). + # So we can plot multiple graphs +library(gridExtra) # together. +suppressMessages(suppressWarnings(library(ggpubr))) # for ggtexttable. +suppressMessages(library(jsonlite)) # to load the data. + +testnames=c( + "boot-times" +) + +data=c() +stats=c() +rstats=c() +rstats_names=c() + +# For each set of results +for (currentdir in resultdirs) { + count=1 + dirstats=c() + for (testname in testnames) { + fname=paste(inputdir, currentdir, testname, '.json', sep="") + if ( !file.exists(fname)) { + warning(paste("Skipping non-existent file: ", fname)) + next + } + + # Derive the name from the test result dirname + datasetname=basename(currentdir) + + # Import the data + fdata=fromJSON(fname) + # De-nest the test specific name + fdata=fdata[[testname]] + + cdata=data.frame(workload=as.numeric(fdata$Results$'to-workload'$Result)) + cdata=cbind(cdata, quit=as.numeric(fdata$Results$'to-quit'$Result)) + + cdata=cbind(cdata, tokernel=as.numeric(fdata$Results$'to-kernel'$Result)) + cdata=cbind(cdata, inkernel=as.numeric(fdata$Results$'in-kernel'$Result)) + cdata=cbind(cdata, total=as.numeric(fdata$Results$'total'$Result)) + + cdata=cbind(cdata, count=seq_len(length(cdata[,"workload"]))) + cdata=cbind(cdata, runtime=rep(datasetname, length(cdata[, "workload"]) )) + + # Calculate some stats for total time + sdata=data.frame(workload_mean=mean(cdata$workload)) + sdata=cbind(sdata, workload_min=min(cdata$workload)) + sdata=cbind(sdata, workload_max=max(cdata$workload)) + sdata=cbind(sdata, workload_sd=sd(cdata$workload)) + sdata=cbind(sdata, workload_cov=((sdata$workload_sd / sdata$workload_mean) * 100)) + sdata=cbind(sdata, runtime=datasetname) + + sdata=cbind(sdata, quit_mean = mean(cdata$quit)) + sdata=cbind(sdata, quit_min = min(cdata$quit)) + sdata=cbind(sdata, quit_max = max(cdata$quit)) + sdata=cbind(sdata, quit_sd = sd(cdata$quit)) + sdata=cbind(sdata, quit_cov = (sdata$quit_sd / sdata$quit_mean) * 100) + + sdata=cbind(sdata, tokernel_mean = mean(cdata$tokernel)) + sdata=cbind(sdata, inkernel_mean = mean(cdata$inkernel)) + sdata=cbind(sdata, total_mean = mean(cdata$total)) + + # Store away as a single set + data=rbind(data, cdata) + stats=rbind(stats, sdata) + + # Store away some stats for the text table + dirstats[count]=round(sdata$tokernel_mean, digits=2) + count = count + 1 + dirstats[count]=round(sdata$inkernel_mean, digits=2) + count = count + 1 + dirstats[count]=round(sdata$workload_mean, digits=2) + count = count + 1 + dirstats[count]=round(sdata$quit_mean, digits=2) + count = count + 1 + dirstats[count]=round(sdata$total_mean, digits=2) + count = count + 1 + } + rstats=rbind(rstats, dirstats) + rstats_names=rbind(rstats_names, datasetname) +} + +unts=c("s", "s", "s", "s", "s") +rstats=rbind(rstats, unts) +rstats_names=rbind(rstats_names, "Units") + + +# If we have only 2 sets of results, then we can do some more +# stats math for the text table +if (length(resultdirs) == 2) { + # This is a touch hard wired - but we *know* we only have two + # datasets... + diff=c() + for( i in 1:5) { + difference = as.double(rstats[2,i]) - as.double(rstats[1,i]) + val = 100 * (difference/as.double(rstats[1,i])) + diff[i] = paste(round(val, digits=2), "%", sep=" ") + } + + rstats=rbind(rstats, diff) + rstats_names=rbind(rstats_names, "Diff") +} + +rstats=cbind(rstats_names, rstats) + +# Set up the text table headers +colnames(rstats)=c("Results", "2k", "ik", "2w", "2q", "tot") + + +# Build us a text table of numerical results +stats_plot = suppressWarnings(ggtexttable(data.frame(rstats, check.names=FALSE), + theme=ttheme(base_size=8), + rows=NULL + )) + +# plot how samples varioed over 'time' +line_plot <- ggplot() + + geom_line( data=data, aes(count, workload, color=runtime)) + + geom_smooth( data=data, aes(count, workload, color=runtime), se=FALSE, method="loess") + + xlab("Iteration") + + ylab("Time (s)") + + ggtitle("Boot to workload", subtitle="First container") + + ylim(0, NA) + + theme(axis.text.x=element_text(angle=90)) + +boot_boxplot <- ggplot() + + geom_boxplot( data=data, aes(runtime, workload, color=runtime), show.legend=FALSE) + + ylim(0, NA) + + ylab("Time (s)") + +# convert the stats to a long format so we can more easily do a side-by-side barplot +longstats <- gather(stats, measure, value, workload_mean, quit_mean, inkernel_mean, tokernel_mean, total_mean) + +bar_plot <- ggplot() + + geom_bar( data=longstats, aes(measure, value, fill=runtime), stat="identity", position="dodge", show.legend=FALSE) + + xlab("Phase") + + ylab("Time (s)") + + ggtitle("Lifecycle phase times", subtitle="Mean") + + ylim(0, NA) + + theme(axis.text.x=element_text(angle=90)) + +master_plot = grid.arrange( + bar_plot, + line_plot, + stats_plot, + boot_boxplot, + nrow=2, + ncol=2 ) diff --git a/tests/metrics/report/report_dockerfile/mem-in-cont.R b/tests/metrics/report/report_dockerfile/mem-in-cont.R new file mode 100644 index 000000000..8dede6b99 --- /dev/null +++ b/tests/metrics/report/report_dockerfile/mem-in-cont.R @@ -0,0 +1,142 @@ +#!/usr/bin/env Rscript +# Copyright (c) 2018-2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +# Analyse the runtime component memory footprint data. + +library(ggplot2) # ability to plot nicely. + # So we can plot multiple graphs +library(gridExtra) # together. +suppressMessages(suppressWarnings(library(ggpubr))) # for ggtexttable. +suppressMessages(library(jsonlite)) # to load the data. + +testnames=c( + "memory-footprint-inside-container" +) + +data=c() +rstats=c() +rstats_rows=c() +rstats_cols=c() + +# For each set of results +for (currentdir in resultdirs) { + dirstats=c() + # For the two different types of memory footprint measures + for (testname in testnames) { + # R seems not to like double path slashes '//' ? + fname=paste(inputdir, currentdir, testname, '.json', sep="") + if ( !file.exists(fname)) { + warning(paste("Skipping non-existent file: ", fname)) + next + } + + # Derive the name from the test result dirname + datasetname=basename(currentdir) + + # Import the data + fdata=fromJSON(fname) + fdata=fdata[[testname]] + # Copy the average result into a shorter, more accesible name + fdata$requested=fdata$Results$memrequest$Result + fdata$total=fdata$Results$memtotal$Result + fdata$free=fdata$Results$memfree$Result + fdata$avail=fdata$Results$memavailable$Result + + # And lets work out what % we have 'lost' between the amount requested + # and the total the container actually sees. + fdata$lost=fdata$requested - fdata$total + fdata$pctotal= 100 * (fdata$lost/ fdata$requested) + + fdata$Runtime=rep(datasetname, length(fdata$Result) ) + + # Store away the bits we need + data=rbind(data, data.frame( + Result=fdata$requested, + Type="requested", + Runtime=fdata$Runtime )) + + data=rbind(data, data.frame( + Result=fdata$total, + Type="total", + Runtime=fdata$Runtime )) + + data=rbind(data, data.frame( + Result=fdata$free, + Type="free", + Runtime=fdata$Runtime )) + + data=rbind(data, data.frame( + Result=fdata$avail, + Type="avail", + Runtime=fdata$Runtime )) + + data=rbind(data, data.frame( + Result=fdata$lost, + Type="lost", + Runtime=fdata$Runtime )) + + data=rbind(data, data.frame( + Result=fdata$pctotal, + Type="% consumed", + Runtime=fdata$Runtime )) + + # Store away some stats for the text table + dirstats=rbind(dirstats, round(fdata$requested, digits=2) ) + dirstats=rbind(dirstats, round(fdata$total, digits=2) ) + dirstats=rbind(dirstats, round(fdata$free, digits=2) ) + dirstats=rbind(dirstats, round(fdata$avail, digits=2) ) + dirstats=rbind(dirstats, round(fdata$lost, digits=2) ) + dirstats=rbind(dirstats, round(fdata$pctotal, digits=2) ) + } + rstats=cbind(rstats, dirstats) + rstats_cols=append(rstats_cols, datasetname) +} + +rstats_rows=c("Requested", "Total", "Free", "Avail", "Consumed", "% Consumed") + +unts=c("Kb", "Kb", "Kb", "Kb", "Kb", "%") +rstats=cbind(rstats, unts) +rstats_cols=append(rstats_cols, "Units") + +# If we have only 2 sets of results, then we can do some more +# stats math for the text table +if (length(resultdirs) == 2) { + # This is a touch hard wired - but we *know* we only have two + # datasets... + diff=c() + # Just the first three entries - meaningless for the pctotal entry + for (n in 1:5) { + difference = (as.double(rstats[n,2]) - as.double(rstats[n,1])) + val = 100 * (difference/as.double(rstats[n,1])) + diff=rbind(diff, round(val, digits=2)) + } + + # Add a blank entry for the other entries + diff=rbind(diff, "") + rstats=cbind(rstats, diff) + rstats_cols=append(rstats_cols, "Diff %") +} + +# Build us a text table of numerical results +stats_plot = suppressWarnings(ggtexttable(data.frame(rstats), + theme=ttheme(base_size=10), + rows=rstats_rows, cols=rstats_cols + )) + +bardata <- subset(data, Type %in% c("requested", "total", "free", "avail")) +# plot how samples varioed over 'time' +barplot <- ggplot() + + geom_bar(data=bardata, aes(Type, Result, fill=Runtime), stat="identity", position="dodge") + + xlab("Measure") + + ylab("Size (Kb)") + + ggtitle("In-container memory statistics") + + ylim(0, NA) + + theme(axis.text.x=element_text(angle=90)) + +master_plot = grid.arrange( + barplot, + stats_plot, + nrow=2, + ncol=1 ) diff --git a/tests/metrics/report/report_dockerfile/memory-footprint.R b/tests/metrics/report/report_dockerfile/memory-footprint.R new file mode 100644 index 000000000..4d5f53129 --- /dev/null +++ b/tests/metrics/report/report_dockerfile/memory-footprint.R @@ -0,0 +1,121 @@ +#!/usr/bin/env Rscript +# Copyright (c) 2018-2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +# Analyse the runtime component memory footprint data. + +library(ggplot2) # ability to plot nicely. + # So we can plot multiple graphs +library(gridExtra) # together. +suppressMessages(suppressWarnings(library(ggpubr))) # for ggtexttable. +suppressMessages(library(jsonlite)) # to load the data. + +testnames=c( + "memory-footprint", + "memory-footprint-ksm" +) + +resultsfilesshort=c( + "noKSM", + "KSM" +) + +data=c() +rstats=c() +rstats_names=c() + +# For each set of results +for (currentdir in resultdirs) { + count=1 + dirstats=c() + # For the two different types of memory footprint measures + for (testname in testnames) { + # R seems not to like double path slashes '//' ? + fname=paste(inputdir, currentdir, testname, '.json', sep="") + if ( !file.exists(fname)) { + warning(paste("Skipping non-existent file: ", fname)) + next + } + + # Derive the name from the test result dirname + datasetname=basename(currentdir) + datasetvariant=resultsfilesshort[count] + + # Import the data + fdata=fromJSON(fname) + fdata=fdata[[testname]] + # Copy the average result into a shorter, more accesible name + fdata$Result=fdata$Results$average$Result + fdata$variant=rep(datasetvariant, length(fdata$Result) ) + fdata$Runtime=rep(datasetname, length(fdata$Result) ) + fdata$Count=seq_len(length(fdata$Result)) + + # Calculate some stats + fdata.mean = mean(fdata$Result) + fdata.min = min(fdata$Result) + fdata.max = max(fdata$Result) + fdata.sd = sd(fdata$Result) + fdata.cov = (fdata.sd / fdata.mean) * 100 + + # Store away the bits we need + data=rbind(data, data.frame( + Result=fdata$Result, + Count=fdata$Count, + Runtime=fdata$Runtime, + variant=fdata$variant ) ) + + # Store away some stats for the text table + dirstats[count]=round(fdata.mean, digits=2) + + count = count + 1 + } + rstats=rbind(rstats, dirstats) + rstats_names=rbind(rstats_names, datasetname) +} + +rstats=cbind(rstats_names, rstats) +unts=rep("Kb", length(resultdirs)) + +# If we have only 2 sets of results, then we can do some more +# stats math for the text table +if (length(resultdirs) == 2) { + # This is a touch hard wired - but we *know* we only have two + # datasets... + diff=c("diff") + difference = (as.double(rstats[2,2]) - as.double(rstats[1,2])) + val = 100 * (difference/as.double(rstats[1,2])) + diff[2] = round(val, digits=2) + difference = (as.double(rstats[2,3]) - as.double(rstats[1,3])) + val = 100 * (difference/as.double(rstats[1,3])) + diff[3] = round(val, digits=2) + rstats=rbind(rstats, diff) + + unts[3]="%" +} + +rstats=cbind(rstats, unts) + +# Set up the text table headers +colnames(rstats)=c("Results", resultsfilesshort, "Units") + +# Build us a text table of numerical results +stats_plot = suppressWarnings(ggtexttable(data.frame(rstats), + theme=ttheme(base_size=10), + rows=NULL + )) + +# plot how samples varioed over 'time' +point_plot <- ggplot() + + geom_point( data=data, aes(Runtime, Result, color=variant), position=position_dodge(0.1)) + + xlab("Dataset") + + ylab("Size (Kb)") + + ggtitle("Average PSS footprint", subtitle="per container") + + ylim(0, NA) + + theme(axis.text.x=element_text(angle=90)) + +master_plot = grid.arrange( + point_plot, + stats_plot, + nrow=1, + ncol=2 ) diff --git a/tests/metrics/report/report_dockerfile/metrics_report.Rmd b/tests/metrics/report/report_dockerfile/metrics_report.Rmd new file mode 100644 index 000000000..9dca2b49f --- /dev/null +++ b/tests/metrics/report/report_dockerfile/metrics_report.Rmd @@ -0,0 +1,63 @@ +--- +# Copyright (c) 2018-2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +--- +\pagebreak + +# Introduction +This report compares the metrics between multiple sets of data generated from +the [Kata Containers report generation scripts](https://github.com/kata-containers/kata-containers/tree/main/metrics/report/README.md). + +This report was generated using the data from the **`r resultdirs`** results directories. + +\pagebreak + +# Container scaling system footprint +This [test](https://github.com/kata-containers/kata-containers/blob/main/metrics/density/footprint_data.sh) +measures the system memory footprint impact whilst running an increasing number +of containers. For this test, [KSM](https://en.wikipedia.org/wiki/Kernel_same-page_merging) + is enabled. The results show how system memory is consumed for different sized +containers, and their average system memory footprint cost and density (how many +containers you can fit per Gb) is calculated. + +```{r footprint-density, echo=FALSE, fig.cap="System Memory density"} +test_name_extra="-ksm" +source('footprint-density.R') +rm(test_name_extra) +``` +\pagebreak + +# Memory used inside container +This [test](https://github.com/kata-containers/kata-containers/blob/main/metrics/density/memory_usage_inside_container.sh) +measures the memory inside a container taken by the container runtime. It shows the difference between the amount of memory requested for the container, and the amount the container can actually 'see'. + +The *% Consumed* is the key row in the table, which compares the *Requested* against *Total* values. + + +```{r mem-in-cont, echo=FALSE, fig.cap="System Memory density"} +source('mem-in-cont.R') +``` +\pagebreak + +# Container boot lifecycle times +This [test](https://github.com/kata-containers/kata-containers/blob/main/metrics/time/launch_times.sh) +uses the `date` command on the host and in the container, as well as data from the container +kernel `dmesg`, to ascertain how long different phases of the create/boot/run/delete +Docker container lifecycle take for the first launched container. + +To decode the stats table, the prefixes are 'to(`2`)' and '`i`n'. The suffixes are '`k`ernel', '`w`orkload' and '`q`uit'. 'tot' is the total time for a complete container start-to-finished cycle. + +```{r lifecycle-time, echo=FALSE, fig.cap="Execution lifecycle times"} +source('lifecycle-time.R') +``` +\pagebreak + +# Test setup details + +This table describes the test system details, as derived from the information contained +in the test results files. + + +```{r dut, echo=FALSE, fig.cap="System configuration details"} +source('dut-details.R') diff --git a/tests/metrics/report/report_dockerfile/network-cpu.R b/tests/metrics/report/report_dockerfile/network-cpu.R new file mode 100644 index 000000000..b6df50b30 --- /dev/null +++ b/tests/metrics/report/report_dockerfile/network-cpu.R @@ -0,0 +1,132 @@ +#!/usr/bin/env Rscript +# Copyright (c) 2018-2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +# Analyse the runtime component memory footprint data. + +library(ggplot2) # ability to plot nicely. + # So we can plot multiple graphs +library(gridExtra) # together. +suppressMessages(suppressWarnings(library(ggpubr))) # for ggtexttable. +suppressMessages(library(jsonlite)) # to load the data. + +testnames=c( + "cpu-information" +) + +resultsfilesshort=c( + "CPU" +) + +data=c() +rstats=c() +rstats_rows=c() +rstats_cols=c() + +Gdenom = (1000.0 * 1000.0 * 1000.0) + +# For each set of results +for (currentdir in resultdirs) { + dirstats=c() + # For the two different types of memory footprint measures + for (testname in testnames) { + # R seems not to like double path slashes '//' ? + fname=paste(inputdir, currentdir, testname, '.json', sep="") + if ( !file.exists(fname)) { + warning(paste("Skipping non-existent file: ", fname)) + next + } + + # Derive the name from the test result dirname + datasetname=basename(currentdir) + datasetvariant=resultsfilesshort[count] + + # Import the data + fdata=fromJSON(fname) + fdata=fdata[[testname]] + # Copy the average result into a shorter, more accesible name + fdata$ips=fdata$Results$"instructions per cycle"$Result + fdata$Gcycles=fdata$Results$cycles$Result / Gdenom + fdata$Ginstructions=fdata$Results$instructions$Result / Gdenom + fdata$variant=rep(datasetvariant, length(fdata$Result) ) + fdata$Runtime=rep(datasetname, length(fdata$Result) ) + + # Store away the bits we need + data=rbind(data, data.frame( + Result=fdata$ips, + Type="ips", + Runtime=fdata$Runtime, + variant=fdata$variant ) ) + data=rbind(data, data.frame( + Result=fdata$Gcycles, + Type="Gcycles", + Runtime=fdata$Runtime, + variant=fdata$variant ) ) + data=rbind(data, data.frame( + Result=fdata$Ginstructions, + Type="Ginstr", + Runtime=fdata$Runtime, + variant=fdata$variant ) ) + + # Store away some stats for the text table + dirstats=rbind(dirstats, round(fdata$ips, digits=2) ) + dirstats=rbind(dirstats, round(fdata$Gcycles, digits=2) ) + dirstats=rbind(dirstats, round(fdata$Ginstructions, digits=2) ) + } + rstats=cbind(rstats, dirstats) + rstats_cols=append(rstats_cols, datasetname) +} + +rstats_rows=c("IPS", "GCycles", "GInstr") + +unts=c("Ins/Cyc", "G", "G") +rstats=cbind(rstats, unts) +rstats_cols=append(rstats_cols, "Units") + +# If we have only 2 sets of results, then we can do some more +# stats math for the text table +if (length(resultdirs) == 2) { + # This is a touch hard wired - but we *know* we only have two + # datasets... + diff=c() + for (n in 1:3) { + difference = (as.double(rstats[n,2]) - as.double(rstats[n,1])) + val = 100 * (difference/as.double(rstats[n,1])) + diff=rbind(diff, round(val, digits=2)) + } + rstats=cbind(rstats, diff) + rstats_cols=append(rstats_cols, "Diff %") +} + +# Build us a text table of numerical results +stats_plot = suppressWarnings(ggtexttable(data.frame(rstats), + theme=ttheme(base_size=10), + rows=rstats_rows, cols=rstats_cols + )) + +# plot how samples varioed over 'time' +ipsdata <- subset(data, Type %in% c("ips")) +ips_plot <- ggplot() + + geom_bar(data=ipsdata, aes(Type, Result, fill=Runtime), stat="identity", position="dodge") + + xlab("Measure") + + ylab("IPS") + + ggtitle("Instructions Per Cycle") + + ylim(0, NA) + + theme(axis.text.x=element_text(angle=90)) + +cycdata <- subset(data, Type %in% c("Gcycles", "Ginstr")) +cycles_plot <- ggplot() + + geom_bar(data=cycdata, aes(Type, Result, fill=Runtime), stat="identity", position="dodge", show.legend=FALSE) + + xlab("Measure") + + ylab("Count (G)") + + ggtitle("Cycles and Instructions") + + ylim(0, NA) + + theme(axis.text.x=element_text(angle=90)) + +master_plot = grid.arrange( + ips_plot, + cycles_plot, + stats_plot, + nrow=2, + ncol=2 ) diff --git a/tests/metrics/storage/fio-dockerfile/Dockerfile b/tests/metrics/storage/fio-dockerfile/Dockerfile new file mode 100644 index 000000000..62bba2772 --- /dev/null +++ b/tests/metrics/storage/fio-dockerfile/Dockerfile @@ -0,0 +1,26 @@ +# Copyright (c) 2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +# Set up an Ubuntu image with 'fio io tester' installed + +FROM docker.io/library/ubuntu:22.04 + +# Version of the Dockerfile +LABEL DOCKERFILE_VERSION="1.0" + +# URL for the fio tester +ENV FIO_TOOLING_URL "https://github.com/axboe/fio" + +RUN apt-get update --quiet && \ + apt-get install --quiet --no-install-recommends -y \ + bash \ + util-linux \ + procps \ + fio && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/ + +COPY workload/fio_bench.sh / +WORKDIR / +CMD ["/bin/bash"] diff --git a/tests/metrics/storage/fio-dockerfile/workload/fio_bench.sh b/tests/metrics/storage/fio-dockerfile/workload/fio_bench.sh new file mode 100755 index 000000000..43aed0136 --- /dev/null +++ b/tests/metrics/storage/fio-dockerfile/workload/fio_bench.sh @@ -0,0 +1,129 @@ +#!/bin/bash +# +# Copyright (c) 2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +# Description of the test: +# This test runs the 'fio benchmark' on kata containers +# https://fio.readthedocs.io/en/latest/ + +set -o pipefail + +# FIO variable settings + +# io-types supported: +# read, write, randread, randwrite, randrw, readwrite +io_type="read" +block_size="4k" +num_jobs="2" + +# FIO default settings +readonly ioengine="libaio" +readonly rate_process="linear" +readonly disable_buffered="1" +readonly iodepth="2" +readonly runtime="10s" +# ramp time +readonly rt="10s" +readonly fname="test.fio" +readonly workload_dir="/" +readonly workload_file="${workload_dir}${fname}" +readonly workload_size="10G" +readonly summary_file_local="/results.json" + +# Show help about this script +function help() { +cat << EOF +Usage: $0 + Description: + Runs FIO test using ctr to excercise IO in kata containers. + + Params: + + Operations are: + run-read-4k + run-write-4k + run-randread-4k + run-randwrite-4k + run-read-64k + run-write-64k + run-randread-64k + run-randwrite-64k + + : [Mandatory] + : [Optional] Any of the FIO supported ioengines, default: libaio. +EOF +} + +# Run from the host +function setup_workload() { + # create workload file: + if [ ! -f ${workload_file} ]; then + pushd "${workload_dir}" > /dev/null 2>&1 + dd if=/dev/urandom of="${workload_file}" bs=64M count=160 > /dev/null 2>&1 + fi +} + +# Run inside container +function launch_workload() { + # the parameters used in the test_name are accesible globally + local test_name="${io_type}_${block_size}_nj-${num_jobs}_${rate_process}_iodepth-${iodepth}_io-direct-${disable_buffered}" + + setup_workload + rm -f "${summary_file_local}" > /dev/null 2>&1 + fio \ + --name="${test_name}" \ + --output-format="json" \ + --filename="${workload_file}" \ + --size="${workload_size}" \ + --rate_process="${rate_process}" \ + --runtime="${runtime}" \ + --ioengine="${ioengine}" \ + --rw="${io_type}" \ + --direct="${disable_buffered}" \ + --numjobs="${num_jobs}" \ + --blocksize="${block_size}" \ + --ramp_time="${rt}" \ + --iodepth="${iodepth}" \ + --gtod_reduce="1" \ + --randrepeat="1" \ + | tee -a ${summary_file_local} > /dev/null 2>&1 +} + +function print_latest_results() { + [ ! -f "${summary_file_local}" ] && echo "Error: no results to display; you must run a test before requesting results display" && exit 1 + echo "$(cat ${summary_file_local})" +} + +function delete_workload() { + rm -f "${workload_file}" > /dev/null 2>&1 +} + +function main() { + local action="${1:-}" + num_jobs="${2:-1}" + + [[ ! ${num_jobs} =~ ^[0-9]+$ ]] && die "The number of jobs must be a positive integer" + + case "${action}" in + run-read-4k) launch_workload ;; + run-read-64k) block_size="64k" && launch_workload ;; + + run-write-4k) io_type="write" && launch_workload ;; + run-write-64k) block_size="64k" && io_type="write" && launch_workload ;; + + run-randread-4k) io_type="randread" && launch_workload ;; + run-randread-64k) block_size="64k" && io_type="randread" && launch_workload ;; + + run-randwrite-4k) io_type="randwrite" && launch_workload ;; + run-randwrite-64k) block_size="64k" && io_type="randwrite" && launch_workload ;; + + print-latest-results) print_latest_results ;; + delete-workload) delete_workload ;; + + *) >&2 echo "Invalid argument" ; help ; exit 1;; + esac +} + +main "$@" diff --git a/tests/metrics/storage/fio-k8s/.gitignore b/tests/metrics/storage/fio-k8s/.gitignore deleted file mode 100644 index 1caa97b15..000000000 --- a/tests/metrics/storage/fio-k8s/.gitignore +++ /dev/null @@ -1 +0,0 @@ -./cmd/fiotest/fio-k8s diff --git a/tests/metrics/storage/fio-k8s/Makefile b/tests/metrics/storage/fio-k8s/Makefile deleted file mode 100644 index ba96203ba..000000000 --- a/tests/metrics/storage/fio-k8s/Makefile +++ /dev/null @@ -1,28 +0,0 @@ -# -# Copyright (c) 2021-2022 Intel Corporation -# -# SPDX-License-Identifier: Apache-2.0 -# - -MKFILE_PATH := $(abspath $(lastword $(MAKEFILE_LIST))) -MKFILE_DIR := $(dir $(MKFILE_PATH)) - -build: - make -C $(MKFILE_DIR)/cmd/fiotest/ gomod - make -C $(MKFILE_DIR)/cmd/fiotest/ build - -test-report: - $(MKFILE_DIR)/scripts/dax-compare-test/report/gen-html-fio-report.sh $(MKFILE_DIR)/cmd/fiotest/test-results/ - -test-report-interactive: - $(MKFILE_DIR)/scripts/dax-compare-test/report/run-docker-jupyter-server.sh $(MKFILE_DIR)/cmd/fiotest/test-results/ - -test: build - make -C $(MKFILE_DIR)/cmd/fiotest/ run - make test-report - -run: build - make -C $(MKFILE_DIR)/scripts/dax-compare-test/ run - -test-ci: build - make -C $(MKFILE_DIR)/cmd/fiotest/ runci diff --git a/tests/metrics/storage/fio-k8s/README.md b/tests/metrics/storage/fio-k8s/README.md deleted file mode 100644 index 96798849a..000000000 --- a/tests/metrics/storage/fio-k8s/README.md +++ /dev/null @@ -1,30 +0,0 @@ -# FIO test in Kubernetes - -This is an automation to run `fio` with Kubernetes. - -## Requirements: - -- Kubernetes cluster running. -- Kata configured as `runtimeclass`. - -## Test structure: - -- [fio-test]: Program wrapper to launch `fio` in a K8s pod. -- [pkg]: Library code that could be used for more `fio` automation. -- [configs]: Configuration files used by [fio-test]. -- [DAX-compare-test]: Script to run [fio-test] to generate `fio` data for Kata with/without `virtio-fs DAX` and K8s bare-metal runtime(`runc`). -- [report] Jupyter Notebook to create reports for data generated by [DAX-compare-test]. - -## Top-level Makefile targets - -- `build`: Build `fio` metrics. -- `test`: quick test, used to verify changes in [fio-test]. -- `run`: Run `fio` metrics and generate reports. -- `test-report-interactive`: Run python notebook in `localhost:8888`, useful to edit the report. -- `test-report`: Generate report from data generated by `make test`. - -[fio-test]:cmd/fiotest -[configs]:configs -[pkg]:pkg -[report]:scripts/dax-compare-test/report -[DAX-compare-test]:scripts/dax-compare-test/README.md diff --git a/tests/metrics/storage/fio-k8s/cmd/fiotest/Makefile b/tests/metrics/storage/fio-k8s/cmd/fiotest/Makefile deleted file mode 100644 index 4c8a27c69..000000000 --- a/tests/metrics/storage/fio-k8s/cmd/fiotest/Makefile +++ /dev/null @@ -1,24 +0,0 @@ -# -# Copyright (c) 2021-2023 Intel Corporation -# -# SPDX-License-Identifier: Apache-2.0 -# - -MKFILE_PATH := $(abspath $(lastword $(MAKEFILE_LIST))) -MKFILE_DIR := $(dir $(MKFILE_PATH)) - -build: - GO111MODULE=on go build - -run: build - $(MKFILE_DIR)/fio-k8s --debug --fio.size 10M --output-dir test-results --test-name kata $(MKFILE_DIR)/../../configs/example-config/ - $(MKFILE_DIR)/fio-k8s --debug --fio.size 10M --output-dir test-results --test-name runc --container-runtime runc $(MKFILE_DIR)/../../configs/example-config/ - -gomod: - go mod edit -replace=github.com/kata-containers/kata-containers/tests/metrics/k8s=../../pkg/k8s - go mod edit -replace=github.com/kata-containers/kata-containers/tests/metrics/exec=../../pkg/exec - go mod edit -replace=github.com/kata-containers/kata-containers/tests/metrics/env=../../pkg/env - go mod tidy - -runci: build - $(MKFILE_DIR)/fio-k8s --debug --fio.size 10M --output-dir test-results --test-name kata $(MKFILE_DIR)/../../configs/example-config/ diff --git a/tests/metrics/storage/fio-k8s/cmd/fiotest/go.mod b/tests/metrics/storage/fio-k8s/cmd/fiotest/go.mod deleted file mode 100644 index 4d6f8fb45..000000000 --- a/tests/metrics/storage/fio-k8s/cmd/fiotest/go.mod +++ /dev/null @@ -1,24 +0,0 @@ -module github.com/kata-containers/kata-containers/tests/metrics/storage/fio-k8s - -go 1.19 - -replace github.com/kata-containers/kata-containers/tests/metrics/exec => ../../pkg/exec - -replace github.com/kata-containers/kata-containers/tests/metrics/k8s => ../../pkg/k8s - -replace github.com/kata-containers/kata-containers/tests/metrics/env => ../../pkg/env - -require ( - github.com/kata-containers/kata-containers/tests/metrics/env v0.0.0-00010101000000-000000000000 - github.com/kata-containers/kata-containers/tests/metrics/exec v0.0.0-00010101000000-000000000000 - github.com/kata-containers/kata-containers/tests/metrics/k8s v0.0.0-00010101000000-000000000000 - github.com/pkg/errors v0.9.1 - github.com/sirupsen/logrus v1.9.3 - github.com/urfave/cli v1.22.14 -) - -require ( - github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect - github.com/russross/blackfriday/v2 v2.1.0 // indirect - golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8 // indirect -) diff --git a/tests/metrics/storage/fio-k8s/cmd/fiotest/go.sum b/tests/metrics/storage/fio-k8s/cmd/fiotest/go.sum deleted file mode 100644 index 45fbeb4a0..000000000 --- a/tests/metrics/storage/fio-k8s/cmd/fiotest/go.sum +++ /dev/null @@ -1,31 +0,0 @@ -github.com/BurntSushi/toml v1.3.2/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ= -github.com/cpuguy83/go-md2man/v2 v2.0.2 h1:p1EgwI/C7NhT0JmVkwCD2ZBK8j4aeHQX2pMHHBfMQ6w= -github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= -github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= -github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= -github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= -github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= -github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk= -github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= -github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= -github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= -github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= -github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= -github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= -github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= -github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= -github.com/urfave/cli v1.22.14 h1:ebbhrRiGK2i4naQJr+1Xj92HXZCrK7MsyTS/ob3HnAk= -github.com/urfave/cli v1.22.14/go.mod h1:X0eDS6pD6Exaclxm99NJ3FiCDRED7vIHpx2mDOHLvkA= -golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8 h1:0A+M6Uqn+Eje4kHMK80dtF3JCXC4ykBgQG4Fe06QRhQ= -golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= -gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= -gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/tests/metrics/storage/fio-k8s/cmd/fiotest/main.go b/tests/metrics/storage/fio-k8s/cmd/fiotest/main.go deleted file mode 100644 index de7e7b2ff..000000000 --- a/tests/metrics/storage/fio-k8s/cmd/fiotest/main.go +++ /dev/null @@ -1,373 +0,0 @@ -// Copyright (c) 2021-2023 Intel Corporation -// -// SPDX-License-Identifier: Apache-2.0 -package main - -import ( - "encoding/csv" - "encoding/json" - "fmt" - "os" - "path" - "path/filepath" - "strings" - "time" - - env "github.com/kata-containers/kata-containers/tests/metrics/env" - exec "github.com/kata-containers/kata-containers/tests/metrics/exec" - "github.com/kata-containers/kata-containers/tests/metrics/k8s" - "github.com/pkg/errors" - "github.com/sirupsen/logrus" - "github.com/urfave/cli" -) - -var log = logrus.New() - -var ( - optContainerRuntime = "container-runtime" - optDebug = "debug" - optOutputDir = "output-dir" - optTestName = "test-name" - // fio options - optFioBlockSize = "fio.block-size" - optFioDirect = "fio.direct" - optFioIoDepth = "fio.iodepth" - optFioSize = "fio.size" - optFioNumJobs = "fio.numjobs" -) - -type RwFioOp struct { - BandwidthKb int `json:"bw"` - IOPS float64 `json:"iops"` -} - -type fioResult struct { - GlobalOptions struct { - IOEngine string `json:"ioengine"` - RW string `json:"rw"` - } `json:"global options"` - Jobs []struct { - JobName string `json:"jobname"` - Read RwFioOp `json:"read"` - Write RwFioOp `json:"write"` - } `json:"jobs"` -} - -// Run fio in k8s metrics test in K8s -func (c fioTestConfig) run() (result fioResult, err error) { - log.Infof("Running fio config: %s", c.jobFile) - - pod := k8s.Pod{YamlPath: c.k8sYaml} - - log.Infof("Delete pod if already created") - err = pod.Delete() - if err != nil { - return result, err - } - - log.Infof("Create pod: %s", pod.YamlPath) - err = pod.Run() - if err != nil { - return result, err - } - - defer func() { - log.Info("Deleting pod") - delErr := pod.Delete() - if delErr != nil { - log.Error(delErr) - if err != nil { - err = errors.Wrapf(err, "Could not delete pod after: %s", delErr) - } - } - }() - - destDir := "/home/fio-jobs" - _, err = pod.Exec("mkdir " + destDir) - if err != nil { - return result, err - } - - dstJobFile := path.Join(destDir, "jobFile") - err = pod.CopyFromHost(c.jobFile, dstJobFile) - if err != nil { - return result, err - } - - _, err = pod.Exec("apt update") - if err != nil { - return result, err - } - _, err = pod.Exec("apt install -y fio") - if err != nil { - return result, err - } - - err = env.DropCaches() - if err != nil { - return result, err - } - - var directStr string - if c.direct { - directStr = "1" - } else { - directStr = "0" - } - - cmdFio := "fio" - cmdFio += " --append-terse " - cmdFio += " --blocksize=" + c.blocksize - cmdFio += " --direct=" + directStr - cmdFio += " --directory=" + c.directory - cmdFio += " --iodepth=" + c.iodepth - cmdFio += " --numjobs=" + c.numjobs - cmdFio += " --runtime=" + c.runtime - cmdFio += " --size=" + c.size - cmdFio += " --output-format=json" - cmdFio += " " + dstJobFile - - log.Infof("Exec fio") - output, err := pod.Exec(cmdFio, k8s.ExecOptShowStdOut()) - if err != nil { - return result, err - } - err = json.Unmarshal([]byte(output), &result) - if err != nil { - return result, errors.Wrapf(err, "failed to unmarshall output : %s", output) - } - - log.Infof("ioengine:%s", result.GlobalOptions.IOEngine) - log.Infof("rw:%s", result.GlobalOptions.RW) - if len(result.Jobs) == 0 { - return result, errors.New("No jobs found after parsing fio results") - } - - testDir := path.Join(c.outputDir, filepath.Base(c.jobFile)) - err = os.MkdirAll(testDir, 0775) - if err != nil { - return result, errors.Wrapf(err, "failed to create test directory for :%s", c.jobFile) - } - outputFile := path.Join(testDir, "output.json") - log.Infof("Store results output in : %s", outputFile) - - err = os.WriteFile(outputFile, []byte(output), 0644) - if err != nil { - return result, err - } - - return result, nil -} - -type fioTestConfig struct { - //test options - k8sYaml string - containerRuntime string - outputDir string - - //fio options - blocksize string - directory string - iodepth string - numjobs string - jobFile string - loops string - runtime string - size string - - direct bool -} - -func runFioJobs(testDirPath string, cfg fioTestConfig) (results []fioResult, err error) { - fioJobsDir, err := filepath.Abs(path.Join(testDirPath, "fio-jobs")) - if err != nil { - return results, err - } - - files, err := os.ReadDir(fioJobsDir) - if err != nil { - log.Fatal(err) - return results, err - } - - if cfg.containerRuntime == "" { - return results, errors.New("containerRuntime is empty") - } - - podYAMLName := cfg.containerRuntime + ".yaml" - cfg.k8sYaml = path.Join(testDirPath, podYAMLName) - - if len(files) == 0 { - return results, errors.New("No fio configs found") - } - - for _, file := range files { - cfg.jobFile = path.Join(fioJobsDir, file.Name()) - r, err := cfg.run() - if err != nil { - return results, err - } - results = append(results, r) - - log.Infof("workload:%s", r.Jobs[0].JobName) - log.Infof("bw_r:%d", r.Jobs[0].Read.BandwidthKb) - log.Infof("IOPS_r:%f", r.Jobs[0].Read.IOPS) - log.Infof("bw_w:%d", r.Jobs[0].Write.BandwidthKb) - log.Infof("IOPS_w:%f", r.Jobs[0].Write.IOPS) - - waitTime := 5 - log.Debugf("Sleep %d seconds(if not wait sometimes create another pod timesout)", waitTime) - time.Sleep(time.Duration(waitTime) * time.Second) - } - return results, err - -} - -func generateResultsView(testName string, results []fioResult, outputDir string) error { - outputFile := path.Join(outputDir, "results.csv") - f, err := os.Create(outputFile) - if err != nil { - return err - } - defer f.Close() - - log.Infof("Creating results output in %s", outputFile) - - w := csv.NewWriter(f) - - headers := []string{"NAME", "WORKLOAD", "bw_r", "bw_w", "IOPS_r", "IOPS_w"} - err = w.Write(headers) - if err != nil { - return err - } - - for _, r := range results { - if len(r.Jobs) == 0 { - return errors.Errorf("fio result has no jobs: %v", r) - } - row := []string{testName} - row = append(row, r.Jobs[0].JobName) - row = append(row, fmt.Sprintf("%d", r.Jobs[0].Read.BandwidthKb)) - row = append(row, fmt.Sprintf("%d", r.Jobs[0].Write.BandwidthKb)) - row = append(row, fmt.Sprintf("%f", r.Jobs[0].Read.IOPS)) - row = append(row, fmt.Sprintf("%f", r.Jobs[0].Write.IOPS)) - if err := w.Write(row); err != nil { - return err - } - } - - w.Flush() - - return w.Error() -} - -func main() { - - app := &cli.App{ - Flags: []cli.Flag{ - &cli.BoolFlag{ - Name: optDebug, - Usage: "Logs in debug level", - }, - &cli.StringFlag{ - Name: optTestName, - Value: "kata-fio-test", - Usage: "Change the fio test name for reports", - }, - &cli.StringFlag{ - Name: optOutputDir, - Value: ".", - Usage: "Use a file to store results", - }, - &cli.StringFlag{ - Name: optContainerRuntime, - Value: "kata", - Usage: "Choose the runtime to use", - }, - //fio options - &cli.StringFlag{ - Name: optFioSize, - Value: "200M", - Usage: "File size to use for tests", - }, - &cli.StringFlag{ - Name: optFioBlockSize, - Value: "4K", - Usage: "Block size for fio tests", - }, - &cli.BoolFlag{ - Name: optFioDirect, - Usage: "Use direct io", - }, - &cli.StringFlag{ - Name: optFioIoDepth, - Value: "16", - Usage: "Number of I/O units to keep in flight against the file", - }, - &cli.StringFlag{ - Name: optFioNumJobs, - Value: "1", - Usage: "Number of clones (processes/threads performing the same workload) of this job", - }, - }, - Action: func(c *cli.Context) error { - jobsDir := c.Args().First() - - if jobsDir == "" { - cli.SubcommandHelpTemplate = strings.Replace(cli.SubcommandHelpTemplate, "[arguments...]", "", -1) - cli.ShowCommandHelp(c, "") - return errors.New("Missing ") - } - - if c.Bool(optDebug) { - log.SetLevel(logrus.DebugLevel) - k8s.Debug = true - env.Debug = true - } - - exec.SetLogger(log) - k8s.SetLogger(log) - env.SetLogger(log) - - testName := c.String(optTestName) - - outputDir, err := filepath.Abs(path.Join(c.String(optOutputDir), testName)) - if err != nil { - return err - } - - cfg := fioTestConfig{ - blocksize: c.String(optFioBlockSize), - direct: c.Bool(optFioDirect), - directory: ".", - iodepth: c.String(optFioIoDepth), - loops: "3", - numjobs: c.String(optFioNumJobs), - runtime: "20", - size: c.String(optFioSize), - containerRuntime: c.String(optContainerRuntime), - outputDir: outputDir, - } - - log.Infof("Results will be created in %s", cfg.outputDir) - - err = os.MkdirAll(cfg.outputDir, 0775) - if err != nil { - return err - } - - results, err := runFioJobs(jobsDir, cfg) - if err != nil { - return err - } - - return generateResultsView(c.String(optTestName), results, outputDir) - }, - } - - err := app.Run(os.Args) - if err != nil { - log.Fatal(err) - } - -} diff --git a/tests/metrics/storage/fio-k8s/configs/example-config/fio-jobs/randrw-async.job b/tests/metrics/storage/fio-k8s/configs/example-config/fio-jobs/randrw-async.job deleted file mode 100644 index 9e0edb96c..000000000 --- a/tests/metrics/storage/fio-k8s/configs/example-config/fio-jobs/randrw-async.job +++ /dev/null @@ -1,10 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# Copyright (c) 2022 Intel Corporation -[global] -name=io_uring -filename=fio-file -rw=randrw -rwmixread=75 -ioengine=io_uring - -[randrw-io_uring] diff --git a/tests/metrics/storage/fio-k8s/configs/example-config/fio-jobs/randrw-libaio.job b/tests/metrics/storage/fio-k8s/configs/example-config/fio-jobs/randrw-libaio.job deleted file mode 100644 index 327852e44..000000000 --- a/tests/metrics/storage/fio-k8s/configs/example-config/fio-jobs/randrw-libaio.job +++ /dev/null @@ -1,10 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# Copyright (c) 2021 Intel Corporation -[global] -name=randrw-libaio -filename=fio-file -rw=randrw -rwmixread=75 -ioengine=libaio - -[randrw-libaio] diff --git a/tests/metrics/storage/fio-k8s/configs/example-config/fio-jobs/randrw-sync.job b/tests/metrics/storage/fio-k8s/configs/example-config/fio-jobs/randrw-sync.job deleted file mode 100644 index 3f7f2b6ed..000000000 --- a/tests/metrics/storage/fio-k8s/configs/example-config/fio-jobs/randrw-sync.job +++ /dev/null @@ -1,10 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# Copyright (c) 2022 Intel Corporation -[global] -name=sync -filename=fio-file -rw=randrw -rwmixread=75 -ioengine=sync - -[randrw-sync] diff --git a/tests/metrics/storage/fio-k8s/configs/example-config/kata.yaml b/tests/metrics/storage/fio-k8s/configs/example-config/kata.yaml deleted file mode 100644 index 08f397dea..000000000 --- a/tests/metrics/storage/fio-k8s/configs/example-config/kata.yaml +++ /dev/null @@ -1,16 +0,0 @@ -## Copyright (c) 2021 Intel Corporation -# -## SPDX-License-Identifier: Apache-2.0 -# -apiVersion: v1 -kind: Pod -metadata: - name: iometrics -spec: - runtimeClassName: kata - containers: - - name: iometrics - image: ubuntu:latest - # Just spin & wait forever - command: [ "/bin/bash", "-c", "--" ] - args: [ "sleep infinity" ] diff --git a/tests/metrics/storage/fio-k8s/configs/example-config/runc.yaml b/tests/metrics/storage/fio-k8s/configs/example-config/runc.yaml deleted file mode 100644 index 4fd96d3c1..000000000 --- a/tests/metrics/storage/fio-k8s/configs/example-config/runc.yaml +++ /dev/null @@ -1,15 +0,0 @@ -## Copyright (c) 2021 Intel Corporation -# -## SPDX-License-Identifier: Apache-2.0 -# -apiVersion: v1 -kind: Pod -metadata: - name: iometrics -spec: - containers: - - name: iometrics - image: ubuntu:latest - # Just spin & wait forever - command: [ "/bin/bash", "-c", "--" ] - args: [ "sleep infinity" ] diff --git a/tests/metrics/storage/fio-k8s/configs/test-config/fio-jobs/randread-libaio.job b/tests/metrics/storage/fio-k8s/configs/test-config/fio-jobs/randread-libaio.job deleted file mode 100644 index 5ab226061..000000000 --- a/tests/metrics/storage/fio-k8s/configs/test-config/fio-jobs/randread-libaio.job +++ /dev/null @@ -1,9 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# Copyright (c) 2021 Intel Corporation -[global] -name=randread-libaio -filename=fio-file -rw=randread -ioengine=libaio - -[randread-libaio] diff --git a/tests/metrics/storage/fio-k8s/configs/test-config/fio-jobs/randread-mmpap.job b/tests/metrics/storage/fio-k8s/configs/test-config/fio-jobs/randread-mmpap.job deleted file mode 100644 index cd79854f2..000000000 --- a/tests/metrics/storage/fio-k8s/configs/test-config/fio-jobs/randread-mmpap.job +++ /dev/null @@ -1,9 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# Copyright (c) 2021 Intel Corporation -[global] -name=randread-mmap -rw=randread -ioengine=mmap - -[randread-mmap] -filename=fio-file diff --git a/tests/metrics/storage/fio-k8s/configs/test-config/fio-jobs/randrw-libaio.job b/tests/metrics/storage/fio-k8s/configs/test-config/fio-jobs/randrw-libaio.job deleted file mode 100644 index 327852e44..000000000 --- a/tests/metrics/storage/fio-k8s/configs/test-config/fio-jobs/randrw-libaio.job +++ /dev/null @@ -1,10 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# Copyright (c) 2021 Intel Corporation -[global] -name=randrw-libaio -filename=fio-file -rw=randrw -rwmixread=75 -ioengine=libaio - -[randrw-libaio] diff --git a/tests/metrics/storage/fio-k8s/configs/test-config/fio-jobs/randrw-mmap.job b/tests/metrics/storage/fio-k8s/configs/test-config/fio-jobs/randrw-mmap.job deleted file mode 100644 index cf0c1288a..000000000 --- a/tests/metrics/storage/fio-k8s/configs/test-config/fio-jobs/randrw-mmap.job +++ /dev/null @@ -1,10 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# Copyright (c) 2021 Intel Corporation -[global] -name=randrw-mmap -rw=randrw -rwmixread=75 -ioengine=mmap - -[randrw-mmap] -filename=fio-file diff --git a/tests/metrics/storage/fio-k8s/configs/test-config/fio-jobs/randwrite-libaio.job b/tests/metrics/storage/fio-k8s/configs/test-config/fio-jobs/randwrite-libaio.job deleted file mode 100644 index ef3bfc5a5..000000000 --- a/tests/metrics/storage/fio-k8s/configs/test-config/fio-jobs/randwrite-libaio.job +++ /dev/null @@ -1,9 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# Copyright (c) 2021 Intel Corporation -[global] -name=randwrite-libaio -filename=fio-file -rw=randwrite -ioengine=libaio - -[randwrite-libaio] diff --git a/tests/metrics/storage/fio-k8s/configs/test-config/fio-jobs/randwrite-mmap.job b/tests/metrics/storage/fio-k8s/configs/test-config/fio-jobs/randwrite-mmap.job deleted file mode 100644 index 0d4f6ef73..000000000 --- a/tests/metrics/storage/fio-k8s/configs/test-config/fio-jobs/randwrite-mmap.job +++ /dev/null @@ -1,9 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# Copyright (c) 2021 Intel Corporation -[global] -name=randwrite-mmap -rw=randwrite -ioengine=mmap - -[randwrite-mmap] -filename=fio-file diff --git a/tests/metrics/storage/fio-k8s/configs/test-config/fio-jobs/seqread-libaio.job b/tests/metrics/storage/fio-k8s/configs/test-config/fio-jobs/seqread-libaio.job deleted file mode 100644 index 6f8a16a3f..000000000 --- a/tests/metrics/storage/fio-k8s/configs/test-config/fio-jobs/seqread-libaio.job +++ /dev/null @@ -1,9 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# Copyright (c) 2021 Intel Corporation -[global] -name=seqread-libaio -filename=fio-file -rw=read -ioengine=libaio - -[seqread-libaio] diff --git a/tests/metrics/storage/fio-k8s/configs/test-config/fio-jobs/seqread-mmap.job b/tests/metrics/storage/fio-k8s/configs/test-config/fio-jobs/seqread-mmap.job deleted file mode 100644 index 9829726d3..000000000 --- a/tests/metrics/storage/fio-k8s/configs/test-config/fio-jobs/seqread-mmap.job +++ /dev/null @@ -1,9 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# Copyright (c) 2021 Intel Corporation -[global] -name=seqread-mmap -rw=read -ioengine=mmap - -[seqread-mmap] -filename=fio-file diff --git a/tests/metrics/storage/fio-k8s/configs/test-config/fio-jobs/seqread-psync.job b/tests/metrics/storage/fio-k8s/configs/test-config/fio-jobs/seqread-psync.job deleted file mode 100644 index a1f54ca7c..000000000 --- a/tests/metrics/storage/fio-k8s/configs/test-config/fio-jobs/seqread-psync.job +++ /dev/null @@ -1,8 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# Copyright (c) 2021 Intel Corporation -[global] -name=seqread-psync -filename=fio-file -rw=read - -[seqread-psync] diff --git a/tests/metrics/storage/fio-k8s/configs/test-config/fio-jobs/seqwrite-libaio.job b/tests/metrics/storage/fio-k8s/configs/test-config/fio-jobs/seqwrite-libaio.job deleted file mode 100644 index 9927e05c9..000000000 --- a/tests/metrics/storage/fio-k8s/configs/test-config/fio-jobs/seqwrite-libaio.job +++ /dev/null @@ -1,9 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# Copyright (c) 2021 Intel Corporation -[global] -name=seqwrite-libaio -filename=fio-file -rw=write -ioengine=libaio - -[seqwrite-libaio] diff --git a/tests/metrics/storage/fio-k8s/configs/test-config/fio-jobs/seqwrite-mmap.job b/tests/metrics/storage/fio-k8s/configs/test-config/fio-jobs/seqwrite-mmap.job deleted file mode 100644 index e3485db4f..000000000 --- a/tests/metrics/storage/fio-k8s/configs/test-config/fio-jobs/seqwrite-mmap.job +++ /dev/null @@ -1,10 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# Copyright (c) 2021 Intel Corporation -[global] -name=seqwrite-mmap -filename=fio-file -rw=write -ioengine=mmap - -[seqwrite-mmap] -filename=fio-file diff --git a/tests/metrics/storage/fio-k8s/configs/test-config/kata.yaml b/tests/metrics/storage/fio-k8s/configs/test-config/kata.yaml deleted file mode 100644 index 08f397dea..000000000 --- a/tests/metrics/storage/fio-k8s/configs/test-config/kata.yaml +++ /dev/null @@ -1,16 +0,0 @@ -## Copyright (c) 2021 Intel Corporation -# -## SPDX-License-Identifier: Apache-2.0 -# -apiVersion: v1 -kind: Pod -metadata: - name: iometrics -spec: - runtimeClassName: kata - containers: - - name: iometrics - image: ubuntu:latest - # Just spin & wait forever - command: [ "/bin/bash", "-c", "--" ] - args: [ "sleep infinity" ] diff --git a/tests/metrics/storage/fio-k8s/configs/test-config/runc.yaml b/tests/metrics/storage/fio-k8s/configs/test-config/runc.yaml deleted file mode 100644 index 4fd96d3c1..000000000 --- a/tests/metrics/storage/fio-k8s/configs/test-config/runc.yaml +++ /dev/null @@ -1,15 +0,0 @@ -## Copyright (c) 2021 Intel Corporation -# -## SPDX-License-Identifier: Apache-2.0 -# -apiVersion: v1 -kind: Pod -metadata: - name: iometrics -spec: - containers: - - name: iometrics - image: ubuntu:latest - # Just spin & wait forever - command: [ "/bin/bash", "-c", "--" ] - args: [ "sleep infinity" ] diff --git a/tests/metrics/storage/fio-k8s/fio-test-ci.sh b/tests/metrics/storage/fio-k8s/fio-test-ci.sh deleted file mode 100755 index a05ca9bf0..000000000 --- a/tests/metrics/storage/fio-k8s/fio-test-ci.sh +++ /dev/null @@ -1,85 +0,0 @@ -#!/bin/bash -# -# Copyright (c) 2022-2023 Intel Corporation -# -# SPDX-License-Identifier: Apache-2.0 - -set -e - -# General env -SCRIPT_PATH=$(dirname "$(readlink -f "$0")") -source "${SCRIPT_PATH}/../../lib/common.bash" -FIO_PATH="${GOPATH}/src/github.com/kata-containers/kata-containers/tests/metrics/storage/fio-k8s" -TEST_NAME="${TEST_NAME:-fio}" - -function main() { - cmds=("bc" "jq") - check_cmds "${cmds[@]}" - check_processes - init_env - - export KUBECONFIG="$HOME/.kube/config" - - pushd "${FIO_PATH}" - echo "INFO: Running K8S FIO test" - make test-ci - popd - - test_result_file="${FIO_PATH}/cmd/fiotest/test-results/kata/randrw-sync.job/output.json" - - metrics_json_init - local read_io=$(cat $test_result_file | grep io_bytes | head -1 | sed 's/[[:blank:]]//g' | cut -f2 -d ':' | cut -f1 -d ',') - local read_bw=$(cat $test_result_file | grep bw_bytes | head -1 | sed 's/[[:blank:]]//g' | cut -f2 -d ':' | cut -f1 -d ',') - local read_90_percentile=$(cat $test_result_file | grep 90.000000 | head -1 | sed 's/[[:blank:]]//g' | cut -f2 -d ':' | cut -f1 -d ',') - local read_95_percentile=$(cat $test_result_file | grep 95.000000 | head -1 | sed 's/[[:blank:]]//g' | cut -f2 -d ':' | cut -f1 -d ',') - local write_io=$(cat $test_result_file | grep io_bytes | head -2 | tail -1 | sed 's/[[:blank:]]//g' | cut -f2 -d ':' | cut -f1 -d ',') - local write_bw=$(cat $test_result_file | grep bw_bytes | head -2 | tail -1 | sed 's/[[:blank:]]//g' | cut -f2 -d ':' | cut -f1 -d ',') - local write_90_percentile=$(cat $test_result_file | grep 90.000000 | head -2 | tail -1 | sed 's/[[:blank:]]//g' | cut -f2 -d ':' | cut -f1 -d ',') - local write_95_percentile=$(cat $test_result_file | grep 95.000000 | head -2 | tail -1 | sed 's/[[:blank:]]//g' | cut -f2 -d ':' | cut -f1 -d ',') - - metrics_json_start_array - local json="$(cat << EOF - { - "readio": { - "Result" : $read_io, - "Units" : "bytes" - }, - "readbw": { - "Result" : $read_bw, - "Units" : "bytes/sec" - }, - "read90percentile": { - "Result" : $read_90_percentile, - "Units" : "ns" - }, - "read95percentile": { - "Result" : $read_95_percentile, - "Units" : "ns" - }, - "writeio": { - "Result" : $write_io, - "Units" : "bytes" - }, - "writebw": { - "Result" : $write_bw, - "Units" : "bytes/sec" - }, - "write90percentile": { - "Result" : $write_90_percentile, - "Units" : "ns" - }, - "write95percentile": { - "Result" : $write_95_percentile, - "Units" : "ns" - } - } -EOF -)" - metrics_json_add_array_element "$json" - metrics_json_end_array "Results" - metrics_json_save - - check_processes -} - -main "$@" diff --git a/tests/metrics/storage/fio-k8s/pkg/env/Makefile b/tests/metrics/storage/fio-k8s/pkg/env/Makefile deleted file mode 100644 index a1c0a7779..000000000 --- a/tests/metrics/storage/fio-k8s/pkg/env/Makefile +++ /dev/null @@ -1,9 +0,0 @@ -# -# Copyright (c) 2021-2023 Intel Corporation -# -# SPDX-License-Identifier: Apache-2.0 -# - -gomod: - GO111MODULE=on go mod edit -replace=github.com/kata-containers/kata-containers/tests/metrics/exec=../exec - GO111MODULE=on go mod tidy diff --git a/tests/metrics/storage/fio-k8s/pkg/env/env.go b/tests/metrics/storage/fio-k8s/pkg/env/env.go deleted file mode 100644 index 034127f96..000000000 --- a/tests/metrics/storage/fio-k8s/pkg/env/env.go +++ /dev/null @@ -1,38 +0,0 @@ -// Copyright (c) 2021-2023 Intel Corporation -// -// SPDX-License-Identifier: Apache-2.0 -package env - -import ( - exec "github.com/kata-containers/kata-containers/tests/metrics/exec" -) - -// logger interface for pkg -var log logger -var Debug bool = false - -type logger interface { - Infof(string, ...interface{}) - Debugf(string, ...interface{}) - Errorf(string, ...interface{}) -} - -func SetLogger(l logger) { - log = l -} - -var sysDropCachesPath = "/proc/sys/vm/drop_caches" - -func DropCaches() (err error) { - log.Infof("drop caches") - _, err = exec.ExecCmd("sync", Debug) - if err != nil { - return err - } - - _, err = exec.ExecCmd("echo 3 | sudo tee "+sysDropCachesPath, Debug) - if err != nil { - return err - } - return nil -} diff --git a/tests/metrics/storage/fio-k8s/pkg/env/go.mod b/tests/metrics/storage/fio-k8s/pkg/env/go.mod deleted file mode 100644 index 4e59e60d9..000000000 --- a/tests/metrics/storage/fio-k8s/pkg/env/go.mod +++ /dev/null @@ -1,10 +0,0 @@ -module github.com/kata-containers/kata-containers/tests/metrics/storage/fio-k8s/exec - -go 1.19 - -require ( - github.com/kata-containers/kata-containers/tests/metrics/exec v0.0.0-00010101000000-000000000000 // indirect - github.com/pkg/errors v0.9.1 // indirect -) - -replace github.com/kata-containers/kata-containers/tests/metrics/exec => ../exec diff --git a/tests/metrics/storage/fio-k8s/pkg/env/go.sum b/tests/metrics/storage/fio-k8s/pkg/env/go.sum deleted file mode 100644 index 7c401c3f5..000000000 --- a/tests/metrics/storage/fio-k8s/pkg/env/go.sum +++ /dev/null @@ -1,2 +0,0 @@ -github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= -github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= diff --git a/tests/metrics/storage/fio-k8s/pkg/exec/Exec.go b/tests/metrics/storage/fio-k8s/pkg/exec/Exec.go deleted file mode 100644 index 6e409f427..000000000 --- a/tests/metrics/storage/fio-k8s/pkg/exec/Exec.go +++ /dev/null @@ -1,67 +0,0 @@ -// Copyright (c) 2021-2023 Intel Corporation -// -// SPDX-License-Identifier: Apache-2.0 -package exec - -import ( - "bytes" - "io" - "os" - "os/exec" - - "github.com/pkg/errors" -) - -// logger interface for pkg -var log logger - -type logger interface { - Infof(string, ...interface{}) - Debugf(string, ...interface{}) - Errorf(string, ...interface{}) -} - -func SetLogger(l logger) { - log = l -} - -// Exec a command -// err != nil if command fails to execute -// output is a string with a combined stdout and stderr -func ExecCmd(c string, showInStdout bool) (stdout string, err error) { - if c == "" { - return "", errors.New("command is empty") - } - - log.Debugf("Exec: %s", c) - cmd := exec.Command("bash", "-o", "pipefail", "-c", c) - var stdBuffer bytes.Buffer - var writers []io.Writer - writers = append(writers, &stdBuffer) - if showInStdout { - writers = append(writers, os.Stdout) - } - mw := io.MultiWriter(writers...) - - cmd.Stdout = mw - cmd.Stderr = mw - - err = cmd.Run() - output := stdBuffer.String() - - return stdBuffer.String(), errors.Wrap(err, output) -} - -// Exec a command -// Send output to Stdout and Stderr -func ExecStdout(c string) error { - if c == "" { - return errors.New("command is empty") - } - - log.Debugf("Exec: %s", c) - cmd := exec.Command("bash", "-o", "pipefail", "-c", c) - cmd.Stdout = os.Stdout - cmd.Stderr = os.Stderr - return cmd.Run() -} diff --git a/tests/metrics/storage/fio-k8s/pkg/exec/go.mod b/tests/metrics/storage/fio-k8s/pkg/exec/go.mod deleted file mode 100644 index c74fcbeb8..000000000 --- a/tests/metrics/storage/fio-k8s/pkg/exec/go.mod +++ /dev/null @@ -1,5 +0,0 @@ -module github.com/kata-containers/kata-containers/tests/metrics/storage/fio-k8s/exec - -go 1.19 - -require github.com/pkg/errors v0.9.1 diff --git a/tests/metrics/storage/fio-k8s/pkg/exec/go.sum b/tests/metrics/storage/fio-k8s/pkg/exec/go.sum deleted file mode 100644 index 7c401c3f5..000000000 --- a/tests/metrics/storage/fio-k8s/pkg/exec/go.sum +++ /dev/null @@ -1,2 +0,0 @@ -github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= -github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= diff --git a/tests/metrics/storage/fio-k8s/pkg/k8s/Makefile b/tests/metrics/storage/fio-k8s/pkg/k8s/Makefile deleted file mode 100644 index f5bee6d16..000000000 --- a/tests/metrics/storage/fio-k8s/pkg/k8s/Makefile +++ /dev/null @@ -1,8 +0,0 @@ -# -# Copyright (c) 2021-2023 Intel Corporation -# -# SPDX-License-Identifier: Apache-2.0 -# -gomod: - GO111MODULE=on go mod edit -replace=github.com/kata-containers/kata-containers/tests/metrics/exec=../exec - GO111MODULE=on go mod tidy diff --git a/tests/metrics/storage/fio-k8s/pkg/k8s/exec.go b/tests/metrics/storage/fio-k8s/pkg/k8s/exec.go deleted file mode 100644 index 9be25f618..000000000 --- a/tests/metrics/storage/fio-k8s/pkg/k8s/exec.go +++ /dev/null @@ -1,34 +0,0 @@ -// Copyright (c) 2021-2023 Intel Corporation -// -// SPDX-License-Identifier: Apache-2.0 -package k8s - -import ( - "fmt" - - exec "github.com/kata-containers/kata-containers/tests/metrics/exec" -) - -type execOpt struct { - showInStdOut bool -} - -type ExecOption func(e *execOpt) - -func ExecOptShowStdOut() ExecOption { - return func(e *execOpt) { - e.showInStdOut = true - } - -} - -func (p *Pod) Exec(cmd string, opts ...ExecOption) (output string, err error) { - log.Debugf("Exec %q in %s", cmd, p.YamlPath) - o := &execOpt{showInStdOut: false} - for _, opt := range opts { - opt(o) - - } - execCmd := fmt.Sprintf("kubectl exec -f %s -- /bin/bash -c %q", p.YamlPath, cmd) - return exec.ExecCmd(execCmd, Debug || o.showInStdOut) -} diff --git a/tests/metrics/storage/fio-k8s/pkg/k8s/go.mod b/tests/metrics/storage/fio-k8s/pkg/k8s/go.mod deleted file mode 100644 index 6a349002a..000000000 --- a/tests/metrics/storage/fio-k8s/pkg/k8s/go.mod +++ /dev/null @@ -1,10 +0,0 @@ -module github.com/kata-containers/kata-containers/tests/metrics/k8s - -go 1.19 - -replace github.com/kata-containers/kata-containers/tests/metrics/exec => ../exec - -require ( - github.com/kata-containers/kata-containers/tests/metrics/exec v0.0.0-00010101000000-000000000000 // indirect - github.com/pkg/errors v0.9.1 // indirect -) diff --git a/tests/metrics/storage/fio-k8s/pkg/k8s/go.sum b/tests/metrics/storage/fio-k8s/pkg/k8s/go.sum deleted file mode 100644 index 7c401c3f5..000000000 --- a/tests/metrics/storage/fio-k8s/pkg/k8s/go.sum +++ /dev/null @@ -1,2 +0,0 @@ -github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= -github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= diff --git a/tests/metrics/storage/fio-k8s/pkg/k8s/k8s.go b/tests/metrics/storage/fio-k8s/pkg/k8s/k8s.go deleted file mode 100644 index 2fef788cc..000000000 --- a/tests/metrics/storage/fio-k8s/pkg/k8s/k8s.go +++ /dev/null @@ -1,68 +0,0 @@ -// Copyright (c) 2021-2023 Intel Corporation -// -// SPDX-License-Identifier: Apache-2.0 -package k8s - -import ( - "fmt" - - exec "github.com/kata-containers/kata-containers/tests/metrics/exec" - "github.com/pkg/errors" -) - -// logger interface for pkg -var log logger -var Debug bool = false - -type logger interface { - Infof(string, ...interface{}) - Debugf(string, ...interface{}) - Errorf(string, ...interface{}) -} - -func SetLogger(l logger) { - log = l -} - -type Pod struct { - YamlPath string -} - -func (p *Pod) waitForReady() (err error) { - log.Debugf("Wait for pod %s", p.YamlPath) - _, err = exec.ExecCmd("kubectl wait --for=condition=ready -f "+p.YamlPath, Debug) - return err -} - -func (p *Pod) Run() (err error) { - - log.Debugf("Creating K8s Pod %s", p.YamlPath) - _, err = exec.ExecCmd("kubectl apply -f "+p.YamlPath, Debug) - if err != nil { - return errors.Wrapf(err, "Failed to run pod %s", p.YamlPath) - } - - err = p.waitForReady() - if err != nil { - return errors.Wrapf(err, "Failed to wait for pod %s", p.YamlPath) - } - return err -} - -func (p *Pod) Delete() (err error) { - log.Debugf("Delete pod %s", p.YamlPath) - _, err = exec.ExecCmd("kubectl delete --ignore-not-found -f "+p.YamlPath, Debug) - return errors.Wrapf(err, "Failed to delete pod %s", p.YamlPath) -} - -func (p *Pod) CopyFromHost(src, dst string) (err error) { - podName, err := exec.ExecCmd("kubectl get -f "+p.YamlPath+" -o jsonpath={.metadata.name}", Debug) - if err != nil { - return nil - } - - log.Debugf("Copy from host %q->%q in pod %s", src, dst, p.YamlPath) - execCmd := fmt.Sprintf("kubectl cp %s %s:%s", src, podName, dst) - _, err = exec.ExecCmd(execCmd, Debug) - return err -} diff --git a/tests/metrics/storage/fio-k8s/scripts/dax-compare-test/Makefile b/tests/metrics/storage/fio-k8s/scripts/dax-compare-test/Makefile deleted file mode 100644 index a33a5015c..000000000 --- a/tests/metrics/storage/fio-k8s/scripts/dax-compare-test/Makefile +++ /dev/null @@ -1,10 +0,0 @@ -# -# Copyright (c) 2021-2023 Intel Corporation -# -# SPDX-License-Identifier: Apache-2.0 -# -MKFILE_PATH := $(abspath $(lastword $(MAKEFILE_LIST))) -MKFILE_DIR := $(dir $(MKFILE_PATH)) -run: - $(MKFILE_DIR)/compare-virtiofsd-dax.sh - "$(MKFILE_DIR)/report/gen-html-fio-report.sh" "./results" diff --git a/tests/metrics/storage/fio-k8s/scripts/dax-compare-test/README.md b/tests/metrics/storage/fio-k8s/scripts/dax-compare-test/README.md deleted file mode 100644 index c8b00f0f9..000000000 --- a/tests/metrics/storage/fio-k8s/scripts/dax-compare-test/README.md +++ /dev/null @@ -1,47 +0,0 @@ -# FIO in Kubernetes - -This test runs `fio` jobs to measure how Kata Containers work using virtio-fs DAX. The test works using Kubernetes. -The test has to run in a single node cluster, it is needed as the test modifies Kata configuration file. - -The `virtio-fs` options that this test will use are: - -* `cache mode` Only `auto`, this is the most compatible mode for most of the Kata use cases. Today this is default in Kata. -* `thread pool size` Restrict the number of worker threads per request queue, zero means no thread pool. -* `DAX` -``` -File contents can be mapped into a memory window on the host, allowing the guest to directly access data from the host page cache. This has several advantages: The guest page cache is bypassed, reducing the memory footprint. No communication is necessary -to access file contents, improving I/O performance. Shared file access is coherent between virtual machines on the same host even with mmap. -``` - -This test by default iterates over different `virtio-fs` configurations. - -| test name | DAX | thread pool size | cache mode | -|---------------------------|-----|------------------|------------| -| pool_0_cache_auto_no_DAX | no | 0 | auto | -| pool_0_cache_auto_DAX | yes | 0 | auto | - -The `fio` options used are: - -`ioengine`: How the IO requests are issued to the kernel. -* `libaio`: Supports async IO for both direct and buffered IO. -* `mmap`: File is memory mapped with mmap(2) and data copied to/from using memcpy(3). - -`rw type`: Type of I/O pattern. -* `randread`: Random reads. -* `randrw`: Random mixed reads and writes. -* `randwrite`: Random writes. -* `read`: Sequential reads. -* `write`: Sequential writes. - -Additional notes: Some jobs contain a `multi` prefix. This means that the same job runs more than once at the same time using its own file. - -### Static `fio` values: - -Some `fio` values are not modified over all the jobs. - -* `runtime`: Tell `fio` to terminate processing after the specified period of time(seconds). -* `loops`: Run the specified number of iterations of this job. Used to repeat the same workload a given number of times. -* `iodepth`: Number of I/O units to keep in flight against the file. Note that increasing `iodepth` beyond 1 will not affect synchronous `ioengine`. -* `size`: The total size of file I/O for each thread of this job. -* `direct`: If value is true, use non-buffered I/O. This is usually O_`DIRECT`. -* `blocksize`: The block size in bytes used for I/O units. diff --git a/tests/metrics/storage/fio-k8s/scripts/dax-compare-test/compare-virtiofsd-dax.sh b/tests/metrics/storage/fio-k8s/scripts/dax-compare-test/compare-virtiofsd-dax.sh deleted file mode 100755 index 248e37776..000000000 --- a/tests/metrics/storage/fio-k8s/scripts/dax-compare-test/compare-virtiofsd-dax.sh +++ /dev/null @@ -1,151 +0,0 @@ -#!/bin/bash -#Copyright (c) 2021-2023 Intel Corporation -# -#SPDX-License-Identifier: Apache-2.0 -# - -set -o errexit -set -o nounset -set -o pipefail -set -o errtrace - -script_dir=$(dirname "$(readlink -f "$0")") - -runtime_path="/opt/kata/bin/kata-runtime" -kata_config_path="/opt/kata/share/defaults/kata-containers/configuration.toml" - -results_dir="$(realpath ./)/results" - -KATA_RUNTIME="${KATA_RUNTIME_CLASS:-kata}" -BAREMETAL_RUNTIME="runc" -RUNTIME_CLASS="" - -FIO_SIZE="${FIO_SIZE:-500M}" -FIO_BLOCKSIZE="${FIO_BLOCKSIZE:-4K}" -VIRTIOFS_DAX_SIZE=${VIRTIOFS_DAX_SIZE:-600M} - -# set the base case for virtiofsd -set_base_virtiofs_config() { - # Running kata-qemu-virtiofs - # Defaults for virtiofs - sudo crudini --set --existing "$kata_config_path" hypervisor.qemu virtio_fs_cache '"auto"' - sudo crudini --set --existing "$kata_config_path" hypervisor.qemu virtio_fs_cache_size ${VIRTIOFS_DAX_SIZE} -} - -## helper function: get name of current bash function -fn_name() { - echo "${FUNCNAME[1]}" -} - -# directory where results are stored -get_results_dir() { - local test_name - local test_result_dir - test_name="${1}" - test_result_dir="${results_dir}/${test_name}" - mkdir -p "${test_result_dir}" - echo "${test_result_dir}" -} - -# Collect kata env -# save kata config toml -# save output from kata-env -kata_env() { - local suffix=${1} - local config_path - local kata_env_bk - local kata_config_bk - kata_env_bk="$(get_results_dir "${suffix}")/kata-env.toml" - kata_config_bk="$(get_results_dir "${suffix}")/kata-config.toml" - - ${runtime_path} kata-env >"${kata_env_bk}" - config_path="$(${runtime_path} kata-env --json | jq .Runtime.Config.Path -r)" - cp "${config_path}" "${kata_config_bk}" -} - -# Collect the command used by virtiofsd -collect_qemu_virtiofs_cmd() { - local rdir - local test_name - test_name="${1}" - - rdir=$(get_results_dir "${test_name}") - # TODO -} - -# Run metrics runner -run_workload() { - local test_name - local test_result_file - local test_result_dir - - test_name="${1}" - - test_result_dir="$(get_results_dir "${test_name}")" - test_result_file="${test_result_dir}/test-out.txt" - - echo "Running for kata config: ${test_name}" - collect_qemu_virtiofs_cmd "$test_name" - - fio_runner_dir="${script_dir}/../../cmd/fiotest/" - fio_jobs="${script_dir}/../../configs/test-config/" - make -C "${fio_runner_dir}" build - pwd - set -x - "${fio_runner_dir}fio-k8s" \ - --debug \ - --fio.size "${FIO_SIZE}" \ - --fio.block-size "${FIO_BLOCKSIZE}" \ - --container-runtime "${RUNTIME_CLASS}" \ - --test-name "${test_name}" \ - --output-dir "$(dirname ${test_result_dir})" \ - "${fio_jobs}" | - tee \ - "${test_result_file}" - set +x -} - -pool_0_cache_auto_dax() { - local suffix="$(fn_name)" - - set_base_virtiofs_config - sudo crudini --set --existing "$kata_config_path" hypervisor.qemu virtio_fs_extra_args '["--thread-pool-size=0","-o","no_posix_lock","-o","xattr"]' - sudo crudini --set --existing "$kata_config_path" hypervisor.qemu virtio_fs_cache '"auto"' - sudo crudini --set --existing "$kata_config_path" hypervisor.qemu virtio_fs_cache_size 1024 - kata_env "${suffix}" - RUNTIME_CLASS="${KATA_RUNTIME}" - run_workload "${suffix}" -} - -pool_0_cache_auto_no_dax() { - local suffix="$(fn_name)" - - set_base_virtiofs_config - sudo crudini --set --existing "$kata_config_path" hypervisor.qemu virtio_fs_extra_args '["--thread-pool-size=0","-o","no_posix_lock","-o","xattr"]' - sudo crudini --set --existing "$kata_config_path" hypervisor.qemu virtio_fs_cache '"auto"' - sudo crudini --set --existing "$kata_config_path" hypervisor.qemu virtio_fs_cache_size 0 - - kata_env "${suffix}" - - RUNTIME_CLASS="${KATA_RUNTIME}" - run_workload "${suffix}" - echo "done" -} - -k8s_baremetal() { - local suffix="$(fn_name)" - - RUNTIME_CLASS="${BAREMETAL_RUNTIME}" - run_workload "${suffix}" -} - -main() { - - mkdir -p "${results_dir}" - - k8s_baremetal - pool_0_cache_auto_dax - pool_0_cache_auto_no_dax -} - -main $* diff --git a/tests/metrics/storage/fio-k8s/scripts/dax-compare-test/report/fio.ipynb b/tests/metrics/storage/fio-k8s/scripts/dax-compare-test/report/fio.ipynb deleted file mode 100644 index d2e8da202..000000000 --- a/tests/metrics/storage/fio-k8s/scripts/dax-compare-test/report/fio.ipynb +++ /dev/null @@ -1,51 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "tWacOPbMYPtc" - }, - "source": [ - "# FIO comparision" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "jXtTs6yldl_y" - }, - "outputs": [], - "source": [ - "import fio\n", - "fio.generate_report()" - ] - } - ], - "metadata": { - "colab": { - "collapsed_sections": [], - "name": "fio.ipynb", - "provenance": [] - }, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.6" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/tests/metrics/storage/fio-k8s/scripts/dax-compare-test/report/fio.py b/tests/metrics/storage/fio-k8s/scripts/dax-compare-test/report/fio.py deleted file mode 100644 index 313f63313..000000000 --- a/tests/metrics/storage/fio-k8s/scripts/dax-compare-test/report/fio.py +++ /dev/null @@ -1,102 +0,0 @@ -# Copyright (c) 2021-2023 Intel Corporation -# -# SPDX-License-Identifier: Apache-2.0 -import pandas as pd -import os -import re -import io -import glob -from IPython.display import display, Markdown -import matplotlib.pyplot as plt - -#Compare the tests results group by fio job. -#Input: -# df: dataset from `import_data()` -# metric: string of metrics provided in `df` -def compare_tests_group_by_fio_job(df, metric): - test_names, metric_df = group_metrics_group_by_testname(df, metric) - show_df(metric_df) - plot_df(metric_df,test_names) - -# Given a metric return results per test group by fio job. -# input: -# df: dataset from `import_data()` -# metric: string with the name of the metric to filter. -# output: -# dataset with fomat: -# 'workload' , 'name[0]' , ... , 'name[n]' -# -def group_metrics_group_by_testname(df, metric): - #name of each tests from results - names = set() - # Rows of new data set - rows = [] - # map: - # keys: name of fio job - # value: dict[k]:v where k: name of a test, v: value of test for metric` - workload = {} - - for k, row in df.iterrows(): - # name of a fio job - w = row['WORKLOAD'] - # name of tests - tname = row['NAME'] - names.add(tname) - # given a fio job name get dict of values - # if not previous values init empty dict - dict_values = workload.get(w, {}) - # For a given metric, add it into as value of dict_values[testname]=val - #e.g - # dict_values["test-name"] = row["IOPS"] - dict_values[tname] = row[metric] - workload[w] = dict_values - - names = list(names) - cols = ['WORKLOAD'] + list(names) - rdf = pd.DataFrame(workload,columns = cols) - - for k in workload: - d = workload[k] - - if not d[names[0]] == 0: - d["WORKLOAD"] = k; - rdf = rdf.append(d,ignore_index=True) - rdf = rdf.dropna() - return names, rdf - -def plot_df(df, names,sort_key=""): - if sort_key != "": - df.sort_values(sort_key, ascending=False) - df.plot(kind='bar',x="WORKLOAD",y=names, figsize=(30, 10)) - plt.show() - - -def import_data(): - frames = [] - for f in glob.glob('./results/*/results.csv'): - print("reading:" + f) - df = pd.read_csv(f) - frames.append(df) - return pd.concat(frames) - -def show_df(df): - pd.set_option('display.max_rows', df.shape[0]+1) - print(df) - -def print_md(s): - display(Markdown(s)) - -#notebook entrypoint -def generate_report(): - #Load the all test results in a single dataset - df_results = import_data() - print_md("Show all data from results") - show_df(df_results) - print_md("### Compare the tests results group by fio job. The metric used to compare is write bandwidth") - compare_tests_group_by_fio_job(df_results, 'bw_w') - print_md("### Compare the tests results group by fio job. The metric used to compare is read bandwidth") - compare_tests_group_by_fio_job(df_results, 'bw_r') - print_md("### Compare the tests results group by fio job. The metric used to compare is write IOPS(Input/Output Operations Per Second)") - compare_tests_group_by_fio_job(df_results, 'IOPS_w') - print_md("### Compare the tests results group by fio job. The metric used to compare is read IOPS(Input/Output Operations Per Second)") - compare_tests_group_by_fio_job(df_results, 'IOPS_r') diff --git a/tests/metrics/storage/fio-k8s/scripts/dax-compare-test/report/gen-html-fio-report.sh b/tests/metrics/storage/fio-k8s/scripts/dax-compare-test/report/gen-html-fio-report.sh deleted file mode 100644 index 8061c059d..000000000 --- a/tests/metrics/storage/fio-k8s/scripts/dax-compare-test/report/gen-html-fio-report.sh +++ /dev/null @@ -1,48 +0,0 @@ -#!/bin/bash -#Copyright (c) 2021-2023 Intel Corporation -# -#SPDX-License-Identifier: Apache-2.0 -# - -set -o errexit -set -o nounset -set -o pipefail -set -o errtrace - -script_dir=$(dirname "$(readlink -f "$0")") - -results_dir=${1:-} - -usage(){ - echo "$0 " -} - -if [ "${results_dir}" == "" ];then - echo "missing results directory" - usage - exit 1 -fi - -if [ ! -d "${results_dir}" ];then - echo "${results_dir} is not a directory" - usage - exit 1 -fi - -results_dir=$(realpath "${results_dir}") - -generate_report(){ - sudo chown "${USER}:${USER}" -R ${results_dir} - sudo docker run --rm -e JUPYTER_ENABLE_LAB=yes \ - -v "${script_dir}:/home/jovyan" \ - -v "${results_dir}:/home/jovyan/results" \ - --user $(id -u):$(id -g) \ - jupyter/scipy-notebook:399cbb986c6b \ - bash -e -c ' - cd results; - jupyter nbconvert --execute /home/jovyan/fio.ipynb --to html; - cp /home/jovyan/fio.html /home/jovyan/results; - ' -} - -generate_report diff --git a/tests/metrics/storage/fio-k8s/scripts/dax-compare-test/report/run-docker-jupyter-server.sh b/tests/metrics/storage/fio-k8s/scripts/dax-compare-test/report/run-docker-jupyter-server.sh deleted file mode 100644 index f386da4e2..000000000 --- a/tests/metrics/storage/fio-k8s/scripts/dax-compare-test/report/run-docker-jupyter-server.sh +++ /dev/null @@ -1,39 +0,0 @@ -#!/bin/bash -#Copyright (c) 2021-2023 Intel Corporation -# -#SPDX-License-Identifier: Apache-2.0 -# - -set -o errexit -set -o nounset -set -o pipefail -set -o errtrace - -script_dir=$(dirname "$(readlink -f "$0")") -NOTEBOOK_PORT="8888" - -results_dir=${1:-} - -usage(){ - echo "$0 " -} - -if [ "${results_dir}" == "" ];then - echo "missing results directory" - usage - exit 1 -fi - -if [ ! -d "${results_dir}" ];then - echo "${results_dir} is not a directory" - usage - exit 1 -fi - -results_dir=$(realpath "${results_dir}") - -sudo -E docker run --rm -p "${NOTEBOOK_PORT}:${NOTEBOOK_PORT}" -e JUPYTER_ENABLE_LAB=yes \ - -v "${script_dir}:/home/jovyan" \ - -v "${results_dir}:/home/jovyan/results" \ - jupyter/scipy-notebook:399cbb986c6b \ - start.sh jupyter lab --LabApp.token='' diff --git a/tests/metrics/storage/fio_test.sh b/tests/metrics/storage/fio_test.sh new file mode 100755 index 000000000..7ea7deb8e --- /dev/null +++ b/tests/metrics/storage/fio_test.sh @@ -0,0 +1,161 @@ +#!/bin/bash +# +# Copyright (c) 2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +# Description of the test: +# This test runs the 'fio benchmark' on kata containers +# https://fio.readthedocs.io/en/latest/ + +set -o pipefail + +SCRIPT_PATH=$(dirname "$(readlink -f "$0")") +source "${SCRIPT_PATH}/../lib/common.bash" + +CONTAINER_ID="fio_bench_${RANDOM}" +IMAGE="docker.io/library/fio-bench:latest" +DOCKERFILE="${SCRIPT_PATH}/fio-dockerfile/Dockerfile" +PAYLOAD_ARGS="${PAYLOAD_ARGS:-tail -f /dev/null}" +TEST_NAME="fio" + +# Fio default number of jobs +nj=4 + +function release_resources() { + sudo -E "${CTR_EXE}" t exec --exec-id "$(random_name)" ${CONTAINER_ID} sh -c "./fio_bench.sh delete-workload" + sleep 0.5 + clean_env_ctr + info "fio test end" +} + +trap release_resources EXIT + +function setup() { + info "setup fio test" + clean_env_ctr + check_cmds "${cmds[@]}" + check_ctr_images "$IMAGE" "$DOCKERFILE" + init_env + + # drop caches + sudo sh -c 'echo 3 > /proc/sys/vm/drop_caches' + + # launch container + sudo -E "${CTR_EXE}" run -d --runtime "${CTR_RUNTIME}" "${IMAGE}" "${CONTAINER_ID}" sh -c "${PAYLOAD_ARGS}" +} + +function parse_results() { + local data="${1}" + local bw=0 + local bw_stddev=0 + local iops=0 + local iops_stddev=0 + + [ -z "${data}" ] && die "Data results are missing when trying to parsing them." + + local io_type="$(echo "${data}" | jq -r '.jobs[0]."job options".rw')" + + if [ "${io_type}" = "read" ] || [ "${io_type}" = "randread" ]; then + # Bandwidth + bw="$(echo "${data}" | num_jobs="$nj" jq '[.jobs[] | .read.bw] | add/(env.num_jobs|tonumber) | .*1000|round/1000')" + bw_stddev="$(echo "${data}" | num_jobs="$nj" jq '[.jobs[] | .read.bw_dev] | add/(env.num_jobs|tonumber) | .*1000|round/1000')" + # IOPS + iops="$(echo "${data}" | num_jobs="$nj" jq '[.jobs[] | .read.iops] | add/(env.num_jobs|tonumber) | .*1000|round/1000')" + iops_stddev="$(echo "${data}" | num_jobs="$nj" jq '[.jobs[] | .read.iops_stddev] | add/(env.num_jobs|tonumber) | .*1000|round/1000')" + elif [ "${io_type}" = "write" ] || [ "${io_type}" = "randwrite" ]; then + # Bandwidth + bw="$(echo "${data}" | num_jobs="$nj" jq '[.jobs[] | .write.bw] | add/(env.num_jobs|tonumber) | .*1000|round/1000')" + bw_stddev="$(echo "${data}" | num_jobs="$nj" jq '[.jobs[] | .write.bw_dev] | add/(env.num_jobs|tonumber) | .*1000|round/1000')" + # IOPS + iops="$(echo "${data}" | num_jobs="$nj" jq '[.jobs[] | .write.iops] | add/(env.num_jobs|tonumber) | .*1000|round/1000')" + iops_stddev="$(echo "${data}" | num_jobs="$nj" jq '[.jobs[] | .write.iops_stddev] | add/(env.num_jobs|tonumber) | .*1000|round/1000')" + else + die "io type ${io_type} is not valid when parsing results" + fi + + convert_results_to_json "${io_type}" "${bw}" "${bw_stddev}" "${iops}" "${iops_stddev}" +} + +function extract_test_params() { + local data="${1}" + [ -z "${data}" ] && die "Missing fio parameters when trying to convert to json format." + + local json_params="$(echo "${data}" | jq -r '.jobs[0]."job options" | del(.name) | del(.rw) | del(.filename)')" + local json="$(cat << EOF + { + "Parameters" : ${json_params} + } +EOF +)" + metrics_json_add_array_element "${json}" +} + +function convert_results_to_json() { + local io_type="${1}" + local bw="${2}" + local bw_stddev="${3}" + local iops="${4}" + local iops_stddev="${5}" + + [ -z "${io_type}" ] || [ -z "${bw}" ] || [ -z "${bw_stddev}" ] || [ -z "${iops}" ] || [ -z "${iops_stddev}" ] && die "Results are missing when trying to convert to json format." + + local json="$(cat << EOF + { + "${io_type}" : { + "bw" : "${bw}", + "bw_stddev" : "${bw_stddev}", + "iops" : "${iops}", + "iops_stddev" : "${iops_stddev}", + "units" : "Kb" + } + } +EOF +)" + metrics_json_add_array_element "${json}" +} + +function store_results() { + local data_r="${1}" + local data_w="${2}" + local title="${3}" + + [ -z "${data_r}" ] || [ -z "${data_w}" ] || [ -z "${title}" ] && die "Missing data and/or title when trying storing results." + + metrics_json_start_array + extract_test_params "${data_r}" + parse_results "${data_r}" + parse_results "${data_w}" + metrics_json_end_array "${title}" +} + +function main() { + setup + + # Collect bs=4K, num_jobs=4, io-direct, io-depth=2 + info "Processing sequential type workload" + sudo -E "${CTR_EXE}" t exec --exec-id "${RANDOM}" ${CONTAINER_ID} sh -c "./fio_bench.sh run-read-4k ${nj}" >/dev/null 2>&1 + local results_read_4K="$(sudo -E "${CTR_EXE}" t exec -t --exec-id "${RANDOM}" ${CONTAINER_ID} sh -c "./fio_bench.sh print-latest-results")" + + sleep 0.5 + sudo -E "${CTR_EXE}" t exec --exec-id "${RANDOM}" ${CONTAINER_ID} sh -c "./fio_bench.sh run-write-4k ${nj}" >/dev/null 2>&1 + local results_write_4K="$(sudo -E "${CTR_EXE}" t exec -t --exec-id "${RANDOM}" ${CONTAINER_ID} sh -c "./fio_bench.sh print-latest-results")" + + # Collect bs=64K, num_jobs=4, io-direct, io-depth=2 + info "Processing random type workload" + sleep 0.5 + sudo -E "${CTR_EXE}" t exec --exec-id "${RANDOM}" ${CONTAINER_ID} sh -c "./fio_bench.sh run-randread-64k ${nj}" >/dev/null 2>&1 + local results_rand_read_64K="$(sudo -E "${CTR_EXE}" t exec -t --exec-id "${RANDOM}" ${CONTAINER_ID} sh -c "./fio_bench.sh print-latest-results")" + + sleep 0.5 + sudo -E "${CTR_EXE}" t exec --exec-id "${RANDOM}" ${CONTAINER_ID} sh -c "./fio_bench.sh run-randwrite-64k ${nj}" >/dev/null 2>&1 + local results_rand_write_64K="$(sudo -E "${CTR_EXE}" t exec -t --exec-id "${RANDOM}" ${CONTAINER_ID} sh -c "./fio_bench.sh print-latest-results")" + + # parse results + metrics_json_init + store_results "${results_read_4K}" "${results_write_4K}" "Results sequential" + store_results "${results_rand_read_64K}" "${results_rand_write_64K}" "Results random" + metrics_json_save +} + +main "$@" diff --git a/tests/metrics/time/launch_times.sh b/tests/metrics/time/launch_times.sh index 0458b979e..3d6b98e34 100755 --- a/tests/metrics/time/launch_times.sh +++ b/tests/metrics/time/launch_times.sh @@ -96,6 +96,7 @@ run_workload() { # number of decimal digits after the decimal points # for 'bc' performing math in kernel period estimation L_CALC_SCALE=13 + local CONTAINER_NAME="kata_launch_times_$(( $RANDOM % 1000 + 1))" start_time=$($DATECMD) # Check entropy level of the host @@ -103,8 +104,7 @@ run_workload() { # Run the image and command and capture the results into an array... declare workload_result - readarray -n 0 workload_result < <(sudo -E "${CTR_EXE}" run --rm --runtime=${CTR_RUNTIME} ${IMAGE} test bash -c "$DATECMD $DMESGCMD") - + readarray -n 0 workload_result < <(sudo -E "${CTR_EXE}" run --rm --runtime ${CTR_RUNTIME} ${IMAGE} ${CONTAINER_NAME} bash -c "$DATECMD $DMESGCMD") end_time=$($DATECMD) # Delay this calculation until after we have run - do not want diff --git a/tests/stability/agent_stability_test.sh b/tests/stability/agent_stability_test.sh new file mode 100755 index 000000000..902d9d48c --- /dev/null +++ b/tests/stability/agent_stability_test.sh @@ -0,0 +1,61 @@ +#!/bin/bash +# +# Copyright (c) 2018-2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# +# This test will perform several execs to a +# running container, the main purpose of this +# test is to stress the agent + +set -e -x + +cidir=$(dirname "$0") + +source "${cidir}/../metrics/lib/common.bash" + +# Environment variables +IMAGE="${IMAGE:-quay.io/prometheus/busybox:latest}" +CONTAINER_NAME="${CONTAINER_NAME:-test}" +PAYLOAD_ARGS="${PAYLOAD_ARGS:-tail -f /dev/null}" + + +# Timeout is the duration of this test (seconds) +# We want to stress the agent for a significant +# time (approximately running for two days) +timeout=186400 +start_time=$(date +%s) +end_time=$((start_time+timeout)) + +function setup { + restart_containerd_service + sudo ctr image pull $IMAGE + sudo ctr run --runtime=$CTR_RUNTIME -d $IMAGE $CONTAINER_NAME sh -c $PAYLOAD_ARGS +} + +function exec_loop { + cmd="sudo ctr t exec --exec-id $(random_name) $CONTAINER_NAME sh -c" + $cmd "echo 'hello world' > file" + $cmd "ls /file" + $cmd "rm -rf /file" + $cmd "touch /tmp/execWorks" + $cmd "ls /tmp | grep execWorks" + $cmd "rm -rf /tmp/execWorks" + $cmd "ls /etc/foo" || echo "Fail expected" + $cmd "cat /tmp/one" || echo "Fail expected" + $cmd "exit 42" || echo "Fail expected" +} + +function teardown { + echo "Ending stability test" + clean_env_ctr +} +trap teardown EXIT + +info "Starting stability test" +setup + +info "Running stability test" +while [[ $end_time > $(date +%s) ]]; do + exec_loop +done diff --git a/tests/stability/cassandra_stress.sh b/tests/stability/cassandra_stress.sh new file mode 100755 index 000000000..2d79d4f59 --- /dev/null +++ b/tests/stability/cassandra_stress.sh @@ -0,0 +1,43 @@ +#!/bin/bash +# +# Copyright (c) 2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +set -o pipefail + +# General env +SCRIPT_PATH=$(dirname "$(readlink -f "$0")") +source "${SCRIPT_PATH}/../metrics/lib/common.bash" + +IMAGE="docker.io/library/cassandra:latest" +CONTAINER_NAME="${CONTAINER_NAME:-cassandra_test}" +DOCKER_IMAGE="cassandra:latest" +PAYLOAD_ARGS="${PAYLOAD_ARGS:-tail -f /dev/null}" +CMD="cassandra -R" + +function main() { + local cmds=("docker") + + init_env + check_cmds "${cmds[@]}" + sudo -E "${DOCKER_EXE}" pull "${DOCKER_IMAGE}" + sudo -E "${DOCKER_EXE}" save -o "${DOCKER_IMAGE}.tar" "${DOCKER_IMAGE}" + sudo -E "${CTR_EXE}" i import "${DOCKER_IMAGE}.tar" + + sudo -E "${CTR_EXE}" run -d --runtime "${CTR_RUNTIME}" "${IMAGE}" "${CONTAINER_NAME}" sh -c "${PAYLOAD_ARGS}" + sudo -E "${CTR_EXE}" t exec --exec-id "$(random_name)" "${CONTAINER_NAME}" sh -c "${CMD}" + info "Write one million rows" + local WRITE_CMD="./opt/cassandra/tools/bin/cassandra-stress write n=1000000 -rate threads=50" + sudo -E "${CTR_EXE}" t exec --exec-id "$(random_name)" "${CONTAINER_NAME}" sh -c "${WRITE_CMD}" + info "Load one row with default schema" + local CQL_WRITE_CMD="./opt/cassandra/tools/bin/cassandra-stress write n=1 c1=one -mode native cql3 -log file-create_schema.log" + sudo -E "${CTR_EXE}" t exec --exec-id "$(random_name)" "${CONTAINER_NAME}" sh -c "${CQL_WRITE_CMD}" + info "Run a write workload using CQL" + local REAL_WRITE_CMD="./opt/cassandra/tools/bin/cassandra-stress write n=1000000 cl=one -mode native cql3 -schema keyspace='keyspace1' -log file=load_1M_rows.log" + sudo -E "${CTR_EXE}" t exec --exec-id "$(random_name)" "${CONTAINER_NAME}" sh -c "${REAL_WRITE_CMD}" + + clean_env_ctr +} + +main "$@" diff --git a/tests/stability/gha-run.sh b/tests/stability/gha-run.sh new file mode 100755 index 000000000..01672534a --- /dev/null +++ b/tests/stability/gha-run.sh @@ -0,0 +1,55 @@ +#!/bin/bash +# +# Copyright (c) 2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# + +set -o errexit +set -o nounset +set -o pipefail + +kata_tarball_dir="${2:-kata-artifacts}" +stability_dir="$(dirname "$(readlink -f "$0")")" +source "${stability_dir}/../common.bash" + +function install_dependencies() { + info "Installing the dependencies needed for running the containerd-stability tests" + + declare -a system_deps=( + jq + ) + + sudo apt-get update + sudo apt-get -y install "${system_deps[@]}" + + ensure_yq + + declare -a github_deps + github_deps[0]="cri_containerd:$(get_from_kata_deps "externals.containerd.${CONTAINERD_VERSION}")" + + for github_dep in "${github_deps[@]}"; do + IFS=":" read -r -a dep <<< "${github_dep}" + install_${dep[0]} "${dep[1]}" + done +} + +function run() { + info "Running soak parallel stability tests using ${KATA_HYPERVISOR} hypervisor" + + export ITERATIONS=2 MAX_CONTAINERS=20 + bash "${stability_dir}/soak_parallel_rm.sh" +} + +function main() { + action="${1:-}" + case "${action}" in + install-dependencies) install_dependencies ;; + install-kata) install_kata ;; + enabling-hypervisor) enabling_hypervisor ;; + run) run ;; + *) >&2 die "Invalid argument" ;; + esac +} + +main "$@" diff --git a/tests/stability/scability_test.sh b/tests/stability/scability_test.sh new file mode 100755 index 000000000..86d941e65 --- /dev/null +++ b/tests/stability/scability_test.sh @@ -0,0 +1,70 @@ +#!/bin/bash +# +# Copyright (c) 2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +set -o pipefail + +# General env +SCRIPT_PATH=$(dirname "$(readlink -f "$0")") +source "${SCRIPT_PATH}/../metrics/lib/common.bash" + +NUM_CONTAINERS="$1" +TIMEOUT_LAUNCH="$2" +PAYLOAD_ARGS="${PAYLOAD_ARGS:-tail -f /dev/null}" +IMAGE="${IMAGE:-quay.io/prometheus/busybox:latest}" + +# Show help about this script +help(){ +cat << EOF +Usage: $0 + Description: + This script launches n number of containers. + Options: + : Number of containers to run. + : Timeout to launch the containers. +EOF +} + +function main() { + # Verify enough arguments + if [ $# != 2 ]; then + echo >&2 "error: Not enough arguments [$@]" + help + exit 1 + fi + + local i=0 + local containers=() + local not_started_count="${NUM_CONTAINERS}" + + init_env + check_cmds "${cmds[@]}" + sudo -E ctr i pull "${IMAGE}" + + info "Creating ${NUM_CONTAINERS} containers" + + for ((i=1; i<= "${NUM_CONTAINERS}"; i++)); do + containers+=($(random_name)) + sudo -E ctr run -d --runtime "${CTR_RUNTIME}" "${IMAGE}" "${containers[-1]}" sh -c "${PAYLOAD_ARGS}" + ((not_started_count--)) + info "$not_started_count remaining containers" + done + + # Check that the requested number of containers are running + check_containers_are_up & pid=$! + (sleep "${TIMEOUT_LAUNCH}" && kill -HUP "${pid}") 2>/dev/null & pid_tout=$! + + if wait "${pid}" 2>/dev/null; then + pkill -HUP -P "${pid_tout}" + wait "${pid_tout}" + else + warn "Time out exceeded" + return 1 + fi + + clean_env_ctr +} + +main "$@" diff --git a/tests/stability/soak_parallel_rm.sh b/tests/stability/soak_parallel_rm.sh new file mode 100755 index 000000000..c05c2fade --- /dev/null +++ b/tests/stability/soak_parallel_rm.sh @@ -0,0 +1,208 @@ +#!/bin/bash +# +# Copyright (c) 2017-2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# +# This test will run a number of parallel containers, and then try to +# 'rm -f' them all at the same time. It will check after each run and +# rm that we have the expected number of containers, shims, +# qemus and runtimes active +# The goals are two fold: +# - spot any stuck or non-started components +# - catch any hang ups + +cidir=$(dirname "$0") +source "${cidir}/../metrics/lib/common.bash" +source "/etc/os-release" || source "/usr/lib/os-release" +set -x + +# How many times will we run the test loop... +ITERATIONS="${ITERATIONS:-5}" + +# the system 'free available' level where we stop running the tests, as otherwise +# the system can crawl to a halt, and/or start refusing to launch new VMs anyway +# We choose 2G, as that is one of the default VM sizes for Kata +MEM_CUTOFF="${MEM_CUTOFF:-(2*1024*1024*1024)}" + +# do we need a command argument for this payload? +COMMAND="${COMMAND:-tail -f /dev/null}" + +# Runtime path +RUNTIME_PATH=$(command -v $RUNTIME) + +# The place where virtcontainers keeps its active pod info +# This is ultimately what 'kata-runtime list' uses to get its info, but +# we can also check it for sanity directly +VC_POD_DIR="${VC_POD_DIR:-/run/vc/sbs}" + +# let's cap the test. If you want to run until you hit the memory limit +# then just set this to a very large number +MAX_CONTAINERS="${MAX_CONTAINERS:-110}" + +KATA_HYPERVISOR="${KATA_HYPERVISOR:-qemu}" + +function check_vsock_active() { + vsock_configured=$($RUNTIME_PATH kata-env | awk '/UseVSock/ {print $3}') + vsock_supported=$($RUNTIME_PATH kata-env | awk '/SupportVSock/ {print $3}') + if [ "$vsock_configured" == true ] && [ "$vsock_supported" == true ]; then + return 0 + else + return 1 + fi +} + +function count_containers() { + sudo ctr c list -q | wc -l +} + +function check_all_running() { + local goterror=0 + + info "Checking ${how_many} containers have all relevant components" + + # check what docker thinks + how_many_running=$(count_containers) + + if (( ${how_many_running} != ${how_many} )); then + info "Wrong number of containers running (${how_many_running} != ${how_many}) - stopping" + ((goterror++)) + fi + + # Only check for Kata components if we are using a Kata runtime + if (( $check_kata_components )); then + + # check we have the right number of shims + how_many_shims=$(pgrep -a -f ${SHIM_PATH} | grep containerd.sock | wc -l) + # one shim process per container... + if (( ${how_many_running} != ${how_many_shims} )); then + info "Wrong number of shims running (${how_many_running} != ${how_many_shims}) - stopping" + ((goterror++)) + fi + + # check we have the right number of vm's + if [[ "$KATA_HYPERVISOR" != "dragonball" ]]; then + how_many_vms=$(pgrep -a $(basename ${HYPERVISOR_PATH} | cut -d '-' -f1) | wc -l) + if (( ${how_many_running} != ${how_many_vms} )); then + info "Wrong number of $KATA_HYPERVISOR running (${how_many_running} != ${how_many_vms}) - stopping" + ((goterror++)) + fi + fi + + # if this is kata-runtime, check how many pods virtcontainers thinks we have + if [[ "$RUNTIME" == "containerd-shim-kata-v2" ]]; then + if [ -d "${VC_POD_DIR}" ]; then + num_vc_pods=$(sudo ls -1 ${VC_POD_DIR} | wc -l) + + if (( ${how_many_running} != ${num_vc_pods} )); then + info "Wrong number of pods in $VC_POD_DIR (${how_many_running} != ${num_vc_pods}) - stopping)" + ((goterror++)) + fi + fi + fi + fi + + if (( goterror != 0 )); then + show_system_ctr_state + die "Got $goterror errors, quitting" + fi +} + +# reported system 'available' memory +function get_system_avail() { + echo $(free -b | head -2 | tail -1 | awk '{print $7}') +} + +function go() { + info "Running..." + + how_many=0 + + while true; do { + check_all_running + + local i + for ((i=1; i<= ${MAX_CONTAINERS}; i++)); do + containers+=($(random_name)) + sudo ctr run --runtime=${CTR_RUNTIME} -d ${nginx_image} ${containers[-1]} sh -c ${COMMAND} + ((how_many++)) + done + + if (( ${how_many} >= ${MAX_CONTAINERS} )); then + info "And we have hit the max ${how_many} containers" + return + fi + + how_much=$(get_system_avail) + if (( ${how_much} < ${MEM_CUTOFF} )); then + info "And we are out of memory on container ${how_many} (${how_much} < ${MEM_CUTOFF})" + return + fi + } + done +} + +function count_mounts() { + echo $(mount | wc -l) +} + +function check_mounts() { + final_mount_count=$(count_mounts) + + if [[ $final_mount_count < $initial_mount_count ]]; then + info "Final mount count does not match initial count (${final_mount_count} != ${initial_mount_count})" + fi +} + +function init() { + restart_containerd_service + extract_kata_env + clean_env_ctr + + # remember how many mount points we had before we do anything + # and then sanity check we end up with no new ones dangling at the end + initial_mount_count=$(count_mounts) + + # Only check Kata items if we are using a Kata runtime + if [[ "$RUNTIME" == "containerd-shim-kata-v2" ]]; then + info "Checking Kata runtime" + check_kata_components=1 + else + info "Not a Kata runtime, not checking for Kata components" + check_kata_components=0 + fi + + versions_file="${cidir}/../versions.yaml" + nginx_version=$("${GOPATH}/bin/yq" read "$versions_file" "docker_images.nginx.version") + nginx_image="docker.io/library/nginx:$nginx_version" + + # Pull nginx image + sudo ctr image pull ${nginx_image} + if [ $? != 0 ]; then + die "Unable to retry docker image ${nginx_image}" + fi +} + +function spin() { + local i + for ((i=1; i<= ITERATIONS; i++)); do { + info "Start iteration $i of $ITERATIONS" + #spin them up + go + #check we are in a sane state + check_all_running + #shut them all down + clean_env_ctr + #Note there should be none running + how_many=0 + #and check they all died + check_all_running + #and that we have no dangling mounts + check_mounts + } + done + +} + +init +spin diff --git a/tests/stability/stressng.sh b/tests/stability/stressng.sh new file mode 100755 index 000000000..9f95b8c68 --- /dev/null +++ b/tests/stability/stressng.sh @@ -0,0 +1,59 @@ +#!/bin/bash +# +# Copyright (c) 2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +set -o pipefail + +# General env +SCRIPT_PATH=$(dirname "$(readlink -f "$0")") +source "${SCRIPT_PATH}/../metrics/lib/common.bash" + +PAYLOAD_ARGS="${PAYLOAD_ARGS:-tail -f /dev/null}" +DOCKERFILE="${SCRIPT_PATH}/stressng_dockerfile/Dockerfile" +IMAGE="docker.io/library/local-stressng:latest" +CONTAINER_NAME="${CONTAINER_NAME:-stressng_test}" + +function main() { + local cmds=("docker") + + init_env + check_cmds "${cmds[@]}" + check_ctr_images "${IMAGE}" "${DOCKERFILE}" + sudo -E ctr run -d --runtime "${CTR_RUNTIME}" "${IMAGE}" "${CONTAINER_NAME}" sh -c "${PAYLOAD_ARGS}" + + # Run 1 iomix stressor (mix of I/O operations) for 20 seconds with verbose output + info "Running iomix stressor test" + IOMIX_CMD="stress-ng --iomix 1 -t 20 -v" + sudo -E ctr t exec --exec-id "$(random_name)" "${CONTAINER_NAME}" sh -c "${IOMIX_CMD}" + + # Run cpu stressors and virtual memory stressors for 5 minutes + info "Running memory stressors for 5 minutes" + MEMORY_CMD="stress-ng --cpu 2 --vm 4 -t 5m" + sudo -E ctr t exec --exec-id "$(random_name)" "${CONTAINER_NAME}" sh -c "${MEMORY_CMD}" + + # Run shared memory stressors + info "Running 8 shared memory stressors" + SHARED_CMD="stress-ng --shm 0" + sudo -E ctr t exec --exec-id "$(random_name)" "${CONTAINER_NAME}" sh -c "${SHARED_CMD}" + + # Run all stressors one by one on all CPUs + info "Running all stressors one by one" + STRESSORS_CMD="stress-ng --seq 0 -t 10 --tz -v" + sudo -E ctr t exec --exec-id "$(random_name)" "${CONTAINER_NAME}" sh -c "${STRESSORS_CMD}" + + # Test floating point on CPU for 60 seconds + info "Running floating tests on CPU" + FLOAT_CMD="stress-ng --matrix 1 -t 1m" + sudo -E ctr t exec --exec-id "$(random_name)" "${CONTAINER_NAME}" sh -c "${FLOAT_CMD}" + + # Runs two instances of the CPU stressors, one instance of the matrix + info "Running instances of the CPU stressors" + INSTANCE_CMD='stress-ng --cpu 2 --matrix 1 --mq 3 -t 5m' + sudo -E ctr t exec --exec-id "$(random_name)" "${CONTAINER_NAME}" sh -c "${INSTANCE_CMD}" + + clean_env_ctr +} + +main "$@" diff --git a/tests/stability/stressng_dockerfile/Dockerfile b/tests/stability/stressng_dockerfile/Dockerfile new file mode 100644 index 000000000..b17002c1e --- /dev/null +++ b/tests/stability/stressng_dockerfile/Dockerfile @@ -0,0 +1,17 @@ +# Copyright (c) 2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +FROM ubuntu:20.04 + +# Version of the Dockerfile +LABEL DOCKERFILE_VERSION="1.0" + +ENV DEBIAN_FRONTEND=noninteractive + +RUN apt-get update && \ + apt-get install -y --no-install-recommends sudo build-essential curl && \ + apt-get remove -y unattended-upgrades && \ + apt-get install -y stress stress-ng + +CMD ["/bin/bash"] diff --git a/tools/packaging/kata-deploy/Dockerfile b/tools/packaging/kata-deploy/Dockerfile index 8e7f6e2ac..95f07f143 100644 --- a/tools/packaging/kata-deploy/Dockerfile +++ b/tools/packaging/kata-deploy/Dockerfile @@ -2,30 +2,25 @@ # # SPDX-License-Identifier: Apache-2.0 -# Specify alternative base image, e.g. clefos for s390x -ARG BASE_IMAGE_NAME=ubuntu -ARG BASE_IMAGE_TAG=20.04 +ARG BASE_IMAGE_NAME=alpine +ARG BASE_IMAGE_TAG=3.18 FROM $BASE_IMAGE_NAME:$BASE_IMAGE_TAG -ENV DEBIAN_FRONTEND=noninteractive ARG KATA_ARTIFACTS=./kata-static.tar.xz ARG DESTINATION=/opt/kata-artifacts COPY ${KATA_ARTIFACTS} ${WORKDIR} -SHELL ["/bin/bash", "-o", "pipefail", "-c"] - RUN \ -apt-get update && \ -apt-get install -y --no-install-recommends apt-transport-https ca-certificates curl gpg xz-utils systemd && \ -mkdir -p /etc/apt/keyrings/ && \ -curl -fsSL https://packages.cloud.google.com/apt/doc/apt-key.gpg | gpg --dearmor -o /etc/apt/keyrings/kubernetes-archive-keyring.gpg && \ -echo "deb [signed-by=/etc/apt/keyrings/kubernetes-archive-keyring.gpg] https://apt.kubernetes.io/ kubernetes-xenial main" | tee /etc/apt/sources.list.d/kubernetes.list && \ -apt-get update && \ -apt-get install -y --no-install-recommends kubectl && \ -apt-get clean && rm -rf /var/lib/apt/lists/ && \ -mkdir -p ${DESTINATION} && \ -tar xvf ${WORKDIR}/${KATA_ARTIFACTS} -C ${DESTINATION} && \ -rm -f ${WORKDIR}/${KATA_ARTIFACTS} + apk --no-cache add bash curl && \ + ARCH=$(uname -m) && \ + if [ "${ARCH}" = "x86_64" ]; then ARCH=amd64; fi && \ + if [ "${ARCH}" = "aarch64" ]; then ARCH=arm64; fi && \ + curl -fL --progress-bar -o /usr/bin/kubectl https://storage.googleapis.com/kubernetes-release/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/${ARCH}/kubectl && \ + chmod +x /usr/bin/kubectl && \ + mkdir -p ${DESTINATION} && \ + tar xvf ${WORKDIR}/${KATA_ARTIFACTS} -C ${DESTINATION} && \ + rm -f ${WORKDIR}/${KATA_ARTIFACTS} && \ + apk del curl COPY scripts ${DESTINATION}/scripts COPY runtimeclasses ${DESTINATION}/runtimeclasses diff --git a/tools/packaging/kata-deploy/README.md b/tools/packaging/kata-deploy/README.md index 9e546e4f5..6ef7cd6a9 100644 --- a/tools/packaging/kata-deploy/README.md +++ b/tools/packaging/kata-deploy/README.md @@ -43,6 +43,36 @@ $ kubectl apply -f kata-rbac/base/kata-rbac.yaml $ kubectl apply -k kata-deploy/overlays/rke2 ``` +#### [k0s] cluster + +For your [k0s](https://k0sproject.io/) cluster, run: + +```sh +$ git clone https://github.com/kata-containers/kata-containers.git +``` + +Check and switch to "main", and then run: + +```bash +$ cd kata-containers/tools/packaging/kata-deploy +$ kubectl apply -f kata-rbac/base/kata-rbac.yaml +$ kubectl apply -k kata-deploy/overlays/k0s +``` + +##### Note + +The supported version of k0s is **v1.27.1+k0s** and above, since the k0s support leverages a special dynamic containerd configuration mode: + +> From 1.27.1 onwards k0s enables dynamic configuration on containerd CRI runtimes. This works by k0s creating a special directory in /etc/k0s/containerd.d/ where user can drop-in partial containerd configuration snippets. +> +> k0s will automatically pick up these files and adds these in containerd configuration imports list. If k0s sees the configuration drop-ins are CRI related configurations k0s will automatically collect all these into a single file and adds that as a single import file. This is to overcome some hard limitation on containerd 1.X versions. Read more at containerd#8056 + +However, this would also require a magic string set in the beginning of the line for `/etc/k0s/containerd.toml`: + +``` +# k0s_managed=true +``` + #### Vanilla Kubernetes cluster ##### Installing the latest image diff --git a/tools/packaging/kata-deploy/kata-cleanup/base/kata-cleanup.yaml b/tools/packaging/kata-deploy/kata-cleanup/base/kata-cleanup.yaml index 3d9006572..7cb875689 100644 --- a/tools/packaging/kata-deploy/kata-cleanup/base/kata-cleanup.yaml +++ b/tools/packaging/kata-deploy/kata-cleanup/base/kata-cleanup.yaml @@ -6,39 +6,39 @@ metadata: namespace: kube-system spec: selector: - matchLabels: - name: kubelet-kata-cleanup + matchLabels: + name: kubelet-kata-cleanup template: metadata: - labels: - name: kubelet-kata-cleanup + labels: + name: kubelet-kata-cleanup spec: serviceAccountName: kata-deploy-sa hostPID: true nodeSelector: - katacontainers.io/kata-runtime: cleanup + katacontainers.io/kata-runtime: cleanup containers: - - name: kube-kata-cleanup - image: quay.io/kata-containers/kata-deploy:latest - imagePullPolicy: Always - command: [ "bash", "-c", "/opt/kata-artifacts/scripts/kata-deploy.sh reset" ] - env: - - name: NODE_NAME - valueFrom: - fieldRef: - fieldPath: spec.nodeName - - name: DEBUG - value: "false" - - name: SHIMS - value: "clh dragonball fc qemu-nvidia-gpu qemu-sev qemu-snp qemu-tdx qemu" - - name: DEFAULT_SHIM - value: "qemu" - - name: CREATE_RUNTIMECLASSES - value: "false" - - name: CREATE_DEFAULT_RUNTIMECLASS - value: "false" - securityContext: - privileged: true + - name: kube-kata-cleanup + image: quay.io/kata-containers/kata-deploy:latest + imagePullPolicy: Always + command: ["bash", "-c", "/opt/kata-artifacts/scripts/kata-deploy.sh reset"] + env: + - name: NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: DEBUG + value: "false" + - name: SHIMS + value: "clh dragonball fc qemu-nvidia-gpu qemu-sev qemu-snp qemu-tdx qemu" + - name: DEFAULT_SHIM + value: "qemu" + - name: CREATE_RUNTIMECLASSES + value: "false" + - name: CREATE_DEFAULT_RUNTIMECLASS + value: "false" + securityContext: + privileged: true updateStrategy: rollingUpdate: maxUnavailable: 1 diff --git a/tools/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml b/tools/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml index c10061d90..3b4e8888a 100644 --- a/tools/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml +++ b/tools/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml @@ -6,50 +6,50 @@ metadata: namespace: kube-system spec: selector: - matchLabels: - name: kata-deploy + matchLabels: + name: kata-deploy template: metadata: - labels: - name: kata-deploy + labels: + name: kata-deploy spec: serviceAccountName: kata-deploy-sa hostPID: true containers: - - name: kube-kata - image: quay.io/kata-containers/kata-deploy:latest - imagePullPolicy: Always - lifecycle: - preStop: - exec: - command: ["bash", "-c", "/opt/kata-artifacts/scripts/kata-deploy.sh cleanup"] - command: [ "bash", "-c", "/opt/kata-artifacts/scripts/kata-deploy.sh install" ] - env: - - name: NODE_NAME - valueFrom: - fieldRef: - fieldPath: spec.nodeName - - name: DEBUG - value: "false" - - name: SHIMS - value: "clh dragonball fc qemu qemu-nvidia-gpu qemu-sev qemu-snp qemu-tdx" - - name: DEFAULT_SHIM - value: "qemu" - - name: CREATE_RUNTIMECLASSES - value: "false" - - name: CREATE_DEFAULT_RUNTIMECLASS - value: "false" - securityContext: - privileged: true - volumeMounts: - - name: crio-conf - mountPath: /etc/crio/ - - name: containerd-conf - mountPath: /etc/containerd/ - - name: kata-artifacts - mountPath: /opt/kata/ - - name: local-bin - mountPath: /usr/local/bin/ + - name: kube-kata + image: quay.io/kata-containers/kata-deploy:latest + imagePullPolicy: Always + lifecycle: + preStop: + exec: + command: ["bash", "-c", "/opt/kata-artifacts/scripts/kata-deploy.sh cleanup"] + command: ["bash", "-c", "/opt/kata-artifacts/scripts/kata-deploy.sh install"] + env: + - name: NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: DEBUG + value: "false" + - name: SHIMS + value: "clh dragonball fc qemu qemu-nvidia-gpu qemu-sev qemu-snp qemu-tdx" + - name: DEFAULT_SHIM + value: "qemu" + - name: CREATE_RUNTIMECLASSES + value: "false" + - name: CREATE_DEFAULT_RUNTIMECLASS + value: "false" + securityContext: + privileged: true + volumeMounts: + - name: crio-conf + mountPath: /etc/crio/ + - name: containerd-conf + mountPath: /etc/containerd/ + - name: kata-artifacts + mountPath: /opt/kata/ + - name: local-bin + mountPath: /usr/local/bin/ volumes: - name: crio-conf hostPath: diff --git a/tools/packaging/kata-deploy/kata-deploy/overlays/k0s/kustomization.yaml b/tools/packaging/kata-deploy/kata-deploy/overlays/k0s/kustomization.yaml new file mode 100644 index 000000000..14904f560 --- /dev/null +++ b/tools/packaging/kata-deploy/kata-deploy/overlays/k0s/kustomization.yaml @@ -0,0 +1,5 @@ +bases: +- ../../base + +patchesStrategicMerge: +- mount_k0s_conf.yaml diff --git a/tools/packaging/kata-deploy/kata-deploy/overlays/k0s/mount_k0s_conf.yaml b/tools/packaging/kata-deploy/kata-deploy/overlays/k0s/mount_k0s_conf.yaml new file mode 100644 index 000000000..c10f938f8 --- /dev/null +++ b/tools/packaging/kata-deploy/kata-deploy/overlays/k0s/mount_k0s_conf.yaml @@ -0,0 +1,12 @@ +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: kata-deploy + namespace: kube-system +spec: + template: + spec: + volumes: + - name: containerd-conf + hostPath: + path: /etc/k0s/containerd.d/ \ No newline at end of file diff --git a/tools/packaging/kata-deploy/local-build/Makefile b/tools/packaging/kata-deploy/local-build/Makefile index ece1900c0..d9e28a547 100644 --- a/tools/packaging/kata-deploy/local-build/Makefile +++ b/tools/packaging/kata-deploy/local-build/Makefile @@ -52,6 +52,12 @@ serial-targets: %-tarball-build: $(MK_DIR)/dockerbuild/install_yq.sh $(call BUILD,$*) +agent-tarball: + ${MAKE} $@-build + +agent-ctl-tarball: + ${MAKE} $@-build + cloud-hypervisor-tarball: ${MAKE} $@-build @@ -61,6 +67,9 @@ cloud-hypervisor-glibc-tarball: firecracker-tarball: ${MAKE} $@-build +kata-ctl-tarball: + ${MAKE} $@-build + kernel-dragonball-experimental-tarball: ${MAKE} $@-build @@ -82,6 +91,9 @@ kernel-tdx-experimental-tarball: kernel-sev-tarball: ${MAKE} $@-build +log-parser-rs-tarball: + ${MAKE} $@-build + nydus-tarball: ${MAKE} $@-build @@ -115,12 +127,18 @@ rootfs-initrd-sev-tarball: kernel-sev-tarball rootfs-initrd-tarball: ${MAKE} $@-build +runk-tarball: + ${MAKE} $@-build + shim-v2-tarball: ${MAKE} $@-build tdvf-tarball: ${MAKE} $@-build +trace-forwarder-tarball: + ${MAKE} $@-build + virtiofsd-tarball: ${MAKE} $@-build diff --git a/tools/packaging/kata-deploy/local-build/dockerbuild/Dockerfile b/tools/packaging/kata-deploy/local-build/dockerbuild/Dockerfile index 1338f482a..a29514968 100644 --- a/tools/packaging/kata-deploy/local-build/dockerbuild/Dockerfile +++ b/tools/packaging/kata-deploy/local-build/dockerbuild/Dockerfile @@ -7,10 +7,11 @@ ENV DEBIAN_FRONTEND=noninteractive ENV INSTALL_IN_GOPATH=false COPY install_yq.sh /usr/bin/install_yq.sh +COPY install_oras.sh /usr/bin/install_oras.sh SHELL ["/bin/bash", "-o", "pipefail", "-c"] -# Install yq and docker +# Install yq, oras, and docker RUN apt-get update && \ apt-get install -y --no-install-recommends \ ca-certificates \ @@ -18,8 +19,10 @@ RUN apt-get update && \ sudo && \ apt-get clean && rm -rf /var/lib/apt/lists/ && \ install_yq.sh && \ + install_oras.sh && \ curl -fsSL https://get.docker.com -o get-docker.sh && \ - if uname -m | grep -Eq 's390x|ppc64le'; then export VERSION="v20.10"; fi && \ + if uname -m | grep -Eq 's390x|ppc64le'; then export VERSION="v20.10" && \ + sed -i 's/\//g' get-docker.sh; fi && \ sh get-docker.sh ARG IMG_USER=kata-builder @@ -28,7 +31,7 @@ ARG GID=1000 # gid of the docker group on the host, required for running docker in docker builds. ARG HOST_DOCKER_GID -RUN if [ ${IMG_USER} != "root" ]; then groupadd --gid=${GID} ${IMG_USER};fi +RUN if [ ${IMG_USER} != "root" ]; then sed -i -e "/:${GID}:/d" /etc/group; groupadd --gid=${GID} ${IMG_USER};fi RUN if [ ${IMG_USER} != "root" ]; then adduser ${IMG_USER} --uid=${UID} --gid=${GID};fi RUN if [ ${IMG_USER} != "root" ] && [ ! -z ${HOST_DOCKER_GID} ]; then groupadd --gid=${HOST_DOCKER_GID} docker_on_host;fi RUN if [ ${IMG_USER} != "root" ] && [ ! -z ${HOST_DOCKER_GID} ]; then usermod -a -G docker_on_host ${IMG_USER};fi diff --git a/tools/packaging/kata-deploy/local-build/dockerbuild/install_oras.sh b/tools/packaging/kata-deploy/local-build/dockerbuild/install_oras.sh new file mode 100755 index 000000000..973a10205 --- /dev/null +++ b/tools/packaging/kata-deploy/local-build/dockerbuild/install_oras.sh @@ -0,0 +1,49 @@ +#!/bin/bash +# +# Copyright (c) 2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# + +set -o errexit +set -o nounset +set -o pipefail + +install_dest="/usr/local/bin" + +function get_installed_oras_version() { + oras version | grep Version | sed -e s/Version:// | tr -d [:blank:] +} + +oras_required_version="v1.1.0" +if command -v oras; then + if [[ "${oras_required_version}" == "v$(get_installed_oras_version)" ]]; then + echo "ORAS is already installed in the system" + exit 0 + fi + + echo "Proceeding to cleanup the previous installed version of ORAS, and install the version specified in the versions.yaml file" + oras_system_path=$(which oras) + sudo rm -f ${oras_system_path} +fi + +arch=$(uname -m) +if [ "${arch}" = "ppc64le" ]; then + echo "An ORAS release for ppc64le is not available yet." + exit 0 +fi +if [ "${arch}" = "x86_64" ]; then + arch="amd64" +fi +if [ "${arch}" = "aarch64" ]; then + arch="arm64" +fi +oras_tarball="oras_${oras_required_version#v}_linux_${arch}.tar.gz" + +echo "Downloading ORAS ${oras_required_version}" +sudo curl -OL https://github.com/oras-project/oras/releases/download/${oras_required_version}/${oras_tarball} + +echo "Installing ORAS to ${install_dest}" +sudo mkdir -p "${install_dest}" +sudo tar -C "${install_dest}" -xzf "${oras_tarball}" +sudo rm -f "${oras_tarball}" diff --git a/tools/packaging/kata-deploy/local-build/kata-deploy-binaries-in-docker.sh b/tools/packaging/kata-deploy/local-build/kata-deploy-binaries-in-docker.sh index ebcc80c81..19653720e 100755 --- a/tools/packaging/kata-deploy/local-build/kata-deploy-binaries-in-docker.sh +++ b/tools/packaging/kata-deploy/local-build/kata-deploy-binaries-in-docker.sh @@ -32,6 +32,11 @@ TARGET_ARCH=${TARGET_ARCH:-$(uname -m)} TARGET_OS=${TARGET_OS:-linux} TARGET_ARCH=${TARGET_ARCH:-$ARCH} +# We've seen issues related to the /home/runner/.docker/buildx/activity/default file +# constantly being with the wrong permissions. +# Let's just remove the file before we build. +rm -f $HOME/.docker/buildx/activity/default + [ "${CROSS_BUILD}" == "true" ] && BUILDX="buildx" && PLATFORM="--platform=${TARGET_OS}/${TARGET_ARCH}" if [ "${CROSS_BUILD}" == "true" ]; then # check if the current docker support docker buildx @@ -72,23 +77,48 @@ docker build -q -t build-kata-deploy \ --build-arg HOST_DOCKER_GID=${docker_gid} \ "${script_dir}/dockerbuild/" +CI="${CI:-}" +ARTEFACT_REGISTRY="${ARTEFACT_REGISTRY:-}" +ARTEFACT_REGISTRY_USERNAME="${ARTEFACT_REGISTRY_USERNAME:-}" +ARTEFACT_REGISTRY_PASSWORD="${ARTEFACT_REGISTRY_PASSWORD:-}" +TARGET_BRANCH="${TARGET_BRANCH:-}" +BUILDER_REGISTRY="${BUILDER_REGISTRY:-}" +PUSH_TO_REGISTRY="${PUSH_TO_REGISTRY:-"no"}" +AGENT_CONTAINER_BUILDER="${AGENT_CONTAINER_BUILDER:-}" +INITRAMFS_CONTAINER_BUILDER="${INITRAMFS_CONTAINER_BUILDER:-}" +KERNEL_CONTAINER_BUILDER="${KERNEL_CONTAINER_BUILDER:-}" +OVMF_CONTAINER_BUILDER="${OVMF_CONTAINER_BUILDER:-}" +QEMU_CONTAINER_BUILDER="${QEMU_CONTAINER_BUILDER:-}" +SHIM_V2_CONTAINER_BUILDER="${SHIM_V2_CONTAINER_BUILDER:-}" +TDSHIM_CONTAINER_BUILDER="${TDSHIM_CONTAINER_BUILDER:-}" +TOOLS_CONTAINER_BUILDER="${TOOLS_CONTAINER_BUILDER:-}" +VIRTIOFSD_CONTAINER_BUILDER="${VIRTIOFSD_CONTAINER_BUILDER:-}" +MEASURED_ROOTFS="${MEASURED_ROOTFS:-}" +USE_CACHE="${USE_CACHE:-}" + docker run \ -v $HOME/.docker:/root/.docker \ -v /var/run/docker.sock:/var/run/docker.sock \ -v "${kata_dir}:${kata_dir}" \ - --env CI="${CI:-}" \ + --env CI="${CI}" \ --env USER=${USER} \ - --env BUILDER_REGISTRY="${BUILDER_REGISTRY:-}" \ - --env PUSH_TO_REGISTRY="${PUSH_TO_REGISTRY:-"no"}" \ - --env INITRAMFS_CONTAINER_BUILDER="${INITRAMFS_CONTAINER_BUILDER:-}" \ - --env KERNEL_CONTAINER_BUILDER="${KERNEL_CONTAINER_BUILDER:-}" \ - --env OVMF_CONTAINER_BUILDER="${OVMF_CONTAINER_BUILDER:-}" \ - --env QEMU_CONTAINER_BUILDER="${QEMU_CONTAINER_BUILDER:-}" \ - --env SHIM_V2_CONTAINER_BUILDER="${SHIM_V2_CONTAINER_BUILDER:-}" \ - --env TDSHIM_CONTAINER_BUILDER="${TDSHIM_CONTAINER_BUILDER:-}" \ - --env VIRTIOFSD_CONTAINER_BUILDER="${VIRTIOFSD_CONTAINER_BUILDER:-}" \ - --env MEASURED_ROOTFS="${MEASURED_ROOTFS:-}" \ - --env USE_CACHE="${USE_CACHE:-}" \ + --env ARTEFACT_REGISTRY="${ARTEFACT_REGISTRY}" \ + --env ARTEFACT_REGISTRY_USERNAME="${ARTEFACT_REGISTRY_USERNAME}" \ + --env ARTEFACT_REGISTRY_PASSWORD="${ARTEFACT_REGISTRY_PASSWORD}" \ + --env TARGET_BRANCH="${TARGET_BRANCH}" \ + --env BUILDER_REGISTRY="${BUILDER_REGISTRY}" \ + --env PUSH_TO_REGISTRY="${PUSH_TO_REGISTRY}" \ + --env AGENT_CONTAINER_BUILDER="${AGENT_CONTAINER_BUILDER}" \ + --env INITRAMFS_CONTAINER_BUILDER="${INITRAMFS_CONTAINER_BUILDER}" \ + --env KERNEL_CONTAINER_BUILDER="${KERNEL_CONTAINER_BUILDER}" \ + --env OVMF_CONTAINER_BUILDER="${OVMF_CONTAINER_BUILDER}" \ + --env QEMU_CONTAINER_BUILDER="${QEMU_CONTAINER_BUILDER}" \ + --env SHIM_V2_CONTAINER_BUILDER="${SHIM_V2_CONTAINER_BUILDER}" \ + --env TDSHIM_CONTAINER_BUILDER="${TDSHIM_CONTAINER_BUILDER}" \ + --env TOOLS_CONTAINER_BUILDER="${TOOLS_CONTAINER_BUILDER}" \ + --env VIRTIOFSD_CONTAINER_BUILDER="${VIRTIOFSD_CONTAINER_BUILDER}" \ + --env MEASURED_ROOTFS="${MEASURED_ROOTFS}" \ + --env USE_CACHE="${USE_CACHE}" \ --env CROSS_BUILD="${CROSS_BUILD}" \ --env TARGET_ARCH="${TARGET_ARCH}" \ --env ARCH="${ARCH}" \ diff --git a/tools/packaging/kata-deploy/local-build/kata-deploy-binaries.sh b/tools/packaging/kata-deploy/local-build/kata-deploy-binaries.sh index a13f9c51f..bb68d883b 100755 --- a/tools/packaging/kata-deploy/local-build/kata-deploy-binaries.sh +++ b/tools/packaging/kata-deploy/local-build/kata-deploy-binaries.sh @@ -22,6 +22,7 @@ readonly static_build_dir="${repo_root_dir}/tools/packaging/static-build" readonly version_file="${repo_root_dir}/VERSION" readonly versions_yaml="${repo_root_dir}/versions.yaml" +readonly agent_builder="${static_build_dir}/agent/build.sh" readonly clh_builder="${static_build_dir}/cloud-hypervisor/build-static-clh.sh" readonly firecracker_builder="${static_build_dir}/firecracker/build-static-firecracker.sh" readonly initramfs_builder="${static_build_dir}/initramfs/build.sh" @@ -32,15 +33,17 @@ readonly qemu_experimental_builder="${static_build_dir}/qemu/build-static-qemu-e readonly shimv2_builder="${static_build_dir}/shim-v2/build.sh" readonly virtiofsd_builder="${static_build_dir}/virtiofsd/build.sh" readonly nydus_builder="${static_build_dir}/nydus/build.sh" - readonly rootfs_builder="${repo_root_dir}/tools/packaging/guest-image/build_image.sh" - -readonly jenkins_url="http://jenkins.katacontainers.io" -readonly cached_artifacts_path="lastSuccessfulBuild/artifact/artifacts" +readonly tools_builder="${static_build_dir}/tools/build.sh" ARCH=${ARCH:-$(uname -m)} MEASURED_ROOTFS=${MEASURED_ROOTFS:-no} USE_CACHE="${USE_CACHE:-"yes"}" +ARTEFACT_REGISTRY="${ARTEFACT_REGISTRY:-ghcr.io}" +ARTEFACT_REGISTRY_USERNAME="${ARTEFACT_REGISTRY_USERNAME:-}" +ARTEFACT_REGISTRY_PASSWORD="${ARTEFACT_REGISTRY_PASSWORD:-}" +TARGET_BRANCH="${TARGET_BRANCH:-main}" +PUSH_TO_REGISTRY="${PUSH_TO_REGISTRY:-}" workdir="${WORKDIR:-$PWD}" @@ -79,9 +82,12 @@ options: -s : Silent mode (produce output in case of failure only) --build= : all + agent + agent-ctl cloud-hypervisor cloud-hypervisor-glibc firecracker + kata-ctl kernel kernel-dragonball-experimental kernel-experimental @@ -90,6 +96,7 @@ options: kernel-nvidia-gpu-tdx-experimental kernel-sev-tarball kernel-tdx-experimental + log-parser-rs nydus ovmf ovmf-sev @@ -101,18 +108,19 @@ options: rootfs-initrd rootfs-initrd-mariner rootfs-initrd-sev + runk shim-v2 tdvf + trace-forwarder virtiofsd EOF exit "${return_code}" } - cleanup_and_fail() { - rm -f "${component_tarball_path}" - return 1 + rm -f "${component_tarball_name}" + return 1 } install_cached_tarball_component() { @@ -121,23 +129,24 @@ install_cached_tarball_component() { fi local component="${1}" - local jenkins_build_url="${2}" - local current_version="${3}" - local current_image_version="${4}" - local component_tarball_name="${5}" - local component_tarball_path="${6}" + local current_version="${2}" + local current_image_version="${3}" + local component_tarball_name="${4}" + local component_tarball_path="${5}" - local cached_version=$(curl -sfL "${jenkins_build_url}/latest" | awk '{print $1}') || cached_version="none" - local cached_image_version=$(curl -sfL "${jenkins_build_url}/latest_image" | awk '{print $1}') || cached_image_version="none" + oras pull ${ARTEFACT_REGISTRY}/kata-containers/cached-artefacts/${build_target}:latest-${TARGET_BRANCH}-$(uname -m) + + cached_version="$(cat ${component}-version)" + cached_image_version="$(cat ${component}-builder-image-version)" + + rm -f ${component}-version + rm -f ${component}-builder-image-version [ "${cached_image_version}" != "${current_image_version}" ] && return 1 [ "${cached_version}" != "${current_version}" ] && return 1 + sha256sum -c "${component}-sha256sum" || return $(cleanup_and_fail) info "Using cached tarball of ${component}" - echo "Downloading tarball from: ${jenkins_build_url}/${component_tarball_name}" - wget "${jenkins_build_url}/${component_tarball_name}" || return $(cleanup_and_fail) - wget "${jenkins_build_url}/sha256sum-${component_tarball_name}" || return $(cleanup_and_fail) - sha256sum -c "sha256sum-${component_tarball_name}" || return $(cleanup_and_fail) mv "${component_tarball_name}" "${component_tarball_path}" } @@ -150,7 +159,6 @@ install_image() { image_type+="-${variant}" fi - local jenkins="${jenkins_url}/job/kata-containers-main-rootfs-${image_type}-${ARCH}/${cached_artifacts_path}" local component="rootfs-${image_type}" local osbuilder_last_commit="$(get_last_modification "${repo_root_dir}/tools/osbuilder")" @@ -161,11 +169,13 @@ install_image() { local libseccomp_version="$(get_from_kata_deps "externals.libseccomp.version")" local rust_version="$(get_from_kata_deps "languages.rust.meta.newest-version")" + latest_artefact="${osbuilder_last_commit}-${guest_image_last_commit}-${agent_last_commit}-${libs_last_commit}-${gperf_version}-${libseccomp_version}-${rust_version}-${image_type}" + latest_builder_image="" + install_cached_tarball_component \ "${component}" \ - "${jenkins}" \ - "${osbuilder_last_commit}-${guest_image_last_commit}-${agent_last_commit}-${libs_last_commit}-${gperf_version}-${libseccomp_version}-${rust_version}-${image_type}" \ - "" \ + "${latest_artefact}" \ + "${latest_builder_image}" \ "${final_tarball_name}" \ "${final_tarball_path}" \ && return 0 @@ -197,7 +207,6 @@ install_initrd() { initrd_type+="-${variant}" fi - local jenkins="${jenkins_url}/job/kata-containers-main-rootfs-${initrd_type}-${ARCH}/${cached_artifacts_path}" local component="rootfs-${initrd_type}" local osbuilder_last_commit="$(get_last_modification "${repo_root_dir}/tools/osbuilder")" @@ -208,13 +217,15 @@ install_initrd() { local libseccomp_version="$(get_from_kata_deps "externals.libseccomp.version")" local rust_version="$(get_from_kata_deps "languages.rust.meta.newest-version")" + latest_artefact="${osbuilder_last_commit}-${guest_image_last_commit}-${agent_last_commit}-${libs_last_commit}-${gperf_version}-${libseccomp_version}-${rust_version}-${initrd_type}" + latest_builder_image="" + [[ "${ARCH}" == "aarch64" && "${CROSS_BUILD}" == "true" ]] && echo "warning: Don't cross build initrd for aarch64 as it's too slow" && exit 0 install_cached_tarball_component \ "${component}" \ - "${jenkins}" \ - "${osbuilder_last_commit}-${guest_image_last_commit}-${agent_last_commit}-${libs_last_commit}-${gperf_version}-${libseccomp_version}-${rust_version}-${initrd_type}" \ - "" \ + "${latest_artefact}" \ + "${latest_builder_image}" \ "${final_tarball_name}" \ "${final_tarball_path}" \ && return 0 @@ -247,11 +258,13 @@ install_cached_kernel_tarball_component() { local kernel_name=${1} local module_dir=${2:-""} + latest_artefact="${kernel_version}-${kernel_kata_config_version}-$(get_last_modification $(dirname $kernel_builder))" + latest_builder_image="$(get_kernel_image_name)" + install_cached_tarball_component \ "${kernel_name}" \ - "${jenkins_url}/job/kata-containers-main-${kernel_name}-${ARCH}/${cached_artifacts_path}" \ - "${kernel_version}-${kernel_kata_config_version}-$(get_last_modification $(dirname $kernel_builder))" \ - "$(get_kernel_image_name)" \ + "${latest_artefact}" \ + "${latest_builder_image}" \ "${final_tarball_name}" \ "${final_tarball_path}" \ || return 1 @@ -263,9 +276,8 @@ install_cached_kernel_tarball_component() { # SEV specific code path install_cached_tarball_component \ "${kernel_name}" \ - "${jenkins_url}/job/kata-containers-main-${kernel_name}-$(uname -m)/${cached_artifacts_path}" \ - "${kernel_version}-${kernel_kata_config_version}-$(get_last_modification $(dirname $kernel_builder))" \ - "$(get_kernel_image_name)" \ + "${latest_artefact}" \ + "${latest_builder_image}" \ "kata-static-kernel-sev-modules.tar.xz" \ "${workdir}/kata-static-kernel-sev-modules.tar.xz" \ || return 1 @@ -384,11 +396,13 @@ install_qemu_helper() { export qemu_repo="$(get_from_kata_deps ${qemu_repo_yaml_path})" export qemu_version="$(get_from_kata_deps ${qemu_version_yaml_path})" + latest_artefact="${qemu_version}-$(calc_qemu_files_sha256sum)" + latest_builder_image="$(get_qemu_image_name)" + install_cached_tarball_component \ "${qemu_name}" \ - "${jenkins_url}/job/kata-containers-main-${qemu_name}-${ARCH}/${cached_artifacts_path}" \ - "${qemu_version}-$(calc_qemu_files_sha256sum)" \ - "$(get_qemu_image_name)" \ + "${latest_artefact}" \ + "${latest_builder_image}" \ "${final_tarball_name}" \ "${final_tarball_path}" \ && return 0 @@ -433,11 +447,13 @@ install_qemu_snp_experimental() { install_firecracker() { local firecracker_version=$(get_from_kata_deps "assets.hypervisor.firecracker.version") + latest_artefact="${firecracker_version}" + latest_builder_image="" + install_cached_tarball_component \ "firecracker" \ - "${jenkins_url}/job/kata-containers-main-firecracker-$(uname -m)/${cached_artifacts_path}" \ - "${firecracker_version}" \ - "" \ + "${latest_artefact}" \ + "${latest_builder_image}" \ "${final_tarball_name}" \ "${final_tarball_path}" \ && return 0 @@ -455,11 +471,13 @@ install_clh_helper() { features="${2}" suffix="${3:-""}" + latest_artefact="$(get_from_kata_deps "assets.hypervisor.cloud_hypervisor.version")" + latest_builder_image="" + install_cached_tarball_component \ "cloud-hypervisor${suffix}" \ - "${jenkins_url}/job/kata-containers-main-clh-$(uname -m)${suffix}/${cached_artifacts_path}" \ - "$(get_from_kata_deps "assets.hypervisor.cloud_hypervisor.version")" \ - "" \ + "${latest_artefact}" \ + "${latest_builder_image}" \ "${final_tarball_name}" \ "${final_tarball_path}" \ && return 0 @@ -495,11 +513,13 @@ install_clh_glibc() { # Install static virtiofsd asset install_virtiofsd() { + latest_artefact="$(get_from_kata_deps "externals.virtiofsd.version")-$(get_from_kata_deps "externals.virtiofsd.toolchain")" + latest_builder_image="$(get_virtiofsd_image_name)" + install_cached_tarball_component \ "virtiofsd" \ - "${jenkins_url}/job/kata-containers-main-virtiofsd-${ARCH}/${cached_artifacts_path}" \ - "$(get_from_kata_deps "externals.virtiofsd.version")-$(get_from_kata_deps "externals.virtiofsd.toolchain")" \ - "$(get_virtiofsd_image_name)" \ + "${latest_artefact}" \ + "${latest_builder_image}" \ "${final_tarball_name}" \ "${final_tarball_path}" \ && return 0 @@ -515,11 +535,13 @@ install_virtiofsd() { install_nydus() { [ "${ARCH}" == "aarch64" ] && ARCH=arm64 + latest_artefact="$(get_from_kata_deps "externals.nydus.version")" + latest_builder_image="" + install_cached_tarball_component \ "nydus" \ - "${jenkins_url}/job/kata-containers-main-nydus-$(uname -m)/${cached_artifacts_path}" \ - "$(get_from_kata_deps "externals.nydus.version")" \ - "" \ + "${latest_artefact}" \ + "${latest_builder_image}" \ "${final_tarball_name}" \ "${final_tarball_path}" \ && return 0 @@ -540,13 +562,14 @@ install_shimv2() { local protocols_last_commit="$(get_last_modification "${repo_root_dir}/src/libs/protocols")" local GO_VERSION="$(get_from_kata_deps "languages.golang.meta.newest-version")" local RUST_VERSION="$(get_from_kata_deps "languages.rust.meta.newest-version")" - local shim_v2_version="${shim_v2_last_commit}-${protocols_last_commit}-${runtime_rs_last_commit}-${GO_VERSION}-${RUST_VERSION}" + + latest_artefact="${shim_v2_last_commit}-${protocols_last_commit}-${runtime_rs_last_commit}-${GO_VERSION}-${RUST_VERSION}" + latest_builder_image="$(get_shim_v2_image_name)" install_cached_tarball_component \ "shim-v2" \ - "${jenkins_url}/job/kata-containers-main-shim-v2-${ARCH}/${cached_artifacts_path}" \ - "${shim_v2_version}" \ - "$(get_shim_v2_image_name)" \ + "${latest_artefact}" \ + "${latest_builder_image}" \ "${final_tarball_name}" \ "${final_tarball_path}" \ && return 0 @@ -573,13 +596,16 @@ install_ovmf() { tarball_name="${2:-edk2-x86_64.tar.gz}" local component_name="ovmf" - local component_version="$(get_from_kata_deps "externals.ovmf.${ovmf_type}.version")" + [ "${ovmf_type}" == "sev" ] && component_name="ovmf-sev" [ "${ovmf_type}" == "tdx" ] && component_name="tdvf" + + latest_artefact="$(get_from_kata_deps "externals.ovmf.${ovmf_type}.version")" + latest_builder_image="$(get_ovmf_image_name)" + install_cached_tarball_component \ "${component_name}" \ - "${jenkins_url}/job/kata-containers-main-ovmf-${ovmf_type}-$(uname -m)/${cached_artifacts_path}" \ - "${component_version}" \ - "$(get_ovmf_image_name)" \ + "${latest_artefact}" \ + "${latest_builder_image}" \ "${final_tarball_name}" \ "${final_tarball_path}" \ && return 0 @@ -598,6 +624,77 @@ install_ovmf_sev() { install_ovmf "sev" "edk2-sev.tar.gz" } +install_agent_helper() { + agent_policy="${1:-no}" + + latest_artefact="$(git log -1 --pretty=format:"%h" ${repo_root_dir}/src/agent)" + latest_builder_image="$(get_agent_image_name)" + + install_cached_tarball_component \ + "${build_target}" \ + "${latest_artefact}" \ + "${latest_builder_image}" \ + "${final_tarball_name}" \ + "${final_tarball_path}" \ + && return 0 + + info "build static agent" + DESTDIR="${destdir}" AGENT_POLICY=${agent_policy} "${agent_builder}" +} + +install_agent() { + install_agent_helper +} + +install_tools_helper() { + tool=${1} + + latest_artefact="$(git log -1 --pretty=format:"%h" ${repo_root_dir}/src/tools/${tool})" + latest_builder_image="$(get_tools_image_name)" + + install_cached_tarball_component \ + "${tool}" \ + "${latest_artefact}" \ + "${latest_builder_image}" \ + "${final_tarball_name}" \ + "${final_tarball_path}" \ + && return 0 + + + info "build static ${tool}" + ${tools_builder} ${tool} + + tool_binary=${tool} + [ ${tool} = "agent-ctl" ] && tool_binary="kata-agent-ctl" + [ ${tool} = "log-parser-rs" ] && tool_binary="log-parser" + [ ${tool} = "trace-forwarder" ] && tool_binary="kata-trace-forwarder" + binary=$(find ${repo_root_dir}/src/tools/${tool}/ -type f -name ${tool_binary}) + + info "Install static ${tool_binary}" + mkdir -p "${destdir}/opt/kata/bin/" + sudo install -D --owner root --group root --mode 0744 ${binary} "${destdir}/opt/kata/bin/${tool_binary}" +} + +install_agent_ctl() { + install_tools_helper "agent-ctl" +} + +install_kata_ctl() { + install_tools_helper "kata-ctl" +} + +install_log_parser_rs() { + install_tools_helper "log-parser-rs" +} + +install_runk() { + install_tools_helper "runk" +} + +install_trace_forwarder() { + install_tools_helper "trace-forwarder" +} + get_kata_version() { local v v=$(cat "${version_file}") @@ -606,6 +703,10 @@ get_kata_version() { handle_build() { info "DESTDIR ${destdir}" + + latest_artefact="" + latest_builder_image="" + local build_target build_target="$1" @@ -615,32 +716,43 @@ handle_build() { case "${build_target}" in all) + install_agent_ctl install_clh install_firecracker install_image install_initrd install_initrd_mariner install_initrd_sev + install_kata_ctl install_kernel install_kernel_dragonball_experimental install_kernel_tdx_experimental + install_log_parser_rs install_nydus install_ovmf install_ovmf_sev install_qemu install_qemu_snp_experimental install_qemu_tdx_experimental + install_runk install_shimv2 install_tdvf + install_trace_forwarder install_virtiofsd ;; + agent) install_agent ;; + + agent-ctl) install_agent_ctl ;; + cloud-hypervisor) install_clh ;; cloud-hypervisor-glibc) install_clh_glibc ;; firecracker) install_firecracker ;; + kata-ctl) install_kata_ctl ;; + kernel) install_kernel ;; kernel-dragonball-experimental) install_kernel_dragonball_experimental ;; @@ -655,6 +767,8 @@ handle_build() { kernel-sev) install_kernel_sev ;; + log-parser-rs) install_log_parser_rs ;; + nydus) install_nydus ;; ovmf) install_ovmf ;; @@ -676,11 +790,15 @@ handle_build() { rootfs-initrd-mariner) install_initrd_mariner ;; rootfs-initrd-sev) install_initrd_sev ;; + + runk) install_runk ;; shim-v2) install_shimv2 ;; tdvf) install_tdvf ;; + trace-forwarder) install_trace_forwarder ;; + virtiofsd) install_virtiofsd ;; *) @@ -693,6 +811,27 @@ handle_build() { sudo tar cvfJ "${final_tarball_path}" "." fi tar tvf "${final_tarball_path}" + + pushd ${workdir} + echo "${latest_artefact}" > ${build_target}-version + echo "${latest_builder_image}" > ${build_target}-builder-image-version + sha256sum "${final_tarball_name}" > ${build_target}-sha256sum + + if [ "${PUSH_TO_REGISTRY}" = "yes" ]; then + if [ -z "${ARTEFACT_REGISTRY}" ] || + [ -z "${ARTEFACT_REGISTRY_USERNAME}" ] || + [ -z "${ARTEFACT_REGISTRY_PASSWORD}" ] || + [ -z "${TARGET_BRANCH}" ]; then + die "ARTEFACT_REGISTRY, ARTEFACT_REGISTRY_USERNAME, ARTEFACT_REGISTRY_PASSWORD and TARGET_BRANCH must be passed to the script when pushing the artefacts to the registry!" + fi + + echo "${ARTEFACT_REGISTRY_PASSWORD}" | oras login "${ARTEFACT_REGISTRY}" -u "${ARTEFACT_REGISTRY_USERNAME}" --password-stdin + + oras push ${ARTEFACT_REGISTRY}/kata-containers/cached-artefacts/${build_target}:latest-${TARGET_BRANCH}-$(uname -m) ${final_tarball_name} ${build_target}-version ${build_target}-builder-image-version ${build_target}-sha256sum + oras logout "${ARTEFACT_REGISTRY}" + fi + + popd } silent_mode_error_trap() { @@ -711,16 +850,22 @@ main() { local build_targets local silent build_targets=( + agent + agent-ctl cloud-hypervisor firecracker + kata-ctl kernel kernel-experimental + log-parser-rs nydus qemu rootfs-image rootfs-initrd rootfs-initrd-mariner + runk shim-v2 + trace-forwarder virtiofsd ) silent=false diff --git a/tools/packaging/kata-deploy/local-build/kata-deploy-build-and-upload-payload.sh b/tools/packaging/kata-deploy/local-build/kata-deploy-build-and-upload-payload.sh index be460ffa7..af746e79c 100755 --- a/tools/packaging/kata-deploy/local-build/kata-deploy-build-and-upload-payload.sh +++ b/tools/packaging/kata-deploy/local-build/kata-deploy-build-and-upload-payload.sh @@ -25,6 +25,8 @@ arch=$(uname -m) [ "$arch" = "x86_64" ] && arch="amd64" IMAGE_TAG="${REGISTRY}:kata-containers-$(git rev-parse HEAD)-${arch}" +sudo chown -R $USER $HOME/.docker + echo "Building the image" docker build --tag ${IMAGE_TAG} . diff --git a/tools/packaging/kata-deploy/local-build/kata-deploy-merge-builds.sh b/tools/packaging/kata-deploy/local-build/kata-deploy-merge-builds.sh index ff5ed3e4f..9d9ee3c40 100755 --- a/tools/packaging/kata-deploy/local-build/kata-deploy-merge-builds.sh +++ b/tools/packaging/kata-deploy/local-build/kata-deploy-merge-builds.sh @@ -37,5 +37,5 @@ pushd ${tarball_content_dir} popd echo "create ${tar_path}" -(cd "${tarball_content_dir}"; tar cvfJ "${tar_path}" .) +(cd "${tarball_content_dir}"; tar cvfJ "${tar_path}" --owner=0 --group=0 .) popd diff --git a/tools/packaging/kata-deploy/scripts/kata-deploy.sh b/tools/packaging/kata-deploy/scripts/kata-deploy.sh index fc26921df..9d60a7ad0 100755 --- a/tools/packaging/kata-deploy/scripts/kata-deploy.sh +++ b/tools/packaging/kata-deploy/scripts/kata-deploy.sh @@ -44,7 +44,7 @@ function create_runtimeclasses() { if [[ "${CREATE_DEFAULT_RUNTIMECLASS}" == "true" ]]; then echo "Creating the kata runtime class for the default shim (an alias for kata-${default_shim})" cp /opt/kata-artifacts/runtimeclasses/kata-${default_shim}.yaml /tmp/kata.yaml - sed -i -e 's/kata-'${default_shim}'/kata/g' /tmp/kata.yaml + sed -i -e 's/name: kata-'${default_shim}'/name: kata/g' /tmp/kata.yaml kubectl apply -f /tmp/kata.yaml rm -f /tmp/kata.yaml fi @@ -62,7 +62,7 @@ function delete_runtimeclasses() { if [[ "${CREATE_DEFAULT_RUNTIMECLASS}" == "true" ]]; then echo "Deleting the kata runtime class for the default shim (an alias for kata-${default_shim})" cp /opt/kata-artifacts/runtimeclasses/kata-${default_shim}.yaml /tmp/kata.yaml - sed -i -e 's/kata-'${default_shim}'/kata/g' /tmp/kata.yaml + sed -i -e 's/name: kata-'${default_shim}'/name: kata/g' /tmp/kata.yaml kubectl delete -f /tmp/kata.yaml rm -f /tmp/kata.yaml fi @@ -74,7 +74,10 @@ function get_container_runtime() { if [ "$?" -ne 0 ]; then die "invalid node name" fi - if echo "$runtime" | grep -qE 'containerd.*-k3s'; then + + if echo "$runtime" | grep -qE "cri-o"; then + echo "cri-o" + elif echo "$runtime" | grep -qE 'containerd.*-k3s'; then if host_systemctl is-active --quiet rke2-agent; then echo "rke2-agent" elif host_systemctl is-active --quiet rke2-server; then @@ -84,6 +87,12 @@ function get_container_runtime() { else echo "k3s" fi + # Note: we assumed you used a conventional k0s setup and k0s will generate a systemd entry k0scontroller.service and k0sworker.service respectively + # and it is impossible to run this script without a kubelet, so this k0s controller must also have worker mode enabled + elif host_systemctl is-active --quiet k0scontroller; then + echo "k0s-controller" + elif host_systemctl is-active --quiet k0sworker; then + echo "k0s-worker" else echo "$runtime" | awk -F '[:]' '{print $1}' fi @@ -136,12 +145,17 @@ function configure_cri_runtime() { crio) configure_crio ;; - containerd | k3s | k3s-agent | rke2-agent | rke2-server) - configure_containerd + containerd | k3s | k3s-agent | rke2-agent | rke2-server | k0s-controller | k0s-worker) + configure_containerd "$1" ;; esac - host_systemctl daemon-reload - host_systemctl restart "$1" + if [ "$1" == "k0s-worker" ] || [ "$1" == "k0s-controller" ]; then + # do nothing, k0s will automatically load the config on the fly + : + else + host_systemctl daemon-reload + host_systemctl restart "$1" + fi wait_till_node_is_ready } @@ -155,7 +169,7 @@ function backup_shim() { if [ ! -f "${shim_backup}" ]; then mv "${shim_file}" "${shim_backup}" else - rm "${shim_file}" + rm -f "${shim_file}" fi fi } @@ -210,12 +224,12 @@ function cleanup_different_shims_base() { local shim_binary="containerd-shim-kata-${shim}-v2" local shim_file="/usr/local/bin/${shim_binary}" - rm "${shim_file}" || true + rm -f "${shim_file}" restore_shim "${shim_file}" done - rm "${default_shim_file}" || true + rm -f "${default_shim_file}" restore_shim "${default_shim_file}" if [[ "${CREATE_RUNTIMECLASSES}" == "true" ]]; then @@ -274,12 +288,15 @@ EOF function configure_containerd_runtime() { local runtime="kata" local configuration="configuration" - if [ -n "${1-}" ]; then - runtime+="-$1" - configuration+="-$1" + if [ -n "${2-}" ]; then + runtime+="-$2" + configuration+="-$2" fi local pluginid=cri - if grep -q "version = 2\>" $containerd_conf_file; then + + # if we are running k0s auto containerd.toml generation, the base template is by default version 2 + # we can safely assume to reference the older version of cri + if grep -q "version = 2\>" $containerd_conf_file || [ "$1" == "k0s-worker" ] || [ "$1" == "k0s-controller" ]; then pluginid=\"io.containerd.grpc.v1.cri\" fi local runtime_table="plugins.${pluginid}.containerd.runtimes.$runtime" @@ -333,16 +350,16 @@ function configure_containerd() { fi # Add default Kata runtime configuration - configure_containerd_runtime + configure_containerd_runtime "$1" for shim in "${shims[@]}"; do - configure_containerd_runtime $shim + configure_containerd_runtime "$1" $shim done } function remove_artifacts() { echo "deleting kata artifacts" - rm -rf /opt/kata/ + rm -rf /opt/kata/* } function cleanup_cri_runtime() { @@ -352,7 +369,7 @@ function cleanup_cri_runtime() { crio) cleanup_crio ;; - containerd | k3s | k3s-agent | rke2-agent | rke2-server) + containerd | k3s | k3s-agent | rke2-agent | rke2-server | k0s-controller | k0s-worker) cleanup_containerd ;; esac @@ -360,9 +377,9 @@ function cleanup_cri_runtime() { } function cleanup_crio() { - rm $crio_drop_in_conf_file + rm -f $crio_drop_in_conf_file if [[ "${DEBUG}" == "true" ]]; then - rm $crio_drop_in_conf_file_debug + rm -f $crio_drop_in_conf_file_debug fi } @@ -375,8 +392,14 @@ function cleanup_containerd() { function reset_runtime() { kubectl label node "$NODE_NAME" katacontainers.io/kata-runtime- - host_systemctl daemon-reload - host_systemctl restart "$1" + if [ "$1" == "k0s-worker" ] || [ "$1" == "k0s-controller" ]; then + # do nothing, k0s will auto restart + : + else + host_systemctl daemon-reload + host_systemctl restart "$1" + fi + if [ "$1" == "crio" ] || [ "$1" == "containerd" ]; then host_systemctl restart kubelet fi @@ -412,6 +435,11 @@ function main() { containerd_conf_file="${containerd_conf_tmpl_file}" containerd_conf_file_backup="${containerd_conf_file}.bak" + elif [ "$runtime" == "k0s-worker" ] || [ "$runtime" == "k0s-controller" ]; then + # From 1.27.1 onwards k0s enables dynamic configuration on containerd CRI runtimes. + # This works by k0s creating a special directory in /etc/k0s/containerd.d/ where user can drop-in partial containerd configuration snippets. + # k0s will automatically pick up these files and adds these in containerd configuration imports list. + containerd_conf_file="/etc/containerd/kata-containers.toml" else # runtime == containerd if [ ! -f "$containerd_conf_file" ] && [ -d $(dirname "$containerd_conf_file") ] && \ @@ -427,7 +455,7 @@ function main() { fi # only install / remove / update if we are dealing with CRIO or containerd - if [[ "$runtime" =~ ^(crio|containerd|k3s|k3s-agent|rke2-agent|rke2-server)$ ]]; then + if [[ "$runtime" =~ ^(crio|containerd|k3s|k3s-agent|rke2-agent|rke2-server|k0s-worker|k0s-controller)$ ]]; then case "$action" in install) diff --git a/tools/packaging/kernel/configs/fragments/whitelist.conf b/tools/packaging/kernel/configs/fragments/whitelist.conf index d6e04444f..a6396a582 100644 --- a/tools/packaging/kernel/configs/fragments/whitelist.conf +++ b/tools/packaging/kernel/configs/fragments/whitelist.conf @@ -18,3 +18,4 @@ CONFIG_ARM64_UAO CONFIG_VFIO_MDEV_DEVICE CONFIG_SPECULATION_MITIGATIONS CONFIG_X86_SGX +CONFIG_VIRTIO_IOMMU diff --git a/tools/packaging/kernel/configs/fragments/x86_64/vfio.conf b/tools/packaging/kernel/configs/fragments/x86_64/vfio.conf index 09a4bf02f..e052e10e6 100644 --- a/tools/packaging/kernel/configs/fragments/x86_64/vfio.conf +++ b/tools/packaging/kernel/configs/fragments/x86_64/vfio.conf @@ -1,3 +1,4 @@ # x86 specific items we need in order to handle vfio_mode=vfio devices CONFIG_INTEL_IOMMU=y CONFIG_IRQ_REMAP=y +CONFIG_VIRTIO_IOMMU=y diff --git a/tools/packaging/kernel/kata_config_version b/tools/packaging/kernel/kata_config_version index 194b81caa..9b252fd09 100644 --- a/tools/packaging/kernel/kata_config_version +++ b/tools/packaging/kernel/kata_config_version @@ -1 +1 @@ -112 +113 diff --git a/tools/packaging/release/release-notes.sh b/tools/packaging/release/release-notes.sh index 254aa255b..734028dc0 100755 --- a/tools/packaging/release/release-notes.sh +++ b/tools/packaging/release/release-notes.sh @@ -140,18 +140,22 @@ The majority of the components of the project were built using containers. In o build reproducibility we publish those container images, and when those are used combined with the version of the projects listed as part of the "versions.yaml" file, users can get as close to the environment we used to build the release artefacts. +* agent (on all its different flavours): $(get_agent_image_name) * Kernel (on all its different flavours): $(get_kernel_image_name) * OVMF (on all its different flavours): $(get_ovmf_image_name) * QEMU (on all its different flavurs): $(get_qemu_image_name) * shim-v2: $(get_shim_v2_image_name) +* tools: $(get_tools_image_name) * virtiofsd: $(get_virtiofsd_image_name) The users who want to rebuild the tarballs using exactly the same images can simply use the following environment variables: +* \`AGENT_CONTAINER_BUILDER\` * \`KERNEL_CONTAINER_BUILDER\` * \`OVMF_CONTAINER_BUILDER\` * \`QEMU_CONTAINER_BUILDER\` * \`SHIM_V2_CONTAINER_BUILDER\` +* \`TOOLS_CONTAINER_BUILDER\` * \`VIRTIOFSD_CONTAINER_BUILDER\` ## Kata Linux Containers Kernel diff --git a/tools/packaging/scripts/lib.sh b/tools/packaging/scripts/lib.sh index e7a6fc987..d1e17e20d 100644 --- a/tools/packaging/scripts/lib.sh +++ b/tools/packaging/scripts/lib.sh @@ -123,7 +123,7 @@ get_last_modification() { dirty="" [ $(git status --porcelain | grep "${file#${repo_root_dir}/}" | wc -l) -gt 0 ] && dirty="-dirty" - echo "$(git log -1 --pretty=format:"%H" ${file})${dirty}" + echo "$(git log -1 --pretty=format:"%h" ${file})${dirty}" popd &> /dev/null } @@ -218,3 +218,18 @@ get_virtiofsd_image_name() { virtiofsd_script_dir="${repo_root_dir}/tools/packaging/static-build/virtiofsd" echo "${BUILDER_REGISTRY}:virtiofsd-$(get_from_kata_deps "externals.virtiofsd.toolchain")-${libc}-$(get_last_modification ${virtiofsd_script_dir})-$(uname -m)" } + +get_tools_image_name() { + tools_dir="${repo_root_dir}/src/tools" + libs_dir="${repo_root_dir}/src/libs" + agent_dir="${repo_root_dir}/src/agent" + + echo "${BUILDER_REGISTRY}:tools-$(get_last_modification ${tools_dir})-$(get_last_modification ${libs_dir})-$(get_last_modification ${agent_dir})" +} + +get_agent_image_name() { + libs_dir="${repo_root_dir}/src/libs" + agent_dir="${repo_root_dir}/src/agent" + + echo "${BUILDER_REGISTRY}:agent-$(get_last_modification ${libs_dir})-$(get_last_modification ${agent_dir})" +} diff --git a/tools/packaging/static-build/agent/Dockerfile b/tools/packaging/static-build/agent/Dockerfile new file mode 100644 index 000000000..c72104cb5 --- /dev/null +++ b/tools/packaging/static-build/agent/Dockerfile @@ -0,0 +1,21 @@ +# Copyright (c) 2023 Intel +# +# SPDX-License-Identifier: Apache-2.0 + +FROM alpine:3.18 +ARG RUST_TOOLCHAIN + +SHELL ["/bin/ash", "-o", "pipefail", "-c"] +RUN apk --no-cache add \ + bash \ + curl \ + gcc \ + git \ + libcap-ng-static \ + libseccomp-static \ + make \ + musl-dev \ + openssl-dev \ + openssl-libs-static \ + protoc && \ + curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain ${RUST_TOOLCHAIN} diff --git a/tools/packaging/static-build/agent/build-static-agent.sh b/tools/packaging/static-build/agent/build-static-agent.sh new file mode 100755 index 000000000..126e22d52 --- /dev/null +++ b/tools/packaging/static-build/agent/build-static-agent.sh @@ -0,0 +1,37 @@ +#!/usr/bin/env bash +# +# Copyright (c) 2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +set -o errexit +set -o nounset +set -o pipefail + +script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +source "${script_dir}/../../scripts/lib.sh" + +init_env() { + source "$HOME/.cargo/env" + + export LIBC=musl + export LIBSECCOMP_LINK_TYPE=static + export LIBSECCOMP_LIB_PATH=/usr/lib + + # This is needed to workaround + # https://github.com/sfackler/rust-openssl/issues/1624 + export OPENSSL_NO_VENDOR=Y +} + +build_agent_from_source() { + echo "build agent from source" + + init_env + + cd src/agent + DESTDIR=${DESTDIR} make + DESTDIR=${DESTDIR} make install +} + +build_agent_from_source $@ diff --git a/tools/packaging/static-build/agent/build.sh b/tools/packaging/static-build/agent/build.sh new file mode 100755 index 000000000..5757952d3 --- /dev/null +++ b/tools/packaging/static-build/agent/build.sh @@ -0,0 +1,30 @@ +#!/usr/bin/env bash +# +# Copyright (c) 2023 Intel +# +# SPDX-License-Identifier: Apache-2.0 + +set -o errexit +set -o nounset +set -o pipefail + +script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +readonly agent_builder="${script_dir}/build-static-agent.sh" + +source "${script_dir}/../../scripts/lib.sh" + +container_image="${AGENT_CONTAINER_BUILDER:-$(get_agent_image_name)}" +[ "${CROSS_BUILD}" == "true" ] && container_image="${container_image}-cross-build" + +sudo docker pull ${container_image} || \ + (sudo docker $BUILDX build $PLATFORM \ + --build-arg RUST_TOOLCHAIN="$(get_from_kata_deps "languages.rust.meta.newest-version")" \ + -t "${container_image}" "${script_dir}" && \ + # No-op unless PUSH_TO_REGISTRY is exported as "yes" + push_to_registry "${container_image}") + +sudo docker run --rm -i -v "${repo_root_dir}:${repo_root_dir}" \ + --env DESTDIR=${DESTDIR} \ + -w "${repo_root_dir}" \ + "${container_image}" \ + bash -c "${agent_builder}" diff --git a/tools/packaging/static-build/cache_components_main.sh b/tools/packaging/static-build/cache_components_main.sh deleted file mode 100755 index dbfa66f63..000000000 --- a/tools/packaging/static-build/cache_components_main.sh +++ /dev/null @@ -1,264 +0,0 @@ -#!/bin/bash -# Copyright (c) 2022 Intel Corporation -# -# SPDX-License-Identifier: Apache-2.0 -# - -set -o errexit -set -o nounset -set -o pipefail - -script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" - -source "${script_dir}/../scripts/lib.sh" - -KERNEL_FLAVOUR="${KERNEL_FLAVOUR:-kernel}" # kernel | kernel-nvidia-gpu | kernel-experimental | kernel-arm-experimental | kernel-dragonball-experimental | kernel-tdx-experimental | kernel-nvidia-gpu-tdx-experimental | kernel-nvidia-gpu-snp -OVMF_FLAVOUR="${OVMF_FLAVOUR:-x86_64}" # x86_64 | tdx -QEMU_FLAVOUR="${QEMU_FLAVOUR:-qemu}" # qemu | qemu-tdx-experimental | qemu-snp-experimental -ROOTFS_IMAGE_TYPE="${ROOTFS_IMAGE_TYPE:-image}" # image | initrd - -cache_clh_artifacts() { - local clh_tarball_name="kata-static-cloud-hypervisor.tar.xz" - local current_clh_version="$(get_from_kata_deps "assets.hypervisor.cloud_hypervisor.version")" - create_cache_asset "${clh_tarball_name}" "${current_clh_version}" "" -} - -cache_firecracker_artifacts() { - local fc_tarball_name="kata-static-firecracker.tar.xz" - local current_fc_version="$(get_from_kata_deps "assets.hypervisor.firecracker.version")" - create_cache_asset "${fc_tarball_name}" "${current_fc_version}" "" -} - -cache_kernel_artifacts() { - local kernel_tarball_name="kata-static-${KERNEL_FLAVOUR}.tar.xz" - local current_kernel_image="$(get_kernel_image_name)" - local current_kernel_kata_config_version="$(cat ${repo_root_dir}/tools/packaging/kernel/kata_config_version)" - # Changes to tools/packaging/kernel are covered by the kata_config_version check - local kernel_last_commit="$(get_last_modification ${repo_root_dir}/tools/packaging/static-build/kernel)" - local kernel_modules_tarball_path="${repo_root_dir}/tools/packaging/kata-deploy/local-build/build/kata-static-kernel-sev-modules.tar.xz" - - # The ${vendor}-gpu kernels are based on an already existing entry, and does not require - # adding a new entry to the versions.yaml. - # - # With this in mind, let's just make sure we get the version from correct entry in the - # versions.yaml file. - case ${KERNEL_FLAVOUR} in - *"nvidia-gpu"*) - KERNEL_FLAVOUR=${KERNEL_FLAVOUR//"-nvidia-gpu"/} - ;; - *) - ;; - esac - - case ${KERNEL_FLAVOUR} in - "kernel-sev"|"kernel-snp") - # In these cases, like "kernel-foo", it must be set to "kernel.foo" when looking at - # the versions.yaml file - current_kernel_version="$(get_from_kata_deps "assets.${KERNEL_FLAVOUR/-/.}.version")" - ;; - *) - current_kernel_version="$(get_from_kata_deps "assets.${KERNEL_FLAVOUR}.version")" - ;; - esac - - local current_component_version="${current_kernel_version}-${current_kernel_kata_config_version}-${kernel_last_commit}" - create_cache_asset "${kernel_tarball_name}" "${current_component_version}" "${current_kernel_image}" - if [[ "${KERNEL_FLAVOUR}" == "kernel-sev" ]]; then - module_dir="${repo_root_dir}/tools/packaging/kata-deploy/local-build/build/kernel-sev/builddir/kata-linux-${current_kernel_version#v}-${current_kernel_kata_config_version}/lib/modules/${current_kernel_version#v}" - if [ ! -f "${kernel_modules_tarball_path}" ]; then - tar cvfJ "${kernel_modules_tarball_path}" "${module_dir}/kernel/drivers/virt/coco/efi_secret/" - fi - create_cache_asset "kata-static-kernel-sev-modules.tar.xz" "${current_component_version}" "${current_kernel_image}" - fi -} - -cache_nydus_artifacts() { - local nydus_tarball_name="kata-static-nydus.tar.xz" - local current_nydus_version="$(get_from_kata_deps "externals.nydus.version")" - create_cache_asset "${nydus_tarball_name}" "${current_nydus_version}" "" -} - -cache_ovmf_artifacts() { - local current_ovmf_version="$(get_from_kata_deps "externals.ovmf.${OVMF_FLAVOUR}.version")" - case ${OVMF_FLAVOUR} in - "tdx") - ovmf_tarball_name="kata-static-tdvf.tar.xz" - ;; - "x86_64") - ovmf_tarball_name="kata-static-ovmf.tar.xz" - ;; - *) - ovmf_tarball_name="kata-static-ovmf-${OVMF_FLAVOUR}.tar.xz" - ;; - esac - - local current_ovmf_image="$(get_ovmf_image_name)" - create_cache_asset "${ovmf_tarball_name}" "${current_ovmf_version}" "${current_ovmf_image}" -} - -cache_qemu_artifacts() { - local qemu_tarball_name="kata-static-${QEMU_FLAVOUR}.tar.xz" - local current_qemu_version=$(get_from_kata_deps "assets.hypervisor.${QEMU_FLAVOUR}.version") - [ -z "${current_qemu_version}" ] && current_qemu_version=$(get_from_kata_deps "assets.hypervisor.${QEMU_FLAVOUR}.tag") - local qemu_sha=$(calc_qemu_files_sha256sum) - local current_qemu_image="$(get_qemu_image_name)" - create_cache_asset "${qemu_tarball_name}" "${current_qemu_version}-${qemu_sha}" "${current_qemu_image}" -} - -cache_rootfs_artifacts() { - local osbuilder_last_commit="$(get_last_modification "${repo_root_dir}/tools/osbuilder")" - local guest_image_last_commit="$(get_last_modification "${repo_root_dir}/tools/packaging/guest-image")" - local agent_last_commit="$(get_last_modification "${repo_root_dir}/src/agent")" - local libs_last_commit="$(get_last_modification "${repo_root_dir}/src/libs")" - local gperf_version="$(get_from_kata_deps "externals.gperf.version")" - local libseccomp_version="$(get_from_kata_deps "externals.libseccomp.version")" - local rust_version="$(get_from_kata_deps "languages.rust.meta.newest-version")" - local rootfs_tarball_name="kata-static-rootfs-${ROOTFS_IMAGE_TYPE}.tar.xz" - local current_rootfs_version="${osbuilder_last_commit}-${guest_image_last_commit}-${agent_last_commit}-${libs_last_commit}-${gperf_version}-${libseccomp_version}-${rust_version}-${ROOTFS_IMAGE_TYPE}" - create_cache_asset "${rootfs_tarball_name}" "${current_rootfs_version}" "" -} - -cache_shim_v2_artifacts() { - local shim_v2_tarball_name="kata-static-shim-v2.tar.xz" - local shim_v2_last_commit="$(get_last_modification "${repo_root_dir}/src/runtime")" - local protocols_last_commit="$(get_last_modification "${repo_root_dir}/src/libs/protocols")" - local runtime_rs_last_commit="$(get_last_modification "${repo_root_dir}/src/runtime-rs")" - local golang_version="$(get_from_kata_deps "languages.golang.meta.newest-version")" - local rust_version="$(get_from_kata_deps "languages.rust.meta.newest-version")" - local current_shim_v2_version="${shim_v2_last_commit}-${protocols_last_commit}-${runtime_rs_last_commit}-${golang_version}-${rust_version}" - local current_shim_v2_image="$(get_shim_v2_image_name)" - create_cache_asset "${shim_v2_tarball_name}" "${current_shim_v2_version}" "${current_shim_v2_image}" -} - -cache_virtiofsd_artifacts() { - local virtiofsd_tarball_name="kata-static-virtiofsd.tar.xz" - local current_virtiofsd_version="$(get_from_kata_deps "externals.virtiofsd.version")-$(get_from_kata_deps "externals.virtiofsd.toolchain")" - local current_virtiofsd_image="$(get_virtiofsd_image_name)" - create_cache_asset "${virtiofsd_tarball_name}" "${current_virtiofsd_version}" "${current_virtiofsd_image}" -} - -create_cache_asset() { - local component_name="${1}" - local component_version="${2}" - local component_image="${3}" - - sudo cp "${repo_root_dir}/tools/packaging/kata-deploy/local-build/build/${component_name}" . - sudo chown -R "${USER}:${USER}" . - sha256sum "${component_name}" > "sha256sum-${component_name}" - cat "sha256sum-${component_name}" - echo "${component_version}" > "latest" - cat "latest" - echo "${component_image}" > "latest_image" - cat "latest_image" -} - -help() { -echo "$(cat << EOF -Usage: $0 "[options]" - Description: - Builds the cache of several kata components. - Options: - -c Cloud hypervisor cache - -F Firecracker cache - -k Kernel cache - * Export KERNEL_FLAVOUR="kernel | kernel-nvidia-gpu | kernel-experimental | kernel-arm-experimental | kernel-dragonball-experimental | kernel-tdx-experimental | kernel-nvidia-gpu-tdx-experimental | kernel-nvidia-gpu-snp" for a specific build - The default KERNEL_FLAVOUR value is "kernel" - -n Nydus cache - -q QEMU cache - * Export QEMU_FLAVOUR="qemu | qemu-tdx-experimental | qemu-snp-experimental" for a specific build - The default QEMU_FLAVOUR value is "qemu" - -r RootFS cache - * Export ROOTFS_IMAGE_TYPE="image|initrd" for one of those two types - The default ROOTFS_IMAGE_TYPE value is "image" - -s Shim v2 cache - -v VirtioFS cache - -h Shows help -EOF -)" -} - -main() { - local cloud_hypervisor_component="${cloud_hypervisor_component:-}" - local firecracker_component="${firecracker_component:-}" - local kernel_component="${kernel_component:-}" - local nydus_component="${nydus_component:-}" - local ovmf_component="${ovmf_component:-}" - local qemu_component="${qemu_component:-}" - local rootfs_component="${rootfs_component:-}" - local shim_v2_component="${shim_v2_component:-}" - local virtiofsd_component="${virtiofsd_component:-}" - local OPTIND - while getopts ":cFknoqrsvh:" opt - do - case "$opt" in - c) - cloud_hypervisor_component="1" - ;; - F) - firecracker_component="1" - ;; - k) - kernel_component="1" - ;; - n) - nydus_component="1" - ;; - o) - ovmf_component="1" - ;; - q) - qemu_component="1" - ;; - r) - rootfs_component="1" - ;; - s) - shim_v2_component="1" - ;; - v) - virtiofsd_component="1" - ;; - h) - help - exit 0; - ;; - :) - echo "Missing argument for -$OPTARG"; - help - exit 1; - ;; - esac - done - shift $((OPTIND-1)) - - [[ -z "${cloud_hypervisor_component}" ]] && \ - [[ -z "${firecracker_component}" ]] && \ - [[ -z "${kernel_component}" ]] && \ - [[ -z "${nydus_component}" ]] && \ - [[ -z "${ovmf_component}" ]] && \ - [[ -z "${qemu_component}" ]] && \ - [[ -z "${rootfs_component}" ]] && \ - [[ -z "${shim_v2_component}" ]] && \ - [[ -z "${virtiofsd_component}" ]] && \ - help && die "Must choose at least one option" - - mkdir -p "${WORKSPACE}/artifacts" - pushd "${WORKSPACE}/artifacts" - echo "Artifacts:" - - [ "${cloud_hypervisor_component}" == "1" ] && cache_clh_artifacts - [ "${firecracker_component}" == "1" ] && cache_firecracker_artifacts - [ "${kernel_component}" == "1" ] && cache_kernel_artifacts - [ "${nydus_component}" == "1" ] && cache_nydus_artifacts - [ "${ovmf_component}" == "1" ] && cache_ovmf_artifacts - [ "${qemu_component}" == "1" ] && cache_qemu_artifacts - [ "${rootfs_component}" == "1" ] && cache_rootfs_artifacts - [ "${shim_v2_component}" == "1" ] && cache_shim_v2_artifacts - [ "${virtiofsd_component}" == "1" ] && cache_virtiofsd_artifacts - - ls -la "${WORKSPACE}/artifacts/" - popd - sync -} - -main "$@" diff --git a/tools/packaging/static-build/nydus/build.sh b/tools/packaging/static-build/nydus/build.sh index 97686642a..908f46af5 100755 --- a/tools/packaging/static-build/nydus/build.sh +++ b/tools/packaging/static-build/nydus/build.sh @@ -12,7 +12,7 @@ script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" source "${script_dir}/../../scripts/lib.sh" -ARCH=${ARCH:-$(arch_to_golang "$(uname -m)")} +arch="$(uname -m)" nydus_url="${nydus_url:-}" nydus_version="${nydus_version:-}" @@ -25,7 +25,7 @@ info "Get nydus information from runtime versions.yaml" nydus_tarball_url="${nydus_url}/releases/download" -file_name="nydus-static-${nydus_version}-linux-${ARCH}.tgz" +file_name="nydus-static-${nydus_version}-linux-$(arch_to_golang $arch).tgz" download_url="${nydus_tarball_url}/${nydus_version}/${file_name}" info "Download nydus version: ${nydus_version} from ${download_url}" diff --git a/tools/packaging/static-build/tools/Dockerfile b/tools/packaging/static-build/tools/Dockerfile new file mode 100644 index 000000000..aa468488d --- /dev/null +++ b/tools/packaging/static-build/tools/Dockerfile @@ -0,0 +1,20 @@ +# Copyright (c) 2023 Intel +# +# SPDX-License-Identifier: Apache-2.0 + +FROM alpine:3.18 +ARG GO_TOOLCHAIN +ARG RUST_TOOLCHAIN + +SHELL ["/bin/ash", "-o", "pipefail", "-c"] +RUN apk --no-cache add \ + bash \ + curl \ + gcc \ + git \ + libcap-ng-static \ + libseccomp-static \ + make \ + musl-dev \ + protoc && \ + curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain ${RUST_TOOLCHAIN} diff --git a/tools/packaging/static-build/tools/build-static-tools.sh b/tools/packaging/static-build/tools/build-static-tools.sh new file mode 100755 index 000000000..2004fcf90 --- /dev/null +++ b/tools/packaging/static-build/tools/build-static-tools.sh @@ -0,0 +1,35 @@ +#!/usr/bin/env bash +# +# Copyright (c) 2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +set -o errexit +set -o nounset +set -o pipefail + +script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +source "${script_dir}/../../scripts/lib.sh" + +init_env() { + source "$HOME/.cargo/env" + + export LIBC=musl + export LIBSECCOMP_LINK_TYPE=static + export LIBSECCOMP_LIB_PATH=/usr/lib + + extra_rust_flags=" -C link-self-contained=yes" +} + +build_tool_from_source() { + tool=${1} + + echo "build ${tool} from source" + init_env + + cd src/tools/${tool} + make +} + +build_tool_from_source $@ diff --git a/tools/packaging/static-build/tools/build.sh b/tools/packaging/static-build/tools/build.sh new file mode 100755 index 000000000..a4dd958c4 --- /dev/null +++ b/tools/packaging/static-build/tools/build.sh @@ -0,0 +1,31 @@ +#!/usr/bin/env bash +# +# Copyright (c) 2023 Intel +# +# SPDX-License-Identifier: Apache-2.0 + +set -o errexit +set -o nounset +set -o pipefail + +script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +readonly tools_builder="${script_dir}/build-static-tools.sh" + +source "${script_dir}/../../scripts/lib.sh" + +tool="${1}" + +container_image="${TOOLS_CONTAINER_BUILDER:-$(get_tools_image_name)}" +[ "${CROSS_BUILD}" == "true" ] && container_image="${container_image}-cross-build" + +sudo docker pull ${container_image} || \ + (sudo docker $BUILDX build $PLATFORM \ + --build-arg RUST_TOOLCHAIN="$(get_from_kata_deps "languages.rust.meta.newest-version")" \ + -t "${container_image}" "${script_dir}" && \ + # No-op unless PUSH_TO_REGISTRY is exported as "yes" + push_to_registry "${container_image}") + +sudo docker run --rm -i -v "${repo_root_dir}:${repo_root_dir}" \ + -w "${repo_root_dir}" \ + "${container_image}" \ + bash -c "${tools_builder} ${tool}" diff --git a/versions.yaml b/versions.yaml index f316be865..2d23f2113 100644 --- a/versions.yaml +++ b/versions.yaml @@ -277,12 +277,12 @@ externals: nydus: description: "Nydus image acceleration service" url: "https://github.com/dragonflyoss/image-service" - version: "v2.2.1" + version: "v2.2.3" nydus-snapshotter: description: "Snapshotter for Nydus image acceleration service" url: "https://github.com/containerd/nydus-snapshotter" - version: "v0.3.3" + version: "v0.12.0" ovmf: description: "Firmware, implementation of UEFI for virtual machines." @@ -379,3 +379,11 @@ plugins: available on a Kubernetes host. url: "https://github.com/k8snetworkplumbingwg/sriov-network-device-plugin" version: "b7f6d3e0679796e907ecca88cfab0e32e326850d" + +docker_images: + description: "Docker hub images used for testing" + + nginx: + description: "Proxy server for HTTP, HTTPS, SMTP, POP3 and IMAP protocols" + url: "https://hub.docker.com/_/nginx/" + version: "1.15-alpine"