From ea1fdd2cb9374622bd8af1553294b349db13a4f4 Mon Sep 17 00:00:00 2001 From: David Esparza Date: Wed, 2 Aug 2023 12:15:50 -0600 Subject: [PATCH 001/339] metrics: Clean kata components before start a metric test. This PR kills all kata components before start a new metric test. Fixes: #7528 Signed-off-by: David Esparza (cherry picked from commit 5dbe88330f0dfb40ac6e88225c128688a1f9dedc) --- tests/common.bash | 29 +++++++++-------------------- tests/metrics/lib/common.bash | 2 ++ 2 files changed, 11 insertions(+), 20 deletions(-) diff --git a/tests/common.bash b/tests/common.bash index d71b3f43c..224df23f5 100644 --- a/tests/common.bash +++ b/tests/common.bash @@ -186,31 +186,20 @@ function clean_env_ctr() if (( count_tasks > 0 )); then die "Can't remove running containers." fi - - kill_kata_components } # Kills running shim and hypervisor components +# by using the kata-component file name. function kill_kata_components() { - local kata_bin_dir="/opt/kata/bin" - local shim_path="${kata_bin_dir}/containerd-shim-kata-v2" - local hypervisor_path="${kata_bin_dir}/qemu-system-x86_64" - local pid_shim_count="$(pgrep -fc ${shim_path} || exit 0)" + local PID_NAMES=( "containerd-shim-kata-v2" "qemu-system-x86_64" "cloud-hypervisor" ) - [ ${pid_shim_count} -gt "0" ] && sudo kill -SIGKILL "$(pgrep -f ${shim_path})" > /dev/null 2>&1 - - if [ "${KATA_HYPERVISOR}" = 'clh' ]; then - hypervisor_path="${kata_bin_dir}/cloud-hypervisor" - elif [ "${KATA_HYPERVISOR}" != 'qemu' ]; then - echo "Failed to stop the hypervisor: '${KATA_HYPERVISOR}' as it is not recognized" - return - fi - - local pid_hypervisor_count="$(pgrep -fc ${hypervisor_path} || exit 0)" - - if [ ${pid_hypervisor_count} -gt "0" ]; then - sudo kill -SIGKILL "$(pgrep -f ${hypervisor_path})" > /dev/null 2>&1 - fi + sudo systemctl stop containerd + # Get the filenames of the kata components + # and kill the correspondingt processes + for PID_NAME in ${PID_NAMES} ; do + sudo killall ${PID_NAME} > /dev/null 2>&1 || true + done + sudo systemctl start containerd } # Restarts a systemd service while ensuring the start-limit-burst is set to 0. diff --git a/tests/metrics/lib/common.bash b/tests/metrics/lib/common.bash index c43019a70..4111d67d4 100755 --- a/tests/metrics/lib/common.bash +++ b/tests/metrics/lib/common.bash @@ -192,6 +192,8 @@ function kill_processes_before_start() CTR_PROCS=$(sudo "${CTR_EXE}" t list -q) [[ -n "${CTR_PROCS}" ]] && clean_env_ctr + kill_kata_components + check_processes } From b2c627aac919c877cd2e94f1c5e1e5c4e4f2407c Mon Sep 17 00:00:00 2001 From: David Esparza Date: Wed, 2 Aug 2023 12:23:09 -0600 Subject: [PATCH 002/339] metrics: Improve naming testing containers in launch times test This commit provides a new way to name the containers used in the launch-times-test in this form: 'kata_launch_times_RANDOM_NUMBER', where RANDOM_NUMBER is in the 0-1000 range. Fixes: #7529 Signed-off-by: David Esparza (cherry picked from commit 1e15369e59faf92b1efcf4b68d099fb4418b73de) --- tests/metrics/time/launch_times.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/metrics/time/launch_times.sh b/tests/metrics/time/launch_times.sh index 0458b979e..3d6b98e34 100755 --- a/tests/metrics/time/launch_times.sh +++ b/tests/metrics/time/launch_times.sh @@ -96,6 +96,7 @@ run_workload() { # number of decimal digits after the decimal points # for 'bc' performing math in kernel period estimation L_CALC_SCALE=13 + local CONTAINER_NAME="kata_launch_times_$(( $RANDOM % 1000 + 1))" start_time=$($DATECMD) # Check entropy level of the host @@ -103,8 +104,7 @@ run_workload() { # Run the image and command and capture the results into an array... declare workload_result - readarray -n 0 workload_result < <(sudo -E "${CTR_EXE}" run --rm --runtime=${CTR_RUNTIME} ${IMAGE} test bash -c "$DATECMD $DMESGCMD") - + readarray -n 0 workload_result < <(sudo -E "${CTR_EXE}" run --rm --runtime ${CTR_RUNTIME} ${IMAGE} ${CONTAINER_NAME} bash -c "$DATECMD $DMESGCMD") end_time=$($DATECMD) # Delay this calculation until after we have run - do not want From aa71d6f9311d3b815b281094ce2e7cfc9b9e3f2f Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Wed, 2 Aug 2023 16:46:48 +0000 Subject: [PATCH 003/339] metrics: Add network latency test This PR adds network latency test for kata metrics. Fixes #7526 Signed-off-by: Gabriela Cervantes (cherry picked from commit 64fdb98704e68ec91f4683624872f06037c388ed) --- latency_kubernetes/latency-network.sh | 85 +++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) create mode 100755 latency_kubernetes/latency-network.sh diff --git a/latency_kubernetes/latency-network.sh b/latency_kubernetes/latency-network.sh new file mode 100755 index 000000000..19c84ef14 --- /dev/null +++ b/latency_kubernetes/latency-network.sh @@ -0,0 +1,85 @@ +#!/bin/bash +# +# Copyright (c) 2022 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +set -o pipefail + +SCRIPT_PATH=$(dirname "$(readlink -f "$0")") + +source "${SCRIPT_PATH}/../../lib/common.bash" +latency_file=$(mktemp latencyresults.XXXXXXXXXX) +TEST_NAME="${TEST_NAME:-latency}" + +function remove_tmp_file() { + rm -rf "${latency_file}" +} + +trap remove_tmp_file EXIT + +function main() { + init_env + cmds=("bc" "jq") + check_cmds "${cmds[@]}" + + # Check no processes are left behind + check_processes + + wait_time=20 + sleep_time=2 + + # Create server + kubectl create -f "${SCRIPT_PATH}/runtimeclass_workloads/latency-server.yaml" + + # Get the names of the server pod + export server_pod_name="latency-server" + + # Verify the server pod is working + local cmd="kubectl get pod $server_pod_name -o yaml | grep 'phase: Running'" + waitForProcess "$wait_time" "$sleep_time" "$cmd" + + # Create client + kubectl create -f "${SCRIPT_PATH}/runtimeclass_workloads/latency-client.yaml" + + # Get the names of the client pod + export client_pod_name="latency-client" + + # Verify the client pod is working + local cmd="kubectl get pod $client_pod_name -o yaml | grep 'phase: Running'" + waitForProcess "$wait_time" "$sleep_time" "$cmd" + + # Get the ip address of the server pod + export server_ip_add=$(kubectl get pod "$server_pod_name" -o jsonpath='{.status.podIP}') + + # Number of packets (sent) + local number="${number:-30}" + + local client_command="ping -c ${number} ${server_ip_add}" + + kubectl exec "$client_pod_name" -- sh -c "$client_command" > "$latency_file" + + metrics_json_init + + local latency=$(cat $latency_file | grep avg | cut -f2 -d'=' | sed 's/[[:blank:]]//g' | cut -f2 -d'/') + + metrics_json_start_array + + local json="$(cat << EOF + { + "latency": { + "Result" : $latency, + "Units" : "ms" + } + } +EOF +)" + + metrics_json_add_array_element "$json" + metrics_json_end_array "Results" + metrics_json_save + + kubectl delete pod "$client_pod_name" "$server_pod_name" + check_processes +} +main "$@" From 2b60fe0fe087b4ddef33bb1ca6e0f78cec2ab859 Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Wed, 2 Aug 2023 16:50:51 +0000 Subject: [PATCH 004/339] metrics: Add latency client yaml This PR adds latency client yaml for the kubernetes test. Signed-off-by: Gabriela Cervantes (cherry picked from commit 9bb8451df5e278ac31db7ed00156cb77ed9186c8) --- .../runtimeclass_workloads/latency-client.yaml | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 latency_kubernetes/runtimeclass_workloads/latency-client.yaml diff --git a/latency_kubernetes/runtimeclass_workloads/latency-client.yaml b/latency_kubernetes/runtimeclass_workloads/latency-client.yaml new file mode 100644 index 000000000..8e1f3bd0e --- /dev/null +++ b/latency_kubernetes/runtimeclass_workloads/latency-client.yaml @@ -0,0 +1,17 @@ +# Copyright (c) 2022-2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# +apiVersion: v1 +kind: Pod +metadata: + name: latency-client +spec: + terminationGracePeriodSeconds: 0 + runtimeClassName: kata + containers: + - name: client-container + image: quay.io/prometheus/busybox:latest + command: + - sleep + - "180" From db23b95b53e5b7b418814056f29e069464ad681d Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Wed, 2 Aug 2023 16:52:17 +0000 Subject: [PATCH 005/339] metrics: Add latency server yaml This PR adds latency server yaml for kubernetes test. Signed-off-by: Gabriela Cervantes (cherry picked from commit 3b45060b6169934943212d399299713b5511f28c) --- .../runtimeclass_workloads/latency-server.yaml | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 latency_kubernetes/runtimeclass_workloads/latency-server.yaml diff --git a/latency_kubernetes/runtimeclass_workloads/latency-server.yaml b/latency_kubernetes/runtimeclass_workloads/latency-server.yaml new file mode 100644 index 000000000..ce301f653 --- /dev/null +++ b/latency_kubernetes/runtimeclass_workloads/latency-server.yaml @@ -0,0 +1,18 @@ +# +# Copyright (c) 2022-2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# +apiVersion: v1 +kind: Pod +metadata: + name: latency-server +spec: + terminationGracePeriodSeconds: 0 + runtimeClassName: kata + containers: + - name: server-container + image: quay.io/prometheus/busybox:latest + command: + - sleep + - "180" From b33d4de01366d3cc27ca69e9b4a633d25b3985d2 Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Wed, 2 Aug 2023 16:56:27 +0000 Subject: [PATCH 006/339] metrics: Add latency test to network README This PR adds latency test to network README for kata metrics. Signed-off-by: Gabriela Cervantes (cherry picked from commit 66db5b53500b5e75be3f25882cd59273f71d0b9a) --- tests/metrics/network/README.md | 1 + .../metrics/network/latency_kubernetes}/latency-client.yaml | 0 .../metrics/network/latency_kubernetes}/latency-network.sh | 0 .../metrics/network/latency_kubernetes}/latency-server.yaml | 0 4 files changed, 1 insertion(+) rename {latency_kubernetes/runtimeclass_workloads => tests/metrics/network/latency_kubernetes}/latency-client.yaml (100%) rename {latency_kubernetes => tests/metrics/network/latency_kubernetes}/latency-network.sh (100%) rename {latency_kubernetes/runtimeclass_workloads => tests/metrics/network/latency_kubernetes}/latency-server.yaml (100%) diff --git a/tests/metrics/network/README.md b/tests/metrics/network/README.md index c109a6841..05acd4cd6 100644 --- a/tests/metrics/network/README.md +++ b/tests/metrics/network/README.md @@ -10,6 +10,7 @@ bandwidth, jitter, latency and parallel bandwidth. ## Networking tests - `k8s-network-metrics-iperf3.sh` measures bandwidth which is the speed of the data transfer. +- `latency-network.sh` measures network latency. ## Running the tests diff --git a/latency_kubernetes/runtimeclass_workloads/latency-client.yaml b/tests/metrics/network/latency_kubernetes/latency-client.yaml similarity index 100% rename from latency_kubernetes/runtimeclass_workloads/latency-client.yaml rename to tests/metrics/network/latency_kubernetes/latency-client.yaml diff --git a/latency_kubernetes/latency-network.sh b/tests/metrics/network/latency_kubernetes/latency-network.sh similarity index 100% rename from latency_kubernetes/latency-network.sh rename to tests/metrics/network/latency_kubernetes/latency-network.sh diff --git a/latency_kubernetes/runtimeclass_workloads/latency-server.yaml b/tests/metrics/network/latency_kubernetes/latency-server.yaml similarity index 100% rename from latency_kubernetes/runtimeclass_workloads/latency-server.yaml rename to tests/metrics/network/latency_kubernetes/latency-server.yaml From a75db201676e89f3258579627da8aec9565bffb0 Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Thu, 3 Aug 2023 16:09:30 +0000 Subject: [PATCH 007/339] gha: Add iperf network metrics This PR adds the iperf network metrics to the github actions for kata metrics. Fixes #7535 Signed-off-by: Gabriela Cervantes (cherry picked from commit 5b5caf89081f1dc83eac404817955255f20ae3c3) --- .github/workflows/run-metrics.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/run-metrics.yaml b/.github/workflows/run-metrics.yaml index 596a3dea4..180121496 100644 --- a/.github/workflows/run-metrics.yaml +++ b/.github/workflows/run-metrics.yaml @@ -52,6 +52,9 @@ jobs: - name: run fio test run: bash tests/metrics/gha-run.sh run-test-fio + - name: run iperf test + run: bash tests/metrics/gha-run.sh run-test-iperf + - name: make metrics tarball ${{ matrix.vmm }} run: bash tests/metrics/gha-run.sh make-tarball-results From d6398ccf9eccdb7134c29fa1f6ad53819997835f Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Thu, 3 Aug 2023 16:16:21 +0000 Subject: [PATCH 008/339] metrics: Add iperf to gha run script This PR adds iperf to gha run script. Signed-off-by: Gabriela Cervantes (cherry picked from commit 3c319d8d4c506ec2f786498a3a0ece061f1664ae) --- tests/metrics/gha-run.sh | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tests/metrics/gha-run.sh b/tests/metrics/gha-run.sh index dfe31a1cc..ac773eb33 100755 --- a/tests/metrics/gha-run.sh +++ b/tests/metrics/gha-run.sh @@ -93,6 +93,14 @@ function run_test_fio() { bash storage/fio-k8s/fio-test-ci.sh } +function run_test_iperf() { + info "Running Iperf test using ${KATA_HYPERVISOR} hypervisor" + # ToDo: remove the exit once the metrics workflow is stable + exit 0 + + bash network/iperf3_kubernetes/k8s-network-metrics-iperf3.sh +} + function main() { action="${1:-}" case "${action}" in @@ -104,6 +112,7 @@ function main() { run-test-blogbench) run_test_blogbench ;; run-test-tensorflow) run_test_tensorflow ;; run-test-fio) run_test_fio ;; + run-test-iperf) run_test_iperf ;; *) >&2 die "Invalid argument" ;; esac } From 759b0fa3858728a6d4e58752560bf4a6afbf162d Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Mon, 7 Aug 2023 16:46:55 +0000 Subject: [PATCH 009/339] metrics: General improvements to mobilenet tensorflow test This PR renames the mobilenet tensorflow test to have a more specific tensorflow name mainly because tensorflow has different configurations and we will add more tensorflow tests so we want to distinguish each tensorflow test. Fixes #7571 Signed-off-by: Gabriela Cervantes (cherry picked from commit 863283716dde19d8bb00c78c6e7bff6b1050bd47) --- .../Dockerfile | 0 ... => tensorflow_mobilenet_v1_bfloat16_fp32.sh} | 16 ++++++++-------- 2 files changed, 8 insertions(+), 8 deletions(-) rename tests/metrics/machine_learning/{tensorflow_mobilenet_dockerfile => mobilenet_v1_bfloat16_fp32_dockerfile}/Dockerfile (100%) rename tests/metrics/machine_learning/{tensorflow_mobilenet_benchmark.sh => tensorflow_mobilenet_v1_bfloat16_fp32.sh} (93%) diff --git a/tests/metrics/machine_learning/tensorflow_mobilenet_dockerfile/Dockerfile b/tests/metrics/machine_learning/mobilenet_v1_bfloat16_fp32_dockerfile/Dockerfile similarity index 100% rename from tests/metrics/machine_learning/tensorflow_mobilenet_dockerfile/Dockerfile rename to tests/metrics/machine_learning/mobilenet_v1_bfloat16_fp32_dockerfile/Dockerfile diff --git a/tests/metrics/machine_learning/tensorflow_mobilenet_benchmark.sh b/tests/metrics/machine_learning/tensorflow_mobilenet_v1_bfloat16_fp32.sh similarity index 93% rename from tests/metrics/machine_learning/tensorflow_mobilenet_benchmark.sh rename to tests/metrics/machine_learning/tensorflow_mobilenet_v1_bfloat16_fp32.sh index 63b42c1e7..d6c1a312a 100755 --- a/tests/metrics/machine_learning/tensorflow_mobilenet_benchmark.sh +++ b/tests/metrics/machine_learning/tensorflow_mobilenet_v1_bfloat16_fp32.sh @@ -10,12 +10,12 @@ set -o pipefail SCRIPT_PATH=$(dirname "$(readlink -f "$0")") source "${SCRIPT_PATH}/../lib/common.bash" -IMAGE="docker.io/library/tensorflowmobilenet:latest" -DOCKERFILE="${SCRIPT_PATH}/tensorflow_mobilenet_dockerfile/Dockerfile" +IMAGE="docker.io/library/mobilenet_v1_bfloat16_fp32:latest" +DOCKERFILE="${SCRIPT_PATH}/mobilenet_v1_bfloat16_fp32_dockerfile/Dockerfile" tensorflow_file=$(mktemp tensorflowresults.XXXXXXXXXX) NUM_CONTAINERS="$1" TIMEOUT="$2" -TEST_NAME="tensorflow-intelai" +TEST_NAME="mobilenet_v1_bfloat16_fp32" PAYLOAD_ARGS="tail -f /dev/null" TESTDIR="${TESTDIR:-/testdir}" # Options to control the start of the workload using a trigger-file @@ -62,7 +62,7 @@ EOF chmod +x "${script}" } -function mobilenet_test() { +function mobilenet_v1_bfloat16_fp32_test() { local CMD_EXPORT_VAR="export KMP_AFFINITY=granularity=fine,verbose,compact && export OMP_NUM_THREADS=16" info "Export environment variables" @@ -70,7 +70,7 @@ function mobilenet_test() { sudo -E "${CTR_EXE}" t exec -d --exec-id "$(random_name)" "${i}" sh -c "${CMD_EXPORT_VAR}" done - info "Running Mobilenet Tensorflow test" + info "Running Mobilenet V1 Bfloat16 FP32 Tensorflow test" local pids=() local j=0 for i in "${containers[@]}"; do @@ -91,7 +91,7 @@ function mobilenet_test() { check_file=$(sudo -E "${CTR_EXE}" t exec -d --exec-id "$(random_name)" "${i}" sh -c "${CMD_FILE}") retries="30" for j in $(seq 1 "${retries}"); do - [ "${check_file}" -eq "1" ] && break + [ "${check_file}" = 1 ] && break sleep 1 done done @@ -104,7 +104,7 @@ function mobilenet_test() { local average_mobilenet=$(echo "${mobilenet_results}" | sed 's/.$//' | sed "s/,/+/g;s/.*/(&)\/$NUM_CONTAINERS/g" | bc -l) local json="$(cat << EOF { - "Mobilenet": { + "Mobilenet_v1_bfloat16_fp32": { "Result": "${mobilenet_results}", "Average": "${average_mobilenet}", "Units": "images/s" @@ -179,7 +179,7 @@ function main() { INITIAL_NUM_PIDS=$(sudo -E "${CTR_EXE}" t metrics "${containers[-1]}" | grep pids.current | grep pids.current | xargs | cut -d ' ' -f 2) ((INITIAL_NUM_PIDS++)) - mobilenet_test + mobilenet_v1_bfloat16_fp32_test metrics_json_save From a48466689053affede9f2495cca09fbc9b86b5ac Mon Sep 17 00:00:00 2001 From: David Esparza Date: Mon, 7 Aug 2023 16:50:51 -0600 Subject: [PATCH 010/339] metrics: install kata once and run multiple checks This PR changes the metrics workflow in order to just install kata once, and run the checks for multiple hypervisor variations. In this way we save time avoiding installing kata for each hypervisor to be tested. Fixes: #7578 Signed-off-by: David Esparza (cherry picked from commit e6649698620af15e4cb933cf8bf1769e13e62e82) --- .github/workflows/run-metrics.yaml | 24 +++++++++++++++++------- tests/common.bash | 27 +++++++++++++++++++-------- tests/metrics/gha-run.sh | 1 + 3 files changed, 37 insertions(+), 15 deletions(-) diff --git a/.github/workflows/run-metrics.yaml b/.github/workflows/run-metrics.yaml index 180121496..e50f6bd21 100644 --- a/.github/workflows/run-metrics.yaml +++ b/.github/workflows/run-metrics.yaml @@ -10,16 +10,11 @@ on: type: string jobs: - run-metrics: - strategy: - fail-fast: true - matrix: - vmm: ['clh', 'qemu'] - max-parallel: 1 + setup-kata: + name: Kata Setup runs-on: metrics env: GOPATH: ${{ github.workspace }} - KATA_HYPERVISOR: ${{ matrix.vmm }} steps: - uses: actions/checkout@v3 with: @@ -34,6 +29,21 @@ jobs: - name: Install kata run: bash tests/metrics/gha-run.sh install-kata kata-artifacts + run-metrics: + needs: setup-kata + strategy: + fail-fast: true + matrix: + vmm: ['clh', 'qemu'] + max-parallel: 1 + runs-on: metrics + env: + GOPATH: ${{ github.workspace }} + KATA_HYPERVISOR: ${{ matrix.vmm }} + steps: + - name: enabling the hypervisor + run: bash tests/metrics/gha-run.sh enabling-hypervisor + - name: run launch times test run: bash tests/metrics/gha-run.sh run-test-launchtimes diff --git a/tests/common.bash b/tests/common.bash index 224df23f5..295ac8c5c 100644 --- a/tests/common.bash +++ b/tests/common.bash @@ -295,18 +295,29 @@ function install_kata() { sudo ln -sf "${b}" "${local_bin_dir}/$(basename $b)" done - if [[ ${KATA_HYPERVISOR} == "dragonball" ]]; then - sudo ln -sf "${katadir}/runtime-rs/bin/containerd-shim-kata-v2" "${local_bin_dir}/containerd-shim-kata-${KATA_HYPERVISOR}-v2" - else - sudo ln -sf "${katadir}/bin/containerd-shim-kata-v2" "${local_bin_dir}/containerd-shim-kata-${KATA_HYPERVISOR}-v2" - fi - - sudo ln -sf ${katadir}/share/defaults/kata-containers/configuration-${KATA_HYPERVISOR}.toml ${katadir}/share/defaults/kata-containers/configuration.toml - check_containerd_config_for_kata restart_containerd_service } +# creates a new kata configuration.toml hard link that +# points to the hypervisor passed by KATA_HYPERVISOR env var. +function enabling_hypervisor() { + declare -r KATA_DIR="/opt/kata" + declare -r CONFIG_DIR="${KATA_DIR}/share/defaults/kata-containers" + declare -r SRC_HYPERVISOR_CONFIG="${CONFIG_DIR}/configuration-${KATA_HYPERVISOR}.toml" + declare -r DEST_KATA_CONFIG="${CONFIG_DIR}/configuration.toml" + declare -r CONTAINERD_SHIM_KATA="/usr/local/bin/containerd-shim-kata-${KATA_HYPERVISOR}-v2" + + if [[ ${KATA_HYPERVISOR} == "dragonball" ]]; then + sudo ln -sf "${KATA_DIR}/runtime-rs/bin/containerd-shim-kata-v2" "${CONTAINERD_SHIM_KATA}" + else + sudo ln -sf "${KATA_DIR}/bin/containerd-shim-kata-v2" "${CONTAINERD_SHIM_KATA}" + fi + + sudo ln -sf "${SRC_HYPERVISOR_CONFIG}" "${DEST_KATA_CONFIG}" +} + + function check_containerd_config_for_kata() { # check containerd config declare -r line1="default_runtime_name = \"kata\"" diff --git a/tests/metrics/gha-run.sh b/tests/metrics/gha-run.sh index ac773eb33..3f168dafe 100755 --- a/tests/metrics/gha-run.sh +++ b/tests/metrics/gha-run.sh @@ -105,6 +105,7 @@ function main() { action="${1:-}" case "${action}" in install-kata) install_kata && install_checkmetrics ;; + enabling-hypervisor) enabling_hypervisor ;; make-tarball-results) make_tarball_results ;; run-test-launchtimes) run_test_launchtimes ;; run-test-memory-usage) run_test_memory_usage ;; From 8d4f9ef256b125e1a46151746016b05022adbbd3 Mon Sep 17 00:00:00 2001 From: Unmesh Deodhar Date: Tue, 8 Aug 2023 14:20:44 -0500 Subject: [PATCH 011/339] tests: upgrade bats version Instead of using package manager to install bats, building this from source. This gives us the updated version of bats which supports functions such as setup_file and teardown_file. We can use these functions into our current tests. Fixes: #7597 Signed-off-by: Unmesh Deodhar (cherry picked from commit aeaec9dae94907636ed179272f40c54e606bf830) --- tests/integration/kubernetes/gha-run.sh | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tests/integration/kubernetes/gha-run.sh b/tests/integration/kubernetes/gha-run.sh index d756ccfdc..9f440d150 100755 --- a/tests/integration/kubernetes/gha-run.sh +++ b/tests/integration/kubernetes/gha-run.sh @@ -47,8 +47,14 @@ function create_cluster() { } function install_bats() { - sudo apt-get update - sudo apt-get -y install bats + # Installing bats from the lunar repo. + # This installs newer version of the bats which supports setup_file and teardown_file functions. + # These functions are helpful when adding new tests that require one time setup. + + sudo apt install -y software-properties-common + sudo add-apt-repository 'deb http://archive.ubuntu.com/ubuntu/ lunar universe' + sudo apt install -y bats + sudo add-apt-repository --remove 'deb http://archive.ubuntu.com/ubuntu/ lunar universe' } function install_kubectl() { From 6ae591c6188efe917d42a39957af383576995856 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Tue, 8 Aug 2023 11:16:10 +0200 Subject: [PATCH 012/339] ci: k8s: Add the image used for unencrypted confidential tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's add here the image we'll be using for unencrypted confidential tests. Later on, we'll make sure to build and use this image as part of our CI. The image can easily be built as a multi-arch image, and has `cpuid` installed in case of `x86_64` build, so it can be used to detect whether we're running on a TEE guest without having to rely on `dmesg | grep ...`. Fixes: #7595 Signed-off-by: Fabiano Fidêncio (cherry picked from commit ab5f603ffa24da7f611b4e31101091578b849de1) --- .../confidential/unencrypted/Dockerfile | 37 +++++++++++++++++++ .../confidential/unencrypted/ssh/unencrypted | 7 ++++ .../unencrypted/ssh/unencrypted.pub | 1 + 3 files changed, 45 insertions(+) create mode 100644 tests/integration/kubernetes/runtimeclass_workloads/confidential/unencrypted/Dockerfile create mode 100644 tests/integration/kubernetes/runtimeclass_workloads/confidential/unencrypted/ssh/unencrypted create mode 100644 tests/integration/kubernetes/runtimeclass_workloads/confidential/unencrypted/ssh/unencrypted.pub diff --git a/tests/integration/kubernetes/runtimeclass_workloads/confidential/unencrypted/Dockerfile b/tests/integration/kubernetes/runtimeclass_workloads/confidential/unencrypted/Dockerfile new file mode 100644 index 000000000..e4db17939 --- /dev/null +++ b/tests/integration/kubernetes/runtimeclass_workloads/confidential/unencrypted/Dockerfile @@ -0,0 +1,37 @@ +# Copyright (c) 2023 Intel Corporatiion +# +# SPDX-License-Identifier: Apache-2.0 + +# We know that using latest is error prone, we're taking the risk here. +# hadolint ignore=DL3007 +FROM alpine:latest + +# We don't need a specific version of those packages +# hadolint ignore=DL3018 +RUN apk add --no-cache curl openssh-server + +# Download and install `cpuid`, which will be used to detect +# whether we're the container is running on a TEE guest +# hadolint ignore=DL3059 +RUN /bin/sh -c \ + 'ARCH=$(uname -m) && \ + [[ "${ARCH}" == "x86_64" ]] && \ + curl -LO https://github.com/klauspost/cpuid/releases/download/v2.2.5/cpuid-Linux_x86_64_2.2.5.tar.gz && \ + tar -xvzf cpuid-Linux_x86_64_2.2.5.tar.gz -C /usr/bin && \ + rm -rf cpuid-Linux_x86_64_2.2.5.tar.gz && \ + rm -f /usr/bin/LICENSE' + +# This is done just to avoid the following error starting sshd +# `sshd: no hostkeys available -- exiting.` +# hadolint ignore=DL3059 +RUN ssh-keygen -t ed25519 -f /etc/ssh/ssh_host_ed25519_key -P "" + +# A password needs to be set for login to work. An empty password is +# unproblematic as password-based login to root is not allowed. +# hadolint ignore=DL3059 +RUN passwd -d root + +# Generated with `ssh-keygen -t ed25519 -f unencrypted -P "" -C ""` +COPY ssh/unencrypted.pub /root/.ssh/authorized_keys + +ENTRYPOINT ["/usr/sbin/sshd", "-D"] diff --git a/tests/integration/kubernetes/runtimeclass_workloads/confidential/unencrypted/ssh/unencrypted b/tests/integration/kubernetes/runtimeclass_workloads/confidential/unencrypted/ssh/unencrypted new file mode 100644 index 000000000..1f75d37d6 --- /dev/null +++ b/tests/integration/kubernetes/runtimeclass_workloads/confidential/unencrypted/ssh/unencrypted @@ -0,0 +1,7 @@ +-----BEGIN OPENSSH PRIVATE KEY----- +b3BlbnNzaC1rZXktdjEAAAAABG5vbmUAAAAEbm9uZQAAAAAAAAABAAAAMwAAAAtzc2gtZW +QyNTUxOQAAACD5RDTjWd8c793pKpOUGt+/D+Fa7PMVUQtSudt6R8JMYAAAAIh44GnReOBp +0QAAAAtzc2gtZWQyNTUxOQAAACD5RDTjWd8c793pKpOUGt+/D+Fa7PMVUQtSudt6R8JMYA +AAAEDwZtSRH/KNwmm/QCMHcif3iMQpGPOr2d12hcQqMY3KJPlENONZ3xzv3ekqk5Qa378P +4Vrs8xVRC1K523pHwkxgAAAAAAECAwQF +-----END OPENSSH PRIVATE KEY----- diff --git a/tests/integration/kubernetes/runtimeclass_workloads/confidential/unencrypted/ssh/unencrypted.pub b/tests/integration/kubernetes/runtimeclass_workloads/confidential/unencrypted/ssh/unencrypted.pub new file mode 100644 index 000000000..ce3b9ef60 --- /dev/null +++ b/tests/integration/kubernetes/runtimeclass_workloads/confidential/unencrypted/ssh/unencrypted.pub @@ -0,0 +1 @@ +ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIPlENONZ3xzv3ekqk5Qa378P4Vrs8xVRC1K523pHwkxg From 0625d8dfc1bd5baef45ccd9315621ba17d48b3c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Tue, 8 Aug 2023 19:05:21 +0200 Subject: [PATCH 013/339] ci: Add build-and-publish-tee-confidential-unencrypted-image MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This will be done before running TEE tests, and it's a hard dependency fr them. Signed-off-by: Fabiano Fidêncio (cherry picked from commit fac8ccf5cd83af683449d49aa6b580183b29b122) --- .github/workflows/ci.yaml | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 52a86b08d..da09da4a4 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -30,6 +30,30 @@ jobs: commit-hash: ${{ inputs.commit-hash }} secrets: inherit + build-and-publish-tee-confidential-unencrypted-image: + runs-on: ubuntu-latest + steps: + - name: Login to Kata Containers ghcr.io + uses: docker/login-action@v2 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Checkout code + uses: actions/checkout@v3 + with: + ref: ${{ inputs.commit-hash }} + + - name: Docker build and push + uses: docker/build-and-push-action@v3 + with: + tags: ghcr.io/kata-containers/test-images:unencrypted-${{ pr-number }} + push: true + context: . + platforms: linux/amd64, linux/s390x + file: tests/integration/kubernetes/runtimeclass_workloads/confidential/unencrypted/Dockerfile + run-k8s-tests-on-aks: needs: publish-kata-deploy-payload-amd64 uses: ./.github/workflows/run-k8s-tests-on-aks.yaml @@ -42,7 +66,7 @@ jobs: secrets: inherit run-k8s-tests-on-sev: - needs: publish-kata-deploy-payload-amd64 + needs: [publish-kata-deploy-payload-amd64, build-and-publish-tee-confidential-unencrypted-image] uses: ./.github/workflows/run-k8s-tests-on-sev.yaml with: registry: ghcr.io @@ -51,7 +75,7 @@ jobs: commit-hash: ${{ inputs.commit-hash }} run-k8s-tests-on-snp: - needs: publish-kata-deploy-payload-amd64 + needs: [publish-kata-deploy-payload-amd64, build-and-publish-tee-confidential-unencrypted-image] uses: ./.github/workflows/run-k8s-tests-on-snp.yaml with: registry: ghcr.io @@ -60,7 +84,7 @@ jobs: commit-hash: ${{ inputs.commit-hash }} run-k8s-tests-on-tdx: - needs: publish-kata-deploy-payload-amd64 + needs: [publish-kata-deploy-payload-amd64, build-and-publish-tee-confidential-unencrypted-image] uses: ./.github/workflows/run-k8s-tests-on-tdx.yaml with: registry: ghcr.io From 218d83bd3fbe171c04187afe36876a22d727bf87 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Mon, 31 Jul 2023 11:42:00 +0200 Subject: [PATCH 014/339] tests: k8s: Ensure the runtime classes are properly created MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With these 2 simple checks we can ensure that we do not regress on the behaviour of allowing the runtime classes / default runtime class to be created by the kata-deploy payload. Fixes: #7491 Signed-off-by: Fabiano Fidêncio (cherry picked from commit 034d7aab876ccb6e2709973b359df59c11dc9d67) --- ...eploy-ensure-runtimec-classes-created.bats | 28 +++++++++++++++++++ .../kubernetes/run_kubernetes_tests.sh | 1 + 2 files changed, 29 insertions(+) create mode 100644 tests/integration/kubernetes/kata-deploy-ensure-runtimec-classes-created.bats diff --git a/tests/integration/kubernetes/kata-deploy-ensure-runtimec-classes-created.bats b/tests/integration/kubernetes/kata-deploy-ensure-runtimec-classes-created.bats new file mode 100644 index 000000000..bd1d5a151 --- /dev/null +++ b/tests/integration/kubernetes/kata-deploy-ensure-runtimec-classes-created.bats @@ -0,0 +1,28 @@ +#!/usr/bin/env bats +# +# Copyright (c) 2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# + +load "${BATS_TEST_DIRNAME}/../../common.bash" +load "${BATS_TEST_DIRNAME}/tests_common.sh" + +setup() { + # We expect 2 runtime classes because: + # * `kata` is the default runtimeclass created, basically an alias for `kata-${KATA_HYPERVISOR}`. + # * `kata-${KATA_HYPERVISOR}` is the other one + # * As part of the tests we're only deploying the specific runtimeclass that will be used, instead of all of them. + expected_runtime_classes=2 +} + +@test "Test runtimeclasses are being properly created" { + # We filter `kata-mshv-vm-isolation` out as that's present on AKS clusters, but that's not coming from kata-deploy + current_runtime_classes=$(kubectl get runtimeclasses | grep -v "kata-mshv-vm-isolation" | grep "kata" | wc -l) + [[ ${current_runtime_classes} -eq ${expected_runtime_classes} ]] + [[ $(kubectl get runtimeclasses | grep -q "${KATA_HYPERVISOR}") -eq 0 ]] +} + +teardown() { + kubectl get runtimeclasses +} diff --git a/tests/integration/kubernetes/run_kubernetes_tests.sh b/tests/integration/kubernetes/run_kubernetes_tests.sh index f8b635d22..e46cbc9fe 100644 --- a/tests/integration/kubernetes/run_kubernetes_tests.sh +++ b/tests/integration/kubernetes/run_kubernetes_tests.sh @@ -18,6 +18,7 @@ if [ -n "${K8S_TEST_UNION:-}" ]; then K8S_TEST_UNION=($K8S_TEST_UNION) else K8S_TEST_UNION=( \ + "kata-deploy-ensure-runtimec-classes-created.bats" \ "k8s-attach-handlers.bats" \ "k8s-caps.bats" \ "k8s-configmap.bats" \ From 3a21c485bf1e0a40201b295d6fd4056c63562efd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Wed, 9 Aug 2023 17:41:07 +0200 Subject: [PATCH 015/339] ci: {{ pr-number }} should be {{ inputs.pr-number }} MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit One of the joys to rely on the `pull_request_target` is to only be able to catch those after those are merged. Fixes: #7595 Signed-off-by: Fabiano Fidêncio (cherry picked from commit 54f6a78500684c941eae7f5e0422d51a5caeadcf) --- .github/workflows/ci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index da09da4a4..a5133d8b1 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -48,7 +48,7 @@ jobs: - name: Docker build and push uses: docker/build-and-push-action@v3 with: - tags: ghcr.io/kata-containers/test-images:unencrypted-${{ pr-number }} + tags: ghcr.io/kata-containers/test-images:unencrypted-${{ inputs.pr-number }} push: true context: . platforms: linux/amd64, linux/s390x From a5a3e4124ff67e20ca8e4428891fa7d272fb3bae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Wed, 9 Aug 2023 17:45:42 +0200 Subject: [PATCH 016/339] ci: k8s: tees: Ensure PR_NUMBER is exported MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Right now this is not being used, but it'll as the image generated for the confidential tests have that as part of their tag. Signed-off-by: Fabiano Fidêncio (cherry picked from commit 43fe5d1b905b7400c42e723a516a38d34eb17d20) --- .github/workflows/ci.yaml | 3 +++ .github/workflows/run-k8s-tests-on-sev.yaml | 4 ++++ .github/workflows/run-k8s-tests-on-snp.yaml | 4 ++++ .github/workflows/run-k8s-tests-on-tdx.yaml | 4 ++++ 4 files changed, 15 insertions(+) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index a5133d8b1..1df5a9717 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -73,6 +73,7 @@ jobs: repo: ${{ github.repository_owner }}/kata-deploy-ci tag: ${{ inputs.tag }}-amd64 commit-hash: ${{ inputs.commit-hash }} + pr-number: ${{ inputs.pr-number }} run-k8s-tests-on-snp: needs: [publish-kata-deploy-payload-amd64, build-and-publish-tee-confidential-unencrypted-image] @@ -82,6 +83,7 @@ jobs: repo: ${{ github.repository_owner }}/kata-deploy-ci tag: ${{ inputs.tag }}-amd64 commit-hash: ${{ inputs.commit-hash }} + pr-number: ${{ inputs.pr-number }} run-k8s-tests-on-tdx: needs: [publish-kata-deploy-payload-amd64, build-and-publish-tee-confidential-unencrypted-image] @@ -91,6 +93,7 @@ jobs: repo: ${{ github.repository_owner }}/kata-deploy-ci tag: ${{ inputs.tag }}-amd64 commit-hash: ${{ inputs.commit-hash }} + pr-number: ${{ inputs.pr-number }} run-metrics-tests: needs: build-kata-static-tarball-amd64 diff --git a/.github/workflows/run-k8s-tests-on-sev.yaml b/.github/workflows/run-k8s-tests-on-sev.yaml index a48425e1f..bb4c5d0bd 100644 --- a/.github/workflows/run-k8s-tests-on-sev.yaml +++ b/.github/workflows/run-k8s-tests-on-sev.yaml @@ -11,6 +11,9 @@ on: tag: required: true type: string + pr-number: + required: true + type: string commit-hash: required: false type: string @@ -27,6 +30,7 @@ jobs: DOCKER_REGISTRY: ${{ inputs.registry }} DOCKER_REPO: ${{ inputs.repo }} DOCKER_TAG: ${{ inputs.tag }} + PR_NUMBER: ${{ inputs.pr-number }} KATA_HYPERVISOR: ${{ matrix.vmm }} KUBECONFIG: /home/kata/.kube/config USING_NFD: "false" diff --git a/.github/workflows/run-k8s-tests-on-snp.yaml b/.github/workflows/run-k8s-tests-on-snp.yaml index 7196a9a1b..fde2131a7 100644 --- a/.github/workflows/run-k8s-tests-on-snp.yaml +++ b/.github/workflows/run-k8s-tests-on-snp.yaml @@ -11,6 +11,9 @@ on: tag: required: true type: string + pr-number: + required: true + type: string commit-hash: required: false type: string @@ -27,6 +30,7 @@ jobs: DOCKER_REGISTRY: ${{ inputs.registry }} DOCKER_REPO: ${{ inputs.repo }} DOCKER_TAG: ${{ inputs.tag }} + PR_NUMBER: ${{ inputs.pr-number }} KATA_HYPERVISOR: ${{ matrix.vmm }} KUBECONFIG: /home/kata/.kube/config USING_NFD: "false" diff --git a/.github/workflows/run-k8s-tests-on-tdx.yaml b/.github/workflows/run-k8s-tests-on-tdx.yaml index a3899177c..3cf2e9b10 100644 --- a/.github/workflows/run-k8s-tests-on-tdx.yaml +++ b/.github/workflows/run-k8s-tests-on-tdx.yaml @@ -11,6 +11,9 @@ on: tag: required: true type: string + pr-number: + required: true + type: string commit-hash: required: false type: string @@ -27,6 +30,7 @@ jobs: DOCKER_REGISTRY: ${{ inputs.registry }} DOCKER_REPO: ${{ inputs.repo }} DOCKER_TAG: ${{ inputs.tag }} + PR_NUMBER: ${{ inputs.pr-number }} KATA_HYPERVISOR: ${{ matrix.vmm }} USING_NFD: "true" steps: From 6c921ce3db8fb7211bef2a0f8e6b77c45ab81eff Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Wed, 9 Aug 2023 16:08:48 +0000 Subject: [PATCH 017/339] metrics: Add network nginx benchmark This PR adds the network nginx benchmark for kata metrics. Fixes #7605 Signed-off-by: Gabriela Cervantes (cherry picked from commit f8a5255cf7cc54413510e5ada1508fcdaa5855ec) --- .../network/nginx_kubernetes/nginx-network.sh | 69 +++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100755 tests/metrics/network/nginx_kubernetes/nginx-network.sh diff --git a/tests/metrics/network/nginx_kubernetes/nginx-network.sh b/tests/metrics/network/nginx_kubernetes/nginx-network.sh new file mode 100755 index 000000000..1fe37e4eb --- /dev/null +++ b/tests/metrics/network/nginx_kubernetes/nginx-network.sh @@ -0,0 +1,69 @@ +#!/bin/bash +# +# Copyright (c) 2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +set -o pipefail + +SCRIPT_PATH=$(dirname "$(readlink -f "$0")") + +source "${SCRIPT_PATH}/../../lib/common.bash" +nginx_file=$(mktemp nginxresults.XXXXXXXXXX) +TEST_NAME="${TEST_NAME:-nginx}" + +function remove_tmp_file() { + rm -rf "${nginx_file}" +} + +trap remove_tmp_file EXIT + +function main() { + init_env + cmds=("bc" "jq" "ab") + check_cmds "${cmds[@]}" + + # Check no processes are left behind + check_processes + + wait_time=20 + sleep_time=2 + timeout="20s" + + deployment="nginx-deployment" + kubectl create -f "${SCRIPT_PATH}/runtimeclass_workloads/nginx-networking.yaml" + kubectl wait --for=condition=Available --timeout="${timeout}" deployment/"${deployment}" + kubectl expose deployment/"${deployment}" + ip=$(kubectl get service/nginx-deployment -o jsonpath='{.spec.clusterIP}') + + ab -n 100000 -c 100 http://"${ip}":80/ > "${nginx_file}" + metrics_json_init + rps=$(cat "${nginx_file}" | grep "Requests" | awk '{print $4}') + echo "Requests per second: ${rps}" + + metrics_json_start_array + + local json="$(cat << EOF + { + "requests": { + "Result" : ${rps}, + "Units": "rps" + } + } +EOF +)" + + metrics_json_add_array_element "$json" + metrics_json_end_array "Results" + metrics_json_save + + nginx_cleanup +} + +function nginx_cleanup() { + kubectl delete deployment "${deployment}" + kubectl delete service "${deployment}" + check_processes +} + +main "$@" From 1971fe49865fb9bbe476263abb9a54be9d30ba79 Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Wed, 9 Aug 2023 16:14:04 +0000 Subject: [PATCH 018/339] metrics: Add nginx kubernetes yaml This PR adds the nginx kubernetes yaml. Signed-off-by: Gabriela Cervantes (cherry picked from commit 498f7c0549785e1906104c7c3f431e0e414e65df) --- .../nginx-networking.yaml | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 tests/metrics/network/nginx_kubernetes/runtimeclass_workloads/nginx-networking.yaml diff --git a/tests/metrics/network/nginx_kubernetes/runtimeclass_workloads/nginx-networking.yaml b/tests/metrics/network/nginx_kubernetes/runtimeclass_workloads/nginx-networking.yaml new file mode 100644 index 000000000..1475f13b7 --- /dev/null +++ b/tests/metrics/network/nginx_kubernetes/runtimeclass_workloads/nginx-networking.yaml @@ -0,0 +1,25 @@ +# Copyright (c) 2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# +apiVersion: apps/v1 +kind: Deployment +metadata: + name: nginx-deployment +spec: + selector: + matchLabels: + app: nginx + replicas: 2 + template: + metadata: + labels: + app: nginx + spec: + terminationGracePeriodSeconds: 0 + runtimeClassName: kata + containers: + - name: nginx + image: nginx:1.14.2 + ports: + - containerPort: 80 From cecb30dbb2347b837260a3e4f81a395fe28208b5 Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Wed, 9 Aug 2023 16:17:46 +0000 Subject: [PATCH 019/339] metrics: Add nginx documentation to network README This PR adds nginx documentation to network README for kata metrics. Signed-off-by: Gabriela Cervantes (cherry picked from commit d1a62962216826832f7965c4ad656dfe1b09a48e) --- tests/metrics/network/README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/metrics/network/README.md b/tests/metrics/network/README.md index 05acd4cd6..971aff6ae 100644 --- a/tests/metrics/network/README.md +++ b/tests/metrics/network/README.md @@ -11,6 +11,7 @@ bandwidth, jitter, latency and parallel bandwidth. - `k8s-network-metrics-iperf3.sh` measures bandwidth which is the speed of the data transfer. - `latency-network.sh` measures network latency. +- `nginx-network.sh` is a benchmark of the lightweight Nginx HTTPS web-server and measures the HTTP requests over a fixed period of time with a configurable number of concurrent clients/connections. ## Running the tests From babbd4186c9406c9f00af0fe70710975615a763d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Wed, 9 Aug 2023 18:34:30 +0200 Subject: [PATCH 020/339] ci: create-confidential-image: Add dependent actions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Following the example on https://github.com/docker/build-push-action, it's clear that the actions to "Set up QEMU" and "Set up Docker Buildx" are missing. Let's add them, and also take the advantage to bump the build-push-action to its v4, which, by the way, had a typo on its name (build-and-push-action does **NOT** exist, build-push-action does). Fixes: #7595 Signed-off-by: Fabiano Fidêncio (cherry picked from commit a2d731ad26df296362d2777502c9cddab97acd90) --- .github/workflows/ci.yaml | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 1df5a9717..f2088e2fd 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -33,6 +33,17 @@ jobs: build-and-publish-tee-confidential-unencrypted-image: runs-on: ubuntu-latest steps: + - name: Checkout code + uses: actions/checkout@v3 + with: + ref: ${{ inputs.commit-hash }} + + - name: Set up QEMU + uses: docker/setup-qemu-action@v2 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v2 + - name: Login to Kata Containers ghcr.io uses: docker/login-action@v2 with: @@ -40,13 +51,8 @@ jobs: username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - - name: Checkout code - uses: actions/checkout@v3 - with: - ref: ${{ inputs.commit-hash }} - - name: Docker build and push - uses: docker/build-and-push-action@v3 + uses: docker/build-push-action@v4 with: tags: ghcr.io/kata-containers/test-images:unencrypted-${{ inputs.pr-number }} push: true From dcc35781f737dabdf24d0304d2580ae214cad02d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Wed, 9 Aug 2023 20:29:25 +0200 Subject: [PATCH 021/339] ci: unencrypted-image: Don't fail to build on s390x MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's make sure that we don't fail in case we're building non x86_64. Signed-off-by: Fabiano Fidêncio (cherry picked from commit eb463b38ec027ca80e86ffdcd75d6fddf7403053) --- .../runtimeclass_workloads/confidential/unencrypted/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/kubernetes/runtimeclass_workloads/confidential/unencrypted/Dockerfile b/tests/integration/kubernetes/runtimeclass_workloads/confidential/unencrypted/Dockerfile index e4db17939..d093c7fe8 100644 --- a/tests/integration/kubernetes/runtimeclass_workloads/confidential/unencrypted/Dockerfile +++ b/tests/integration/kubernetes/runtimeclass_workloads/confidential/unencrypted/Dockerfile @@ -19,7 +19,7 @@ RUN /bin/sh -c \ curl -LO https://github.com/klauspost/cpuid/releases/download/v2.2.5/cpuid-Linux_x86_64_2.2.5.tar.gz && \ tar -xvzf cpuid-Linux_x86_64_2.2.5.tar.gz -C /usr/bin && \ rm -rf cpuid-Linux_x86_64_2.2.5.tar.gz && \ - rm -f /usr/bin/LICENSE' + rm -f /usr/bin/LICENSE' || true # This is done just to avoid the following error starting sshd # `sshd: no hostkeys available -- exiting.` From b8b4ca10e96479be9d00c5fc5ecab4a6da4d516b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Wed, 9 Aug 2023 20:31:26 +0200 Subject: [PATCH 022/339] ci: unencrypted-image: Fix build context MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The build context should be the folder where the Dockerfile is present, otherwise the files copied into the image won't be found. Fixes: #7595 Signed-off-by: Fabiano Fidêncio (cherry picked from commit 03d1fa67b1c63087279a2a3e90846cd327581eb0) --- .github/workflows/ci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index f2088e2fd..da8fae148 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -56,7 +56,7 @@ jobs: with: tags: ghcr.io/kata-containers/test-images:unencrypted-${{ inputs.pr-number }} push: true - context: . + context: tests/integration/kubernetes/runtimeclass_workloads/confidential/unencrypted/ platforms: linux/amd64, linux/s390x file: tests/integration/kubernetes/runtimeclass_workloads/confidential/unencrypted/Dockerfile From a341c2f3249a5334a2a2d5e0ca9cb2ded45d08ad Mon Sep 17 00:00:00 2001 From: David Esparza Date: Wed, 9 Aug 2023 14:19:21 -0600 Subject: [PATCH 023/339] metrics: compute tensorflow statistics This PR computes average results for TF bench. Additionally, it improves the data parsing from all running containers. Fixes: #7603 Signed-off-by: David Esparza (cherry picked from commit 473b0d3a31a1def0c3c7fbc5fcb603a4916e1f03) --- tests/metrics/machine_learning/tensorflow.sh | 149 ++++++++++--------- 1 file changed, 77 insertions(+), 72 deletions(-) diff --git a/tests/metrics/machine_learning/tensorflow.sh b/tests/metrics/machine_learning/tensorflow.sh index fc6c1f8c9..3473b451c 100755 --- a/tests/metrics/machine_learning/tensorflow.sh +++ b/tests/metrics/machine_learning/tensorflow.sh @@ -31,11 +31,16 @@ CMD_RESNET="$dst_dir/$resnet_start_script" CMD_ALEXNET="$dst_dir/$alexnet_start_script" timeout=600 INITIAL_NUM_PIDS=1 -CMD_FILE="cat alexnet_results | grep 'total images' | wc -l" -RESNET_CMD_FILE="cat resnet_results | grep 'total images' | wc -l" +ALEXNET_FILE="alexnet_results" +ALEXNET_CHECK_FILE_CMD="cat /${ALEXNET_FILE} | grep 'total images' | wc -l" +RESNET_FILE="resnet_results" +RESNET_CHECK_FILE_CMD="cat /${RESNET_FILE} | grep 'total images' | wc -l" +MAX_RETRIES=300 function remove_tmp_file() { rm -rf "${resnet_tensorflow_file}" "${alexnet_tensorflow_file}" + rm -rf "${src_dir}" + clean_env_ctr } trap remove_tmp_file EXIT @@ -59,7 +64,8 @@ function create_resnet_start_script() { cat <>"${script}" #!/bin/bash -python benchmarks/scripts/tf_cnn_benchmarks/tf_cnn_benchmarks.py -data_format=NHWC --device cpu --batch_size=${BATCH_SIZE} --num_batches=${NUM_BATCHES} > resnet_results +pushd "benchmarks/scripts/tf_cnn_benchmarks" +python tf_cnn_benchmarks.py -data_format=NHWC --device cpu --batch_size=${BATCH_SIZE} --num_batches=${NUM_BATCHES} > "/${RESNET_FILE}" EOF chmod +x "${script}" } @@ -70,78 +76,82 @@ function create_alexnet_start_script() { cat <>"${script}" #!/bin/bash -python benchmarks/scripts/tf_cnn_benchmarks/tf_cnn_benchmarks.py --num_batches=${NUM_BATCHES} --device=cpu --batch_size=${BATCH_SIZE} --forward_only=true --model=alexnet --data_format=NHWC > alexnet_results +pushd "benchmarks/scripts/tf_cnn_benchmarks" +python tf_cnn_benchmarks.py --num_batches=${NUM_BATCHES} --device=cpu --batch_size=${BATCH_SIZE} --forward_only=true --model=alexnet --data_format=NHWC > "/${ALEXNET_FILE}" EOF chmod +x "${script}" } +function launch_workload() { + WORKLOAD=${1} + [[ -z ${WORKLOAD} ]] && die "Container workload is missing" + + local pids=() + local j=0 + for i in "${containers[@]}"; do + $(sudo -E "${CTR_EXE}" t exec -d --exec-id "$(random_name)" "${i}" sh -c "${WORKLOAD}")& + pids["${j}"]=$! + ((j++)) + done + + # wait for all pids + for pid in ${pids[*]}; do + wait "${pid}" + done +} + +function collect_results() { + WORKLOAD=${1} + [[ -z ${WORKLOAD} ]] && die "Container workload is missing" + + local tasks_running=("${containers[@]}") + local retries=${MAX_RETRIES} + + while [ "${#tasks_running[@]}" -gt 0 ] && [ "${retries}" -gt 0 ]; do + for i in "${!tasks_running[@]}"; do + check_file=$(sudo -E "${CTR_EXE}" t exec --exec-id "$(random_name)" "${tasks_running[i]}" sh -c "${WORKLOAD}") + + # if the current task is done, remove the corresponding container from the active list + [ "${check_file}" -eq "1" ] && unset 'tasks_running[i]' + done + ((retries--)) + sleep 3 + echo -n "." + done + echo -e "\n" +} + function tensorflow_test() { - info "Copy Resnet Tensorflow test" - local pids=() - local j=0 + # Resnet section + info "Running TF-Resnet test" + launch_workload "${CMD_RESNET}" + collect_results "${RESNET_CHECK_FILE_CMD}" + + # Alexnet section + info "Running TF-Alexnet test" + launch_workload "${CMD_ALEXNET}" + collect_results "${ALEXNET_CHECK_FILE_CMD}" + + info "Tensorflow workload completed" + # Retrieving results for i in "${containers[@]}"; do - $(sudo -E "${CTR_EXE}" t exec -d --exec-id "$(random_name)" "${i}" sh -c "${CMD_RESNET}")& - pids["${j}"]=$! - ((j++)) + sudo -E "${CTR_EXE}" t exec --exec-id "$(random_name)" "${i}" sh -c "cat /${RESNET_FILE}" >> "${resnet_tensorflow_file}" + sudo -E "${CTR_EXE}" t exec --exec-id "$(random_name)" "${i}" sh -c "cat /${ALEXNET_FILE}" >> "${alexnet_tensorflow_file}" done - # wait for all pids - for pid in ${pids[*]}; do - wait "${pid}" - done + # Parsing resnet results + local resnet_results=$(cat "${resnet_tensorflow_file}" | grep "total images/sec" | cut -d ":" -f2 | sed -e 's/^[ \t]*//' | tr '\n' ',' | sed 's/.$//') + local res_sum="$(sed -e 's/,/\n/g' <<< ${resnet_results} | awk 'BEGIN {total=0} {total += $1} END {print total}')" + local num_elements="$(awk '{print NF}' FS=',' <<<${resnet_results})" + local average_resnet="$(echo "scale=2 ; ${res_sum} / ${num_elements}" | bc)" - info "All containers are running the workload..." - - for i in "${containers[@]}"; do - check_file=$(sudo -E "${CTR_EXE}" t exec --exec-id "$(random_name)" "${i}" sh -c "${RESNET_CMD_FILE}") - retries="300" - for j in $(seq 1 "${retries}"); do - [ "${check_file}" -eq "1" ] && break - sleep 1 - done - done - - info "Copy Alexnet Tensorflow test" - local pids=() - local j=0 - for i in "${containers[@]}"; do - $(sudo -E "${CTR_EXE}" t exec -d --exec-id "$(random_name)" "${i}" sh -c "${CMD_ALEXNET}")& - pids["${j}"]=$! - ((j++)) - done - - # wait for all pids - for pid in ${pids[*]}; do - wait "${pid}" - done - - for i in "${containers[@]}"; do - check_file=$(sudo -E "${CTR_EXE}" t exec --exec-id "$(random_name)" "${i}" sh -c "${CMD_FILE}") - retries="300" - for j in $(seq 1 "${retries}"); do - [ "${check_file}" -eq "1" ] && break - sleep 1 - done - done - - for i in "${containers[@]}"; do - sudo -E "${CTR_EXE}" t exec --exec-id "$(random_name)" "${i}" sh -c "cat resnet_results" >> "${resnet_tensorflow_file}" - done - - local res_results=$(cat "${resnet_tensorflow_file}" | grep "total images/sec" | cut -d ":" -f2 | sed -e 's/^[ \t]*//' | tr '\n' ',' | sed 's/.$//') - local resnet_results=$(printf "%.0f\n" "${res_results}") - local res_average=$(echo "${resnet_results}" | sed "s/,/+/g;s/.*/(&)\/${NUM_CONTAINERS}/g" | bc -l) - local average_resnet=$(printf "%.0f\n" "${res_average}") - - for i in "${containers[@]}"; do - sudo -E "${CTR_EXE}" t exec --exec-id "$(random_name)" "${i}" sh -c "cat alexnet_results" >> "${alexnet_tensorflow_file}" - done - - local alex_results=$(cat "${alexnet_tensorflow_file}" | grep "total images/sec" | cut -d ":" -f2 | sed -e 's/^[ \t]*//' | tr '\n' ',' | sed 's/.$//') - local alexnet_results=$(printf "%.0f\n" "${alex_results}") - local alex_average=$(echo "${alexnet_results}" | sed "s/,/+/g;s/.*/(&)\/${NUM_CONTAINERS}/g" | bc -l) - local average_alexnet=$(printf "%.0f\n" "${alex_average}") + # Parsing alexnet results + local alexnet_results=$(cat "${alexnet_tensorflow_file}" | grep "total images/sec" | cut -d ":" -f2 | sed -e 's/^[ \t]*//' | tr '\n' ',' | sed 's/.$//') + local alex_sum="$(sed -e 's/,/\n/g' <<< ${alexnet_results} | awk 'BEGIN {total=0} {total += $1} END {print total}')" + num_elements="$(awk '{print NF}' FS=',' <<< ${alexnet_results})" + local average_alexnet="$(echo " scale=2 ; ${alex_sum} / ${num_elements}" | bc)" + # writing json results file local json="$(cat << EOF { "resnet": { @@ -174,7 +184,7 @@ function check_containers_are_up() { function main() { # Verify enough arguments - if [ $# != 2 ]; then + if [ "$#" -lt 2 ]; then echo >&2 "error: Not enough arguments [$@]" help exit 1 @@ -224,13 +234,8 @@ function main() { # Get the initial number of pids in a single container before the workload starts INITIAL_NUM_PIDS=$(sudo -E "${CTR_EXE}" t metrics "${containers[-1]}" | grep pids.current | grep pids.current | xargs | cut -d ' ' -f 2) ((INITIAL_NUM_PIDS++)) - tensorflow_test - metrics_json_save - - rm -rf "${src_dir}" - - clean_env_ctr } + main "$@" From 6b6a6ee724ed2de952c5ab6fdb7e09cf9e5f2f74 Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Thu, 10 Aug 2023 17:27:11 +0000 Subject: [PATCH 024/339] metrics: Add common functions to the common script This PR adds the collect results function to the common metrics script. Fixes #7617 Signed-off-by: Gabriela Cervantes (cherry picked from commit 9879709aae781440dc2a4624fe0a501208e6aee7) --- tests/metrics/lib/common.bash | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/tests/metrics/lib/common.bash b/tests/metrics/lib/common.bash index 4111d67d4..14dabda38 100755 --- a/tests/metrics/lib/common.bash +++ b/tests/metrics/lib/common.bash @@ -361,3 +361,25 @@ function wait_ksm_settle() done info "Timed out after ${1}s waiting for KSM to settle" } + +function collect_results() { + WORKLOAD=${1} + [[ -z ${WORKLOAD} ]] && die "Container workload is missing" + + local tasks_running=("${containers[@]}") + local retries=100 + + while [ "${#tasks_running[@]}" -gt 0 ] && [ "${retries}" -gt 0 ]; do + for i in "${!tasks_running[@]}"; do + check_file=$(sudo -E "${CTR_EXE}" t exec --exec-id "$(random_name)" "${tasks_running[i]}" sh > + + # if the current task is done, remove the corresponding container from the active list + [ "${check_file}" -eq "1" ] && unset 'tasks_running[i]' + done + ((retries--)) + sleep 3 + echo -n "." + done + echo -e "\n" +} + From f29f8114704d9818e617677360ebaeb0cfa38b46 Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Thu, 10 Aug 2023 17:31:23 +0000 Subject: [PATCH 025/339] metrics: Remove collect results function definition This PR removes the collect results function from tensorflow script as it is going to be referenced in the common metrics script. Signed-off-by: Gabriela Cervantes (cherry picked from commit 286de046af2ad7397a71fdd5cbce4da993b837fd) --- tests/metrics/machine_learning/tensorflow.sh | 21 -------------------- 1 file changed, 21 deletions(-) diff --git a/tests/metrics/machine_learning/tensorflow.sh b/tests/metrics/machine_learning/tensorflow.sh index 3473b451c..7f1131927 100755 --- a/tests/metrics/machine_learning/tensorflow.sh +++ b/tests/metrics/machine_learning/tensorflow.sh @@ -100,27 +100,6 @@ function launch_workload() { done } -function collect_results() { - WORKLOAD=${1} - [[ -z ${WORKLOAD} ]] && die "Container workload is missing" - - local tasks_running=("${containers[@]}") - local retries=${MAX_RETRIES} - - while [ "${#tasks_running[@]}" -gt 0 ] && [ "${retries}" -gt 0 ]; do - for i in "${!tasks_running[@]}"; do - check_file=$(sudo -E "${CTR_EXE}" t exec --exec-id "$(random_name)" "${tasks_running[i]}" sh -c "${WORKLOAD}") - - # if the current task is done, remove the corresponding container from the active list - [ "${check_file}" -eq "1" ] && unset 'tasks_running[i]' - done - ((retries--)) - sleep 3 - echo -n "." - done - echo -e "\n" -} - function tensorflow_test() { # Resnet section info "Running TF-Resnet test" From 1b68145b6aacd671d175aa4a9335b2bc54ddffa3 Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Thu, 10 Aug 2023 17:34:30 +0000 Subject: [PATCH 026/339] metrics: Use collect_results function in tensorflow mobilenet test This PR uses the collect results function defined in common for the tensorflow mobilenet test. Signed-off-by: Gabriela Cervantes (cherry picked from commit d3e57cf4548a72fcdb3130122b6900728eebdbb4) --- .../tensorflow_mobilenet_v1_bfloat16_fp32.sh | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/tests/metrics/machine_learning/tensorflow_mobilenet_v1_bfloat16_fp32.sh b/tests/metrics/machine_learning/tensorflow_mobilenet_v1_bfloat16_fp32.sh index d6c1a312a..e48f75a5a 100755 --- a/tests/metrics/machine_learning/tensorflow_mobilenet_v1_bfloat16_fp32.sh +++ b/tests/metrics/machine_learning/tensorflow_mobilenet_v1_bfloat16_fp32.sh @@ -87,14 +87,7 @@ function mobilenet_v1_bfloat16_fp32_test() { touch "${host_trigger_file}" info "All containers are running the workload..." - for i in "${containers[@]}"; do - check_file=$(sudo -E "${CTR_EXE}" t exec -d --exec-id "$(random_name)" "${i}" sh -c "${CMD_FILE}") - retries="30" - for j in $(seq 1 "${retries}"); do - [ "${check_file}" = 1 ] && break - sleep 1 - done - done + collect_results "${CMD_FILE}" for i in "${containers[@]}"; do sudo -E "${CTR_EXE}" t exec --exec-id "$(random_name)" "${i}" sh -c "${CMD_RESULTS}" >> "${tensorflow_file}" From 7fffa7f9ce0eb67c37b15f4664646e0d0a75f046 Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Thu, 10 Aug 2023 17:39:24 +0000 Subject: [PATCH 027/339] metrics: Add check containers are up in common script This PR adds check containers are up in common script for kata metrics. Signed-off-by: Gabriela Cervantes (cherry picked from commit 1c84680d8c97339b8f18f701d00ad4efcdfe8650) --- tests/metrics/lib/common.bash | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tests/metrics/lib/common.bash b/tests/metrics/lib/common.bash index 14dabda38..56da11417 100755 --- a/tests/metrics/lib/common.bash +++ b/tests/metrics/lib/common.bash @@ -383,3 +383,17 @@ function collect_results() { echo -e "\n" } +function check_containers_are_up() { + local NUM_CONTAINERS="$1" + [[ -z ${NUM_CONTAINERS} ]] && die "Number of containers is missing" + + local TIMEOUT=60 + local containers_launched=0 + for i in $(seq "${TIMEOUT}") ; do + info "Verify that the containers are running" + containers_launched="$(sudo ${CTR_EXE} t list | grep -c "RUNNING")" + [ "${containers_launched}" -eq "${NUM_CONTAINERS}" ] && break + sleep 1 + [ "${i}" == "${TIMEOUT}" ] && return 1 + done +} From 73843b786d2b1bb5f775c15b07b9ae3494681740 Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Thu, 10 Aug 2023 17:42:09 +0000 Subject: [PATCH 028/339] metrics: Use check containers are up in tensorflow script This PR uses the check containers are up from the common script in the tensorflow script. Signed-off-by: Gabriela Cervantes (cherry picked from commit 9d57a1fab4505be931500b8348765a600b7d0630) --- tests/metrics/machine_learning/tensorflow.sh | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/tests/metrics/machine_learning/tensorflow.sh b/tests/metrics/machine_learning/tensorflow.sh index 7f1131927..5a2d7cb1e 100755 --- a/tests/metrics/machine_learning/tensorflow.sh +++ b/tests/metrics/machine_learning/tensorflow.sh @@ -150,17 +150,6 @@ EOF metrics_json_end_array "Results" } -function check_containers_are_up() { - local containers_launched=0 - for i in $(seq "${TIMEOUT}") ; do - info "Verify that the containers are running" - containers_launched="$(sudo ${CTR_EXE} t list | grep -c "RUNNING")" - [ "${containers_launched}" -eq "${NUM_CONTAINERS}" ] && break - sleep 1 - [ "${i}" == "${TIMEOUT}" ] && return 1 - done -} - function main() { # Verify enough arguments if [ "$#" -lt 2 ]; then @@ -195,11 +184,11 @@ function main() { metrics_json_start_array # Check that the requested number of containers are running - check_containers_are_up + check_containers_are_up "${NUM_CONTAINERS}" # Check that the requested number of containers are running local timeout_launch="10" - check_containers_are_up & pid=$! + check_containers_are_up "${NUM_CONTAINERS}" & pid=$! (sleep "${timeout_launch}" && kill -HUP "${pid}") 2>/dev/null & pid_tout=$! if wait "${pid}" 2>/dev/null; then From fc893927454aa6b0b541cba93f286a26cf8a9f12 Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Thu, 10 Aug 2023 17:44:36 +0000 Subject: [PATCH 029/339] metrics: Add check containers are up in tensorflow mobilenet script This PR adds the check containers are up in the common script in the tensorflow mobilenet script. Signed-off-by: Gabriela Cervantes (cherry picked from commit 918c783084d7b775789f8a725892dd88feffccf0) --- tests/metrics/lib/common.bash | 10 +++++----- .../tensorflow_mobilenet_v1_bfloat16_fp32.sh | 15 ++------------- 2 files changed, 7 insertions(+), 18 deletions(-) diff --git a/tests/metrics/lib/common.bash b/tests/metrics/lib/common.bash index 56da11417..a715ea035 100755 --- a/tests/metrics/lib/common.bash +++ b/tests/metrics/lib/common.bash @@ -363,18 +363,18 @@ function wait_ksm_settle() } function collect_results() { - WORKLOAD=${1} - [[ -z ${WORKLOAD} ]] && die "Container workload is missing" + local WORKLOAD="$1" + [[ -z "${WORKLOAD}" ]] && die "Container workload is missing" local tasks_running=("${containers[@]}") local retries=100 while [ "${#tasks_running[@]}" -gt 0 ] && [ "${retries}" -gt 0 ]; do for i in "${!tasks_running[@]}"; do - check_file=$(sudo -E "${CTR_EXE}" t exec --exec-id "$(random_name)" "${tasks_running[i]}" sh > + check_file=$(sudo -E "${CTR_EXE}" t exec --exec-id "$(random_name)" "${tasks_running[i]}" sh -c "${WORKLOAD}") # if the current task is done, remove the corresponding container from the active list - [ "${check_file}" -eq "1" ] && unset 'tasks_running[i]' + [ "${check_file}" = 1 ] && unset 'tasks_running[i]' done ((retries--)) sleep 3 @@ -385,7 +385,7 @@ function collect_results() { function check_containers_are_up() { local NUM_CONTAINERS="$1" - [[ -z ${NUM_CONTAINERS} ]] && die "Number of containers is missing" + [[ -z "${NUM_CONTAINERS}" ]] && die "Number of containers is missing" local TIMEOUT=60 local containers_launched=0 diff --git a/tests/metrics/machine_learning/tensorflow_mobilenet_v1_bfloat16_fp32.sh b/tests/metrics/machine_learning/tensorflow_mobilenet_v1_bfloat16_fp32.sh index e48f75a5a..aaacc86a4 100755 --- a/tests/metrics/machine_learning/tensorflow_mobilenet_v1_bfloat16_fp32.sh +++ b/tests/metrics/machine_learning/tensorflow_mobilenet_v1_bfloat16_fp32.sh @@ -109,17 +109,6 @@ EOF metrics_json_end_array "Results" } -function check_containers_are_up() { - local containers_launched=0 - for i in $(seq "${TIMEOUT}") ; do - info "Verify that the containers are running" - containers_launched="$(sudo ${CTR_EXE} t list | grep -c "RUNNING")" - [ "${containers_launched}" -eq "${NUM_CONTAINERS}" ] && break - sleep 1 - [ "${i}" == "${TIMEOUT}" ] && return 1 - done -} - function main() { # Verify enough arguments if [ $# != 2 ]; then @@ -153,11 +142,11 @@ function main() { metrics_json_start_array # Check that the requested number of containers are running - check_containers_are_up + check_containers_are_up "${NUM_CONTAINERS}" # Check that the requested number of containers are running local timeout_launch="10" - check_containers_are_up & pid=$! + check_containers_are_up "${NUM_CONTAINERS}" & pid=$! (sleep "${timeout_launch}" && kill -HUP "${pid}") 2>/dev/null & pid_tout=$! if wait "${pid}" 2>/dev/null; then From b07c19eb5f91503daa9bbf9db385843aa1cae16e Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Thu, 10 Aug 2023 20:12:22 +0000 Subject: [PATCH 030/339] metrics: Add check containers are running function This PR adds the check containers are running function the common metrics script. Signed-off-by: Gabriela Cervantes (cherry picked from commit 833cf7a684658fcb20ddcf5e80bcb75aa92d03c5) --- tests/metrics/lib/common.bash | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tests/metrics/lib/common.bash b/tests/metrics/lib/common.bash index a715ea035..327379a51 100755 --- a/tests/metrics/lib/common.bash +++ b/tests/metrics/lib/common.bash @@ -397,3 +397,21 @@ function check_containers_are_up() { [ "${i}" == "${TIMEOUT}" ] && return 1 done } + +function check_containers_are_running() { + local NUM_CONTAINERS="$1" + [[ -z "${NUM_CONTAINERS}" ]] && die "Number of containers is missing" + + # Check that the requested number of containers are running + local timeout_launch="10" + check_containers_are_up "${NUM_CONTAINERS}" & pid=$! + (sleep "${timeout_launch}" && kill -HUP "${pid}") 2>/dev/null & pid_tout=$! + + if wait "${pid}" 2>/dev/null; then + pkill -HUP -P "${pid_tout}" + wait "${pid_tout}" + else + warn "Time out exceeded" + return 1 + fi +} From 3502bb4b203e936fd52990d7a19b104fd2624c0d Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Thu, 10 Aug 2023 20:13:37 +0000 Subject: [PATCH 031/339] metrics: Remove unused variable in tensorflow script This PR removes an unused variable in tensorflow script. Signed-off-by: Gabriela Cervantes (cherry picked from commit f700f9b0ba02e9dba90091d1c18f0eebef08ed98) --- tests/metrics/machine_learning/tensorflow.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/metrics/machine_learning/tensorflow.sh b/tests/metrics/machine_learning/tensorflow.sh index 5a2d7cb1e..803ee882e 100755 --- a/tests/metrics/machine_learning/tensorflow.sh +++ b/tests/metrics/machine_learning/tensorflow.sh @@ -35,7 +35,6 @@ ALEXNET_FILE="alexnet_results" ALEXNET_CHECK_FILE_CMD="cat /${ALEXNET_FILE} | grep 'total images' | wc -l" RESNET_FILE="resnet_results" RESNET_CHECK_FILE_CMD="cat /${RESNET_FILE} | grep 'total images' | wc -l" -MAX_RETRIES=300 function remove_tmp_file() { rm -rf "${resnet_tensorflow_file}" "${alexnet_tensorflow_file}" From 1b30aa818e29da0b78b70f69ba45bc1b064e3303 Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Thu, 10 Aug 2023 20:15:18 +0000 Subject: [PATCH 032/339] metrics: Add check containers are up in tensorflow script This PR adds the check containers are up function from common in tensorflow script. Signed-off-by: Gabriela Cervantes (cherry picked from commit 36337ee146de060e31b948ee339985381930857b) --- tests/metrics/machine_learning/tensorflow.sh | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/tests/metrics/machine_learning/tensorflow.sh b/tests/metrics/machine_learning/tensorflow.sh index 803ee882e..7b33f9654 100755 --- a/tests/metrics/machine_learning/tensorflow.sh +++ b/tests/metrics/machine_learning/tensorflow.sh @@ -186,17 +186,7 @@ function main() { check_containers_are_up "${NUM_CONTAINERS}" # Check that the requested number of containers are running - local timeout_launch="10" - check_containers_are_up "${NUM_CONTAINERS}" & pid=$! - (sleep "${timeout_launch}" && kill -HUP "${pid}") 2>/dev/null & pid_tout=$! - - if wait "${pid}" 2>/dev/null; then - pkill -HUP -P "${pid_tout}" - wait "${pid_tout}" - else - warn "Time out exceeded" - return 1 - fi + check_containers_are_running "${NUM_CONTAINERS}" # Get the initial number of pids in a single container before the workload starts INITIAL_NUM_PIDS=$(sudo -E "${CTR_EXE}" t metrics "${containers[-1]}" | grep pids.current | grep pids.current | xargs | cut -d ' ' -f 2) From b600659df21fa50089f88a88ce59af856b5d51f5 Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Thu, 10 Aug 2023 20:17:20 +0000 Subject: [PATCH 033/339] metrics: Add check containers are running in tensorflow mobilenet This PR adds check containers are running in tensorflow mobilenet that is being defined in common script. Signed-off-by: Gabriela Cervantes (cherry picked from commit fdcd52ff78ff1e043a6b774d89534e2a1c5f01ed) --- .../tensorflow_mobilenet_v1_bfloat16_fp32.sh | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/tests/metrics/machine_learning/tensorflow_mobilenet_v1_bfloat16_fp32.sh b/tests/metrics/machine_learning/tensorflow_mobilenet_v1_bfloat16_fp32.sh index aaacc86a4..e451216fa 100755 --- a/tests/metrics/machine_learning/tensorflow_mobilenet_v1_bfloat16_fp32.sh +++ b/tests/metrics/machine_learning/tensorflow_mobilenet_v1_bfloat16_fp32.sh @@ -145,17 +145,7 @@ function main() { check_containers_are_up "${NUM_CONTAINERS}" # Check that the requested number of containers are running - local timeout_launch="10" - check_containers_are_up "${NUM_CONTAINERS}" & pid=$! - (sleep "${timeout_launch}" && kill -HUP "${pid}") 2>/dev/null & pid_tout=$! - - if wait "${pid}" 2>/dev/null; then - pkill -HUP -P "${pid_tout}" - wait "${pid_tout}" - else - warn "Time out exceeded" - return 1 - fi + check_containers_are_running "${NUM_CONTAINERS}" # Get the initial number of pids in a single container before the workload starts INITIAL_NUM_PIDS=$(sudo -E "${CTR_EXE}" t metrics "${containers[-1]}" | grep pids.current | grep pids.current | xargs | cut -d ' ' -f 2) From 058b3044553b2b207c850f9b798218606e7646ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Tue, 25 Jul 2023 20:37:54 +0200 Subject: [PATCH 034/339] gha: static-checks: Move to the Azure instances MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The GHA runners are not exactly powerful, which makes the static-checks take way too long (almost an hour). Let's give a try and move those to the same size of Azure instances used as part of our CI, and probably have this time reduced. Fixes: #7446 Signed-off-by: Fabiano Fidêncio (cherry picked from commit c52d090522f58fc6e09649cbb3cf1dfa9c2fc8c8) --- .github/workflows/static-checks.yaml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/.github/workflows/static-checks.yaml b/.github/workflows/static-checks.yaml index 23f11aab3..76ea53de6 100644 --- a/.github/workflows/static-checks.yaml +++ b/.github/workflows/static-checks.yaml @@ -13,7 +13,7 @@ concurrency: name: Static checks jobs: static-checks: - runs-on: ubuntu-20.04 + runs-on: garm-ubuntu-2004 strategy: matrix: cmd: @@ -36,6 +36,10 @@ jobs: with: fetch-depth: 0 path: ./src/github.com/${{ github.repository }} + - name: Install dependencies + run: | + sudo apt-get update + sudo apt-get install -y --no-install-recommends build-essential haveged - name: Install Go uses: actions/setup-go@v3 with: @@ -82,4 +86,6 @@ jobs: - name: Run check if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }} run: | + export PATH=$PATH:"$HOME/.cargo/bin" + export XDG_RUNTIME_DIR=$(mktemp -d /tmp/kata-tests-$USER.XXX | tee >(xargs chmod 0700)) cd ${GOPATH}/src/github.com/${{ github.repository }} && ${{ matrix.cmd }} From 671ad98451f2c767e41b8eef53a7ff71c4cf41b5 Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Fri, 11 Aug 2023 16:50:00 +0000 Subject: [PATCH 035/339] metrics: Add Cassandra Kubernetes benchmark for kata metrics This PR adds Cassandra Kubernetes benchmark for kata metrics tests. Fixes #7625 Signed-off-by: Gabriela Cervantes (cherry picked from commit 349b89969ab75b1a834549644054ae2552d8340d) --- .../disk/cassandra_kubernetes/cassandra.sh | 199 ++++++++++++++++++ 1 file changed, 199 insertions(+) create mode 100755 tests/metrics/disk/cassandra_kubernetes/cassandra.sh diff --git a/tests/metrics/disk/cassandra_kubernetes/cassandra.sh b/tests/metrics/disk/cassandra_kubernetes/cassandra.sh new file mode 100755 index 000000000..99beae779 --- /dev/null +++ b/tests/metrics/disk/cassandra_kubernetes/cassandra.sh @@ -0,0 +1,199 @@ +#!/bin/bash +# +# Copyright (c) 2022-2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +set -o pipefail + +SCRIPT_PATH=$(dirname "$(readlink -f "$0")") + +source "${SCRIPT_PATH}/../../lib/common.bash" +test_repo="${test_repo:-github.com/kata-containers/tests}" +TEST_NAME="${TEST_NAME:-cassandra}" +cassandra_file=$(mktemp cassandraresults.XXXXXXXXXX) +cassandra_read_file=$(mktemp cassandrareadresults.XXXXXXXXXX) + +function remove_tmp_file() { + rm -rf "${cassandra_file}" "${cassandra_read_file}" +} + +trap remove_tmp_file EXIT + +function cassandra_write_test() { + cassandra_start + export pod_name="cassandra-0" + export write_cmd="/usr/local/apache-cassandra-3.11.2/tools/bin/cassandra-stress write n=1000000 cl=one -mode native cql3 -schema keyspace="keyspace1" -pop seq=1..1000000 -node cassandra" + number_of_retries="50" + for _ in $(seq 1 "$number_of_retries"); do + if kubectl exec -i cassandra-0 -- sh -c 'nodetool status' | grep Up; then + ok="1" + break; + fi + sleep 1 + done + # This is needed to wait that cassandra is up + sleep 30 + kubectl exec -i cassandra-0 -- sh -c "$write_cmd" > "${cassandra_file}" + write_op_rate=$(cat "${cassandra_file}" | grep -e "Op rate" | cut -d':' -f2 | sed -e 's/^[ \t]*//' | cut -d ' ' -f1) + write_latency_mean=$(cat "${cassandra_file}" | grep -e "Latency mean" | cut -d':' -f2 | sed -e 's/^[ \t]*//' | cut -d ' ' -f1) + write_latency_95th=$(cat "${cassandra_file}" | grep -e "Latency 95th percentile" | cut -d':' -f2 | sed -e 's/^[ \t]*//' | cut -d ' ' -f1) + write_latency_99th=$(cat "${cassandra_file}" | grep -e "Latency 99th percentile" | cut -d':' -f2 | sed -e 's/^[ \t]*//' | cut -d ' ' -f1) + write_latency_median=$(cat "${cassandra_file}" | grep -e "Latency median" | cut -d':' -f2 | sed -e 's/^[ \t]*//' | cut -d ' ' -f1) + + export read_cmd="/usr/local/apache-cassandra-3.11.2/tools/bin/cassandra-stress read n=200000 -rate threads=50" + kubectl exec -i cassandra-0 -- sh -c "$read_cmd" > "${cassandra_read_file}" + read_op_rate=$(cat "${cassandra_read_file}" | grep -e "Op rate" | cut -d':' -f2 | sed -e 's/^[ \t]*//' | cut -d ' ' -f1) + read_latency_mean=$(cat "${cassandra_read_file}" | grep -e "Latency mean" | cut -d':' -f2 | sed -e 's/^[ \t]*//' | cut -d ' ' -f1) + read_latency_95th=$(cat "${cassandra_read_file}" | grep -e "Latency 95th percentile" | cut -d':' -f2 | sed -e 's/^[ \t]*//' | cut -d ' ' -f1) + read_latency_99th=$(cat "${cassandra_read_file}" | grep -e "Latency 99th percentile" | cut -d':' -f2 | sed -e 's/^[ \t]*//' | cut -d ' ' -f1) + read_latency_median=$(cat "${cassandra_read_file}" | grep -e "Latency median" | cut -d':' -f2 | sed -e 's/^[ \t]*//' | cut -d ' ' -f1) + + metrics_json_init + # Save configuration + metrics_json_start_array + + local json="$(cat << EOF + { + "Write Op rate": { + "Result" : "$write_op_rate", + "Units" : "op/s" + }, + "Write Latency Mean": { + "Result" : "$write_latency_mean", + "Units" : "ms" + }, + "Write Latency 95th percentile": { + "Result" : "$write_latency_95th", + "Units" : "ms" + }, + "Write Latency 99th percentile": { + "Result" : "$write_latency_99th", + "Units" : "ms" + }, + "Write Latency Median" : { + "Result" : "$write_latency_median", + "Units" : "ms" + }, + "Read Op rate": { + "Result" : "$read_op_rate", + "Units" : "op/s" + }, + "Read Latency Mean": { + "Result" : "$read_latency_mean", + "Units" : "ms" + }, + "Read Latency 95th percentile": { + "Result" : "$read_latency_95th", + "Units" : "ms" + }, + "Read Latency 99th percentile": { + "Result" : "$read_latency_99th", + "Units" : "ms" + }, + "Read Latency Median" : { + "Result" : "$read_latency_median", + "Units" : "ms" + } + } +EOF +)" + + metrics_json_add_array_element "$json" + metrics_json_end_array "Results" + + metrics_json_save + cassandra_cleanup +} + +function cassandra_start() { + cmds=("bc" "jq") + check_cmds "${cmds[@]}" + + # Check no processes are left behind + check_processes + + export service_name="cassandra" + export app_name="cassandra" + + wait_time=20 + sleep_time=2 + + vol_capacity="3Gi" + volume_name="block-loop-pv" + volume_claim="block-loop-pvc" + + # Create Loop Device + export tmp_disk_image=$(mktemp --tmpdir disk.XXXXXX.img) + truncate "$tmp_disk_image" --size "3GB" + export loop_dev=$(sudo losetup -f) + sudo losetup "$loop_dev" "$tmp_disk_image" + + # Create Storage Class + kubectl create -f "${SCRIPT_PATH}/volume/block-local-storage.yaml" + + # Create Persistent Volume + export tmp_pv_yaml=$(mktemp --tmpdir block_persistent_vol.XXXXX.yaml) + sed -e "s|LOOP_DEVICE|${loop_dev}|" "${SCRIPT_PATH}/volume/block-loop-pv.yaml" > "$tmp_pv_yaml" + sed -i "s|HOSTNAME|$(hostname | awk '{print tolower($0)}')|" "$tmp_pv_yaml" + sed -i "s|CAPACITY|${vol_capacity}|" "$tmp_pv_yaml" + + kubectl create -f "$tmp_pv_yaml" + cmd="kubectl get pv/${volume_name} | grep Available" + waitForProcess "$wait_time" "$sleep_time" "$cmd" + + # Create Persistent Volume Claim + export tmp_pvc_yaml=$(mktemp --tmpdir block_persistent_vol.XXXXX.yaml) + sed -e "s|CAPACITY|${vol_capacity}|" "${SCRIPT_PATH}/volume/block-loop-pvc.yaml" > "$tmp_pvc_yaml" + kubectl create -f "$tmp_pvc_yaml" + + # Create service + kubectl create -f "${SCRIPT_PATH}/runtimeclass_workloads/cassandra-service.yaml" + + # Check service + kubectl get svc | grep "$service_name" + + # Create workload using volume + ctr_dev_path="/dev/xda" + export tmp_pod_yaml=$(mktemp --tmpdir pod-pv.XXXXX.yaml) + sed -e "s|DEVICE_PATH|${ctr_dev_path}|" "${SCRIPT_PATH}/runtimeclass_workloads/cassandra-statefulset.yaml" > "$tmp_pod_yaml" + kubectl create -f "$tmp_pod_yaml" + cmd="kubectl rollout status --watch --timeout=120s statefulset/$app_name" + waitForProcess "$wait_time" "$sleep_time" "$cmd" + + # Verify persistent volume claim is bound + kubectl get pvc | grep "Bound" + + # Check pods are running + cmd="kubectl get pods -o jsonpath='{.items[*].status.phase}' | grep Running" + waitForProcess "$wait_time" "$sleep_time" "$cmd" +} + +function cassandra_cleanup() { + kubectl patch pvc block-loop-pvc -p '{"metadata":{"finalizers":null}}' + kubectl delete pvc block-loop-pvc --force + kubectl patch pv block-loop-pv -p '{"metadata":{"finalizers":null}}' + kubectl delete pv block-loop-pv --force + kubectl delete svc "$service_name" + kubectl delete pod -l app="$app_name" + kubectl delete storageclass block-local-storage + kubectl delete statefulsets "$app_name" + + # Delete temporary yaml files + rm -f "$tmp_pv_yaml" + rm -f "$tmp_pvc_yaml" + rm -f "$tmp_pod_yaml" + + # Remove image and loop device + sudo losetup -d "$loop_dev" + rm -f "$tmp_disk_image" + + check_processes +} + +function main() { + init_env + cassandra_write_test +} + +main "$@" From 1b126eb4ceb38a3d9142f0b0cbd5d78a21c275f6 Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Fri, 11 Aug 2023 16:54:20 +0000 Subject: [PATCH 036/339] metrics: Add block loop pvc for cassandra test This PR adds the block loop pvc for cassandra test. Signed-off-by: Gabriela Cervantes (cherry picked from commit 9890271594198db69e6dd4a1a56a4a526f2a3067) --- .../cassandra_kubernetes/volume/block-local-storage.yaml | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 tests/metrics/disk/cassandra_kubernetes/volume/block-local-storage.yaml diff --git a/tests/metrics/disk/cassandra_kubernetes/volume/block-local-storage.yaml b/tests/metrics/disk/cassandra_kubernetes/volume/block-local-storage.yaml new file mode 100644 index 000000000..5f0209408 --- /dev/null +++ b/tests/metrics/disk/cassandra_kubernetes/volume/block-local-storage.yaml @@ -0,0 +1,6 @@ +kind: StorageClass +apiVersion: storage.k8s.io/v1 +metadata: + name: block-local-storage +provisioner: kubernetes.io/no-provisioner +volumeBindingMode: WaitForFirstConsumer From 105a556a308bdbf8a89525db09661d125a64fe8f Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Fri, 11 Aug 2023 16:55:40 +0000 Subject: [PATCH 037/339] metrics: Add block loop pv yaml for cassandra test This PR adds the block loop pv yaml for cassandra test. Signed-off-by: Gabriela Cervantes (cherry picked from commit e3d511946ff1f0039710f4387e2fe15887abe37c) --- .../volume/block-loop-pv.yaml | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 tests/metrics/disk/cassandra_kubernetes/volume/block-loop-pv.yaml diff --git a/tests/metrics/disk/cassandra_kubernetes/volume/block-loop-pv.yaml b/tests/metrics/disk/cassandra_kubernetes/volume/block-loop-pv.yaml new file mode 100644 index 000000000..b657a059c --- /dev/null +++ b/tests/metrics/disk/cassandra_kubernetes/volume/block-loop-pv.yaml @@ -0,0 +1,22 @@ +apiVersion: v1 +kind: PersistentVolume +metadata: + name: block-loop-pv +spec: + capacity: + storage: CAPACITY + volumeMode: Block + accessModes: + - ReadWriteOnce + persistentVolumeReclaimPolicy: Retain + storageClassName: block-local-storage + local: + path: LOOP_DEVICE + nodeAffinity: + required: + nodeSelectorTerms: + - matchExpressions: + - key: kubernetes.io/hostname + operator: In + values: + - HOSTNAME From 1292b51092bc972c3494b391a57269f460d979b9 Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Fri, 11 Aug 2023 16:57:09 +0000 Subject: [PATCH 038/339] metrics: Add block loop pvc yaml for cassandra This PR adds block loop pvc yaml for cassandra test. Signed-off-by: Gabriela Cervantes (cherry picked from commit 2297a0d1c504526950f7ad72b3f9c80d3b39715a) --- .../cassandra_kubernetes/volume/block-loop-pvc.yaml | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 tests/metrics/disk/cassandra_kubernetes/volume/block-loop-pvc.yaml diff --git a/tests/metrics/disk/cassandra_kubernetes/volume/block-loop-pvc.yaml b/tests/metrics/disk/cassandra_kubernetes/volume/block-loop-pvc.yaml new file mode 100644 index 000000000..e796af206 --- /dev/null +++ b/tests/metrics/disk/cassandra_kubernetes/volume/block-loop-pvc.yaml @@ -0,0 +1,12 @@ +kind: PersistentVolumeClaim +apiVersion: v1 +metadata: + name: block-loop-pvc +spec: + accessModes: + - ReadWriteOnce + storageClassName: block-local-storage + volumeMode: Block + resources: + requests: + storage: CAPACITY From d5a14449fca7459f64e8947adda731390bc740ac Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Fri, 11 Aug 2023 17:00:16 +0000 Subject: [PATCH 039/339] metrics: Add cassandra service yaml This PR adds the cassandra service yaml for the benchmark. Signed-off-by: Gabriela Cervantes (cherry picked from commit c1dcc1396f68120c12827df2ced466ed84f51eb2) --- .../cassandra-service.yaml | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 tests/metrics/disk/cassandra_kubernetes/volume/runtimeclass_workloads/cassandra-service.yaml diff --git a/tests/metrics/disk/cassandra_kubernetes/volume/runtimeclass_workloads/cassandra-service.yaml b/tests/metrics/disk/cassandra_kubernetes/volume/runtimeclass_workloads/cassandra-service.yaml new file mode 100644 index 000000000..26e43d09c --- /dev/null +++ b/tests/metrics/disk/cassandra_kubernetes/volume/runtimeclass_workloads/cassandra-service.yaml @@ -0,0 +1,17 @@ +# +# Copyright (c) 2022-2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# +apiVersion: v1 +kind: Service +metadata: + labels: + app: cassandra + name: cassandra +spec: + clusterIP: None + ports: + - port: 9042 + selector: + app: cassandra From 47f32c4983b1ab46cf2044f4b6eb465371d9008f Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Fri, 11 Aug 2023 17:04:33 +0000 Subject: [PATCH 040/339] metrics: Add cassandra statefulset yaml This PR adds cassandra statefulset yaml for kata metrics. Signed-off-by: Gabriela Cervantes (cherry picked from commit 5d0f0d43c72ad6741ce6525d4a6235ef2087b984) --- .../disk/cassandra_kubernetes/cassandra.sh | 1 - .../cassandra-service.yaml | 0 .../cassandra-statefulset.yaml | 56 +++++++++++++++++++ 3 files changed, 56 insertions(+), 1 deletion(-) rename tests/metrics/disk/cassandra_kubernetes/{volume => }/runtimeclass_workloads/cassandra-service.yaml (100%) create mode 100644 tests/metrics/disk/cassandra_kubernetes/runtimeclass_workloads/cassandra-statefulset.yaml diff --git a/tests/metrics/disk/cassandra_kubernetes/cassandra.sh b/tests/metrics/disk/cassandra_kubernetes/cassandra.sh index 99beae779..b6c636548 100755 --- a/tests/metrics/disk/cassandra_kubernetes/cassandra.sh +++ b/tests/metrics/disk/cassandra_kubernetes/cassandra.sh @@ -9,7 +9,6 @@ set -o pipefail SCRIPT_PATH=$(dirname "$(readlink -f "$0")") source "${SCRIPT_PATH}/../../lib/common.bash" -test_repo="${test_repo:-github.com/kata-containers/tests}" TEST_NAME="${TEST_NAME:-cassandra}" cassandra_file=$(mktemp cassandraresults.XXXXXXXXXX) cassandra_read_file=$(mktemp cassandrareadresults.XXXXXXXXXX) diff --git a/tests/metrics/disk/cassandra_kubernetes/volume/runtimeclass_workloads/cassandra-service.yaml b/tests/metrics/disk/cassandra_kubernetes/runtimeclass_workloads/cassandra-service.yaml similarity index 100% rename from tests/metrics/disk/cassandra_kubernetes/volume/runtimeclass_workloads/cassandra-service.yaml rename to tests/metrics/disk/cassandra_kubernetes/runtimeclass_workloads/cassandra-service.yaml diff --git a/tests/metrics/disk/cassandra_kubernetes/runtimeclass_workloads/cassandra-statefulset.yaml b/tests/metrics/disk/cassandra_kubernetes/runtimeclass_workloads/cassandra-statefulset.yaml new file mode 100644 index 000000000..fdc10df51 --- /dev/null +++ b/tests/metrics/disk/cassandra_kubernetes/runtimeclass_workloads/cassandra-statefulset.yaml @@ -0,0 +1,56 @@ +# +# Copyright (c) 2022-2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# +apiVersion: "apps/v1" +kind: StatefulSet +metadata: + name: cassandra +spec: + serviceName: cassandra + replicas: 1 + selector: + matchLabels: + app: cassandra + template: + metadata: + labels: + app: cassandra + spec: + runtimeClassName: kata + containers: + - name: cassandra + image: gcr.io/google-samples/cassandra:v13 + imagePullPolicy: Always + ports: + - containerPort: 7000 + name: intra-node + - containerPort: 7001 + name: tls-intra-node + - containerPort: 7199 + name: jmx + - containerPort: 9042 + name: cql + env: + - name: CASSANDRA_SEEDS + value: cassandra-0.cassandra.default.svc.cluster.local + - name: MAX_HEAP_SIZE + value: 256M + - name: HEAP_NEWSIZE + value: 100M + - name: CASSANDRA_CLUSTER_NAME + value: "Cassandra" + - name: CASSANDRA_DC + value: "DC1" + - name: CASSANDRA_RACK + value: "Rack1" + - name: CASSANDRA_ENDPOINT_SNITCH + value: GossipingPropertyFileSnitch + volumeDevices: + - devicePath: DEVICE_PATH + name: my-volume + volumes: + - name: my-volume + persistentVolumeClaim: + claimName: block-loop-pvc From 4e8c512346ee194e5b471cf957b260c3c95f4fc9 Mon Sep 17 00:00:00 2001 From: David Esparza Date: Fri, 11 Aug 2023 12:25:57 -0600 Subject: [PATCH 041/339] metrics: fix the loop used to stop kata components #7629 This PR fixed the loop that stops the kata-shim and the hypervisors used in metrics checks. Fixes: #7628 Signed-off-by: David Esparza (cherry picked from commit 767434d50a4137b92b5b161fa662c8ea5db25dd6) --- tests/common.bash | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/tests/common.bash b/tests/common.bash index 295ac8c5c..bdb63c77c 100644 --- a/tests/common.bash +++ b/tests/common.bash @@ -189,17 +189,16 @@ function clean_env_ctr() } # Kills running shim and hypervisor components -# by using the kata-component file name. function kill_kata_components() { + local TIMEOUT="30s" local PID_NAMES=( "containerd-shim-kata-v2" "qemu-system-x86_64" "cloud-hypervisor" ) sudo systemctl stop containerd - # Get the filenames of the kata components - # and kill the correspondingt processes - for PID_NAME in ${PID_NAMES} ; do - sudo killall ${PID_NAME} > /dev/null 2>&1 || true + # iterate over the list of kata components and stop them + for PID_NAME in "${PID_NAMES[@]}"; do + [[ ! -z "$(pidof ${PID_NAME})" ]] && sudo killall "${PID_NAME}" > /dev/null 2>&1 || true done - sudo systemctl start containerd + sudo timeout -s SIGKILL "${TIMEOUT}" systemctl start containerd } # Restarts a systemd service while ensuring the start-limit-burst is set to 0. From c19cebfa801ef4c9baff9cceb0d18a7bbbef2cbd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Mon, 14 Aug 2023 16:55:33 +0200 Subject: [PATCH 042/339] tests: Add gha-run-k8s-common.sh MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's split a good portion of `tests/integration/kuberentes/gha-run.sh` out, and put them in a place where they can be used to the soon-to-come kata-deploy specific tests. Signed-off-by: Fabiano Fidêncio (cherry picked from commit af1b46bbf2ee8ea161eca1b37f905fe221c6b211) --- tests/gha-run-k8s-common.sh | 79 +++++++++++++++++++++++++ tests/integration/kubernetes/gha-run.sh | 69 +-------------------- 2 files changed, 80 insertions(+), 68 deletions(-) create mode 100644 tests/gha-run-k8s-common.sh diff --git a/tests/gha-run-k8s-common.sh b/tests/gha-run-k8s-common.sh new file mode 100644 index 000000000..cb727a7b7 --- /dev/null +++ b/tests/gha-run-k8s-common.sh @@ -0,0 +1,79 @@ +#!/usr/bin/env bash + +# Copyright (c) 2023 Microsoft Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +set -o errexit +set -o nounset +set -o pipefail + +tests_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +source "${tests_dir}/common.bash" + +AZ_RG="${AZ_RG:-kataCI}" + +function _print_cluster_name() { + short_sha="$(git rev-parse --short=12 HEAD)" + echo "${GH_PR_NUMBER}-${short_sha}-${KATA_HYPERVISOR}-${KATA_HOST_OS}-amd64" +} + +function install_azure_cli() { + curl -sL https://aka.ms/InstallAzureCLIDeb | sudo bash + # The aks-preview extension is required while the Mariner Kata host is in preview. + az extension add --name aks-preview +} + +function login_azure() { + az login \ + --service-principal \ + -u "${AZ_APPID}" \ + -p "${AZ_PASSWORD}" \ + --tenant "${AZ_TENANT_ID}" +} + +function create_cluster() { + # First, ensure that the cluster didn't fail to get cleaned up from a previous run. + delete_cluster || true + + az aks create \ + -g "${AZ_RG}" \ + -n "$(_print_cluster_name)" \ + -s "Standard_D4s_v5" \ + --node-count 1 \ + --generate-ssh-keys \ + $([ "${KATA_HOST_OS}" = "cbl-mariner" ] && echo "--os-sku AzureLinux --workload-runtime KataMshvVmIsolation") +} + +function install_bats() { + # Installing bats from the lunar repo. + # This installs newer version of the bats which supports setup_file and teardown_file functions. + # These functions are helpful when adding new tests that require one time setup. + + sudo apt install -y software-properties-common + sudo add-apt-repository 'deb http://archive.ubuntu.com/ubuntu/ lunar universe' + sudo apt install -y bats + sudo add-apt-repository --remove 'deb http://archive.ubuntu.com/ubuntu/ lunar universe' +} + +function install_kubectl() { + sudo az aks install-cli +} + +function get_cluster_credentials() { + az aks get-credentials \ + -g "${AZ_RG}" \ + -n "$(_print_cluster_name)" +} + +function delete_cluster() { + az aks delete \ + -g "${AZ_RG}" \ + -n "$(_print_cluster_name)" \ + --yes +} + +function get_nodes_and_pods_info() { + kubectl debug $(kubectl get nodes -o name) -it --image=quay.io/kata-containers/kata-debug:latest || true + kubectl get pods -o name | grep node-debugger | xargs kubectl delete || true +} diff --git a/tests/integration/kubernetes/gha-run.sh b/tests/integration/kubernetes/gha-run.sh index 9f440d150..5456ad3fc 100755 --- a/tests/integration/kubernetes/gha-run.sh +++ b/tests/integration/kubernetes/gha-run.sh @@ -9,64 +9,9 @@ set -o nounset set -o pipefail kubernetes_dir="$(dirname "$(readlink -f "$0")")" -source "${kubernetes_dir}/../../common.bash" +source "${kubernetes_dir}/../../gha-run-k8s-common.sh" tools_dir="${repo_root_dir}/tools" -AZ_RG="${AZ_RG:-kataCI}" - -function _print_cluster_name() { - short_sha="$(git rev-parse --short=12 HEAD)" - echo "${GH_PR_NUMBER}-${short_sha}-${KATA_HYPERVISOR}-${KATA_HOST_OS}-amd64" -} - -function install_azure_cli() { - curl -sL https://aka.ms/InstallAzureCLIDeb | sudo bash - # The aks-preview extension is required while the Mariner Kata host is in preview. - az extension add --name aks-preview -} - -function login_azure() { - az login \ - --service-principal \ - -u "${AZ_APPID}" \ - -p "${AZ_PASSWORD}" \ - --tenant "${AZ_TENANT_ID}" -} - -function create_cluster() { - # First, ensure that the cluster didn't fail to get cleaned up from a previous run. - delete_cluster || true - - az aks create \ - -g "${AZ_RG}" \ - -n "$(_print_cluster_name)" \ - -s "Standard_D4s_v5" \ - --node-count 1 \ - --generate-ssh-keys \ - $([ "${KATA_HOST_OS}" = "cbl-mariner" ] && echo "--os-sku AzureLinux --workload-runtime KataMshvVmIsolation") -} - -function install_bats() { - # Installing bats from the lunar repo. - # This installs newer version of the bats which supports setup_file and teardown_file functions. - # These functions are helpful when adding new tests that require one time setup. - - sudo apt install -y software-properties-common - sudo add-apt-repository 'deb http://archive.ubuntu.com/ubuntu/ lunar universe' - sudo apt install -y bats - sudo add-apt-repository --remove 'deb http://archive.ubuntu.com/ubuntu/ lunar universe' -} - -function install_kubectl() { - sudo az aks install-cli -} - -function get_cluster_credentials() { - az aks get-credentials \ - -g "${AZ_RG}" \ - -n "$(_print_cluster_name)" -} - function deploy_kata() { platform="${1}" ensure_yq @@ -171,18 +116,6 @@ function cleanup() { kubectl delete -f "${tools_dir}/packaging/kata-deploy/kata-rbac/base/kata-rbac.yaml" } -function delete_cluster() { - az aks delete \ - -g "${AZ_RG}" \ - -n "$(_print_cluster_name)" \ - --yes -} - -function get_nodes_and_pods_info() { - kubectl debug $(kubectl get nodes -o name) -it --image=quay.io/kata-containers/kata-debug:latest || true - kubectl get pods -o name | grep node-debugger | xargs kubectl delete || true -} - function main() { export KATA_HOST_OS="${KATA_HOST_OS:-}" From e812c437fecbae11f82fafeca05462bc583151f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Mon, 14 Aug 2023 16:59:41 +0200 Subject: [PATCH 043/339] tests: kata-deploy: Add functional/kata-deploy/gha-run.sh placeholder MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Right now this file does nothing, as it's not even called by any GHA. However, it'll be populated later on as part of a different series, where we'll have kata-deploy specific tests running here. Signed-off-by: Fabiano Fidêncio (cherry picked from commit 831e73ff918430985adf51c9dc9cbfce66505965) --- tests/functional/kata-deploy/gha-run.sh | 38 +++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100755 tests/functional/kata-deploy/gha-run.sh diff --git a/tests/functional/kata-deploy/gha-run.sh b/tests/functional/kata-deploy/gha-run.sh new file mode 100755 index 000000000..ef362f40e --- /dev/null +++ b/tests/functional/kata-deploy/gha-run.sh @@ -0,0 +1,38 @@ +#!/usr/bin/env bash + +# Copyright (c) 2023 Microsoft Corporation +# Copyright (c) 2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +set -o errexit +set -o nounset +set -o pipefail + +kata_deploy_dir="$(dirname "$(readlink -f "$0")")" +source "$kata_deploy_dir}/../../gha-run-k8s-common.sh" +tools_dir="${repo_root_dir}/tools" + +function run_tests() { + return 0 +} + +function main() { + export KATA_HOST_OS="${KATA_HOST_OS:-}" + + action="${1:-}" + + case "${action}" in + install-azure-cli) install_azure_cli ;; + login-azure) login_azure ;; + create-cluster) create_cluster ;; + install-bats) install_bats ;; + install-kubectl) install_kubectl ;; + get-cluster-credentials) get_cluster_credentials ;; + run-tests) run_tests ;; + delete-cluster) cleanup "aks" ;; + *) >&2 echo "Invalid argument"; exit 2 ;; + esac +} + +main "$@" From f5e14ef28309588b84820b1bb69d046c1b28af89 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Mon, 14 Aug 2023 17:05:13 +0200 Subject: [PATCH 044/339] tests: kata-deploy: Add placeholder for kata-deploy-tests-on-aks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This will not be tested as part of the PR, thanks to the `pull_request_target` event, but we want it to be added so we can build atop of that in a coming up series. Signed-off-by: Fabiano Fidêncio (cherry picked from commit d9ee17aaecb40843b1fd64469da6375ade153787) --- .../run-kata-deploy-tests-on-aks.yaml | 77 +++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 .github/workflows/run-kata-deploy-tests-on-aks.yaml diff --git a/.github/workflows/run-kata-deploy-tests-on-aks.yaml b/.github/workflows/run-kata-deploy-tests-on-aks.yaml new file mode 100644 index 000000000..951e6e932 --- /dev/null +++ b/.github/workflows/run-kata-deploy-tests-on-aks.yaml @@ -0,0 +1,77 @@ +name: CI | Run kata-deploy tests on AKS +on: + workflow_call: + inputs: + registry: + required: true + type: string + repo: + required: true + type: string + tag: + required: true + type: string + pr-number: + required: true + type: string + commit-hash: + required: false + type: string + +jobs: + run-kata-deploy-tests: + strategy: + fail-fast: false + matrix: + host_os: + - ubuntu + vmm: + - clh + - dragonball + - qemu + include: + - host_os: cbl-mariner + vmm: clh + runs-on: ubuntu-latest + env: + DOCKER_REGISTRY: ${{ inputs.registry }} + DOCKER_REPO: ${{ inputs.repo }} + DOCKER_TAG: ${{ inputs.tag }} + GH_PR_NUMBER: ${{ inputs.pr-number }} + KATA_HOST_OS: ${{ matrix.host_os }} + KATA_HYPERVISOR: ${{ matrix.vmm }} + USING_NFD: "false" + steps: + - uses: actions/checkout@v3 + with: + ref: ${{ inputs.commit-hash }} + + - name: Download Azure CLI + run: bash tests/functional/kata-deploy/gha-run.sh install-azure-cli + + - name: Log into the Azure account + run: bash tests/functional/kata-deploy/gha-run.sh login-azure + env: + AZ_APPID: ${{ secrets.AZ_APPID }} + AZ_PASSWORD: ${{ secrets.AZ_PASSWORD }} + AZ_TENANT_ID: ${{ secrets.AZ_TENANT_ID }} + + - name: Create AKS cluster + timeout-minutes: 10 + run: bash tests/functional/kata-deploy/gha-run.sh create-cluster + + - name: Install `bats` + run: bash tests/functional/kata-deploy/gha-run.sh install-bats + + - name: Install `kubectl` + run: bash tests/functional/kata-deploy/gha-run.sh install-kubectl + + - name: Download credentials for the Kubernetes CLI to use them + run: bash tests/functional/kata-deploy/gha-run.sh get-cluster-credentials + + - name: Run tests + run: bash tests/functional/kata-deploy/gha-run.sh run-tests + + - name: Delete AKS cluster + if: always() + run: bash tests/functional/kata-deploy/gha-run.sh delete-cluster From 4bb8fcc0c01459ec3c6a9c74e09da5d201ddbc2e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Mon, 14 Aug 2023 17:06:04 +0200 Subject: [PATCH 045/339] tests: kata-deploy: Add placeholder for kata-deploy-tests-on-tdx MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This will not be tested as part of the PR, thanks to the `pull_request_target` event, but we want it to be added so we can build atop of that in a coming up series. Fixes: #7642 Signed-off-by: Fabiano Fidêncio (cherry picked from commit e55fa93db984fd925730db97da4d9d5930455934) --- .../run-kata-deploy-tests-on-tdx.yaml | 42 +++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 .github/workflows/run-kata-deploy-tests-on-tdx.yaml diff --git a/.github/workflows/run-kata-deploy-tests-on-tdx.yaml b/.github/workflows/run-kata-deploy-tests-on-tdx.yaml new file mode 100644 index 000000000..f4029b6e9 --- /dev/null +++ b/.github/workflows/run-kata-deploy-tests-on-tdx.yaml @@ -0,0 +1,42 @@ +name: CI | Run kata-deploy tests on TDX +on: + workflow_call: + inputs: + registry: + required: true + type: string + repo: + required: true + type: string + tag: + required: true + type: string + pr-number: + required: true + type: string + commit-hash: + required: false + type: string + +jobs: + run-kata-deploy-tests: + strategy: + fail-fast: false + matrix: + vmm: + - qemu-tdx + runs-on: tdx + env: + DOCKER_REGISTRY: ${{ inputs.registry }} + DOCKER_REPO: ${{ inputs.repo }} + DOCKER_TAG: ${{ inputs.tag }} + PR_NUMBER: ${{ inputs.pr-number }} + KATA_HYPERVISOR: ${{ matrix.vmm }} + USING_NFD: "true" + steps: + - uses: actions/checkout@v3 + with: + ref: ${{ inputs.commit-hash }} + + - name: Run tests + run: bash tests/functional/kata-deploy/gha-run.sh run-tests From 738d808cace4b5050570087525b5f5407b4e3055 Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Mon, 14 Aug 2023 20:36:19 +0000 Subject: [PATCH 046/339] metrics: Rename tensorflow scripts This PR renames the tensorflow scripts to include the data format that is being used as we will have multiple tests with different data and model formats for tensorflow so this will help us to distinguish them. Fixes #7645 Signed-off-by: Gabriela Cervantes (cherry picked from commit 18a7fd8e4e4107109750bf6c460928b4e94f7cf8) --- .../{tensorflow.sh => tensorflow_nhwc.sh} | 10 +++++----- .../Dockerfile | 0 2 files changed, 5 insertions(+), 5 deletions(-) rename tests/metrics/machine_learning/{tensorflow.sh => tensorflow_nhwc.sh} (96%) rename tests/metrics/machine_learning/{tensorflow_dockerfile => tensorflow_nhwc_dockerfile}/Dockerfile (100%) diff --git a/tests/metrics/machine_learning/tensorflow.sh b/tests/metrics/machine_learning/tensorflow_nhwc.sh similarity index 96% rename from tests/metrics/machine_learning/tensorflow.sh rename to tests/metrics/machine_learning/tensorflow_nhwc.sh index 7b33f9654..cb5f9ff28 100755 --- a/tests/metrics/machine_learning/tensorflow.sh +++ b/tests/metrics/machine_learning/tensorflow_nhwc.sh @@ -10,15 +10,15 @@ set -o pipefail SCRIPT_PATH=$(dirname "$(readlink -f "$0")") source "${SCRIPT_PATH}/../lib/common.bash" -IMAGE="docker.io/library/tensorflow:latest" -DOCKERFILE="${SCRIPT_PATH}/tensorflow_dockerfile/Dockerfile" +IMAGE="docker.io/library/tensorflow_nhwc:latest" +DOCKERFILE="${SCRIPT_PATH}/tensorflow_nhwc_dockerfile/Dockerfile" BATCH_SIZE="100" NUM_BATCHES="100" resnet_tensorflow_file=$(mktemp resnettensorflowresults.XXXXXXXXXX) alexnet_tensorflow_file=$(mktemp alexnettensorflowresults.XXXXXXXXXX) NUM_CONTAINERS="$1" TIMEOUT="$2" -TEST_NAME="tensorflow" +TEST_NAME="tensorflow_nhwc" PAYLOAD_ARGS="tail -f /dev/null" # Options to control the start of the workload using a trigger-file dst_dir="/host" @@ -99,7 +99,7 @@ function launch_workload() { done } -function tensorflow_test() { +function tensorflow_nhwc_test() { # Resnet section info "Running TF-Resnet test" launch_workload "${CMD_RESNET}" @@ -191,7 +191,7 @@ function main() { # Get the initial number of pids in a single container before the workload starts INITIAL_NUM_PIDS=$(sudo -E "${CTR_EXE}" t metrics "${containers[-1]}" | grep pids.current | grep pids.current | xargs | cut -d ' ' -f 2) ((INITIAL_NUM_PIDS++)) - tensorflow_test + tensorflow_nhwc_test metrics_json_save } diff --git a/tests/metrics/machine_learning/tensorflow_dockerfile/Dockerfile b/tests/metrics/machine_learning/tensorflow_nhwc_dockerfile/Dockerfile similarity index 100% rename from tests/metrics/machine_learning/tensorflow_dockerfile/Dockerfile rename to tests/metrics/machine_learning/tensorflow_nhwc_dockerfile/Dockerfile From 3c7597f4ba389a3dad67d0356e3f2f7ed54753b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Mon, 14 Aug 2023 23:01:22 +0200 Subject: [PATCH 047/339] gha: cri-containerd: Ensure RUNTIME takes KATA_HYPERVISOR into account MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Short commit log says it all. Signed-off-by: Fabiano Fidêncio (cherry picked from commit 9e898701f5835285c34a922cbb23a6d0fcf41b9b) --- tests/integration/cri-containerd/integration-tests.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration/cri-containerd/integration-tests.sh b/tests/integration/cri-containerd/integration-tests.sh index 0e4df5578..76bc94366 100755 --- a/tests/integration/cri-containerd/integration-tests.sh +++ b/tests/integration/cri-containerd/integration-tests.sh @@ -21,9 +21,9 @@ export PATH="$PATH:/usr/local/sbin" export PATH="$PATH:/usr/local/go/bin" # Runtime to be used for testing -RUNTIME=${RUNTIME:-containerd-shim-kata-v2} -FACTORY_TEST=${FACTORY_TEST:-""} KATA_HYPERVISOR="${KATA_HYPERVISOR:-qemu}" +RUNTIME=${RUNTIME:-containerd-shim-kata-${KATA_HYPERVISOR}-v2} +FACTORY_TEST=${FACTORY_TEST:-""} USE_DEVMAPPER="${USE_DEVMAPPER:-false}" ARCH=$(uname -m) From 2fea5a5f8b046c27a085c59fdcb5d1ad37a23fb6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Mon, 14 Aug 2023 13:44:24 +0200 Subject: [PATCH 048/339] gha: cri-containerd: Group containerd logs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This improves readability in case of failures by a lot. Signed-off-by: Fabiano Fidêncio (cherry picked from commit 6c8b2ffa603c2dbc300797c88b75c4fb45db856a) --- tests/integration/cri-containerd/integration-tests.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/integration/cri-containerd/integration-tests.sh b/tests/integration/cri-containerd/integration-tests.sh index 76bc94366..26fa9eae9 100755 --- a/tests/integration/cri-containerd/integration-tests.sh +++ b/tests/integration/cri-containerd/integration-tests.sh @@ -130,10 +130,11 @@ trap cleanup EXIT function err_report() { local log_file="${REPORT_DIR}/containerd.log" if [ -f "$log_file" ]; then - echo "ERROR: containerd log :" + echo "::group::ERROR: containerd log :" echo "-------------------------------------" cat "${log_file}" echo "-------------------------------------" + echo "::endgroup::" fi } From 585d5fba03be0d81146396a888fb52d92d54a874 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Mon, 14 Aug 2023 13:45:07 +0200 Subject: [PATCH 049/339] gha: cri-containerd: Print kata logs in case of error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We need this to fully understand what are the issues we're facing. Signed-off-by: Fabiano Fidêncio (cherry picked from commit ae0930824aa29bdc09b794c49bf9a27cf2b25643) --- tests/integration/cri-containerd/integration-tests.sh | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/integration/cri-containerd/integration-tests.sh b/tests/integration/cri-containerd/integration-tests.sh index 26fa9eae9..4726a6621 100755 --- a/tests/integration/cri-containerd/integration-tests.sh +++ b/tests/integration/cri-containerd/integration-tests.sh @@ -136,6 +136,11 @@ function err_report() { echo "-------------------------------------" echo "::endgroup::" fi + echo "::group::ERROR: Kata Containers logs : " + echo "-------------------------------------" + sudo journalctl -xe -t kata + echo "-------------------------------------" + echo "::endgroup::" } From 10058f718ae64a9eb19aa76b02af02d676e28c04 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Mon, 14 Aug 2023 15:15:05 +0200 Subject: [PATCH 050/339] gha: cri-containerd: Show pod before deleting it MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It'll help us to debug failures with the pod stop / pod delete. Signed-off-by: Fabiano Fidêncio (cherry picked from commit b29782984a0f064f82f1865a221e5c6ad49f24d3) --- tests/integration/cri-containerd/integration-tests.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/integration/cri-containerd/integration-tests.sh b/tests/integration/cri-containerd/integration-tests.sh index 4726a6621..d9919bb68 100755 --- a/tests/integration/cri-containerd/integration-tests.sh +++ b/tests/integration/cri-containerd/integration-tests.sh @@ -203,6 +203,8 @@ EOF } function testContainerStop() { + info "show pod $podid" + sudo crictl pods --id $podid info "stop pod $podid" sudo crictl stopp $podid info "remove pod $podid" From 20be3d93d5389a302536186cdee502b31bf9fa53 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Tue, 15 Aug 2023 13:04:06 +0200 Subject: [PATCH 051/339] gha: cri-containerd: Add timeout to the crictl calls on testContainerStop MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As part of the runners, we're hitting a timeout that I cannot reproduce, at all, when allocating the same instance and running the tests manually. The default timeout to connect to the server is 2s when using `crictl`. Let's increase this to 20s. It's fairly important to mention that in the first tests I used a timeout of 10s, and that helped but we still hit issues every now and then. Signed-off-by: Fabiano Fidêncio (cherry picked from commit 84dd02e0f9baf942181d2808fc3f31c1077a0ebd) --- tests/integration/cri-containerd/integration-tests.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/integration/cri-containerd/integration-tests.sh b/tests/integration/cri-containerd/integration-tests.sh index d9919bb68..33f212856 100755 --- a/tests/integration/cri-containerd/integration-tests.sh +++ b/tests/integration/cri-containerd/integration-tests.sh @@ -204,11 +204,11 @@ EOF function testContainerStop() { info "show pod $podid" - sudo crictl pods --id $podid + sudo crictl --timeout=20s pods --id $podid info "stop pod $podid" - sudo crictl stopp $podid + sudo crictl --timeout=20s stopp $podid info "remove pod $podid" - sudo crictl rmp $podid + sudo crictl --timeout=20s rmp $podid sudo cp "$default_containerd_config_backup" "$default_containerd_config" restart_containerd_service From 7474e50ae2d96ba9798f5593da495caa16c7d4b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Mon, 14 Aug 2023 09:49:04 +0200 Subject: [PATCH 052/339] gha: cri-containerd: Enable tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As the cri-containerd tests have been fully migrated to GHA, let's make sure we get them running. Fixes: #6543 Signed-off-by: Fabiano Fidêncio (cherry picked from commit b3592ab25cc72af3dbc783a8def3ca0d04e1b972) --- tests/integration/cri-containerd/gha-run.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/integration/cri-containerd/gha-run.sh b/tests/integration/cri-containerd/gha-run.sh index c36c96605..542964423 100755 --- a/tests/integration/cri-containerd/gha-run.sh +++ b/tests/integration/cri-containerd/gha-run.sh @@ -59,7 +59,8 @@ function install_dependencies() { function run() { info "Running cri-containerd tests using ${KATA_HYPERVISOR} hypervisor" - return 0 + enabling_hypervisor + bash -c ${cri_containerd_dir}/integration-tests.sh } function main() { From 5d85cac1d697f7964299e471a49b67380d292cc0 Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Tue, 15 Aug 2023 17:30:22 +0000 Subject: [PATCH 053/339] metrics: Add Tensorflow ResNet50 int8 benchmark This PR adds the Tensorflow ResNet50 int8 script for kata metrics. Fixes #7652 Signed-off-by: Gabriela Cervantes (cherry picked from commit 6d971ba8df26a8b537ae5d38bbead8d9c1d1fd21) --- .../tensorflow_resnet50_int8.sh | 155 ++++++++++++++++++ 1 file changed, 155 insertions(+) create mode 100755 tests/metrics/machine_learning/tensorflow_resnet50_int8.sh diff --git a/tests/metrics/machine_learning/tensorflow_resnet50_int8.sh b/tests/metrics/machine_learning/tensorflow_resnet50_int8.sh new file mode 100755 index 000000000..177a62c98 --- /dev/null +++ b/tests/metrics/machine_learning/tensorflow_resnet50_int8.sh @@ -0,0 +1,155 @@ +#!/bin/bash +# +# Copyright (c) 2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +set -o pipefial + +# General env +SCRIPT_PATH=$(dirname "$(readlink -f "$0")") +source "${SCRIPT_PATH}/../lib/common.bash" + +IMAGE="docker.io/library/resnet50int8:latest" +DOCKERFILE="${SCRIPT_PATH}/resnet50_int8/Dockerfile" +tensorflow_file=$(mktemp tensorflowresults.XXXXXXXXXX) +NUM_CONTAINERS="$1" +TIMEOUT="$2" +TEST_NAME="tensorflow-resnet50int8" +PAYLOAD_ARGS="tail -f /dev/null" +TESTDIR="${TESTDIR:-/testdir}" +# Options to control the start of the workload using a trigger-file +dst_dir="/host" +src_dir=$(mktemp --tmpdir -d tensorflowresnet50int8.XXXXXXXXXX) +MOUNT_OPTIONS="type=bind,src=$src_dir,dst=$dst_dir,options=rbind:ro" +start_script="resnet50_start.sh" +# CMD points to the script that starts the workload +# export DNNL_MAX_CPU_ISA=AVX512_CORE_AMX +CMD="export KMP_AFFINITY=granularity=fine,verbose,compact && export OMP_NUM_THREADS=16 && $dst_dir/$start_script" +guest_trigger_file="$dst_dir/$trigger_file" +host_trigger_file="$src_dir/$trigger_file" +INITIAL_NUM_PIDS=1 +CMD_FILE="cat results | grep 'Throughput' | wc -l" +CMD_RESULTS="cat results | grep 'Throughput' | cut -d':' -f2 | cut -d' ' -f2 | tr '\n' ','" + +function remove_tmp_file() { + rm -rf "${tensorflow_file}" +} + +trap remove_tmp_file EXIT + +function help() { +cat << EOF +Usage: $0 + Description: + This script launches n number of containers + to run the ResNet50 int8 using a Tensorflow + container. + Options: + : Number of containers to run. + : Timeout to launch the containers. +EOF +} + +function create_start_script() { + local script="${src_dir}/${start_script}" + rm -rf "${script}" + +cat <>"${script}" +#!/bin/bash +python3.8 models/benchmarks/launch_benchmark.py --benchmark-only --framework tensorflow --model-name resnet50 --precision int8 --mode inference --in-graph /resnet50_int8_pretrained_model.pb --batch-size 116 --num-intra-threads 16 >> results +EOF + chmod +x "${script}" +} + +function resnet50_int8_test() { + info "Running ResNet50 Int8 Tensorflow test" + local pids=() + local j=0 + for i in "${containers[@]}"; do + $(sudo -E "${CTR_EXE}" t exec --exec-id "$(random_name)" "${i}" sh -c "${CMD}")& + pids["${j}"]=$! + ((j++)) + done + + # wait for all pids + for pid in ${pids[*]}; do + wait "${pid}" + done + + touch "${host_trigger_file}" + info "All containers are running the workload..." + collect_results "${CMD_FILE}" + + for i in "${containers[@]}"; do + sudo -E "${CTR_EXE}" t exec --exec-id "$(random_name)" "${i}" sh -c "${CMD_RESULTS}" >> "${tensorflow_file}" + done + + local resnet50_int8_results=$(cat "${tensorflow_file}" | sed 's/.$//') + local average_resnet50_int8=$(echo "${resnet50_int8_results}" | sed 's/.$//'| sed "s/,/+/g;s/.*/(&)\/2/g" | bc -l) + + local json="$(cat << EOF + { + "ResNet50Int8": { + "Result": "${resnet50_int8_results}", + "Average": "${average_resnet50_int8}", + "Units": "images/s" + } + } +EOF +)" + metrics_json_add_array_element "$json" + metrics_json_end_array "Results" +} + +function main() { + # Verify enough arguments + if [ $# != 2 ]; then + echo >&2 "error: Not enough arguments [$@]" + help + exit 1 + fi + + local i=0 + local containers=() + local not_started_count="${NUM_CONTAINERS}" + + # Check tools/commands dependencies + cmds=("awk" "docker" "bc") + check_cmds "${cmds[@]}" + check_ctr_images "${IMAGE}" "${DOCKERFILE}" + + init_env + create_start_script + + info "Creating ${NUM_CONTAINERS} containers" + + for ((i=1; i<= "${NUM_CONTAINERS}"; i++)); do + containers+=($(random_name)) + sudo -E "${CTR_EXE}" run -d --runtime "${CTR_RUNTIME}" --mount="${MOUNT_OPTIONS}" "${IMAGE}" "${containers[-1]}" sh -c "${PAYLOAD_ARGS}" + ((not_started_count--)) + info "$not_started_count remaining containers" + done + + metrics_json_init + metrics_json_start_array + + # Check that the requested number of containers are running + check_containers_are_up "${NUM_CONTAINERS}" + + # Check that the requested number of containers are running + check_containers_are_running "${NUM_CONTAINERS}" + + # Get the initial number of pids in a single container before the workload starts + INITIAL_NUM_PIDS=$(sudo -E "${CTR_EXE}" t metrics "${containers[-1]}" | grep pids.current | grep pids.current | xargs | cut -d ' ' -f 2) + ((INITIAL_NUM_PIDS++)) + + resnet50_int8_test + + metrics_json_save + + sudo rm -rf "${src_dir}" + + clean_env_ctr +} +main "$@" From 6e5a5b82499c716e2d5f869490e40e203cddec88 Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Tue, 15 Aug 2023 17:36:48 +0000 Subject: [PATCH 054/339] metrics: Add Dockerfile for ResNet50 int8 This PR adds the dockerfile for ResNet50 int8 benchmark. Signed-off-by: Gabriela Cervantes (cherry picked from commit 24baededc041dc306155ce29344ef3bc68c988f0) --- .../resnet50_int8_dockerfile/Dockerfile | 21 +++++++++++++++++++ .../tensorflow_resnet50_int8.sh | 4 ++-- 2 files changed, 23 insertions(+), 2 deletions(-) create mode 100644 tests/metrics/machine_learning/resnet50_int8_dockerfile/Dockerfile diff --git a/tests/metrics/machine_learning/resnet50_int8_dockerfile/Dockerfile b/tests/metrics/machine_learning/resnet50_int8_dockerfile/Dockerfile new file mode 100644 index 000000000..9782d72e4 --- /dev/null +++ b/tests/metrics/machine_learning/resnet50_int8_dockerfile/Dockerfile @@ -0,0 +1,21 @@ +# Copyright (c) 2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +# Usage: FROM [image name] +FROM ubuntu:20.04 + +ENV DEBIAN_FRONTEND=noninteractive + +# Version of the Dockerfile +LABEL DOCKERFILE_VERSION="1.0" + +RUN apt-get update && \ + apt-get install -y --no-install-recommends wget nano curl build-essential git && \ + apt-get install -y python3.8 python3-pip && \ + pip install intel-tensorflow-avx512==2.8.0 && \ + pip install protobuf==3.20.* && \ + wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_8/resnet50_int8_pretrained_model.pb && \ + git clone https://github.com/IntelAI/models.git + +CMD ["/bin/bash"] diff --git a/tests/metrics/machine_learning/tensorflow_resnet50_int8.sh b/tests/metrics/machine_learning/tensorflow_resnet50_int8.sh index 177a62c98..19c3e1192 100755 --- a/tests/metrics/machine_learning/tensorflow_resnet50_int8.sh +++ b/tests/metrics/machine_learning/tensorflow_resnet50_int8.sh @@ -11,7 +11,7 @@ SCRIPT_PATH=$(dirname "$(readlink -f "$0")") source "${SCRIPT_PATH}/../lib/common.bash" IMAGE="docker.io/library/resnet50int8:latest" -DOCKERFILE="${SCRIPT_PATH}/resnet50_int8/Dockerfile" +DOCKERFILE="${SCRIPT_PATH}/resnet50_int8_dockerfile/Dockerfile" tensorflow_file=$(mktemp tensorflowresults.XXXXXXXXXX) NUM_CONTAINERS="$1" TIMEOUT="$2" @@ -22,7 +22,7 @@ TESTDIR="${TESTDIR:-/testdir}" dst_dir="/host" src_dir=$(mktemp --tmpdir -d tensorflowresnet50int8.XXXXXXXXXX) MOUNT_OPTIONS="type=bind,src=$src_dir,dst=$dst_dir,options=rbind:ro" -start_script="resnet50_start.sh" +start_script="resnet50int8_start.sh" # CMD points to the script that starts the workload # export DNNL_MAX_CPU_ISA=AVX512_CORE_AMX CMD="export KMP_AFFINITY=granularity=fine,verbose,compact && export OMP_NUM_THREADS=16 && $dst_dir/$start_script" From a845e94139e777ec6ad4929be632f6401afcc506 Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Tue, 15 Aug 2023 17:46:44 +0000 Subject: [PATCH 055/339] docs: Add Tensorflow Resnet50 documentation This PR adds the Tensorflow Resnet50 documentation. Signed-off-by: Gabriela Cervantes (cherry picked from commit 1a1b20776066bd96801332952e11785d0471cff3) --- tests/metrics/machine_learning/README.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tests/metrics/machine_learning/README.md b/tests/metrics/machine_learning/README.md index f55adba20..1872e9150 100644 --- a/tests/metrics/machine_learning/README.md +++ b/tests/metrics/machine_learning/README.md @@ -47,3 +47,16 @@ $ cd metrics/machine_learning $ ./tensorflow_mobilenet_benchmark.sh 25 60 ``` +# Kata Containers TensorFlow `ResNet50` Metrics + +`ResNet50` is an image classification model pre-trained on the `ImageNet` dataset. +Kata Containers provides a test for running `ResNet50` inference using Intel optimized +`TensorFlow`. + +## Running the `TensorFlow` `ResNet50` test +Individual test can be run by hand, for example: + +``` +$ cd metrics/machine_learning +$ ./tensorflow_resnet50_int8.sh 25 60 +``` From bd29413721a6c88202c9dc663c69aa68c1e2e691 Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Tue, 15 Aug 2023 17:48:27 +0000 Subject: [PATCH 056/339] docs: Fix TensorFlow word across the document This PR fixes the TensorFlow word across the document to have uniformity across all the document. Signed-off-by: Gabriela Cervantes (cherry picked from commit bade6a5c3b7b0f5acfece10787675882bb2c7ada) --- tests/metrics/machine_learning/README.md | 10 +++++----- .../resnet50_int8_dockerfile/Dockerfile | 6 +++--- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/metrics/machine_learning/README.md b/tests/metrics/machine_learning/README.md index 1872e9150..41cc139dc 100644 --- a/tests/metrics/machine_learning/README.md +++ b/tests/metrics/machine_learning/README.md @@ -1,4 +1,4 @@ -# Kata Containers Tensorflow Metrics +# Kata Containers TensorFlow Metrics Kata Containers provides a series of performance tests using the TensorFlow reference benchmarks (tf_cnn_benchmarks). @@ -30,16 +30,16 @@ Individual tests can be run by hand, for example: $ cd metrics/machine_learning $ ./tensorflow.sh 40 100 ``` -# Kata Containers Tensorflow `MobileNet` Metrics +# Kata Containers TensorFlow `MobileNet` Metrics `MobileNets` are small, low-latency, low-power models parameterized to meet the resource constraints of a variety of use cases. They can be built upon for classification, detection, embeddings and segmentation similar to how other popular large scale models, such as Inception, are used. `MobileNets` can be run efficiently on mobile devices with `Tensorflow` Lite. -Kata Containers provides a test for running `MobileNet V1` inference using Intel-Optimized `Tensorflow`. +Kata Containers provides a test for running `MobileNet V1` inference using Intel-Optimized `TensorFlow`. -## Running the `Tensorflow` `MobileNet` test +## Running the `TensorFlow` `MobileNet` test Individual test can be run by hand, for example: ``` @@ -50,7 +50,7 @@ $ ./tensorflow_mobilenet_benchmark.sh 25 60 # Kata Containers TensorFlow `ResNet50` Metrics `ResNet50` is an image classification model pre-trained on the `ImageNet` dataset. -Kata Containers provides a test for running `ResNet50` inference using Intel optimized +Kata Containers provides a test for running `ResNet50` inference using Intel-Optimized `TensorFlow`. ## Running the `TensorFlow` `ResNet50` test diff --git a/tests/metrics/machine_learning/resnet50_int8_dockerfile/Dockerfile b/tests/metrics/machine_learning/resnet50_int8_dockerfile/Dockerfile index 9782d72e4..a033475ee 100644 --- a/tests/metrics/machine_learning/resnet50_int8_dockerfile/Dockerfile +++ b/tests/metrics/machine_learning/resnet50_int8_dockerfile/Dockerfile @@ -13,9 +13,9 @@ LABEL DOCKERFILE_VERSION="1.0" RUN apt-get update && \ apt-get install -y --no-install-recommends wget nano curl build-essential git && \ apt-get install -y python3.8 python3-pip && \ - pip install intel-tensorflow-avx512==2.8.0 && \ - pip install protobuf==3.20.* && \ - wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_8/resnet50_int8_pretrained_model.pb && \ + pip install --no-cache-dir intel-tensorflow-avx512==2.8.0 && \ + pip install --no-cache-dir protobuf==3.20.* && \ + wget -q https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_8/resnet50_int8_pretrained_model.pb && \ git clone https://github.com/IntelAI/models.git CMD ["/bin/bash"] From 145450544db76471cdfe1fe706b5246e937aa46b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Tue, 15 Aug 2023 18:16:13 +0200 Subject: [PATCH 057/339] gha: ci: Start running kata-deploy tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's add the tests as part of the ci.yaml, so they an be triggered as part of each PR. For this PR those tests won't be triggered, courtesy to the `pull_request_target` event we rely on. Signed-off-by: Fabiano Fidêncio (cherry picked from commit d19a75e80c616cfdd1f6f6a3c62775d9b9355994) --- .github/workflows/ci.yaml | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index da8fae148..1e0cac69a 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -60,6 +60,27 @@ jobs: platforms: linux/amd64, linux/s390x file: tests/integration/kubernetes/runtimeclass_workloads/confidential/unencrypted/Dockerfile + run-kata-deploy-tests-on-aks: + needs: publish-kata-deploy-payload-amd64 + uses: ./.github/workflows/run-kata-deploy-tests-on-aks.yaml + with: + registry: ghcr.io + repo: ${{ github.repository_owner }}/kata-deploy-ci + tag: ${{ inputs.tag }}-amd64 + commit-hash: ${{ inputs.commit-hash }} + pr-number: ${{ inputs.pr-number }} + secrets: inherit + + run-kata-deploy-tests-on-tdx: + needs: [publish-kata-deploy-payload-amd64, build-and-publish-tee-confidential-unencrypted-image] + uses: ./.github/workflows/run-kata-deploy-tests-on-tdx.yaml + with: + registry: ghcr.io + repo: ${{ github.repository_owner }}/kata-deploy-ci + tag: ${{ inputs.tag }}-amd64 + commit-hash: ${{ inputs.commit-hash }} + pr-number: ${{ inputs.pr-number }} + run-k8s-tests-on-aks: needs: publish-kata-deploy-payload-amd64 uses: ./.github/workflows/run-k8s-tests-on-aks.yaml From 870d8004a0e6340646465d4f710fe2830dc52fcf Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Wed, 16 Aug 2023 15:25:39 +0000 Subject: [PATCH 058/339] metrics: Fix MobileNet help me description This PR fixes MobileNet help me description in the tensorflow script. Fixes #7661 Signed-off-by: Gabriela Cervantes (cherry picked from commit 2a491e9b1f6b1dcc64859973fb718354aade3fc8) --- .../machine_learning/tensorflow_mobilenet_v1_bfloat16_fp32.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/metrics/machine_learning/tensorflow_mobilenet_v1_bfloat16_fp32.sh b/tests/metrics/machine_learning/tensorflow_mobilenet_v1_bfloat16_fp32.sh index e451216fa..f1327f59c 100755 --- a/tests/metrics/machine_learning/tensorflow_mobilenet_v1_bfloat16_fp32.sh +++ b/tests/metrics/machine_learning/tensorflow_mobilenet_v1_bfloat16_fp32.sh @@ -43,7 +43,7 @@ cat << EOF Usage: $0 Description: This script launches n number of containers - to run the tf cnn benchmarks using a Tensorflow + to run the MobileNet model using a Tensorflow container. Options: : Number of containers to run. From 915bace795b23c0f9b16b3a2bd565c1fe2fefcbb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Bombo?= Date: Wed, 16 Aug 2023 11:01:08 -0700 Subject: [PATCH 059/339] kata-deploy: Properly create default runtime class MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The default `kata` runtime class would get created with the `kata` handler instead of `kata-$KATA_HYPERVISOR`. This made Kata use the wrong hypervisor and broke CI. Fixes: #7663 Signed-off-by: Aurélien Bombo (cherry picked from commit 339569b69c4a67e0ab37c13501f6693023a7dc85) --- .../kata-deploy-ensure-runtimec-classes-created.bats | 12 +++++++++++- tools/packaging/kata-deploy/scripts/kata-deploy.sh | 4 ++-- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/tests/integration/kubernetes/kata-deploy-ensure-runtimec-classes-created.bats b/tests/integration/kubernetes/kata-deploy-ensure-runtimec-classes-created.bats index bd1d5a151..b2ec3c330 100644 --- a/tests/integration/kubernetes/kata-deploy-ensure-runtimec-classes-created.bats +++ b/tests/integration/kubernetes/kata-deploy-ensure-runtimec-classes-created.bats @@ -14,13 +14,23 @@ setup() { # * `kata-${KATA_HYPERVISOR}` is the other one # * As part of the tests we're only deploying the specific runtimeclass that will be used, instead of all of them. expected_runtime_classes=2 + + # We expect both runtime classes to have the same handler: kata-${KATA_HYPERVISOR} + expected_handlers_re=( \ + "kata\s+kata-${KATA_HYPERVISOR}" \ + "kata-${KATA_HYPERVISOR}\s+kata-${KATA_HYPERVISOR}" \ + ) } @test "Test runtimeclasses are being properly created" { # We filter `kata-mshv-vm-isolation` out as that's present on AKS clusters, but that's not coming from kata-deploy current_runtime_classes=$(kubectl get runtimeclasses | grep -v "kata-mshv-vm-isolation" | grep "kata" | wc -l) [[ ${current_runtime_classes} -eq ${expected_runtime_classes} ]] - [[ $(kubectl get runtimeclasses | grep -q "${KATA_HYPERVISOR}") -eq 0 ]] + + for handler_re in ${expected_handlers_re[@]} + do + [[ $(kubectl get runtimeclass | grep -E "${handler_re}") ]] + done } teardown() { diff --git a/tools/packaging/kata-deploy/scripts/kata-deploy.sh b/tools/packaging/kata-deploy/scripts/kata-deploy.sh index fc26921df..5f7d25331 100755 --- a/tools/packaging/kata-deploy/scripts/kata-deploy.sh +++ b/tools/packaging/kata-deploy/scripts/kata-deploy.sh @@ -44,7 +44,7 @@ function create_runtimeclasses() { if [[ "${CREATE_DEFAULT_RUNTIMECLASS}" == "true" ]]; then echo "Creating the kata runtime class for the default shim (an alias for kata-${default_shim})" cp /opt/kata-artifacts/runtimeclasses/kata-${default_shim}.yaml /tmp/kata.yaml - sed -i -e 's/kata-'${default_shim}'/kata/g' /tmp/kata.yaml + sed -i -e 's/name: kata-'${default_shim}'/name: kata/g' /tmp/kata.yaml kubectl apply -f /tmp/kata.yaml rm -f /tmp/kata.yaml fi @@ -62,7 +62,7 @@ function delete_runtimeclasses() { if [[ "${CREATE_DEFAULT_RUNTIMECLASS}" == "true" ]]; then echo "Deleting the kata runtime class for the default shim (an alias for kata-${default_shim})" cp /opt/kata-artifacts/runtimeclasses/kata-${default_shim}.yaml /tmp/kata.yaml - sed -i -e 's/kata-'${default_shim}'/kata/g' /tmp/kata.yaml + sed -i -e 's/name: kata-'${default_shim}'/name: kata/g' /tmp/kata.yaml kubectl delete -f /tmp/kata.yaml rm -f /tmp/kata.yaml fi From 810507e8a303626c5e79ead517c09fd512b87dd9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Bombo?= Date: Wed, 16 Aug 2023 13:29:54 -0700 Subject: [PATCH 060/339] tests: k8s: Call ensure_yq() in setup.sh MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It wasn't the `common.bash` import in `run_kubernetes_tests.sh` causing the yq error so let's try this instead. Reference: https://github.com/kata-containers/kata-containers/actions/runs/5674941359/job/15379797568#step:10:341 Signed-off-by: Aurélien Bombo (cherry picked from commit f4dd15286345cbc7d1454b8677ececbed136fd05) --- tests/common.bash | 1 + tests/integration/kubernetes/setup.sh | 6 ++++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/common.bash b/tests/common.bash index bdb63c77c..5e5f833ef 100644 --- a/tests/common.bash +++ b/tests/common.bash @@ -338,6 +338,7 @@ function ensure_yq() { export GOPATH export PATH="${GOPATH}/bin:${PATH}" INSTALL_IN_GOPATH=true "${repo_root_dir}/ci/install_yq.sh" + hash -d yq # yq is preinstalled on GHA Ubuntu 22.04 runners so we clear Bash's PATH cache. } # dependency: What we want to get the version from the versions.yaml file diff --git a/tests/integration/kubernetes/setup.sh b/tests/integration/kubernetes/setup.sh index fb68e47cb..45e652f32 100755 --- a/tests/integration/kubernetes/setup.sh +++ b/tests/integration/kubernetes/setup.sh @@ -22,18 +22,20 @@ set_runtime_class() { set_kernel_path() { if [[ "${KATA_HOST_OS}" = "cbl-mariner" ]]; then mariner_kernel_path="/usr/share/cloud-hypervisor/vmlinux.bin" - find ${kubernetes_dir}/runtimeclass_workloads_work/*.yaml -exec yq write -i {} 'metadata.annotations[io.katacontainers.config.hypervisor.kernel]' "${mariner_kernel_path}" \; + # Not using find -exec as that still returns 0 on failure. + find ${kubernetes_dir}/runtimeclass_workloads_work/*.yaml -print0 | xargs -0 -I% yq write -i % 'metadata.annotations[io.katacontainers.config.hypervisor.kernel]' "${mariner_kernel_path}" fi } set_initrd_path() { if [[ "${KATA_HOST_OS}" = "cbl-mariner" ]]; then initrd_path="/opt/kata/share/kata-containers/kata-containers-initrd-mariner.img" - find ${kubernetes_dir}/runtimeclass_workloads_work/*.yaml -exec yq write -i {} 'metadata.annotations[io.katacontainers.config.hypervisor.initrd]' "${initrd_path}" \; + find ${kubernetes_dir}/runtimeclass_workloads_work/*.yaml -print0 | xargs -0 -I% yq write -i % 'metadata.annotations[io.katacontainers.config.hypervisor.initrd]' "${initrd_path}" fi } main() { + ensure_yq reset_workloads_work_dir set_runtime_class set_kernel_path From d7130f48b03280e61ec02dc2b8c9fcc9a80ed4ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Tue, 15 Aug 2023 16:21:12 +0200 Subject: [PATCH 061/339] gha: k8s: Stop running kata-deploy tests as part of the k8s suite MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In a follow-up series, we'll add a whole suite for the kata-deploy tests. With this in mind, let's already get rid of this one and avoid more kata-deploy tests to land here. Fixes: #7642 Signed-off-by: Fabiano Fidêncio (cherry picked from commit cfc29c11a3077ddbfff18f064cbe5797fa03f7b8) --- ...eploy-ensure-runtimec-classes-created.bats | 38 ------------------- .../kubernetes/run_kubernetes_tests.sh | 1 - 2 files changed, 39 deletions(-) delete mode 100644 tests/integration/kubernetes/kata-deploy-ensure-runtimec-classes-created.bats diff --git a/tests/integration/kubernetes/kata-deploy-ensure-runtimec-classes-created.bats b/tests/integration/kubernetes/kata-deploy-ensure-runtimec-classes-created.bats deleted file mode 100644 index b2ec3c330..000000000 --- a/tests/integration/kubernetes/kata-deploy-ensure-runtimec-classes-created.bats +++ /dev/null @@ -1,38 +0,0 @@ -#!/usr/bin/env bats -# -# Copyright (c) 2023 Intel Corporation -# -# SPDX-License-Identifier: Apache-2.0 -# - -load "${BATS_TEST_DIRNAME}/../../common.bash" -load "${BATS_TEST_DIRNAME}/tests_common.sh" - -setup() { - # We expect 2 runtime classes because: - # * `kata` is the default runtimeclass created, basically an alias for `kata-${KATA_HYPERVISOR}`. - # * `kata-${KATA_HYPERVISOR}` is the other one - # * As part of the tests we're only deploying the specific runtimeclass that will be used, instead of all of them. - expected_runtime_classes=2 - - # We expect both runtime classes to have the same handler: kata-${KATA_HYPERVISOR} - expected_handlers_re=( \ - "kata\s+kata-${KATA_HYPERVISOR}" \ - "kata-${KATA_HYPERVISOR}\s+kata-${KATA_HYPERVISOR}" \ - ) -} - -@test "Test runtimeclasses are being properly created" { - # We filter `kata-mshv-vm-isolation` out as that's present on AKS clusters, but that's not coming from kata-deploy - current_runtime_classes=$(kubectl get runtimeclasses | grep -v "kata-mshv-vm-isolation" | grep "kata" | wc -l) - [[ ${current_runtime_classes} -eq ${expected_runtime_classes} ]] - - for handler_re in ${expected_handlers_re[@]} - do - [[ $(kubectl get runtimeclass | grep -E "${handler_re}") ]] - done -} - -teardown() { - kubectl get runtimeclasses -} diff --git a/tests/integration/kubernetes/run_kubernetes_tests.sh b/tests/integration/kubernetes/run_kubernetes_tests.sh index e46cbc9fe..f8b635d22 100644 --- a/tests/integration/kubernetes/run_kubernetes_tests.sh +++ b/tests/integration/kubernetes/run_kubernetes_tests.sh @@ -18,7 +18,6 @@ if [ -n "${K8S_TEST_UNION:-}" ]; then K8S_TEST_UNION=($K8S_TEST_UNION) else K8S_TEST_UNION=( \ - "kata-deploy-ensure-runtimec-classes-created.bats" \ "k8s-attach-handlers.bats" \ "k8s-caps.bats" \ "k8s-configmap.bats" \ From 74edbaac967134b52a1d632af0bd93cdab1c5bfa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Tue, 15 Aug 2023 16:24:59 +0200 Subject: [PATCH 062/339] gha: kata-deploy: Add run-kata-deploy-tests.sh MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This will have the same function as run-k8s-tests.sh has, but for kata-deploy. Right now it doesn't have any tests, and the command to actually run the tests is commented out, but right now this is just a placeholder that will be populated sooner than later. Signed-off-by: Fabiano Fidêncio (cherry picked from commit ce6adecd0a28a18e112c62608db73975352a99f5) --- .../kata-deploy/run-kata-deploy-tests.sh | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 tests/functional/kata-deploy/run-kata-deploy-tests.sh diff --git a/tests/functional/kata-deploy/run-kata-deploy-tests.sh b/tests/functional/kata-deploy/run-kata-deploy-tests.sh new file mode 100644 index 000000000..f66206869 --- /dev/null +++ b/tests/functional/kata-deploy/run-kata-deploy-tests.sh @@ -0,0 +1,24 @@ +#!/bin/bash +# +# Copyright (c) 2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# + +set -e + +kata_deploy_dir=$(dirname "$(readlink -f "$0")") +source "${kata_deploy_dir}/../../common.bash" + +if [ -n "${KATA_DEPLOY_TEST_UNION:-}" ]; then + KATA_DEPLOY_TEST_UNION=($KATA_DEPLOY_TEST_UNION) +else + KATA_DEPLOY_TEST_UNION=( \ + ) +fi + +info "Run tests" +for KATA_DEPLOY_TEST_ENTRY in ${KATA_DEPLOY_TEST_UNION[@]} +do + #bats "${KATA_DEPLOY_TEST_ENTRY}" +done From cdfcd9aba8d6ccb5aa0a1232bfb3e78b6d0271a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Thu, 17 Aug 2023 10:15:34 +0200 Subject: [PATCH 063/339] tests: commob: Don't fail if yq is not part of the cache MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This may happen on external runners. Signed-off-by: Fabiano Fidêncio (cherry picked from commit 790bd3548d30aa205efd171736ecdcc9b7aacb1a) --- tests/common.bash | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/common.bash b/tests/common.bash index 5e5f833ef..08b82601b 100644 --- a/tests/common.bash +++ b/tests/common.bash @@ -338,7 +338,7 @@ function ensure_yq() { export GOPATH export PATH="${GOPATH}/bin:${PATH}" INSTALL_IN_GOPATH=true "${repo_root_dir}/ci/install_yq.sh" - hash -d yq # yq is preinstalled on GHA Ubuntu 22.04 runners so we clear Bash's PATH cache. + hash -d yq 2> /dev/null || true # yq is preinstalled on GHA Ubuntu 22.04 runners so we clear Bash's PATH cache. } # dependency: What we want to get the version from the versions.yaml file From b38624e2b3afead488287cdc6f4ecbf41e99e221 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Thu, 17 Aug 2023 10:39:15 +0200 Subject: [PATCH 064/339] tests: common: Ensure test_type is used as part of the cluster's name MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit By doing this we can make sure there won't be any clash on the cluster name created for either the k8s or the kata-deploy tests. Signed-off-by: Fabiano Fidêncio (cherry picked from commit 285e616b5eb80bfc602fd7ab7e71fbd4323aef94) --- tests/functional/kata-deploy/gha-run.sh | 6 +++--- tests/gha-run-k8s-common.sh | 18 +++++++++++++----- tests/integration/kubernetes/gha-run.sh | 3 ++- 3 files changed, 18 insertions(+), 9 deletions(-) diff --git a/tests/functional/kata-deploy/gha-run.sh b/tests/functional/kata-deploy/gha-run.sh index ef362f40e..0e3f2057a 100755 --- a/tests/functional/kata-deploy/gha-run.sh +++ b/tests/functional/kata-deploy/gha-run.sh @@ -25,12 +25,12 @@ function main() { case "${action}" in install-azure-cli) install_azure_cli ;; login-azure) login_azure ;; - create-cluster) create_cluster ;; + create-cluster) create_cluster "kata-deploy" ;; install-bats) install_bats ;; install-kubectl) install_kubectl ;; - get-cluster-credentials) get_cluster_credentials ;; + get-cluster-credentials) get_cluster_credentials "kata-deploy" ;; run-tests) run_tests ;; - delete-cluster) cleanup "aks" ;; + delete-cluster) cleanup "aks" "kata-deploy" ;; *) >&2 echo "Invalid argument"; exit 2 ;; esac } diff --git a/tests/gha-run-k8s-common.sh b/tests/gha-run-k8s-common.sh index cb727a7b7..bf0d202e5 100644 --- a/tests/gha-run-k8s-common.sh +++ b/tests/gha-run-k8s-common.sh @@ -14,8 +14,10 @@ source "${tests_dir}/common.bash" AZ_RG="${AZ_RG:-kataCI}" function _print_cluster_name() { + test_type="${1:-k8s}" + short_sha="$(git rev-parse --short=12 HEAD)" - echo "${GH_PR_NUMBER}-${short_sha}-${KATA_HYPERVISOR}-${KATA_HOST_OS}-amd64" + echo "${test_type}-${GH_PR_NUMBER}-${short_sha}-${KATA_HYPERVISOR}-${KATA_HOST_OS}-amd64" } function install_azure_cli() { @@ -33,12 +35,14 @@ function login_azure() { } function create_cluster() { + test_type="${1:-k8s}" + # First, ensure that the cluster didn't fail to get cleaned up from a previous run. - delete_cluster || true + delete_cluster "${test_type}" || true az aks create \ -g "${AZ_RG}" \ - -n "$(_print_cluster_name)" \ + -n "$(_print_cluster_name ${test_type})" \ -s "Standard_D4s_v5" \ --node-count 1 \ --generate-ssh-keys \ @@ -61,15 +65,19 @@ function install_kubectl() { } function get_cluster_credentials() { + test_type="${1:-k8s}" + az aks get-credentials \ -g "${AZ_RG}" \ - -n "$(_print_cluster_name)" + -n "$(_print_cluster_name ${test_type})" } function delete_cluster() { + test_type="${1:-k8s}" + az aks delete \ -g "${AZ_RG}" \ - -n "$(_print_cluster_name)" \ + -n "$(_print_cluster_name ${test_type})" \ --yes } diff --git a/tests/integration/kubernetes/gha-run.sh b/tests/integration/kubernetes/gha-run.sh index 5456ad3fc..ee0743db0 100755 --- a/tests/integration/kubernetes/gha-run.sh +++ b/tests/integration/kubernetes/gha-run.sh @@ -78,13 +78,14 @@ function run_tests() { function cleanup() { platform="${1}" + test_type="${2:-k8s}" ensure_yq echo "Gather information about the nodes and pods before cleaning up the node" get_nodes_and_pods_info if [ "${platform}" = "aks" ]; then - delete_cluster + delete_cluster ${test_type} return fi From 0b4fb826de3316b0c37204264a242f234794804b Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Thu, 17 Aug 2023 16:04:18 +0000 Subject: [PATCH 065/339] metrics: Remove unused variable in tensorflow mobilenet script This PR removes unused variable in tensorflow mobilenet script. Fixes #7679 Signed-off-by: Gabriela Cervantes (cherry picked from commit 8616c050ae3cc2a1408d36ad728d468f60b20f0c) --- .../machine_learning/tensorflow_mobilenet_v1_bfloat16_fp32.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/metrics/machine_learning/tensorflow_mobilenet_v1_bfloat16_fp32.sh b/tests/metrics/machine_learning/tensorflow_mobilenet_v1_bfloat16_fp32.sh index f1327f59c..63a37154f 100755 --- a/tests/metrics/machine_learning/tensorflow_mobilenet_v1_bfloat16_fp32.sh +++ b/tests/metrics/machine_learning/tensorflow_mobilenet_v1_bfloat16_fp32.sh @@ -27,7 +27,6 @@ start_script="mobilenet_start.sh" CMD="$dst_dir/$start_script" guest_trigger_file="$dst_dir/$trigger_file" host_trigger_file="$src_dir/$trigger_file" -timeout=600 INITIAL_NUM_PIDS=1 CMD_FILE="cat results | grep 'Average Throughput' | wc -l" CMD_RESULTS="cat results | grep 'Average Throughput' | cut -d':' -f2 | cut -d' ' -f2 | tr '\n' ','" From 2c5db14a1ad44a6986b0186b37c809be2d79c55b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Tue, 15 Aug 2023 16:30:23 +0200 Subject: [PATCH 066/339] gha: kata-deploy: Add the first kata-deploy test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This test, at least for now, only checks whether the runtimeclasses have been properly created. This is just a migration from a test we had as part of the k8s suite. Signed-off-by: Fabiano Fidêncio (cherry picked from commit 4ffc2c86f3e32f518d87f465ee98c789b1ffb273) --- tests/functional/kata-deploy/gha-run.sh | 13 +- tests/functional/kata-deploy/kata-deploy.bats | 119 ++++++++++++++++++ .../kata-deploy/run-kata-deploy-tests.sh | 3 +- 3 files changed, 131 insertions(+), 4 deletions(-) create mode 100644 tests/functional/kata-deploy/kata-deploy.bats diff --git a/tests/functional/kata-deploy/gha-run.sh b/tests/functional/kata-deploy/gha-run.sh index 0e3f2057a..211990482 100755 --- a/tests/functional/kata-deploy/gha-run.sh +++ b/tests/functional/kata-deploy/gha-run.sh @@ -10,16 +10,23 @@ set -o nounset set -o pipefail kata_deploy_dir="$(dirname "$(readlink -f "$0")")" -source "$kata_deploy_dir}/../../gha-run-k8s-common.sh" -tools_dir="${repo_root_dir}/tools" +source "${kata_deploy_dir}/../../gha-run-k8s-common.sh" function run_tests() { - return 0 + pushd "${kata_deploy_dir}" + bash run-kata-deploy-tests.sh + popd } function main() { export KATA_HOST_OS="${KATA_HOST_OS:-}" + platform="aks" + if [ "${KATA_HYPERVISOR}" = "qemu-tdx" ]; then + platform="tdx" + fi + export platform + action="${1:-}" case "${action}" in diff --git a/tests/functional/kata-deploy/kata-deploy.bats b/tests/functional/kata-deploy/kata-deploy.bats new file mode 100644 index 000000000..531c3eb64 --- /dev/null +++ b/tests/functional/kata-deploy/kata-deploy.bats @@ -0,0 +1,119 @@ +#!/usr/bin/env bats +# +# Copyright (c) 2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# + +load "${BATS_TEST_DIRNAME}/../../common.bash" + +setup() { + repo_root_dir="${BATS_TEST_DIRNAME}/../../../" + ensure_yq + + # Cleanup any runtimeclass already present in the cluster + # apart from the default one that comes from AKS + for rc in `kubectl get runtimeclass -o name | grep -v "kata-mshv-vm-isolation" | sed 's|runtimeclass.node.k8s.io/||'`; do + kubectl delete runtimeclass $rc; + done + + # We expect 2 runtime classes because: + # * `kata` is the default runtimeclass created, basically an alias for `kata-${KATA_HYPERVISOR}`. + # * `kata-${KATA_HYPERVISOR}` is the other one + # * As part of the tests we're only deploying the specific runtimeclass that will be used, instead of all of them. + expected_runtime_classes=2 + + # We expect both runtime classes to have the same handler: kata-${KATA_HYPERVISOR} + expected_handlers_re=( \ + "kata\s+kata-${KATA_HYPERVISOR}" \ + "kata-${KATA_HYPERVISOR}\s+kata-${KATA_HYPERVISOR}" \ + ) + + # Set the latest image, the one generated as part of the PR, to be used as part of the tests + sed -i -e "s|quay.io/kata-containers/kata-deploy:latest|${DOCKER_REGISTRY}/${DOCKER_REPO}:${DOCKER_TAG}|g" "${repo_root_dir}/tools/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml" + + # Enable debug for Kata Containers + yq write -i "${repo_root_dir}/tools/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml" 'spec.template.spec.containers[0].env[1].value' --tag '!!str' "true" + # Create the runtime class only for the shim that's being tested + yq write -i "${repo_root_dir}/tools/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml" 'spec.template.spec.containers[0].env[2].value' "${KATA_HYPERVISOR}" + # Set the tested hypervisor as the default `kata` shim + yq write -i "${repo_root_dir}/tools/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml" 'spec.template.spec.containers[0].env[3].value' "${KATA_HYPERVISOR}" + # Let the `kata-deploy` script take care of the runtime class creation / removal + yq write -i "${repo_root_dir}/tools/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml" 'spec.template.spec.containers[0].env[4].value' --tag '!!str' "true" + # Let the `kata-deploy` create the default `kata` runtime class + yq write -i "${repo_root_dir}/tools/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml" 'spec.template.spec.containers[0].env[5].value' --tag '!!str' "true" + + if [ "${KATA_HOST_OS}" = "cbl-mariner" ]; then + yq write -i "${repo_root_dir}/tools/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml" 'spec.template.spec.containers[0].env[+].name' "HOST_OS" + yq write -i "${repo_root_dir}/tools/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml" 'spec.template.spec.containers[0].env[-1].value' "${KATA_HOST_OS}" + fi + + echo "::group::Final kata-deploy.yaml that is used in the test" + cat "${repo_root_dir}/tools/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml" + grep "${DOCKER_REGISTRY}/${DOCKER_REPO}:${DOCKER_TAG}" "${repo_root_dir}/tools/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml" || die "Failed to setup the tests image" + echo "::endgroup::" + + kubectl apply -f "${repo_root_dir}/tools/packaging/kata-deploy/kata-rbac/base/kata-rbac.yaml" + if [ "${platform}" = "tdx" ]; then + kubectl apply -k "${repo_root_dir}/tools/packaging/kata-deploy/kata-deploy/overlays/k3s" + else + kubectl apply -f "${repo_root_dir}/tools/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml" + fi + kubectl -n kube-system wait --timeout=10m --for=condition=Ready -l name=kata-deploy pod + + # Give some time for the pod to finish what's doing and have the + # runtimeclasses properly created + sleep 30s +} + +@test "Test runtimeclasses are being properly created" { + # We filter `kata-mshv-vm-isolation` out as that's present on AKS clusters, but that's not coming from kata-deploy + current_runtime_classes=$(kubectl get runtimeclasses | grep -v "kata-mshv-vm-isolation" | grep "kata" | wc -l) + [[ ${current_runtime_classes} -eq ${expected_runtime_classes} ]] + + for handler_re in ${expected_handlers_re[@]} + do + kubectl get runtimeclass | grep -E "${handler_re}" + done +} + +teardown() { + kubectl get runtimeclasses -o name | grep -v "kata-mshv-vm-isolation" + + if [ "${platform}" = "tdx" ]; then + deploy_spec="-k "${repo_root_dir}/tools/packaging/kata-deploy/kata-deploy/overlays/k3s"" + cleanup_spec="-k "${repo_root_dir}/tools/packaging/kata-deploy/kata-cleanup/overlays/k3s"" + else + deploy_spec="-f "${repo_root_dir}/tools/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml"" + cleanup_spec="-f "${repo_root_dir}/tools/packaging/kata-deploy/kata-cleanup/base/kata-cleanup.yaml"" + fi + + kubectl delete ${deploy_spec} + kubectl -n kube-system wait --timeout=10m --for=delete -l name=kata-deploy pod + + # Let the `kata-deploy` script take care of the runtime class creation / removal + yq write -i "${repo_root_dir}/tools/packaging/kata-deploy/kata-cleanup/base/kata-cleanup.yaml" 'spec.template.spec.containers[0].env[4].value' --tag '!!str' "true" + # Create the runtime class only for the shim that's being tested + yq write -i "${repo_root_dir}/tools/packaging/kata-deploy/kata-cleanup/base/kata-cleanup.yaml" 'spec.template.spec.containers[0].env[2].value' "${KATA_HYPERVISOR}" + # Set the tested hypervisor as the default `kata` shim + yq write -i "${repo_root_dir}/tools/packaging/kata-deploy/kata-cleanup/base/kata-cleanup.yaml" 'spec.template.spec.containers[0].env[3].value' "${KATA_HYPERVISOR}" + # Let the `kata-deploy` create the default `kata` runtime class + yq write -i "${repo_root_dir}/tools/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml" 'spec.template.spec.containers[0].env[5].value' --tag '!!str' "true" + + sed -i -e "s|quay.io/kata-containers/kata-deploy:latest|${DOCKER_REGISTRY}/${DOCKER_REPO}:${DOCKER_TAG}|g" "${repo_root_dir}/tools/packaging/kata-deploy/kata-cleanup/base/kata-cleanup.yaml" + cat "${repo_root_dir}/tools/packaging/kata-deploy/kata-cleanup/base/kata-cleanup.yaml" + grep "${DOCKER_REGISTRY}/${DOCKER_REPO}:${DOCKER_TAG}" "${repo_root_dir}/tools/packaging/kata-deploy/kata-cleanup/base/kata-cleanup.yaml" || die "Failed to setup the tests image" + + kubectl apply ${cleanup_spec} + sleep 30s + + kubectl delete ${cleanup_spec} + kubectl delete -f "${repo_root_dir}/tools/packaging/kata-deploy/kata-rbac/base/kata-rbac.yaml" + + # Cleanup any runtime class that was left behind in the cluster, in + # case of a test failure, apart from the default one that comes from + # AKS + for rc in `kubectl get runtimeclass -o name | grep -v "kata-mshv-vm-isolation" | sed 's|runtimeclass.node.k8s.io/||'`; do + kubectl delete runtimeclass $rc; + done +} diff --git a/tests/functional/kata-deploy/run-kata-deploy-tests.sh b/tests/functional/kata-deploy/run-kata-deploy-tests.sh index f66206869..859bebf2e 100644 --- a/tests/functional/kata-deploy/run-kata-deploy-tests.sh +++ b/tests/functional/kata-deploy/run-kata-deploy-tests.sh @@ -14,11 +14,12 @@ if [ -n "${KATA_DEPLOY_TEST_UNION:-}" ]; then KATA_DEPLOY_TEST_UNION=($KATA_DEPLOY_TEST_UNION) else KATA_DEPLOY_TEST_UNION=( \ + "kata-deploy.bats" \ ) fi info "Run tests" for KATA_DEPLOY_TEST_ENTRY in ${KATA_DEPLOY_TEST_UNION[@]} do - #bats "${KATA_DEPLOY_TEST_ENTRY}" + bats "${KATA_DEPLOY_TEST_ENTRY}" done From 3e2a383b7d04fa102a35ad63f88e0af1950726bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Thu, 17 Aug 2023 10:52:18 +0200 Subject: [PATCH 067/339] gha: kata-deploy: Do the runtime class cleanup as part of the cleanup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of doing this as part of the test itself, let's ensure it's done before running the tests and during the tests cleanup. Signed-off-by: Fabiano Fidêncio (cherry picked from commit 2d896ad12fe39c71d5cbb9cb7be9ec2d8e816b03) --- tests/functional/kata-deploy/gha-run.sh | 23 +++++++++++++++++++ tests/functional/kata-deploy/kata-deploy.bats | 13 ----------- 2 files changed, 23 insertions(+), 13 deletions(-) diff --git a/tests/functional/kata-deploy/gha-run.sh b/tests/functional/kata-deploy/gha-run.sh index 211990482..8e1c9a1d0 100755 --- a/tests/functional/kata-deploy/gha-run.sh +++ b/tests/functional/kata-deploy/gha-run.sh @@ -13,11 +13,34 @@ kata_deploy_dir="$(dirname "$(readlink -f "$0")")" source "${kata_deploy_dir}/../../gha-run-k8s-common.sh" function run_tests() { + cleanup_runtimeclasses || true + pushd "${kata_deploy_dir}" bash run-kata-deploy-tests.sh popd } +function cleanup_runtimeclasses() { + # Cleanup any runtime class that was left behind in the cluster, in + # case of a test failure, apart from the default one that comes from + # AKS + for rc in `kubectl get runtimeclass -o name | grep -v "kata-mshv-vm-isolation" | sed 's|runtimeclass.node.k8s.io/||'` + do + kubectl delete runtimeclass $rc; + done +} + +function cleanup() { + platform="${1}" + test_type="${2:-k8s}" + + cleanup_runtimeclasses || true + + if [ "${platform}" = "aks" ]; then + delete_cluster ${test_type} + fi +} + function main() { export KATA_HOST_OS="${KATA_HOST_OS:-}" diff --git a/tests/functional/kata-deploy/kata-deploy.bats b/tests/functional/kata-deploy/kata-deploy.bats index 531c3eb64..84faf8fe1 100644 --- a/tests/functional/kata-deploy/kata-deploy.bats +++ b/tests/functional/kata-deploy/kata-deploy.bats @@ -11,12 +11,6 @@ setup() { repo_root_dir="${BATS_TEST_DIRNAME}/../../../" ensure_yq - # Cleanup any runtimeclass already present in the cluster - # apart from the default one that comes from AKS - for rc in `kubectl get runtimeclass -o name | grep -v "kata-mshv-vm-isolation" | sed 's|runtimeclass.node.k8s.io/||'`; do - kubectl delete runtimeclass $rc; - done - # We expect 2 runtime classes because: # * `kata` is the default runtimeclass created, basically an alias for `kata-${KATA_HYPERVISOR}`. # * `kata-${KATA_HYPERVISOR}` is the other one @@ -109,11 +103,4 @@ teardown() { kubectl delete ${cleanup_spec} kubectl delete -f "${repo_root_dir}/tools/packaging/kata-deploy/kata-rbac/base/kata-rbac.yaml" - - # Cleanup any runtime class that was left behind in the cluster, in - # case of a test failure, apart from the default one that comes from - # AKS - for rc in `kubectl get runtimeclass -o name | grep -v "kata-mshv-vm-isolation" | sed 's|runtimeclass.node.k8s.io/||'`; do - kubectl delete runtimeclass $rc; - done } From 7d0a3dbf24da2a36abdee6be0fb081b569c9b41d Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Thu, 17 Aug 2023 19:50:56 +0000 Subject: [PATCH 068/339] metrics: Fix check results for tensorflow benchmark This PR fixes the check results for tensorflow benchmark now that we change the name of the test. Fixes #7684 Signed-off-by: Gabriela Cervantes (cherry picked from commit e8a511934355c7a0014c8fe714d14c35cf12f1f2) --- .../ci_worker/checkmetrics-json-clh-kata-metric8.toml | 8 ++++---- .../ci_worker/checkmetrics-json-qemu-kata-metric8.toml | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml index 3e71b7e74..98bb17752 100644 --- a/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml +++ b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml @@ -73,26 +73,26 @@ minpercent = 20.0 maxpercent = 20.0 [[metric]] -name = "tensorflow" +name = "tensorflow_nhwc" type = "json" description = "tensorflow resnet model" # Min and Max values to set a 'range' that # the median of the CSV Results data must fall # within (inclusive) -checkvar = ".\"tensorflow\".Results | .[] | .resnet.Result" +checkvar = ".\"tensorflow_nhwc\".Results | .[] | .resnet.Result" checktype = "mean" midval = 3566.0 minpercent = 20.0 maxpercent = 20.0 [[metric]] -name = "tensorflow" +name = "tensorflow_nhwc" type = "json" description = "tensorflow alexnet model" # Min and Max values to set a 'range' that # the median of the CSV Results data must fall # within (inclusive) -checkvar = ".\"tensorflow\".Results | .[] | .alexnet.Result" +checkvar = ".\"tensorflow_nhwc\".Results | .[] | .alexnet.Result" checktype = "mean" midval = 98.0 minpercent = 20.0 diff --git a/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-qemu-kata-metric8.toml b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-qemu-kata-metric8.toml index d615db0b3..98b7bce04 100644 --- a/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-qemu-kata-metric8.toml +++ b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-qemu-kata-metric8.toml @@ -73,26 +73,26 @@ minpercent = 20.0 maxpercent = 20.0 [[metric]] -name = "tensorflow" +name = "tensorflow_nhwc" type = "json" description = "tensorflow resnet model" # Min and Max values to set a 'range' that # the median of the CSV Results data must fall # within (inclusive) -checkvar = ".\"tensorflow\".Results | .[] | .resnet.Result" +checkvar = ".\"tensorflow_nhwc\".Results | .[] | .resnet.Result" checktype = "mean" midval = 3546.0 minpercent = 20.0 maxpercent = 20.0 [[metric]] -name = "tensorflow" +name = "tensorflow_nhwc" type = "json" description = "tensorflow alexnet model" # Min and Max values to set a 'range' that # the median of the CSV Results data must fall # within (inclusive) -checkvar = ".\"tensorflow\".Results | .[] | .alexnet.Result" +checkvar = ".\"tensorflow_nhwc\".Results | .[] | .alexnet.Result" checktype = "mean" midval = 98.0 minpercent = 20.0 From 3d3882a06a262a75812161648b79a32642c08eb1 Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Thu, 17 Aug 2023 20:17:48 +0000 Subject: [PATCH 069/339] metrics: Update tensorflow name in gha run script This PR update tensorflow name in gha run script. Signed-off-by: Gabriela Cervantes (cherry picked from commit 85c02828e11394725e866cf3547cef352b6d48c4) --- tests/metrics/gha-run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/metrics/gha-run.sh b/tests/metrics/gha-run.sh index 3f168dafe..576e8273e 100755 --- a/tests/metrics/gha-run.sh +++ b/tests/metrics/gha-run.sh @@ -80,7 +80,7 @@ function run_test_blogbench() { function run_test_tensorflow() { info "Running TensorFlow test using ${KATA_HYPERVISOR} hypervisor" - bash tests/metrics/machine_learning/tensorflow.sh 1 20 + bash tests/metrics/machine_learning/tensorflow_nhwc.sh 1 20 check_metrics } From d46c300608a5d225726c66be5a659c9d5f9b2a67 Mon Sep 17 00:00:00 2001 From: David Esparza Date: Wed, 16 Aug 2023 16:30:24 -0600 Subject: [PATCH 070/339] metrics: Enable kata runtime in K8s for FIO test. This PR configures the corresponding kata runtime in K8s based on the tested hypervisor. This PR also enables FIO metrics test in the kata metrics-ci. Fixes: #7665 Signed-off-by: David Esparza (cherry picked from commit fb571f8be9bf20d373c17476850e87bd35358b27) --- tests/metrics/gha-run.sh | 4 +--- tests/metrics/storage/fio-k8s/Makefile | 7 +++++-- .../storage/fio-k8s/cmd/fiotest/Makefile | 7 +++++-- .../configs/example-config/kata-clh.yaml | 17 +++++++++++++++++ .../configs/example-config/kata-qemu.yaml | 17 +++++++++++++++++ tests/metrics/storage/fio-k8s/fio-test-ci.sh | 11 ++++++----- 6 files changed, 51 insertions(+), 12 deletions(-) create mode 100644 tests/metrics/storage/fio-k8s/configs/example-config/kata-clh.yaml create mode 100644 tests/metrics/storage/fio-k8s/configs/example-config/kata-qemu.yaml diff --git a/tests/metrics/gha-run.sh b/tests/metrics/gha-run.sh index 576e8273e..46b91769e 100755 --- a/tests/metrics/gha-run.sh +++ b/tests/metrics/gha-run.sh @@ -87,10 +87,8 @@ function run_test_tensorflow() { function run_test_fio() { info "Running FIO test using ${KATA_HYPERVISOR} hypervisor" - # ToDo: remove the exit once the metrics workflow is stable - exit 0 - bash storage/fio-k8s/fio-test-ci.sh + bash tests/metrics/storage/fio-k8s/fio-test-ci.sh } function run_test_iperf() { diff --git a/tests/metrics/storage/fio-k8s/Makefile b/tests/metrics/storage/fio-k8s/Makefile index ba96203ba..8bdcf91f6 100644 --- a/tests/metrics/storage/fio-k8s/Makefile +++ b/tests/metrics/storage/fio-k8s/Makefile @@ -24,5 +24,8 @@ test: build run: build make -C $(MKFILE_DIR)/scripts/dax-compare-test/ run -test-ci: build - make -C $(MKFILE_DIR)/cmd/fiotest/ runci +test-qemu: build + make -C $(MKFILE_DIR)/cmd/fiotest/ run-qemu + +test-clh: build + make -C $(MKFILE_DIR)/cmd/fiotest/ run-clh diff --git a/tests/metrics/storage/fio-k8s/cmd/fiotest/Makefile b/tests/metrics/storage/fio-k8s/cmd/fiotest/Makefile index 4c8a27c69..48ea3c91f 100644 --- a/tests/metrics/storage/fio-k8s/cmd/fiotest/Makefile +++ b/tests/metrics/storage/fio-k8s/cmd/fiotest/Makefile @@ -20,5 +20,8 @@ gomod: go mod edit -replace=github.com/kata-containers/kata-containers/tests/metrics/env=../../pkg/env go mod tidy -runci: build - $(MKFILE_DIR)/fio-k8s --debug --fio.size 10M --output-dir test-results --test-name kata $(MKFILE_DIR)/../../configs/example-config/ +run-qemu: build + $(MKFILE_DIR)/fio-k8s --debug --fio.size 10M --output-dir test-results --test-name kata --container-runtime kata-qemu $(MKFILE_DIR)/../../configs/example-config/ + +run-clh: build + $(MKFILE_DIR)/fio-k8s --debug --fio.size 10M --output-dir test-results --test-name kata --container-runtime kata-clh $(MKFILE_DIR)/../../configs/example-config/ diff --git a/tests/metrics/storage/fio-k8s/configs/example-config/kata-clh.yaml b/tests/metrics/storage/fio-k8s/configs/example-config/kata-clh.yaml new file mode 100644 index 000000000..0744fe013 --- /dev/null +++ b/tests/metrics/storage/fio-k8s/configs/example-config/kata-clh.yaml @@ -0,0 +1,17 @@ +## Copyright (c) 2021 Intel Corporation +# +## SPDX-License-Identifier: Apache-2.0 +# +apiVersion: v1 +kind: Pod +metadata: + name: iometrics +spec: + runtimeClassName: kata-clh + containers: + - args: + - sleep + - infinity + image: ubuntu:latest + name: iometrics + imagePullPolicy: Always diff --git a/tests/metrics/storage/fio-k8s/configs/example-config/kata-qemu.yaml b/tests/metrics/storage/fio-k8s/configs/example-config/kata-qemu.yaml new file mode 100644 index 000000000..8d7cb3319 --- /dev/null +++ b/tests/metrics/storage/fio-k8s/configs/example-config/kata-qemu.yaml @@ -0,0 +1,17 @@ +## Copyright (c) 2021 Intel Corporation +# +## SPDX-License-Identifier: Apache-2.0 +# +apiVersion: v1 +kind: Pod +metadata: + name: iometrics +spec: + runtimeClassName: kata-qemu + containers: + - args: + - sleep + - infinity + image: ubuntu:latest + name: iometrics + imagePullPolicy: Always diff --git a/tests/metrics/storage/fio-k8s/fio-test-ci.sh b/tests/metrics/storage/fio-k8s/fio-test-ci.sh index a05ca9bf0..3f187e395 100755 --- a/tests/metrics/storage/fio-k8s/fio-test-ci.sh +++ b/tests/metrics/storage/fio-k8s/fio-test-ci.sh @@ -18,11 +18,12 @@ function main() { check_processes init_env - export KUBECONFIG="$HOME/.kube/config" - pushd "${FIO_PATH}" - echo "INFO: Running K8S FIO test" - make test-ci + [ -z "${KATA_HYPERVISOR}" ] && die "Hypervisor ID is missing." + [ "${KATA_HYPERVISOR}" != "qemu" ] && [ "${KATA_HYPERVISOR}" != "clh" ] && die "Hypervisor not recognized: ${KATA_HYPERVISOR}" + + echo "INFO: Running K8S FIO test using ${KATA_HYPERVISOR} hypervisor" + make "test-${KATA_HYPERVISOR}" popd test_result_file="${FIO_PATH}/cmd/fiotest/test-results/kata/randrw-sync.job/output.json" @@ -30,7 +31,7 @@ function main() { metrics_json_init local read_io=$(cat $test_result_file | grep io_bytes | head -1 | sed 's/[[:blank:]]//g' | cut -f2 -d ':' | cut -f1 -d ',') local read_bw=$(cat $test_result_file | grep bw_bytes | head -1 | sed 's/[[:blank:]]//g' | cut -f2 -d ':' | cut -f1 -d ',') - local read_90_percentile=$(cat $test_result_file | grep 90.000000 | head -1 | sed 's/[[:blank:]]//g' | cut -f2 -d ':' | cut -f1 -d ',') + local read_90_percentile=$(cat $test_result_file | grep 90.000000 | head -1 | sed 's/[[:blank:]]//g' | cut -f2 -d ':' | cut -f1 -d ',') local read_95_percentile=$(cat $test_result_file | grep 95.000000 | head -1 | sed 's/[[:blank:]]//g' | cut -f2 -d ':' | cut -f1 -d ',') local write_io=$(cat $test_result_file | grep io_bytes | head -2 | tail -1 | sed 's/[[:blank:]]//g' | cut -f2 -d ':' | cut -f1 -d ',') local write_bw=$(cat $test_result_file | grep bw_bytes | head -2 | tail -1 | sed 's/[[:blank:]]//g' | cut -f2 -d ':' | cut -f1 -d ',') From 508f1bba15e28a05a1abbda79f74d9362a4a34b6 Mon Sep 17 00:00:00 2001 From: Dan Mihai Date: Mon, 21 Aug 2023 02:09:35 +0000 Subject: [PATCH 071/339] gha: capture additional kata-deploy output 10 lines can be insufficient for diagnostics. Fixes: #7707 Signed-off-by: Dan Mihai (cherry picked from commit 400eb8874322ac2dc20c0e2809e0ed704f256e5d) --- tests/integration/kubernetes/gha-run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/kubernetes/gha-run.sh b/tests/integration/kubernetes/gha-run.sh index ee0743db0..1dda036b1 100755 --- a/tests/integration/kubernetes/gha-run.sh +++ b/tests/integration/kubernetes/gha-run.sh @@ -54,7 +54,7 @@ function deploy_kata() { fi echo "::group::kata-deploy logs" - kubectl -n kube-system logs -l name=kata-deploy + kubectl -n kube-system logs --tail=100 -l name=kata-deploy echo "::endgroup::" echo "::group::Runtime classes" From abcb225ce326aa09bdc4acb97f6ef28020f45ca1 Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Mon, 21 Aug 2023 16:03:43 +0000 Subject: [PATCH 072/339] metrics: Use function from metrics common in pytorch script This PR uses a common function into the pytorch script. Fixes #7709 Signed-off-by: Gabriela Cervantes (cherry picked from commit 39bc3488f5756f589f4ad924a9b7013312086e0a) --- tests/metrics/machine_learning/pytorch.sh | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/tests/metrics/machine_learning/pytorch.sh b/tests/metrics/machine_learning/pytorch.sh index 9958c1690..c07cdf3ab 100755 --- a/tests/metrics/machine_learning/pytorch.sh +++ b/tests/metrics/machine_learning/pytorch.sh @@ -29,17 +29,6 @@ function remove_tmp_file() { trap remove_tmp_file EXIT -function check_containers_are_up() { - local containers_launched=0 - for i in $(seq "${TIMEOUT}") ; do - info "Verify that the containers are running" - containers_launched="$(sudo ${CTR_EXE} t list | grep -c "RUNNING")" - [ "${containers_launched}" -eq "${NUM_CONTAINERS}" ] && break - sleep 1 - [ "${i}" == "${TIMEOUT}" ] && return 1 - done -} - function equation_of_state_pytorch_test() { info "Running Equation of State Pytorch test" for i in "${containers[@]}"; do @@ -146,7 +135,7 @@ function main() { # Check that the requested number of containers are running - check_containers_are_up + check_containers_are_up "${NUM_CONTAINERS}" equation_of_state_pytorch_test From ad2036927f96a587bcf856771285ec37a47314e8 Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Mon, 21 Aug 2023 21:06:04 +0000 Subject: [PATCH 073/339] metrics: Fix FIO path This PR fixes the FIO path for the FIO files. Fixes #7711 Signed-off-by: Gabriela Cervantes (cherry picked from commit eee2ee6eebedd1ad1fad696f54551d202c283541) --- tests/metrics/storage/fio-k8s/fio-test-ci.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/metrics/storage/fio-k8s/fio-test-ci.sh b/tests/metrics/storage/fio-k8s/fio-test-ci.sh index 3f187e395..0576647f0 100755 --- a/tests/metrics/storage/fio-k8s/fio-test-ci.sh +++ b/tests/metrics/storage/fio-k8s/fio-test-ci.sh @@ -9,7 +9,7 @@ set -e # General env SCRIPT_PATH=$(dirname "$(readlink -f "$0")") source "${SCRIPT_PATH}/../../lib/common.bash" -FIO_PATH="${GOPATH}/src/github.com/kata-containers/kata-containers/tests/metrics/storage/fio-k8s" +FIO_PATH="${GOPATH}/tests/metrics/storage/fio-k8s" TEST_NAME="${TEST_NAME:-fio}" function main() { From 6ea1d3bffd47fb0b070aefababd1f8f9fbc91ebc Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Tue, 22 Aug 2023 16:20:31 +0000 Subject: [PATCH 074/339] metrics: Add disk link to README This PR adds disk link to README documentation for kata metrics. Fixes #7721 Signed-off-by: Gabriela Cervantes (cherry picked from commit 8afd158cef8f864493c08afa41080520a70698c2) --- tests/metrics/README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/metrics/README.md b/tests/metrics/README.md index 05ccf3891..6afa8a35b 100644 --- a/tests/metrics/README.md +++ b/tests/metrics/README.md @@ -78,6 +78,8 @@ For further details see the [storage tests documentation](storage). Test relating to measure reading and writing against clusters. +For further details see the [disk tests documentation](disk). + ### Machine Learning Tests relating with TensorFlow and Pytorch implementations of several popular From 4f2c9372c3daa18633cf2d305e3d91a19a64c720 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Wed, 23 Aug 2023 16:49:26 +0200 Subject: [PATCH 075/339] kata-deploy: Avoid failing on content removal MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We can simply use `rm -f` all over the place and avoid the container returning any error. Fixes: #7733 Signed-off-by: Fabiano Fidêncio (cherry picked from commit 5cba38c1750f24fc946cfe72f5f1fc971b58d90b) --- tools/packaging/kata-deploy/scripts/kata-deploy.sh | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/packaging/kata-deploy/scripts/kata-deploy.sh b/tools/packaging/kata-deploy/scripts/kata-deploy.sh index 5f7d25331..cd56beeb4 100755 --- a/tools/packaging/kata-deploy/scripts/kata-deploy.sh +++ b/tools/packaging/kata-deploy/scripts/kata-deploy.sh @@ -155,7 +155,7 @@ function backup_shim() { if [ ! -f "${shim_backup}" ]; then mv "${shim_file}" "${shim_backup}" else - rm "${shim_file}" + rm -f "${shim_file}" fi fi } @@ -210,12 +210,12 @@ function cleanup_different_shims_base() { local shim_binary="containerd-shim-kata-${shim}-v2" local shim_file="/usr/local/bin/${shim_binary}" - rm "${shim_file}" || true + rm -f "${shim_file}" restore_shim "${shim_file}" done - rm "${default_shim_file}" || true + rm -f "${default_shim_file}" restore_shim "${default_shim_file}" if [[ "${CREATE_RUNTIMECLASSES}" == "true" ]]; then @@ -360,9 +360,9 @@ function cleanup_cri_runtime() { } function cleanup_crio() { - rm $crio_drop_in_conf_file + rm -f $crio_drop_in_conf_file if [[ "${DEBUG}" == "true" ]]; then - rm $crio_drop_in_conf_file_debug + rm -f $crio_drop_in_conf_file_debug fi } From 9acbf2faf7888a4903a37e21785701a0f7f301e8 Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Wed, 23 Aug 2023 16:16:57 +0000 Subject: [PATCH 076/339] metrics: Add TensorFlow ResNet50 FP32 benchmark This PR adds TensorFlow ResNet50 FP32 benchmark for kata metrics. Fixes #7735 Signed-off-by: Gabriela Cervantes (cherry picked from commit 4b7d72c4a8c35bb04bcba2edd148f6ca93c33e14) --- .../tensorflow_resnet50_fp32.sh | 161 ++++++++++++++++++ 1 file changed, 161 insertions(+) create mode 100755 tests/metrics/machine_learning/tensorflow_resnet50_fp32.sh diff --git a/tests/metrics/machine_learning/tensorflow_resnet50_fp32.sh b/tests/metrics/machine_learning/tensorflow_resnet50_fp32.sh new file mode 100755 index 000000000..10abbd381 --- /dev/null +++ b/tests/metrics/machine_learning/tensorflow_resnet50_fp32.sh @@ -0,0 +1,161 @@ +#!/bin/bash +# +# Copyright (c) 2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +set -o pipefail + +# General env +SCRIPT_PATH=$(dirname "$(readlink -f "$0")") +source "${SCRIPT_PATH}/../lib/common.bash" + +IMAGE="docker.io/library/resnet50_fp32:latest" +DOCKERFILE="${SCRIPT_PATH}/resnet50_fp32_dockerfile/Dockerfile" +tensorflow_file=$(mktemp tensorflowresults.XXXXXXXXXX) +NUM_CONTAINERS="$1" +TIMEOUT="$2" +TEST_NAME="resnet50_fp32" +PAYLOAD_ARGS="tail -f /dev/null" +TESTDIR="${TESTDIR:-/testdir}" +# Options to control the start of the workload using a trigger-file +dst_dir="/host" +src_dir=$(mktemp --tmpdir -d tensorflowresnet.XXXXXXXXXX) +MOUNT_OPTIONS="type=bind,src=$src_dir,dst=$dst_dir,options=rbind:ro" +start_script="resnet50_fp32_start.sh" +# CMD points to the script that starts the workload +CMD="$dst_dir/$start_script" +guest_trigger_file="$dst_dir/$trigger_file" +host_trigger_file="$src_dir/$trigger_file" +INITIAL_NUM_PIDS=1 +CMD_FILE="cat results | grep 'Average Throughput' | wc -l" +CMD_RESULTS="cat results | grep 'Average Throughput' | cut -d':' -f2 | cut -d' ' -f2 | tr '\n' ','" + +function remove_tmp_file() { + rm -rf "${tensorflow_file}" +} + +trap remove_tmp_file EXIT + +function help() { +cat << EOF +Usage: $0 + Description: + This script launches n number of containers + to run the ResNet50 fp32 model using a Tensorflow + container. + Options: + : Number of containers to run. + : Timeout to launch the containers. +EOF +} + +function create_start_script() { + local script="${src_dir}/${start_script}" + rm -rf "${script}" + +cat <>"${script}" +#!/bin/bash +python3.8 models/benchmarks/launch_benchmark.py --benchmark-only --framework tensorflow --model-name resnet50 --precision fp32 --mode inference --in-graph /resnet50_fp32_pretrained_model.pb --batch-size 1 --num-intra-threads 16 >> results +EOF + chmod +x "${script}" +} + +function resnet50_fp32_test() { + local CMD_EXPORT_VAR="export KMP_AFFINITY=granularity=fine,verbose,compact && export OMP_NUM_THREADS=16" + + info "Export environment variables" + for i in "${containers[@]}"; do + sudo -E "${CTR_EXE}" t exec -d --exec-id "$(random_name)" "${i}" sh -c "${CMD_EXPORT_VAR}" + done + + info "Running ResNet50 FP32 Tensorflow test" + local pids=() + local j=0 + for i in "${containers[@]}"; do + $(sudo -E "${CTR_EXE}" t exec --exec-id "$(random_name)" "${i}" sh -c "${CMD}")& + pids["${j}"]=$! + ((j++)) + done + + # wait for all pids + for pid in ${pids[*]}; do + wait "${pid}" + done + + touch "${host_trigger_file}" + info "All containers are running the workload..." + + collect_results "${CMD_FILE}" + + for i in "${containers[@]}"; do + sudo -E "${CTR_EXE}" t exec --exec-id "$(random_name)" "${i}" sh -c "${CMD_RESULTS}" >> "${tensorflow_file}" + done + + local resnet50_fp32_results=$(cat "${tensorflow_file}" | sed 's/.$//') + local average_resnet50_fp32=$(echo "${resnet50_fp32_results}" | sed 's/.$//' | sed "s/,/+/g;s/.*/(&)\/$NUM_CONTAINERS/g" | bc -l) + local json="$(cat << EOF + { + "ResNet50_fp32": { + "Result": "${resnet50_fp32_results}", + "Average": "${average_resnet50_fp32}", + "Units": "images/s" + } + } +EOF +)" + metrics_json_add_array_element "$json" + metrics_json_end_array "Results" +} + +function main() { + # Verify enough arguments + if [ $# != 2 ]; then + echo >&2 "error: Not enough arguments [$@]" + help + exit 1 + fi + + local i=0 + local containers=() + local not_started_count="${NUM_CONTAINERS}" + + # Check tools/commands dependencies + cmds=("awk" "docker" "bc") + check_cmds "${cmds[@]}" + check_ctr_images "${IMAGE}" "${DOCKERFILE}" + + init_env + create_start_script + + info "Creating ${NUM_CONTAINERS} containers" + + for ((i=1; i<= "${NUM_CONTAINERS}"; i++)); do + containers+=($(random_name)) + sudo -E "${CTR_EXE}" run -d --runtime "${CTR_RUNTIME}" --mount="${MOUNT_OPTIONS}" "${IMAGE}" "${containers[-1]}" sh -c "${PAYLOAD_ARGS}" + ((not_started_count--)) + info "${not_started_count} remaining containers" + done + + metrics_json_init + metrics_json_start_array + + # Check that the requested number of containers are running + check_containers_are_up "${NUM_CONTAINERS}" + + # Check that the requested number of containers are running + check_containers_are_running "${NUM_CONTAINERS}" + + # Get the initial number of pids in a single container before the workload starts + INITIAL_NUM_PIDS=$(sudo -E "${CTR_EXE}" t metrics "${containers[-1]}" | grep pids.current | grep pids.current | xargs | cut -d ' ' -f 2) + ((INITIAL_NUM_PIDS++)) + + resnet50_fp32_test + + metrics_json_save + + sudo rm -rf "${src_dir}" + + clean_env_ctr +} +main "$@" From 7421737229744ebe43ed9087f24766824c333752 Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Wed, 23 Aug 2023 16:24:58 +0000 Subject: [PATCH 077/339] metrics: Add TensorFlow ResNet50 fp32 Dockerfile This PR adds the TensorFlow ResNet50 fp32 Dockerfile for kata metrics. Signed-off-by: Gabriela Cervantes (cherry picked from commit 959ca49447d5df1f65dc068ccfd790d6d18da2ea) --- .../resnet50_fp32_dockerfile/Dockerfile | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 tests/metrics/machine_learning/resnet50_fp32_dockerfile/Dockerfile diff --git a/tests/metrics/machine_learning/resnet50_fp32_dockerfile/Dockerfile b/tests/metrics/machine_learning/resnet50_fp32_dockerfile/Dockerfile new file mode 100644 index 000000000..a0e796456 --- /dev/null +++ b/tests/metrics/machine_learning/resnet50_fp32_dockerfile/Dockerfile @@ -0,0 +1,21 @@ +# Copyright (c) 2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +# Usage: FROM [image name] +FROM ubuntu:20.04 + +ENV DEBIAN_FRONTEND=noninteractive + +# Version of the Dockerfile +LABEL DOCKERFILE_VERSION="1.0" + +RUN apt-get update && \ + apt-get install -y --no-install-recommends wget nano curl build-essential git && \ + apt-get install -y python3.8 python3-pip && \ + pip install --no-cache-dir intel-tensorflow-avx512==2.8.0 && \ + pip install --no-cache-dir protobuf==3.20.* && \ + wget -q https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_8/resnet50_fp32_pretrained_model.pb && \ + git clone https://github.com/IntelAI/models.git + +CMD ["/bin/bash"] From 394d146b8949baad761bc9e3adb687e84af227d1 Mon Sep 17 00:00:00 2001 From: Jeremi Piotrowski Date: Wed, 23 Aug 2023 18:57:29 +0200 Subject: [PATCH 078/339] local-build: Remove GID before creating group docker install now creates a group with gid 999 which happens to match what we need to get docker-in-docker to work. Remove the group first as we don't need it. Fixes: #7726 Signed-off-by: Jeremi Piotrowski (cherry picked from commit 3b881fbc0edbe3c0979bff0d6f12bbce7215e3bb) --- tools/packaging/kata-deploy/local-build/dockerbuild/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/packaging/kata-deploy/local-build/dockerbuild/Dockerfile b/tools/packaging/kata-deploy/local-build/dockerbuild/Dockerfile index 1338f482a..569a4ffdb 100644 --- a/tools/packaging/kata-deploy/local-build/dockerbuild/Dockerfile +++ b/tools/packaging/kata-deploy/local-build/dockerbuild/Dockerfile @@ -28,7 +28,7 @@ ARG GID=1000 # gid of the docker group on the host, required for running docker in docker builds. ARG HOST_DOCKER_GID -RUN if [ ${IMG_USER} != "root" ]; then groupadd --gid=${GID} ${IMG_USER};fi +RUN if [ ${IMG_USER} != "root" ]; then sed -i -e "/:${GID}:/d" /etc/group; groupadd --gid=${GID} ${IMG_USER};fi RUN if [ ${IMG_USER} != "root" ]; then adduser ${IMG_USER} --uid=${UID} --gid=${GID};fi RUN if [ ${IMG_USER} != "root" ] && [ ! -z ${HOST_DOCKER_GID} ]; then groupadd --gid=${HOST_DOCKER_GID} docker_on_host;fi RUN if [ ${IMG_USER} != "root" ] && [ ! -z ${HOST_DOCKER_GID} ]; then usermod -a -G docker_on_host ${IMG_USER};fi From e99a13d26cca79c589db7c848143ccd1ad1686c4 Mon Sep 17 00:00:00 2001 From: Jeremi Piotrowski Date: Wed, 23 Aug 2023 16:38:48 +0200 Subject: [PATCH 079/339] gha: vfio: Run on Ubuntu 23.04 runner The vfio test requires nested-nested virtualization: L0 Azure host -> L1 Ubuntu VM -> L2 Fedora VM -> L3 Kata This hits a kernel bug on v5.15 but works quite nicely on the v6.2 kernel included in Ubuntu 23.04. We can switch back to Ubuntu 22.04 when they roll out v6.2. Fixes: #6555 Signed-off-by: Jeremi Piotrowski (cherry picked from commit 936e8091a7d3a3a76c6b392a3d930b80456ddba9) --- .github/workflows/run-vfio-tests.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run-vfio-tests.yaml b/.github/workflows/run-vfio-tests.yaml index ba34d2088..6a42a1b4e 100644 --- a/.github/workflows/run-vfio-tests.yaml +++ b/.github/workflows/run-vfio-tests.yaml @@ -15,7 +15,7 @@ jobs: fail-fast: false matrix: vmm: ['clh', 'qemu'] - runs-on: garm-ubuntu-2204 + runs-on: garm-ubuntu-2304 env: GOPATH: ${{ github.workspace }} KATA_HYPERVISOR: ${{ matrix.vmm }} From 5b9a69433d49c983ed139c757d3925df8adabaa0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Thu, 24 Aug 2023 13:57:36 +0200 Subject: [PATCH 080/339] kata-deploy: Don't try to remove /opt/kata MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The directory is a host path mount and cannot be removed from within the container. What we actually want to remove is whatever is inside that directory. This may raise errors like: ``` rm: cannot remove '/opt/kata/': Device or resource busy ``` Fixes: #7746 Signed-off-by: Fabiano Fidêncio (cherry picked from commit d8f3ce6497babb235543331964c9fb66b8cdcaf7) --- tools/packaging/kata-deploy/scripts/kata-deploy.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/packaging/kata-deploy/scripts/kata-deploy.sh b/tools/packaging/kata-deploy/scripts/kata-deploy.sh index cd56beeb4..e01a8a9a6 100755 --- a/tools/packaging/kata-deploy/scripts/kata-deploy.sh +++ b/tools/packaging/kata-deploy/scripts/kata-deploy.sh @@ -342,7 +342,7 @@ function configure_containerd() { function remove_artifacts() { echo "deleting kata artifacts" - rm -rf /opt/kata/ + rm -rf /opt/kata/* } function cleanup_cri_runtime() { From 3e07c89d3923a9c0fd968c5d51b6f67a748eed3d Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Thu, 24 Aug 2023 15:54:27 +0000 Subject: [PATCH 081/339] metrics: Remove unused variable in tensorflow nhwc script This PR removes unused variable in tensorflow nhwc script. Fixes #7750 Signed-off-by: Gabriela Cervantes (cherry picked from commit 32a778b6da92d31c29a03bc63afd3294ffc0a881) --- tests/metrics/machine_learning/tensorflow_nhwc.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/metrics/machine_learning/tensorflow_nhwc.sh b/tests/metrics/machine_learning/tensorflow_nhwc.sh index cb5f9ff28..328c055d4 100755 --- a/tests/metrics/machine_learning/tensorflow_nhwc.sh +++ b/tests/metrics/machine_learning/tensorflow_nhwc.sh @@ -29,7 +29,6 @@ alexnet_start_script="alexnet_start.sh" resnet_start_script="resnet_start.sh" CMD_RESNET="$dst_dir/$resnet_start_script" CMD_ALEXNET="$dst_dir/$alexnet_start_script" -timeout=600 INITIAL_NUM_PIDS=1 ALEXNET_FILE="alexnet_results" ALEXNET_CHECK_FILE_CMD="cat /${ALEXNET_FILE} | grep 'total images' | wc -l" From e8c24fa0b92de3eb99cb284bf58098b35023b9ae Mon Sep 17 00:00:00 2001 From: Dan Mihai Date: Fri, 25 Aug 2023 02:16:35 +0000 Subject: [PATCH 082/339] tests: delete k8s deployment at the test's end At the end of k8s-kill-all-process-in-container.bats, delete the deployment it created. Fixes: #7752 Signed-off-by: Dan Mihai (cherry picked from commit 6a974679f21499eac5eabb3ffa083384257b149c) --- tests/integration/kubernetes/k8s-pod-quota.bats | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/integration/kubernetes/k8s-pod-quota.bats b/tests/integration/kubernetes/k8s-pod-quota.bats index d9a527725..20ad06274 100644 --- a/tests/integration/kubernetes/k8s-pod-quota.bats +++ b/tests/integration/kubernetes/k8s-pod-quota.bats @@ -31,5 +31,6 @@ setup() { } teardown() { + kubectl delete -f "${pod_config_dir}/pod-quota-deployment.yaml" kubectl delete -f "${pod_config_dir}/resource-quota.yaml" } From 17d22cae34a08b414c2d5c17f19b741285d31fce Mon Sep 17 00:00:00 2001 From: Dan Mihai Date: Fri, 25 Aug 2023 02:09:55 +0000 Subject: [PATCH 083/339] tests: use unique test name k8s-pid-ns.bats was already using the test name from k8s-kill-all-process-in-container.bats - probably a copy/paste bug. Fixes: #7753 Signed-off-by: Dan Mihai (cherry picked from commit 183f51d6f632fc83d06a9e42b849e0d548b8e6aa) --- .../kubernetes/k8s-kill-all-process-in-container.bats | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/kubernetes/k8s-kill-all-process-in-container.bats b/tests/integration/kubernetes/k8s-kill-all-process-in-container.bats index 5081b8d7d..b40429245 100644 --- a/tests/integration/kubernetes/k8s-kill-all-process-in-container.bats +++ b/tests/integration/kubernetes/k8s-kill-all-process-in-container.bats @@ -15,7 +15,7 @@ setup() { get_pod_config_dir } -@test "Check PID namespaces" { +@test "Kill all processes in container" { # Create the pod kubectl create -f "${pod_config_dir}/initcontainer-shareprocesspid.yaml" From 6a34bae03da8ef525ca4bbc5f0fc6880ef2a835e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Fri, 25 Aug 2023 10:00:17 +0200 Subject: [PATCH 084/339] gha: Avoid "fail-fast" in tests that are known to be flaky MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Otherwise we'll have to re-run all the tests due to a flaky behaviour in one of the parts. Fixes: #7757 Signed-off-by: Fabiano Fidêncio (cherry picked from commit fb49d5d7ce85d9fba1f6d4cf45b1d4781fe82a2c) --- .github/workflows/run-cri-containerd-tests.yaml | 5 ++++- .github/workflows/run-metrics.yaml | 5 ++++- .github/workflows/run-nydus-tests.yaml | 5 ++++- .github/workflows/static-checks.yaml | 4 ++++ 4 files changed, 16 insertions(+), 3 deletions(-) diff --git a/.github/workflows/run-cri-containerd-tests.yaml b/.github/workflows/run-cri-containerd-tests.yaml index 4b439733b..19e8687ea 100644 --- a/.github/workflows/run-cri-containerd-tests.yaml +++ b/.github/workflows/run-cri-containerd-tests.yaml @@ -12,7 +12,10 @@ on: jobs: run-cri-containerd: strategy: - fail-fast: true + # We can set this to true whenever we're 100% sure that + # the all the tests are not flaky, otherwise we'll fail + # all the tests due to a single flaky instance. + fail-fast: false matrix: containerd_version: ['lts', 'active'] vmm: ['clh', 'qemu'] diff --git a/.github/workflows/run-metrics.yaml b/.github/workflows/run-metrics.yaml index e50f6bd21..d2e5ab64e 100644 --- a/.github/workflows/run-metrics.yaml +++ b/.github/workflows/run-metrics.yaml @@ -32,7 +32,10 @@ jobs: run-metrics: needs: setup-kata strategy: - fail-fast: true + # We can set this to true whenever we're 100% sure that + # the all the tests are not flaky, otherwise we'll fail + # all the tests due to a single flaky instance. + fail-fast: false matrix: vmm: ['clh', 'qemu'] max-parallel: 1 diff --git a/.github/workflows/run-nydus-tests.yaml b/.github/workflows/run-nydus-tests.yaml index 647582c08..10ff1cc9a 100644 --- a/.github/workflows/run-nydus-tests.yaml +++ b/.github/workflows/run-nydus-tests.yaml @@ -12,7 +12,10 @@ on: jobs: run-nydus: strategy: - fail-fast: true + # We can set this to true whenever we're 100% sure that + # the all the tests are not flaky, otherwise we'll fail + # all the tests due to a single flaky instance. + fail-fast: false matrix: containerd_version: ['lts', 'active'] vmm: ['clh', 'qemu', 'dragonball'] diff --git a/.github/workflows/static-checks.yaml b/.github/workflows/static-checks.yaml index 76ea53de6..cb113bfb0 100644 --- a/.github/workflows/static-checks.yaml +++ b/.github/workflows/static-checks.yaml @@ -15,6 +15,10 @@ jobs: static-checks: runs-on: garm-ubuntu-2004 strategy: + # We can set this to true whenever we're 100% sure that + # the all the tests are not flaky, otherwise we'll fail + # all the tests due to a single flaky instance. + fail-fast: false matrix: cmd: - "make vendor" From 1b7c7901d9627b7ea34fbbb2b7e9be22ca7713e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Mon, 28 Aug 2023 13:07:42 +0200 Subject: [PATCH 085/339] local-build: Remove $HOME/.docker/buildx/activity/default MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The file can be removed between builds without causing any issue, and leaving it around has been causing us some headache due to: ``` ERROR: open /home/runner/.docker/buildx/activity/default: permission denied ``` Signed-off-by: Fabiano Fidêncio (cherry picked from commit 3818bf3311a5d70704a3773941536f9a59a3a0e4) --- .../local-build/kata-deploy-binaries-in-docker.sh | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/packaging/kata-deploy/local-build/kata-deploy-binaries-in-docker.sh b/tools/packaging/kata-deploy/local-build/kata-deploy-binaries-in-docker.sh index ebcc80c81..5c336cc8b 100755 --- a/tools/packaging/kata-deploy/local-build/kata-deploy-binaries-in-docker.sh +++ b/tools/packaging/kata-deploy/local-build/kata-deploy-binaries-in-docker.sh @@ -32,6 +32,11 @@ TARGET_ARCH=${TARGET_ARCH:-$(uname -m)} TARGET_OS=${TARGET_OS:-linux} TARGET_ARCH=${TARGET_ARCH:-$ARCH} +# We've seen issues related to the /home/runner/.docker/buildx/activity/default file +# constantly being with the wrong permissions. +# Let's just remove the file before we build. +rm -f $HOME/.docker/buildx/activity/default + [ "${CROSS_BUILD}" == "true" ] && BUILDX="buildx" && PLATFORM="--platform=${TARGET_OS}/${TARGET_ARCH}" if [ "${CROSS_BUILD}" == "true" ]; then # check if the current docker support docker buildx From 019849071e2371f5da187fb9f69cad352306af5d Mon Sep 17 00:00:00 2001 From: Unmesh Deodhar Date: Tue, 27 Jun 2023 14:05:04 -0500 Subject: [PATCH 086/339] tests: Add confidential test for SEV MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a test case for the launch of unencrypted confidential container, verifying that we are running inside a TEE. Right now the test only works with SEV, but it'll be expanded in the coming commits, as part of this very same series. Fixes: #7184 Signed-Off-By: Unmesh Deodhar Signed-off-by: Fabiano Fidêncio (cherry picked from commit c3b9d4945e0319342747a8d28bfb49995363f4ae) --- .../kubernetes/confidential_common.sh | 32 ++++++++++++ .../kubernetes/k8s-confidential.bats | 49 +++++++++++++++++++ .../kubernetes/run_kubernetes_tests.sh | 1 + .../pod-confidential-unencrypted.yaml | 33 +++++++++++++ 4 files changed, 115 insertions(+) create mode 100644 tests/integration/kubernetes/confidential_common.sh create mode 100644 tests/integration/kubernetes/k8s-confidential.bats create mode 100644 tests/integration/kubernetes/runtimeclass_workloads/pod-confidential-unencrypted.yaml diff --git a/tests/integration/kubernetes/confidential_common.sh b/tests/integration/kubernetes/confidential_common.sh new file mode 100644 index 000000000..4484e65be --- /dev/null +++ b/tests/integration/kubernetes/confidential_common.sh @@ -0,0 +1,32 @@ +#!/usr/bin/env bash +# Copyright 2022-2023 Advanced Micro Devices, Inc. +# Copyright 2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# + +source "${BATS_TEST_DIRNAME}/tests_common.sh" + +function setup_unencrypted_confidential_pod() { + get_pod_config_dir + + export SSH_KEY_FILE="${pod_config_dir}/confidential/unencrypted/ssh/unencrypted" + + if [ -n "${PR_NUMBER}" ]; then + # Use correct address in pod yaml + sed -i "s/-nightly/-${PR_NUMBER}/" "${pod_config_dir}/pod-confidential-unencrypted.yaml" + fi + + # Set permissions on private key file + sudo chmod 600 "${SSH_KEY_FILE}" +} + +# This function relies on `KATA_HYPERVISOR` being an environment variable +# and returns the remote command to be executed to that specific hypervisor +# in order to identify whether the workload is running on a TEE environment +function get_remote_command_per_hypervisor() { + declare -A REMOTE_COMMAND_PER_HYPERVISOR + REMOTE_COMMAND_PER_HYPERVISOR[qemu-sev]="dmesg | grep \"Memory Encryption Features active:.*\(SEV$\|SEV \)\"" + + echo "${REMOTE_COMMAND_PER_HYPERVISOR[${KATA_HYPERVISOR}]}" +} diff --git a/tests/integration/kubernetes/k8s-confidential.bats b/tests/integration/kubernetes/k8s-confidential.bats new file mode 100644 index 000000000..1a56fe34d --- /dev/null +++ b/tests/integration/kubernetes/k8s-confidential.bats @@ -0,0 +1,49 @@ +#!/usr/bin/env bats +# Copyright 2022-2023 Advanced Micro Devices, Inc. +# Copyright 2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# + +load "${BATS_TEST_DIRNAME}/../../common.bash" +load "${BATS_TEST_DIRNAME}/confidential_common.sh" +load "${BATS_TEST_DIRNAME}/tests_common.sh" + +setup() { + SUPPORTED_HYPERVISORS=("qemu-sev") + + # This check must be done with "${KATA_HYPERVISOR}" to avoid + # having substrings, like qemu, being matched with qemu-$something. + [[ " ${SUPPORTED_HYPERVISORS[*]} " =~ " ${KATA_HYPERVISOR} " ]] || skip "Test not supported for ${KATA_HYPERVISOR}." + + get_pod_config_dir + setup_unencrypted_confidential_pod +} + +@test "Test unencrypted confidential container launch success and verify that we are running in a secure enclave." { + # Start the service/deployment/pod + kubectl apply -f "${pod_config_dir}/pod-confidential-unencrypted.yaml" + + # Retrieve pod name, wait for it to come up, retrieve pod ip + pod_name=$(kubectl get pod -o wide | grep "confidential-unencrypted" | awk '{print $1;}') + + # Check pod creation + kubectl wait --for=condition=Ready --timeout=$timeout pod "${pod_name}" + + pod_ip=$(kubectl get pod -o wide | grep "confidential-unencrypted" | awk '{print $6;}') + + # Run the remote command + coco_enabled=$(ssh -i ${SSH_KEY_FILE} -o "StrictHostKeyChecking no" -o "PasswordAuthentication=no" root@${pod_ip} /bin/sh -c "$(get_remote_command_per_hypervisor)" || true) + + if [ -z "$coco_enabled" ]; then + >&2 echo -e "Confidential compute is expected but not enabled." + return 1 + fi +} + +teardown() { + [[ " ${SUPPORTED_HYPERVISORS[*]} " =~ " ${KATA_HYPERVISOR} " ]] || skip "Test not supported for ${KATA_HYPERVISOR}." + + kubectl describe "pod/${pod_name}" || true + kubectl delete -f "${pod_config_dir}/pod-confidential-unencrypted.yaml" || true +} diff --git a/tests/integration/kubernetes/run_kubernetes_tests.sh b/tests/integration/kubernetes/run_kubernetes_tests.sh index f8b635d22..3d963c6c3 100644 --- a/tests/integration/kubernetes/run_kubernetes_tests.sh +++ b/tests/integration/kubernetes/run_kubernetes_tests.sh @@ -18,6 +18,7 @@ if [ -n "${K8S_TEST_UNION:-}" ]; then K8S_TEST_UNION=($K8S_TEST_UNION) else K8S_TEST_UNION=( \ + "k8s-confidential.bats" \ "k8s-attach-handlers.bats" \ "k8s-caps.bats" \ "k8s-configmap.bats" \ diff --git a/tests/integration/kubernetes/runtimeclass_workloads/pod-confidential-unencrypted.yaml b/tests/integration/kubernetes/runtimeclass_workloads/pod-confidential-unencrypted.yaml new file mode 100644 index 000000000..591d86de8 --- /dev/null +++ b/tests/integration/kubernetes/runtimeclass_workloads/pod-confidential-unencrypted.yaml @@ -0,0 +1,33 @@ +# Copyright (c) 2023 Advanced Micro Devices, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# +kind: Service +apiVersion: v1 +metadata: + name: "confidential-unencrypted" +spec: + selector: + app: "confidential-unencrypted" + ports: + - port: 22 +--- +kind: Deployment +apiVersion: apps/v1 +metadata: + name: "confidential-unencrypted" +spec: + selector: + matchLabels: + app: "confidential-unencrypted" + template: + metadata: + labels: + app: "confidential-unencrypted" + spec: + runtimeClassName: kata + containers: + - name: "confidential-unencrypted" + image: ghcr.io/kata-containers/test-images:unencrypted-nightly + imagePullPolicy: Always + From 4976629aee811465c1046b993b8cc1c4ce84d2e4 Mon Sep 17 00:00:00 2001 From: Unmesh Deodhar Date: Thu, 27 Jul 2023 09:55:15 -0500 Subject: [PATCH 087/339] tests: Expand confidential test to support SNP Let's expand the confidential test to also support SNP. Fixes: #7184 Signed-off-by: Unmesh Deodhar (cherry picked from commit e31f099be1796df21ea0a0a24c83489e3f1dc5d2) --- tests/integration/kubernetes/confidential_common.sh | 1 + tests/integration/kubernetes/k8s-confidential.bats | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/integration/kubernetes/confidential_common.sh b/tests/integration/kubernetes/confidential_common.sh index 4484e65be..3260fac52 100644 --- a/tests/integration/kubernetes/confidential_common.sh +++ b/tests/integration/kubernetes/confidential_common.sh @@ -27,6 +27,7 @@ function setup_unencrypted_confidential_pod() { function get_remote_command_per_hypervisor() { declare -A REMOTE_COMMAND_PER_HYPERVISOR REMOTE_COMMAND_PER_HYPERVISOR[qemu-sev]="dmesg | grep \"Memory Encryption Features active:.*\(SEV$\|SEV \)\"" + REMOTE_COMMAND_PER_HYPERVISOR[qemu-snp]="dmesg | grep \"Memory Encryption Features active:.*SEV-SNP\"" echo "${REMOTE_COMMAND_PER_HYPERVISOR[${KATA_HYPERVISOR}]}" } diff --git a/tests/integration/kubernetes/k8s-confidential.bats b/tests/integration/kubernetes/k8s-confidential.bats index 1a56fe34d..915cc6531 100644 --- a/tests/integration/kubernetes/k8s-confidential.bats +++ b/tests/integration/kubernetes/k8s-confidential.bats @@ -10,7 +10,7 @@ load "${BATS_TEST_DIRNAME}/confidential_common.sh" load "${BATS_TEST_DIRNAME}/tests_common.sh" setup() { - SUPPORTED_HYPERVISORS=("qemu-sev") + SUPPORTED_HYPERVISORS=("qemu-sev" "qemu-snp") # This check must be done with "${KATA_HYPERVISOR}" to avoid # having substrings, like qemu, being matched with qemu-$something. From 2684b267f7c99601fc8ec3913930c5d57cf2dfa5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Mon, 28 Aug 2023 21:37:20 +0200 Subject: [PATCH 088/339] tests: Expand confidential test to support TDX MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's expand the confidential test to also support TDX. The main difference on the test, though, is that we're not grepping for a string in the `dmesg` output, but rather relying on `cpuid` to detect a TDX guest. Fixes: #7184 Signed-off-by: Fabiano Fidêncio (cherry picked from commit e286e842c1a0025d95b526978b56f7ee2a608a93) --- tests/integration/kubernetes/confidential_common.sh | 1 + tests/integration/kubernetes/k8s-confidential.bats | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/integration/kubernetes/confidential_common.sh b/tests/integration/kubernetes/confidential_common.sh index 3260fac52..0e049cadd 100644 --- a/tests/integration/kubernetes/confidential_common.sh +++ b/tests/integration/kubernetes/confidential_common.sh @@ -28,6 +28,7 @@ function get_remote_command_per_hypervisor() { declare -A REMOTE_COMMAND_PER_HYPERVISOR REMOTE_COMMAND_PER_HYPERVISOR[qemu-sev]="dmesg | grep \"Memory Encryption Features active:.*\(SEV$\|SEV \)\"" REMOTE_COMMAND_PER_HYPERVISOR[qemu-snp]="dmesg | grep \"Memory Encryption Features active:.*SEV-SNP\"" + REMOTE_COMMAND_PER_HYPERVISOR[qemu-tdx]="cpuid | grep TDX_GUEST" echo "${REMOTE_COMMAND_PER_HYPERVISOR[${KATA_HYPERVISOR}]}" } diff --git a/tests/integration/kubernetes/k8s-confidential.bats b/tests/integration/kubernetes/k8s-confidential.bats index 915cc6531..c78c7d518 100644 --- a/tests/integration/kubernetes/k8s-confidential.bats +++ b/tests/integration/kubernetes/k8s-confidential.bats @@ -10,7 +10,7 @@ load "${BATS_TEST_DIRNAME}/confidential_common.sh" load "${BATS_TEST_DIRNAME}/tests_common.sh" setup() { - SUPPORTED_HYPERVISORS=("qemu-sev" "qemu-snp") + SUPPORTED_HYPERVISORS=("qemu-sev" "qemu-snp" "qemu-tdx") # This check must be done with "${KATA_HYPERVISOR}" to avoid # having substrings, like qemu, being matched with qemu-$something. From 03c206f87f0fe54a4bb5196efb562ca8b0aacf15 Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Tue, 29 Aug 2023 16:25:14 +0000 Subject: [PATCH 089/339] metrics: Add metrics report script This PR adds metrics report script for kata metrics. Fixes #7782 Signed-off-by: Gabriela Cervantes (cherry picked from commit 69781fc027ec86371b77c92de3d58253ba396d13) --- tests/metrics/report/makereport.sh | 97 ++++++++++++++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100755 tests/metrics/report/makereport.sh diff --git a/tests/metrics/report/makereport.sh b/tests/metrics/report/makereport.sh new file mode 100755 index 000000000..3926d7567 --- /dev/null +++ b/tests/metrics/report/makereport.sh @@ -0,0 +1,97 @@ +#!/bin/bash +# Copyright (c) 2018-2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +# Take the data found in subdirectories of the metrics 'results' directory, +# and turn them into a PDF report. Use a Dockerfile containing all the tooling +# and scripts we need to do that. + +set -e + +SCRIPT_PATH=$(dirname "$(readlink -f "$0")") +source "${SCRIPT_PATH}/../lib/common.bash" + +IMAGE="${IMAGE:-metrics-report}" +DOCKERFILE="${SCRIPT_PATH}/report_dockerfile/Dockerfile" + +HOST_INPUT_DIR="${SCRIPT_PATH}/../results" +R_ENV_FILE="${HOST_INPUT_DIR}/Env.R" +HOST_OUTPUT_DIR="${SCRIPT_PATH}/output" + +GUEST_INPUT_DIR="/inputdir/" +GUEST_OUTPUT_DIR="/outputdir/" + +# If in debugging mode, we also map in the scripts dir so you can +# dynamically edit and re-load them at the R prompt +HOST_SCRIPT_DIR="${SCRIPT_PATH}/report_dockerfile" +GUEST_SCRIPT_DIR="/scripts/" + +setup() { + echo "Checking subdirectories" + check_subdir="$(ls -dx ${HOST_INPUT_DIR}/*/ 2> /dev/null | wc -l)" + if [ $check_subdir -eq 0 ]; then + die "No subdirs in [${HOST_INPUT_DIR}] to read results from." + fi + + echo "Checking Dockerfile" + check_dockerfiles_images "$IMAGE" "$DOCKERFILE" + + mkdir -p "$HOST_OUTPUT_DIR" && true + + echo "inputdir=\"${GUEST_INPUT_DIR}\"" > ${R_ENV_FILE} + echo "outputdir=\"${GUEST_OUTPUT_DIR}\"" >> ${R_ENV_FILE} + + # A bit of a hack to get an R syntax'd list of dirs to process + # Also, need it as not host-side dir path - so short relative names + resultdirs="$(cd ${HOST_INPUT_DIR}; ls -dx */)" + resultdirslist=$(echo ${resultdirs} | sed 's/ \+/", "/g') + echo "resultdirs=c(" >> ${R_ENV_FILE} + echo " \"${resultdirslist}\"" >> ${R_ENV_FILE} + echo ")" >> ${R_ENV_FILE} +} + +run() { + docker run -ti --rm -v ${HOST_INPUT_DIR}:${GUEST_INPUT_DIR} -v ${HOST_OUTPUT_DIR}:${GUEST_OUTPUT_DIR} ${extra_volumes} ${IMAGE} ${extra_command} + ls -la ${HOST_OUTPUT_DIR}/* +} + +help() { + usage=$(cat << EOF +Usage: $0 [-h] [options] + Description: + This script generates a metrics report document + from the results directory one level up in the + directory tree (../results). + Options: + -d, Run in debug (interactive) mode + -h, Print this help +EOF +) + echo "$usage" +} + +main() { + + local OPTIND + while getopts "d" opt;do + case ${opt} in + d) + # In debug mode, run a shell instead of the default report generation + extra_command="bash" + extra_volumes="-v ${HOST_SCRIPT_DIR}:${GUEST_SCRIPT_DIR}" + ;; + ?) + # parse failure + help + die "Failed to parse arguments" + ;; + esac + done + shift $((OPTIND-1)) + + setup + run +} + +main "$@" From 9f8e194e6fd9c803c0c395195b2f2bfa1a3e01bb Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Tue, 29 Aug 2023 16:28:32 +0000 Subject: [PATCH 090/339] metrics: Add report dockerfile This PR adds the report dockerfile for kata metrics. Signed-off-by: Gabriela Cervantes (cherry picked from commit 08812074d12d6980193b4e167784fecc58ac6b8c) --- .../report/report_dockerfile/Dockerfile | 44 +++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 tests/metrics/report/report_dockerfile/Dockerfile diff --git a/tests/metrics/report/report_dockerfile/Dockerfile b/tests/metrics/report/report_dockerfile/Dockerfile new file mode 100644 index 000000000..74c502652 --- /dev/null +++ b/tests/metrics/report/report_dockerfile/Dockerfile @@ -0,0 +1,44 @@ +# Copyright (c) 2018-2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +# Set up an Ubuntu image with the components needed to generate a +# metrics report. That includes: +# - R +# - The R 'tidyverse' +# - pandoc +# - The report generation R files and helper scripts + +# Start with the base rocker tidyverse. +# We would have used the 'verse' base, that already has some of the docs processing +# installed, but I could not figure out how to add in the extra bits we needed to +# the lite tex version is uses. +# Here we specify a tag for base image instead of using latest to let it free from +# the risk from the update of latest base image. +FROM rocker/tidyverse:3.6.0 + +# Version of the Dockerfile +LABEL DOCKERFILE_VERSION="1.2" + +# Without this some of the package installs stop to try and ask questions... +ENV DEBIAN_FRONTEND=noninteractive + +# Install the extra doc processing parts we need for our Rmarkdown PDF flow. +RUN apt-get update -qq && \ + apt-get install -y --no-install-recommends \ + texlive-latex-base \ + texlive-fonts-recommended \ + latex-xcolor && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists + +# Install the extra R packages we need. +RUN install2.r --error --deps TRUE \ + gridExtra \ + ggpubr + +# Pull in our actual worker scripts +COPY . /scripts + +# By default generate the report +CMD ["/scripts/genreport.sh"] From 80625ed5736eae1c64ddb1fb5a2ae9b8c9c7e555 Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Tue, 29 Aug 2023 16:45:22 +0000 Subject: [PATCH 091/339] metrics: Add metrics report R files This PR adds the metrics report R files. Signed-off-by: Gabriela Cervantes (cherry picked from commit fce2487971a27f217f613859e38d00aca95e4dda) --- .../report/report_dockerfile/dut-details.R | 122 ++++++++ .../report/report_dockerfile/fio-reads.R | 269 ++++++++++++++++++ .../report/report_dockerfile/fio-writes.R | 260 +++++++++++++++++ .../report_dockerfile/footprint-density.R | 111 ++++++++ .../report/report_dockerfile/lifecycle-time.R | 157 ++++++++++ .../report/report_dockerfile/mem-in-cont.R | 142 +++++++++ .../report_dockerfile/memory-footprint.R | 121 ++++++++ .../report/report_dockerfile/network-cpu.R | 132 +++++++++ 8 files changed, 1314 insertions(+) create mode 100644 tests/metrics/report/report_dockerfile/dut-details.R create mode 100644 tests/metrics/report/report_dockerfile/fio-reads.R create mode 100644 tests/metrics/report/report_dockerfile/fio-writes.R create mode 100644 tests/metrics/report/report_dockerfile/footprint-density.R create mode 100644 tests/metrics/report/report_dockerfile/lifecycle-time.R create mode 100644 tests/metrics/report/report_dockerfile/mem-in-cont.R create mode 100644 tests/metrics/report/report_dockerfile/memory-footprint.R create mode 100644 tests/metrics/report/report_dockerfile/network-cpu.R diff --git a/tests/metrics/report/report_dockerfile/dut-details.R b/tests/metrics/report/report_dockerfile/dut-details.R new file mode 100644 index 000000000..3b16c8c2b --- /dev/null +++ b/tests/metrics/report/report_dockerfile/dut-details.R @@ -0,0 +1,122 @@ +#!/usr/bin/env Rscript +# Copyright (c) 2018-2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +# Display details for the 'Device Under Test', for all data sets being processed. + +suppressMessages(suppressWarnings(library(tidyr))) # for gather(). +library(tibble) +suppressMessages(suppressWarnings(library(plyr))) # rbind.fill + # So we can plot multiple graphs +library(gridExtra) # together. +suppressMessages(suppressWarnings(library(ggpubr))) # for ggtexttable. +suppressMessages(library(jsonlite)) # to load the data. + +# A list of all the known results files we might find the information inside. +resultsfiles=c( + "boot-times.json", + "memory-footprint.json", + "memory-footprint-ksm.json", + "memory-footprint-inside-container.json" + ) + +data=c() +stats=c() +stats_names=c() + +# For each set of results +for (currentdir in resultdirs) { + count=1 + dirstats=c() + for (resultsfile in resultsfiles) { + fname=paste(inputdir, currentdir, resultsfile, sep="/") + if ( !file.exists(fname)) { + #warning(paste("Skipping non-existent file: ", fname)) + next + } + + # Derive the name from the test result dirname + datasetname=basename(currentdir) + + # Import the data + fdata=fromJSON(fname) + + if (length(fdata$'kata-env') != 0 ) { + # We have kata-runtime data + dirstats=tibble("Run Ver"=as.character(fdata$'kata-env'$Runtime$Version$Semver)) + dirstats=cbind(dirstats, "Run SHA"=as.character(fdata$'kata-env'$Runtime$Version$Commit)) + + pver=as.character(fdata$'kata-env'$Proxy$Version) + pver=sub("^[[:alpha:][:blank:]-]*", "", pver) + # uncomment if you want to drop the commit sha as well + #pver=sub("([[:digit:].]*).*", "\\1", pver) + dirstats=cbind(dirstats, "Proxy Ver"=pver) + + # Trim the shim string + sver=as.character(fdata$'kata-env'$Shim$Version) + sver=sub("^[[:alpha:][:blank:]-]*", "", sver) + # uncomment if you want to drop the commit sha as well + #sver=sub("([[:digit:].]*).*", "\\1", sver) + dirstats=cbind(dirstats, "Shim Ver"=sver) + + # Default QEMU ver string is far too long and noisy - trim. + hver=as.character(fdata$'kata-env'$Hypervisor$Version) + hver=sub("^[[:alpha:][:blank:]]*", "", hver) + hver=sub("([[:digit:].]*).*", "\\1", hver) + dirstats=cbind(dirstats, "Hyper Ver"=hver) + + iver=as.character(fdata$'kata-env'$Image$Path) + iver=sub("^[[:alpha:]/-]*", "", iver) + dirstats=cbind(dirstats, "Image Ver"=iver) + + kver=as.character(fdata$'kata-env'$Kernel$Path) + kver=sub("^[[:alpha:]/-]*", "", kver) + dirstats=cbind(dirstats, "Guest Krnl"=kver) + + dirstats=cbind(dirstats, "Host arch"=as.character(fdata$'kata-env'$Host$Architecture)) + dirstats=cbind(dirstats, "Host Distro"=as.character(fdata$'kata-env'$Host$Distro$Name)) + dirstats=cbind(dirstats, "Host DistVer"=as.character(fdata$'kata-env'$Host$Distro$Version)) + dirstats=cbind(dirstats, "Host Model"=as.character(fdata$'kata-env'$Host$CPU$Model)) + dirstats=cbind(dirstats, "Host Krnl"=as.character(fdata$'kata-env'$Host$Kernel)) + dirstats=cbind(dirstats, "runtime"=as.character(fdata$test$runtime)) + + break + } else { + if (length(fdata$'runc-env') != 0 ) { + dirstats=tibble("Run Ver"=as.character(fdata$'runc-env'$Version$Semver)) + dirstats=cbind(dirstats, "Run SHA"=as.character(fdata$'runc-env'$Version$Commit)) + dirstats=cbind(dirstats, "runtime"=as.character(fdata$test$runtime)) + } else { + dirstats=tibble("runtime"="Unknown") + } + break + } + } + + if ( length(dirstats) == 0 ) { + warning(paste("No valid data found for directory ", currentdir)) + } + + # use plyr rbind.fill so we can combine disparate version info frames + stats=rbind.fill(stats, dirstats) + stats_names=rbind(stats_names, datasetname) +} + +rownames(stats) = stats_names + +# Rotate the tibble so we get data dirs as the columns +spun_stats = as_tibble(cbind(What=names(stats), t(stats))) + +# Build us a text table of numerical results +stats_plot = suppressWarnings(ggtexttable(data.frame(spun_stats, check.names=FALSE), + theme=ttheme(base_size=6), + rows=NULL + )) + +# It may seem odd doing a grid of 1x1, but it should ensure we get a uniform format and +# layout to match the other charts and tables in the report. +master_plot = grid.arrange( + stats_plot, + nrow=1, + ncol=1 ) diff --git a/tests/metrics/report/report_dockerfile/fio-reads.R b/tests/metrics/report/report_dockerfile/fio-reads.R new file mode 100644 index 000000000..5f3f1c212 --- /dev/null +++ b/tests/metrics/report/report_dockerfile/fio-reads.R @@ -0,0 +1,269 @@ +#!/usr/bin/env Rscript +# Copyright (c) 2018-2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +# Display details for `fio` random read storage IO tests. + + +library(ggplot2) # ability to plot nicely +library(gridExtra) # So we can plot multiple graphs together +suppressMessages(suppressWarnings(library(ggpubr))) # for ggtexttable +suppressMessages(library(jsonlite)) # to load the data +suppressMessages(suppressWarnings(library(tidyr))) # for gather +library(tibble) + +testnames=c( + "fio-randread-128", + "fio-randread-256", + "fio-randread-512", + "fio-randread-1k", + "fio-randread-2k", + "fio-randread-4k", + "fio-randread-8k", + "fio-randread-16k", + "fio-randread-32k", + "fio-randread-64k" + ) + +data2=c() +all_ldata=c() +all_ldata2=c() +stats=c() +rstats=c() +rstats_names=c() + +# Where to store up the stats for the tables +read_bw_stats=c() +read_iops_stats=c() +read_lat95_stats=c() +read_lat99_stats=c() + +# For each set of results +for (currentdir in resultdirs) { + bw_dirstats=c() + iops_dirstats=c() + lat95_dirstats=c() + lat99_dirstats=c() + # Derive the name from the test result dirname + datasetname=basename(currentdir) + + for (testname in testnames) { + fname=paste(inputdir, currentdir, testname, '.json', sep="") + if ( !file.exists(fname)) { + #warning(paste("Skipping non-existent file: ", fname)) + next + } + + # Import the data + fdata=fromJSON(fname) + # De-ref the test named unique data + fdata=fdata[[testname]] + + blocksize=fdata$Raw$'global options'$bs + + # Extract the latency data - it comes as a table of percentiles, so + # we have to do a little work... + clat=data.frame(clat_ns=fdata$Raw$jobs[[1]]$read$clat_ns$percentile) + + # Generate a clat data set with 'clean' percentile numbers so + # we can sensibly plot it later on. + clat2=clat + colnames(clat2)<-sub("clat_ns.", "", colnames(clat2)) + colnames(clat2)<-sub("0000", "", colnames(clat2)) + ldata2=gather(clat2) + colnames(ldata2)[colnames(ldata2)=="key"] <- "percentile" + colnames(ldata2)[colnames(ldata2)=="value"] <- "ms" + ldata2$ms=ldata2$ms/1000000 #ns->ms + ldata2=cbind(ldata2, runtime=rep(datasetname, length(ldata2$percentile))) + ldata2=cbind(ldata2, blocksize=rep(blocksize, length(ldata2$percentile))) + + # Pull the 95 and 99 percentile numbers for the boxplot + # Plotting all values for all runtimes and blocksizes is just way too + # noisy to make a meaninful picture, so we use this subset. + # Our values fall more in the range of ms... + pc95data=tibble(percentile=clat$clat_ns.95.000000/1000000) + pc95data=cbind(pc95data, runtime=rep(paste(datasetname, "95pc", sep="-"), length(pc95data$percentile))) + pc99data=tibble(percentile=clat$clat_ns.99.000000/1000000) + pc99data=cbind(pc99data, runtime=rep(paste(datasetname, "99pc", sep="-"), length(pc95data$percentile))) + ldata=rbind(pc95data, pc99data) + ldata=cbind(ldata, blocksize=rep(blocksize, length(ldata$percentile))) + + # We want total bandwidth, so that is the sum of the bandwidths + # from all the read 'jobs'. + mdata=data.frame(read_bw_mps=as.numeric(sum(fdata$Raw$jobs[[1]]$read$bw)/1024)) + mdata=cbind(mdata, iops_tot=as.numeric(sum(fdata$Raw$jobs[[1]]$read$iops))) + mdata=cbind(mdata, runtime=rep(datasetname, length(mdata[, "read_bw_mps"]) )) + mdata=cbind(mdata, blocksize=rep(blocksize, length(mdata[, "read_bw_mps"]) )) + + # Extract the stats tables + bw_dirstats=rbind(bw_dirstats, round(mdata$read_bw_mps, digits=1)) + # Rowname hack to get the blocksize recorded + rownames(bw_dirstats)[nrow(bw_dirstats)]=blocksize + + iops_dirstats=rbind(iops_dirstats, round(mdata$iops_tot, digits=1)) + rownames(iops_dirstats)[nrow(iops_dirstats)]=blocksize + + # And do the 95 and 99 percentiles as tables as well + lat95_dirstats=rbind(lat95_dirstats, round(mean(clat$clat_ns.95.000000)/1000000, digits=1)) + rownames(lat95_dirstats)[nrow(lat95_dirstats)]=blocksize + lat99_dirstats=rbind(lat99_dirstats, round(mean(clat$clat_ns.99.000000)/1000000, digits=1)) + rownames(lat99_dirstats)[nrow(lat99_dirstats)]=blocksize + + # Collect up as sets across all files and runtimes. + data2=rbind(data2, mdata) + all_ldata=rbind(all_ldata, ldata) + all_ldata2=rbind(all_ldata2, ldata2) + } + + # Collect up for each dir we process into a column + read_bw_stats=cbind(read_bw_stats, bw_dirstats) + colnames(read_bw_stats)[ncol(read_bw_stats)]=datasetname + + read_iops_stats=cbind(read_iops_stats, iops_dirstats) + colnames(read_iops_stats)[ncol(read_iops_stats)]=datasetname + + read_lat95_stats=cbind(read_lat95_stats, lat95_dirstats) + colnames(read_lat95_stats)[ncol(read_lat95_stats)]=datasetname + read_lat99_stats=cbind(read_lat99_stats, lat99_dirstats) + colnames(read_lat99_stats)[ncol(read_lat99_stats)]=datasetname +} + +# To get a nice looking table, we need to extract the rownames into their +# own column +read_bw_stats=cbind(Bandwidth=rownames(read_bw_stats), read_bw_stats) +read_bw_stats=cbind(read_bw_stats, Units=rep("MB/s", nrow(read_bw_stats))) + +read_iops_stats=cbind(IOPS=rownames(read_iops_stats), read_iops_stats) +read_iops_stats=cbind(read_iops_stats, Units=rep("IOP/s", nrow(read_iops_stats))) + +read_lat95_stats=cbind('lat 95pc'=rownames(read_lat95_stats), read_lat95_stats) +read_lat95_stats=cbind(read_lat95_stats, Units=rep("ms", nrow(read_lat95_stats))) +read_lat99_stats=cbind('lat 99pc'=rownames(read_lat99_stats), read_lat99_stats) +read_lat99_stats=cbind(read_lat99_stats, Units=rep("ms", nrow(read_lat99_stats))) + +# Bandwidth line plot +read_bw_line_plot <- ggplot() + + geom_line( data=data2, aes(blocksize, read_bw_mps, group=runtime, color=runtime)) + + ylim(0, NA) + + ggtitle("Random Read total bandwidth") + + xlab("Blocksize") + + ylab("Bandwidth (MiB/s)") + + theme( + axis.text.x=element_text(angle=90), + legend.position=c(0.35,0.8), + legend.title=element_text(size=5), + legend.text=element_text(size=5), + legend.background = element_rect(fill=alpha('blue', 0.2)) + ) + +# IOPS line plot +read_iops_line_plot <- ggplot() + + geom_line( data=data2, aes(blocksize, iops_tot, group=runtime, color=runtime)) + + ylim(0, NA) + + ggtitle("Random Read total IOPS") + + xlab("Blocksize") + + ylab("IOPS") + + theme( + axis.text.x=element_text(angle=90), + legend.position=c(0.35,0.8), + legend.title=element_text(size=5), + legend.text=element_text(size=5), + legend.background = element_rect(fill=alpha('blue', 0.2)) + ) + +# 95 and 99 percentile box plot +read_clat_box_plot <- ggplot() + + geom_boxplot( data=all_ldata, aes(blocksize, percentile, color=runtime)) + + stat_summary( data=all_ldata, aes(blocksize, percentile, group=runtime, color=runtime), fun.y=mean, geom="line") + + ylim(0, NA) + + ggtitle("Random Read completion latency", subtitle="95&99 percentiles, boxplot over jobs") + + xlab("Blocksize") + + ylab("Latency (ms)") + + theme(axis.text.x=element_text(angle=90)) + + # Use the 'paired' colour matrix as we are setting these up as pairs of + # 95 and 99 percentiles, and it is much easier to visually group those to + # each runtime if we use this colourmap. + scale_colour_brewer(palette="Paired") +# it would be nice to use the same legend theme as the other plots on this +# page, but because of the number of entries it tends to flow off the picture. +# theme( +# axis.text.x=element_text(angle=90), +# legend.position=c(0.35,0.8), +# legend.title=element_text(size=5), +# legend.text=element_text(size=5), +# legend.background = element_rect(fill=alpha('blue', 0.2)) +# ) + +# As the boxplot is actually quite hard to interpret, also show a linegraph +# of all the percentiles for a single blocksize. +which_blocksize='4k' +clat_line_subtitle=paste("For blocksize", which_blocksize, sep=" ") +single_blocksize=subset(all_ldata2, blocksize==which_blocksize) +clat_line=aggregate( + single_blocksize$ms, + by=list( + percentile=single_blocksize$percentile, + blocksize=single_blocksize$blocksize, + runtime=single_blocksize$runtime + ), + FUN=mean +) + +clat_line$percentile=as.numeric(clat_line$percentile) + +read_clat_line_plot <- ggplot() + + geom_line( data=clat_line, aes(percentile, x, group=runtime, color=runtime)) + + ylim(0, NA) + + ggtitle("Random Read completion latency percentiles", subtitle=clat_line_subtitle) + + xlab("Percentile") + + ylab("Time (ms)") + + theme( + axis.text.x=element_text(angle=90), + legend.position=c(0.35,0.8), + legend.title=element_text(size=5), + legend.text=element_text(size=5), + legend.background = element_rect(fill=alpha('blue', 0.2)) + ) + +# Output the pretty pictures +graphics_plot = grid.arrange( + read_bw_line_plot, + read_iops_line_plot, + read_clat_box_plot, + read_clat_line_plot, + nrow=2, + ncol=2 ) + +# A bit of an odd tweak to force a pagebreak between the pictures and +# the tables. This only works because we have a `results='asis'` in the Rmd +# R fragment. +cat("\n\n\\pagebreak\n") + +read_bw_stats_plot = suppressWarnings(ggtexttable(read_bw_stats, + theme=ttheme(base_size=10), + rows=NULL + )) + +read_iops_stats_plot = suppressWarnings(ggtexttable(read_iops_stats, + theme=ttheme(base_size=10), + rows=NULL + )) + +read_lat95_stats_plot = suppressWarnings(ggtexttable(read_lat95_stats, + theme=ttheme(base_size=10), + rows=NULL + )) +read_lat99_stats_plot = suppressWarnings(ggtexttable(read_lat99_stats, + theme=ttheme(base_size=10), + rows=NULL + )) + +# and then the statistics tables +stats_plot = grid.arrange( + read_bw_stats_plot, + read_iops_stats_plot, + read_lat95_stats_plot, + read_lat99_stats_plot, + nrow=4, + ncol=1 ) diff --git a/tests/metrics/report/report_dockerfile/fio-writes.R b/tests/metrics/report/report_dockerfile/fio-writes.R new file mode 100644 index 000000000..5fa223176 --- /dev/null +++ b/tests/metrics/report/report_dockerfile/fio-writes.R @@ -0,0 +1,260 @@ +#!/usr/bin/env Rscript +# Copyright (c) 2018-2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +# Display details for 'fio' random writes storage IO tests. + + +library(ggplot2) # ability to plot nicely +library(gridExtra) # So we can plot multiple graphs together +suppressMessages(suppressWarnings(library(ggpubr))) # for ggtexttable +suppressMessages(library(jsonlite)) # to load the data +suppressMessages(suppressWarnings(library(tidyr))) # for gather +library(tibble) + +testnames=c( + "fio-randwrite-128", + "fio-randwrite-256", + "fio-randwrite-512", + "fio-randwrite-1k", + "fio-randwrite-2k", + "fio-randwrite-4k", + "fio-randwrite-8k", + "fio-randwrite-16k", + "fio-randwrite-32k", + "fio-randwrite-64k" + ) + +data2=c() +all_ldata=c() +all_ldata2=c() +stats=c() +rstats=c() +rstats_names=c() + + +# Where to store up the stats for the tables +write_bw_stats=c() +write_iops_stats=c() +write_lat95_stats=c() +write_lat99_stats=c() + +# For each set of results +for (currentdir in resultdirs) { + bw_dirstats=c() + iops_dirstats=c() + lat95_dirstats=c() + lat99_dirstats=c() + # Derive the name from the test result dirname + datasetname=basename(currentdir) + + for (testname in testnames) { + fname=paste(inputdir, currentdir, testname, '.json', sep="") + if ( !file.exists(fname)) { + #warning(paste("Skipping non-existent file: ", fname)) + next + } + + # Import the data + fdata=fromJSON(fname) + # De-nest the test specific named data + fdata=fdata[[testname]] + + blocksize=fdata$Raw$'global options'$bs + + # Extract the latency data - it comes as a table of percentiles, so + # we have to do a little work... + clat=data.frame(clat_ns=fdata$Raw$jobs[[1]]$write$clat_ns$percentile) + + # Generate a clat data set with 'clean' percentile numbers so + # we can sensibly plot it later on. + clat2=clat + colnames(clat2)<-sub("clat_ns.", "", colnames(clat2)) + colnames(clat2)<-sub("0000", "", colnames(clat2)) + ldata2=gather(clat2) + colnames(ldata2)[colnames(ldata2)=="key"] <- "percentile" + colnames(ldata2)[colnames(ldata2)=="value"] <- "ms" + ldata2$ms=ldata2$ms/1000000 #ns->ms + ldata2=cbind(ldata2, runtime=rep(datasetname, length(ldata2$percentile))) + ldata2=cbind(ldata2, blocksize=rep(blocksize, length(ldata2$percentile))) + + # Pull the 95 and 99 percentiles for the boxplot diagram. + # Our values fall more in the range of ms... + pc95data=tibble(percentile=clat$clat_ns.95.000000/1000000) + pc95data=cbind(pc95data, runtime=rep(paste(datasetname, "95pc", sep="-"), length(pc95data$percentile))) + pc99data=tibble(percentile=clat$clat_ns.99.000000/1000000) + pc99data=cbind(pc99data, runtime=rep(paste(datasetname, "99pc", sep="-"), length(pc95data$percentile))) + ldata=rbind(pc95data, pc99data) + ldata=cbind(ldata, blocksize=rep(blocksize, length(ldata$percentile))) + + # We want total bandwidth, so that is the sum of the bandwidths + # from all the write 'jobs'. + mdata=data.frame(write_bw_mps=as.numeric(sum(fdata$Raw$jobs[[1]]$write$bw)/1024)) + mdata=cbind(mdata, iops_tot=as.numeric(sum(fdata$Raw$jobs[[1]]$write$iops))) + mdata=cbind(mdata, runtime=rep(datasetname, length(mdata[, "write_bw_mps"]) )) + mdata=cbind(mdata, blocksize=rep(blocksize, length(mdata[, "write_bw_mps"]) )) + + # Extract the stats tables + bw_dirstats=rbind(bw_dirstats, round(mdata$write_bw_mps, digits=1)) + # Rowname hack to get the blocksize recorded + rownames(bw_dirstats)[nrow(bw_dirstats)]=blocksize + + iops_dirstats=rbind(iops_dirstats, round(mdata$iops_tot, digits=1)) + rownames(iops_dirstats)[nrow(iops_dirstats)]=blocksize + + # And do the 95 and 99 percentiles as tables as well + lat95_dirstats=rbind(lat95_dirstats, round(mean(clat$clat_ns.95.000000)/1000000, digits=1)) + rownames(lat95_dirstats)[nrow(lat95_dirstats)]=blocksize + lat99_dirstats=rbind(lat99_dirstats, round(mean(clat$clat_ns.99.000000)/1000000, digits=1)) + rownames(lat99_dirstats)[nrow(lat99_dirstats)]=blocksize + + # Store away as single sets + data2=rbind(data2, mdata) + all_ldata=rbind(all_ldata, ldata) + all_ldata2=rbind(all_ldata2, ldata2) + } + + # Collect up for each dir we process into a column + write_bw_stats=cbind(write_bw_stats, bw_dirstats) + colnames(write_bw_stats)[ncol(write_bw_stats)]=datasetname + + write_iops_stats=cbind(write_iops_stats, iops_dirstats) + colnames(write_iops_stats)[ncol(write_iops_stats)]=datasetname + + write_lat95_stats=cbind(write_lat95_stats, lat95_dirstats) + colnames(write_lat95_stats)[ncol(write_lat95_stats)]=datasetname + write_lat99_stats=cbind(write_lat99_stats, lat99_dirstats) + colnames(write_lat99_stats)[ncol(write_lat99_stats)]=datasetname +} + +# To get a nice looking table, we need to extract the rownames into their +# own column +write_bw_stats=cbind(Bandwidth=rownames(write_bw_stats), write_bw_stats) +write_bw_stats=cbind(write_bw_stats, Units=rep("MB/s", nrow(write_bw_stats))) + +write_iops_stats=cbind(IOPS=rownames(write_iops_stats), write_iops_stats) +write_iops_stats=cbind(write_iops_stats, Units=rep("IOP/s", nrow(write_iops_stats))) + +write_lat95_stats=cbind('lat 95pc'=rownames(write_lat95_stats), write_lat95_stats) +write_lat95_stats=cbind(write_lat95_stats, Units=rep("ms", nrow(write_lat95_stats))) +write_lat99_stats=cbind('lat 99pc'=rownames(write_lat99_stats), write_lat99_stats) +write_lat99_stats=cbind(write_lat99_stats, Units=rep("ms", nrow(write_lat99_stats))) + +# lineplot of total bandwidth across blocksizes. +write_bw_line_plot <- ggplot() + + geom_line( data=data2, aes(blocksize, write_bw_mps, group=runtime, color=runtime)) + + ylim(0, NA) + + ggtitle("Random Write total bandwidth") + + xlab("Blocksize") + + ylab("Bandwidth (MiB/s)") + + theme( + axis.text.x=element_text(angle=90), + legend.position=c(0.35,0.8), + legend.title=element_text(size=5), + legend.text=element_text(size=5), + legend.background = element_rect(fill=alpha('blue', 0.2)) + ) + +# lineplot of IOPS across blocksizes +write_iops_line_plot <- ggplot() + + geom_line( data=data2, aes(blocksize, iops_tot, group=runtime, color=runtime)) + + ylim(0, NA) + + ggtitle("Random Write total IOPS") + + xlab("Blocksize") + + ylab("IOPS") + + theme( + axis.text.x=element_text(angle=90), + legend.position=c(0.35,0.8), + legend.title=element_text(size=5), + legend.text=element_text(size=5), + legend.background = element_rect(fill=alpha('blue', 0.2)) + ) + +# boxplot of 95 and 99 percentiles covering the parallel jobs, shown across +# the blocksizes. +write_clat_box_plot <- ggplot() + + geom_boxplot( data=all_ldata, aes(blocksize, percentile, color=runtime)) + + stat_summary( data=all_ldata, aes(blocksize, percentile, group=runtime, color=runtime), fun.y=mean, geom="line") + + ylim(0, NA) + + ggtitle("Random Write completion latency", subtitle="95&99 Percentiles, boxplot across jobs") + + xlab("Blocksize") + + ylab("Latency (ms)") + + theme(axis.text.x=element_text(angle=90)) + + # Use the 'paired' colour matrix as we are setting these up as pairs of + # 95 and 99 percentiles, and it is much easier to visually group those to + # each runtime if we use this colourmap. + scale_colour_brewer(palette="Paired") + + +# completion latency line plot across the percentiles, for a specific blocksize only +# as otherwise the graph would be far too noisy. +which_blocksize='4k' +clat_line_subtitle=paste("For blocksize", which_blocksize, sep=" ") +single_blocksize=subset(all_ldata2, blocksize==which_blocksize) +clat_line=aggregate( + single_blocksize$ms, + by=list( + percentile=single_blocksize$percentile, + blocksize=single_blocksize$blocksize, + runtime=single_blocksize$runtime + ), + FUN=mean +) + +clat_line$percentile=as.numeric(clat_line$percentile) + +write_clat_line_plot <- ggplot() + + geom_line( data=clat_line, aes(percentile, x, group=runtime, color=runtime)) + + ylim(0, NA) + + ggtitle("Random Write completion latency percentiles", subtitle=clat_line_subtitle) + + xlab("Percentile") + + ylab("Time (ms)") + + theme( + axis.text.x=element_text(angle=90), + legend.position=c(0.35,0.8), + legend.title=element_text(size=5), + legend.text=element_text(size=5), + legend.background = element_rect(fill=alpha('blue', 0.2)) + ) + +master_plot = grid.arrange( + write_bw_line_plot, + write_iops_line_plot, + write_clat_box_plot, + write_clat_line_plot, + nrow=2, + ncol=2 ) + +# A bit of an odd tweak to force a pagebreak between the pictures and +# the tables. This only works because we have a `results='asis'` in the Rmd +# R fragment. +cat("\n\n\\pagebreak\n") + +write_bw_stats_plot = suppressWarnings(ggtexttable(write_bw_stats, + theme=ttheme(base_size=10), + rows=NULL + )) + +write_iops_stats_plot = suppressWarnings(ggtexttable(write_iops_stats, + theme=ttheme(base_size=10), + rows=NULL + )) + +write_lat95_stats_plot = suppressWarnings(ggtexttable(write_lat95_stats, + theme=ttheme(base_size=10), + rows=NULL + )) +write_lat99_stats_plot = suppressWarnings(ggtexttable(write_lat99_stats, + theme=ttheme(base_size=10), + rows=NULL + )) + +# and then the statistics tables +stats_plot = grid.arrange( + write_bw_stats_plot, + write_iops_stats_plot, + write_lat95_stats_plot, + write_lat99_stats_plot, + nrow=4, + ncol=1 ) diff --git a/tests/metrics/report/report_dockerfile/footprint-density.R b/tests/metrics/report/report_dockerfile/footprint-density.R new file mode 100644 index 000000000..3539dc9cc --- /dev/null +++ b/tests/metrics/report/report_dockerfile/footprint-density.R @@ -0,0 +1,111 @@ +#!/usr/bin/env Rscript +# Copyright (c) 2018-2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +# Show system memory reduction, and hence container 'density', by analysing the +# scaling footprint data results and the 'system free' memory. + +library(ggplot2) # ability to plot nicely. + # So we can plot multiple graphs +library(gridExtra) # together. +suppressMessages(suppressWarnings(library(ggpubr))) # for ggtexttable. +suppressMessages(library(jsonlite)) # to load the data. + +testnames=c( + paste("footprint-busybox.*", test_name_extra, sep=""), + paste("footprint-mysql.*", test_name_extra, sep=""), + paste("footprint-elasticsearch.*", test_name_extra, sep="") +) + +data=c() +stats=c() +rstats=c() +rstats_names=c() + +for (currentdir in resultdirs) { + count=1 + dirstats=c() + for (testname in testnames) { + matchdir=paste(inputdir, currentdir, sep="") + matchfile=paste(testname, '\\.json', sep="") + files=list.files(matchdir, pattern=matchfile) + if ( length(files) == 0 ) { + #warning(paste("Pattern [", matchdir, "/", matchfile, "] matched nothing")) + } + for (ffound in files) { + fname=paste(inputdir, currentdir, ffound, sep="") + if ( !file.exists(fname)) { + warning(paste("Skipping non-existent file: ", fname)) + next + } + + # Derive the name from the test result dirname + datasetname=basename(currentdir) + + # Import the data + fdata=fromJSON(fname) + # De-nest the test name specific data + shortname=substr(ffound, 1, nchar(ffound)-nchar(".json")) + fdata=fdata[[shortname]] + + payload=fdata$Config$payload + testname=paste(datasetname, payload) + + cdata=data.frame(avail_mb=as.numeric(fdata$Results$system$avail)/(1024*1024)) + cdata=cbind(cdata, avail_decr=as.numeric(fdata$Results$system$avail_decr)) + cdata=cbind(cdata, count=seq_len(length(cdata[, "avail_mb"]))) + cdata=cbind(cdata, testname=rep(testname, length(cdata[, "avail_mb"]) )) + cdata=cbind(cdata, payload=rep(payload, length(cdata[, "avail_mb"]) )) + cdata=cbind(cdata, dataset=rep(datasetname, length(cdata[, "avail_mb"]) )) + + # Gather our statistics + sdata=data.frame(num_containers=length(cdata[, "avail_mb"])) + # Pick out the last avail_decr value - which in theory should be + # the most we have consumed... + sdata=cbind(sdata, mem_consumed=cdata[, "avail_decr"][length(cdata[, "avail_decr"])]) + sdata=cbind(sdata, avg_bytes_per_c=sdata$mem_consumed / sdata$num_containers) + sdata=cbind(sdata, runtime=testname) + + # Store away as a single set + data=rbind(data, cdata) + stats=rbind(stats, sdata) + + s = c( + "Test"=testname, + "n"=sdata$num_containers, + "size"=(sdata$mem_consumed) / 1024, + "kb/n"=round((sdata$mem_consumed / sdata$num_containers) / 1024, digits=1), + "n/Gb"= round((1*1024*1024*1024) / (sdata$mem_consumed / sdata$num_containers), digits=1) + ) + + rstats=rbind(rstats, s) + count = count + 1 + } + } +} + +# Set up the text table headers +colnames(rstats)=c("Test", "n", "Tot_Kb", "avg_Kb", "n_per_Gb") + +# Build us a text table of numerical results +stats_plot = suppressWarnings(ggtexttable(data.frame(rstats), + theme=ttheme(base_size=10), + rows=NULL + )) + +# plot how samples varioed over 'time' +line_plot <- ggplot() + + geom_point( data=data, aes(count, avail_mb, group=testname, color=payload, shape=dataset)) + + geom_line( data=data, aes(count, avail_mb, group=testname, color=payload)) + + xlab("Containers") + + ylab("System Avail (Mb)") + + ggtitle("System Memory free") + + ylim(0, NA) + + theme(axis.text.x=element_text(angle=90)) + +master_plot = grid.arrange( + line_plot, + stats_plot, + nrow=2, + ncol=1 ) diff --git a/tests/metrics/report/report_dockerfile/lifecycle-time.R b/tests/metrics/report/report_dockerfile/lifecycle-time.R new file mode 100644 index 000000000..4969f23c9 --- /dev/null +++ b/tests/metrics/report/report_dockerfile/lifecycle-time.R @@ -0,0 +1,157 @@ +#!/usr/bin/env Rscript +# Copyright (c) 2018-2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +# Display how long the various phases of a container lifecycle (run, execute, die etc. +# take. + +library(ggplot2) # ability to plot nicely. +suppressMessages(suppressWarnings(library(tidyr))) # for gather(). + # So we can plot multiple graphs +library(gridExtra) # together. +suppressMessages(suppressWarnings(library(ggpubr))) # for ggtexttable. +suppressMessages(library(jsonlite)) # to load the data. + +testnames=c( + "boot-times" +) + +data=c() +stats=c() +rstats=c() +rstats_names=c() + +# For each set of results +for (currentdir in resultdirs) { + count=1 + dirstats=c() + for (testname in testnames) { + fname=paste(inputdir, currentdir, testname, '.json', sep="") + if ( !file.exists(fname)) { + warning(paste("Skipping non-existent file: ", fname)) + next + } + + # Derive the name from the test result dirname + datasetname=basename(currentdir) + + # Import the data + fdata=fromJSON(fname) + # De-nest the test specific name + fdata=fdata[[testname]] + + cdata=data.frame(workload=as.numeric(fdata$Results$'to-workload'$Result)) + cdata=cbind(cdata, quit=as.numeric(fdata$Results$'to-quit'$Result)) + + cdata=cbind(cdata, tokernel=as.numeric(fdata$Results$'to-kernel'$Result)) + cdata=cbind(cdata, inkernel=as.numeric(fdata$Results$'in-kernel'$Result)) + cdata=cbind(cdata, total=as.numeric(fdata$Results$'total'$Result)) + + cdata=cbind(cdata, count=seq_len(length(cdata[,"workload"]))) + cdata=cbind(cdata, runtime=rep(datasetname, length(cdata[, "workload"]) )) + + # Calculate some stats for total time + sdata=data.frame(workload_mean=mean(cdata$workload)) + sdata=cbind(sdata, workload_min=min(cdata$workload)) + sdata=cbind(sdata, workload_max=max(cdata$workload)) + sdata=cbind(sdata, workload_sd=sd(cdata$workload)) + sdata=cbind(sdata, workload_cov=((sdata$workload_sd / sdata$workload_mean) * 100)) + sdata=cbind(sdata, runtime=datasetname) + + sdata=cbind(sdata, quit_mean = mean(cdata$quit)) + sdata=cbind(sdata, quit_min = min(cdata$quit)) + sdata=cbind(sdata, quit_max = max(cdata$quit)) + sdata=cbind(sdata, quit_sd = sd(cdata$quit)) + sdata=cbind(sdata, quit_cov = (sdata$quit_sd / sdata$quit_mean) * 100) + + sdata=cbind(sdata, tokernel_mean = mean(cdata$tokernel)) + sdata=cbind(sdata, inkernel_mean = mean(cdata$inkernel)) + sdata=cbind(sdata, total_mean = mean(cdata$total)) + + # Store away as a single set + data=rbind(data, cdata) + stats=rbind(stats, sdata) + + # Store away some stats for the text table + dirstats[count]=round(sdata$tokernel_mean, digits=2) + count = count + 1 + dirstats[count]=round(sdata$inkernel_mean, digits=2) + count = count + 1 + dirstats[count]=round(sdata$workload_mean, digits=2) + count = count + 1 + dirstats[count]=round(sdata$quit_mean, digits=2) + count = count + 1 + dirstats[count]=round(sdata$total_mean, digits=2) + count = count + 1 + } + rstats=rbind(rstats, dirstats) + rstats_names=rbind(rstats_names, datasetname) +} + +unts=c("s", "s", "s", "s", "s") +rstats=rbind(rstats, unts) +rstats_names=rbind(rstats_names, "Units") + + +# If we have only 2 sets of results, then we can do some more +# stats math for the text table +if (length(resultdirs) == 2) { + # This is a touch hard wired - but we *know* we only have two + # datasets... + diff=c() + for( i in 1:5) { + difference = as.double(rstats[2,i]) - as.double(rstats[1,i]) + val = 100 * (difference/as.double(rstats[1,i])) + diff[i] = paste(round(val, digits=2), "%", sep=" ") + } + + rstats=rbind(rstats, diff) + rstats_names=rbind(rstats_names, "Diff") +} + +rstats=cbind(rstats_names, rstats) + +# Set up the text table headers +colnames(rstats)=c("Results", "2k", "ik", "2w", "2q", "tot") + + +# Build us a text table of numerical results +stats_plot = suppressWarnings(ggtexttable(data.frame(rstats, check.names=FALSE), + theme=ttheme(base_size=8), + rows=NULL + )) + +# plot how samples varioed over 'time' +line_plot <- ggplot() + + geom_line( data=data, aes(count, workload, color=runtime)) + + geom_smooth( data=data, aes(count, workload, color=runtime), se=FALSE, method="loess") + + xlab("Iteration") + + ylab("Time (s)") + + ggtitle("Boot to workload", subtitle="First container") + + ylim(0, NA) + + theme(axis.text.x=element_text(angle=90)) + +boot_boxplot <- ggplot() + + geom_boxplot( data=data, aes(runtime, workload, color=runtime), show.legend=FALSE) + + ylim(0, NA) + + ylab("Time (s)") + +# convert the stats to a long format so we can more easily do a side-by-side barplot +longstats <- gather(stats, measure, value, workload_mean, quit_mean, inkernel_mean, tokernel_mean, total_mean) + +bar_plot <- ggplot() + + geom_bar( data=longstats, aes(measure, value, fill=runtime), stat="identity", position="dodge", show.legend=FALSE) + + xlab("Phase") + + ylab("Time (s)") + + ggtitle("Lifecycle phase times", subtitle="Mean") + + ylim(0, NA) + + theme(axis.text.x=element_text(angle=90)) + +master_plot = grid.arrange( + bar_plot, + line_plot, + stats_plot, + boot_boxplot, + nrow=2, + ncol=2 ) diff --git a/tests/metrics/report/report_dockerfile/mem-in-cont.R b/tests/metrics/report/report_dockerfile/mem-in-cont.R new file mode 100644 index 000000000..8dede6b99 --- /dev/null +++ b/tests/metrics/report/report_dockerfile/mem-in-cont.R @@ -0,0 +1,142 @@ +#!/usr/bin/env Rscript +# Copyright (c) 2018-2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +# Analyse the runtime component memory footprint data. + +library(ggplot2) # ability to plot nicely. + # So we can plot multiple graphs +library(gridExtra) # together. +suppressMessages(suppressWarnings(library(ggpubr))) # for ggtexttable. +suppressMessages(library(jsonlite)) # to load the data. + +testnames=c( + "memory-footprint-inside-container" +) + +data=c() +rstats=c() +rstats_rows=c() +rstats_cols=c() + +# For each set of results +for (currentdir in resultdirs) { + dirstats=c() + # For the two different types of memory footprint measures + for (testname in testnames) { + # R seems not to like double path slashes '//' ? + fname=paste(inputdir, currentdir, testname, '.json', sep="") + if ( !file.exists(fname)) { + warning(paste("Skipping non-existent file: ", fname)) + next + } + + # Derive the name from the test result dirname + datasetname=basename(currentdir) + + # Import the data + fdata=fromJSON(fname) + fdata=fdata[[testname]] + # Copy the average result into a shorter, more accesible name + fdata$requested=fdata$Results$memrequest$Result + fdata$total=fdata$Results$memtotal$Result + fdata$free=fdata$Results$memfree$Result + fdata$avail=fdata$Results$memavailable$Result + + # And lets work out what % we have 'lost' between the amount requested + # and the total the container actually sees. + fdata$lost=fdata$requested - fdata$total + fdata$pctotal= 100 * (fdata$lost/ fdata$requested) + + fdata$Runtime=rep(datasetname, length(fdata$Result) ) + + # Store away the bits we need + data=rbind(data, data.frame( + Result=fdata$requested, + Type="requested", + Runtime=fdata$Runtime )) + + data=rbind(data, data.frame( + Result=fdata$total, + Type="total", + Runtime=fdata$Runtime )) + + data=rbind(data, data.frame( + Result=fdata$free, + Type="free", + Runtime=fdata$Runtime )) + + data=rbind(data, data.frame( + Result=fdata$avail, + Type="avail", + Runtime=fdata$Runtime )) + + data=rbind(data, data.frame( + Result=fdata$lost, + Type="lost", + Runtime=fdata$Runtime )) + + data=rbind(data, data.frame( + Result=fdata$pctotal, + Type="% consumed", + Runtime=fdata$Runtime )) + + # Store away some stats for the text table + dirstats=rbind(dirstats, round(fdata$requested, digits=2) ) + dirstats=rbind(dirstats, round(fdata$total, digits=2) ) + dirstats=rbind(dirstats, round(fdata$free, digits=2) ) + dirstats=rbind(dirstats, round(fdata$avail, digits=2) ) + dirstats=rbind(dirstats, round(fdata$lost, digits=2) ) + dirstats=rbind(dirstats, round(fdata$pctotal, digits=2) ) + } + rstats=cbind(rstats, dirstats) + rstats_cols=append(rstats_cols, datasetname) +} + +rstats_rows=c("Requested", "Total", "Free", "Avail", "Consumed", "% Consumed") + +unts=c("Kb", "Kb", "Kb", "Kb", "Kb", "%") +rstats=cbind(rstats, unts) +rstats_cols=append(rstats_cols, "Units") + +# If we have only 2 sets of results, then we can do some more +# stats math for the text table +if (length(resultdirs) == 2) { + # This is a touch hard wired - but we *know* we only have two + # datasets... + diff=c() + # Just the first three entries - meaningless for the pctotal entry + for (n in 1:5) { + difference = (as.double(rstats[n,2]) - as.double(rstats[n,1])) + val = 100 * (difference/as.double(rstats[n,1])) + diff=rbind(diff, round(val, digits=2)) + } + + # Add a blank entry for the other entries + diff=rbind(diff, "") + rstats=cbind(rstats, diff) + rstats_cols=append(rstats_cols, "Diff %") +} + +# Build us a text table of numerical results +stats_plot = suppressWarnings(ggtexttable(data.frame(rstats), + theme=ttheme(base_size=10), + rows=rstats_rows, cols=rstats_cols + )) + +bardata <- subset(data, Type %in% c("requested", "total", "free", "avail")) +# plot how samples varioed over 'time' +barplot <- ggplot() + + geom_bar(data=bardata, aes(Type, Result, fill=Runtime), stat="identity", position="dodge") + + xlab("Measure") + + ylab("Size (Kb)") + + ggtitle("In-container memory statistics") + + ylim(0, NA) + + theme(axis.text.x=element_text(angle=90)) + +master_plot = grid.arrange( + barplot, + stats_plot, + nrow=2, + ncol=1 ) diff --git a/tests/metrics/report/report_dockerfile/memory-footprint.R b/tests/metrics/report/report_dockerfile/memory-footprint.R new file mode 100644 index 000000000..4d5f53129 --- /dev/null +++ b/tests/metrics/report/report_dockerfile/memory-footprint.R @@ -0,0 +1,121 @@ +#!/usr/bin/env Rscript +# Copyright (c) 2018-2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +# Analyse the runtime component memory footprint data. + +library(ggplot2) # ability to plot nicely. + # So we can plot multiple graphs +library(gridExtra) # together. +suppressMessages(suppressWarnings(library(ggpubr))) # for ggtexttable. +suppressMessages(library(jsonlite)) # to load the data. + +testnames=c( + "memory-footprint", + "memory-footprint-ksm" +) + +resultsfilesshort=c( + "noKSM", + "KSM" +) + +data=c() +rstats=c() +rstats_names=c() + +# For each set of results +for (currentdir in resultdirs) { + count=1 + dirstats=c() + # For the two different types of memory footprint measures + for (testname in testnames) { + # R seems not to like double path slashes '//' ? + fname=paste(inputdir, currentdir, testname, '.json', sep="") + if ( !file.exists(fname)) { + warning(paste("Skipping non-existent file: ", fname)) + next + } + + # Derive the name from the test result dirname + datasetname=basename(currentdir) + datasetvariant=resultsfilesshort[count] + + # Import the data + fdata=fromJSON(fname) + fdata=fdata[[testname]] + # Copy the average result into a shorter, more accesible name + fdata$Result=fdata$Results$average$Result + fdata$variant=rep(datasetvariant, length(fdata$Result) ) + fdata$Runtime=rep(datasetname, length(fdata$Result) ) + fdata$Count=seq_len(length(fdata$Result)) + + # Calculate some stats + fdata.mean = mean(fdata$Result) + fdata.min = min(fdata$Result) + fdata.max = max(fdata$Result) + fdata.sd = sd(fdata$Result) + fdata.cov = (fdata.sd / fdata.mean) * 100 + + # Store away the bits we need + data=rbind(data, data.frame( + Result=fdata$Result, + Count=fdata$Count, + Runtime=fdata$Runtime, + variant=fdata$variant ) ) + + # Store away some stats for the text table + dirstats[count]=round(fdata.mean, digits=2) + + count = count + 1 + } + rstats=rbind(rstats, dirstats) + rstats_names=rbind(rstats_names, datasetname) +} + +rstats=cbind(rstats_names, rstats) +unts=rep("Kb", length(resultdirs)) + +# If we have only 2 sets of results, then we can do some more +# stats math for the text table +if (length(resultdirs) == 2) { + # This is a touch hard wired - but we *know* we only have two + # datasets... + diff=c("diff") + difference = (as.double(rstats[2,2]) - as.double(rstats[1,2])) + val = 100 * (difference/as.double(rstats[1,2])) + diff[2] = round(val, digits=2) + difference = (as.double(rstats[2,3]) - as.double(rstats[1,3])) + val = 100 * (difference/as.double(rstats[1,3])) + diff[3] = round(val, digits=2) + rstats=rbind(rstats, diff) + + unts[3]="%" +} + +rstats=cbind(rstats, unts) + +# Set up the text table headers +colnames(rstats)=c("Results", resultsfilesshort, "Units") + +# Build us a text table of numerical results +stats_plot = suppressWarnings(ggtexttable(data.frame(rstats), + theme=ttheme(base_size=10), + rows=NULL + )) + +# plot how samples varioed over 'time' +point_plot <- ggplot() + + geom_point( data=data, aes(Runtime, Result, color=variant), position=position_dodge(0.1)) + + xlab("Dataset") + + ylab("Size (Kb)") + + ggtitle("Average PSS footprint", subtitle="per container") + + ylim(0, NA) + + theme(axis.text.x=element_text(angle=90)) + +master_plot = grid.arrange( + point_plot, + stats_plot, + nrow=1, + ncol=2 ) diff --git a/tests/metrics/report/report_dockerfile/network-cpu.R b/tests/metrics/report/report_dockerfile/network-cpu.R new file mode 100644 index 000000000..b6df50b30 --- /dev/null +++ b/tests/metrics/report/report_dockerfile/network-cpu.R @@ -0,0 +1,132 @@ +#!/usr/bin/env Rscript +# Copyright (c) 2018-2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +# Analyse the runtime component memory footprint data. + +library(ggplot2) # ability to plot nicely. + # So we can plot multiple graphs +library(gridExtra) # together. +suppressMessages(suppressWarnings(library(ggpubr))) # for ggtexttable. +suppressMessages(library(jsonlite)) # to load the data. + +testnames=c( + "cpu-information" +) + +resultsfilesshort=c( + "CPU" +) + +data=c() +rstats=c() +rstats_rows=c() +rstats_cols=c() + +Gdenom = (1000.0 * 1000.0 * 1000.0) + +# For each set of results +for (currentdir in resultdirs) { + dirstats=c() + # For the two different types of memory footprint measures + for (testname in testnames) { + # R seems not to like double path slashes '//' ? + fname=paste(inputdir, currentdir, testname, '.json', sep="") + if ( !file.exists(fname)) { + warning(paste("Skipping non-existent file: ", fname)) + next + } + + # Derive the name from the test result dirname + datasetname=basename(currentdir) + datasetvariant=resultsfilesshort[count] + + # Import the data + fdata=fromJSON(fname) + fdata=fdata[[testname]] + # Copy the average result into a shorter, more accesible name + fdata$ips=fdata$Results$"instructions per cycle"$Result + fdata$Gcycles=fdata$Results$cycles$Result / Gdenom + fdata$Ginstructions=fdata$Results$instructions$Result / Gdenom + fdata$variant=rep(datasetvariant, length(fdata$Result) ) + fdata$Runtime=rep(datasetname, length(fdata$Result) ) + + # Store away the bits we need + data=rbind(data, data.frame( + Result=fdata$ips, + Type="ips", + Runtime=fdata$Runtime, + variant=fdata$variant ) ) + data=rbind(data, data.frame( + Result=fdata$Gcycles, + Type="Gcycles", + Runtime=fdata$Runtime, + variant=fdata$variant ) ) + data=rbind(data, data.frame( + Result=fdata$Ginstructions, + Type="Ginstr", + Runtime=fdata$Runtime, + variant=fdata$variant ) ) + + # Store away some stats for the text table + dirstats=rbind(dirstats, round(fdata$ips, digits=2) ) + dirstats=rbind(dirstats, round(fdata$Gcycles, digits=2) ) + dirstats=rbind(dirstats, round(fdata$Ginstructions, digits=2) ) + } + rstats=cbind(rstats, dirstats) + rstats_cols=append(rstats_cols, datasetname) +} + +rstats_rows=c("IPS", "GCycles", "GInstr") + +unts=c("Ins/Cyc", "G", "G") +rstats=cbind(rstats, unts) +rstats_cols=append(rstats_cols, "Units") + +# If we have only 2 sets of results, then we can do some more +# stats math for the text table +if (length(resultdirs) == 2) { + # This is a touch hard wired - but we *know* we only have two + # datasets... + diff=c() + for (n in 1:3) { + difference = (as.double(rstats[n,2]) - as.double(rstats[n,1])) + val = 100 * (difference/as.double(rstats[n,1])) + diff=rbind(diff, round(val, digits=2)) + } + rstats=cbind(rstats, diff) + rstats_cols=append(rstats_cols, "Diff %") +} + +# Build us a text table of numerical results +stats_plot = suppressWarnings(ggtexttable(data.frame(rstats), + theme=ttheme(base_size=10), + rows=rstats_rows, cols=rstats_cols + )) + +# plot how samples varioed over 'time' +ipsdata <- subset(data, Type %in% c("ips")) +ips_plot <- ggplot() + + geom_bar(data=ipsdata, aes(Type, Result, fill=Runtime), stat="identity", position="dodge") + + xlab("Measure") + + ylab("IPS") + + ggtitle("Instructions Per Cycle") + + ylim(0, NA) + + theme(axis.text.x=element_text(angle=90)) + +cycdata <- subset(data, Type %in% c("Gcycles", "Ginstr")) +cycles_plot <- ggplot() + + geom_bar(data=cycdata, aes(Type, Result, fill=Runtime), stat="identity", position="dodge", show.legend=FALSE) + + xlab("Measure") + + ylab("Count (G)") + + ggtitle("Cycles and Instructions") + + ylim(0, NA) + + theme(axis.text.x=element_text(angle=90)) + +master_plot = grid.arrange( + ips_plot, + cycles_plot, + stats_plot, + nrow=2, + ncol=2 ) From a8fa3d99da046a10c38693ffbf0bbfa19159445a Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Tue, 29 Aug 2023 16:50:32 +0000 Subject: [PATCH 092/339] metrics: Generate PNGs alongside the PDF report This PR generates the PNGs for the kata metrics PDF report. Signed-off-by: Gabriela Cervantes (cherry picked from commit 878d1a2e7dab4a912aa5f99c830d09cfa5bca8ce) --- .../report/report_dockerfile/genreport.sh | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 tests/metrics/report/report_dockerfile/genreport.sh diff --git a/tests/metrics/report/report_dockerfile/genreport.sh b/tests/metrics/report/report_dockerfile/genreport.sh new file mode 100644 index 000000000..5689c93d0 --- /dev/null +++ b/tests/metrics/report/report_dockerfile/genreport.sh @@ -0,0 +1,19 @@ +#!/bin/bash +# Copyright (c) 2018-2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +REPORTNAME="metrics_report.pdf" + +cd scripts + +Rscript --slave -e "library(knitr);knit('pdf.Rmd')" +Rscript --slave -e "library(knitr);pandoc('pdf.md', format='latex')" + +Rscript --slave -e "library(knitr);knit('html.Rmd')" +Rscript --slave -e "library(knitr);pandoc('html.md', format='html')" + +cp /scripts/pdf.pdf /outputdir/${REPORTNAME} +cp /scripts/figure/*.png /outputdir/ +echo "PNGs of graphs and tables can be found in the output directory." +echo "The report, named ${REPORTNAME}, can be found in the output directory" From 428eb6908d79127cc0bc46df9e678b7402e26d9b Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Tue, 29 Aug 2023 17:43:06 +0000 Subject: [PATCH 093/339] metrics: Add report file titles This PR adds report file titles for kata metrics. Signed-off-by: Gabriela Cervantes (cherry picked from commit 139ffd4f758ea282aa8e3a5312c6151c20c50362) --- .../metrics/report/report_dockerfile/html.Rmd | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 tests/metrics/report/report_dockerfile/html.Rmd diff --git a/tests/metrics/report/report_dockerfile/html.Rmd b/tests/metrics/report/report_dockerfile/html.Rmd new file mode 100644 index 000000000..86229f335 --- /dev/null +++ b/tests/metrics/report/report_dockerfile/html.Rmd @@ -0,0 +1,23 @@ +--- +# Copyright (c) 2018-2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# +title: "Kata Containers metrics report" +author: "Auto generated" +date: "`r format(Sys.time(), '%d %B, %Y')`" +output: + html_document: +urlcolor: blue +--- + +```{r setup, include=FALSE} +#Set these opts to get pdf images which fit into beamer slides better +opts_chunk$set(dev = 'png') +# Pick up any env set by the invoking script, such as the root dir of the +# results data tree +source("/inputdir/Env.R") +``` + +```{r child = 'metrics_report.Rmd'} +``` From 01f6e6a1a3a283e993ee9f0507a07cbf4b9cccec Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Tue, 29 Aug 2023 17:45:36 +0000 Subject: [PATCH 094/339] metrics: Add metrics reportgen This PR adds metrics reportgen for kata metrics. Signed-off-by: Gabriela Cervantes (cherry picked from commit 8e6d4e6f3d4b2b799fb3fe338b9e69a6a70be6dd) --- .../report/report_dockerfile/metrics_report.Rmd | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 tests/metrics/report/report_dockerfile/metrics_report.Rmd diff --git a/tests/metrics/report/report_dockerfile/metrics_report.Rmd b/tests/metrics/report/report_dockerfile/metrics_report.Rmd new file mode 100644 index 000000000..6a6cdbbe8 --- /dev/null +++ b/tests/metrics/report/report_dockerfile/metrics_report.Rmd @@ -0,0 +1,15 @@ +--- +# Copyright (c) 2018-2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +--- +\pagebreak + +# Introduction +This report compares the metrics between multiple sets of data generated from +the [Kata Containers report generation scripts](https://github.com/kata-containers/tests/tree/main/metrics/report/README.md). + +This report was generated using the data from the **`r resultdirs`** results directories. + +\pagebreak + From 1c1eb9810700abdb0c1df33116e5f932cf5338f4 Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Tue, 29 Aug 2023 17:51:27 +0000 Subject: [PATCH 095/339] metrics: Add scaling system footprint in metrics report This PR adds scaling system footprint in metrics report. Signed-off-by: Gabriela Cervantes (cherry picked from commit 79fbb9d2430d69c862d0bf2ddd7cae2f559572ff) --- .../report/report_dockerfile/metrics_report.Rmd | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/tests/metrics/report/report_dockerfile/metrics_report.Rmd b/tests/metrics/report/report_dockerfile/metrics_report.Rmd index 6a6cdbbe8..27f4e2b1d 100644 --- a/tests/metrics/report/report_dockerfile/metrics_report.Rmd +++ b/tests/metrics/report/report_dockerfile/metrics_report.Rmd @@ -7,9 +7,23 @@ # Introduction This report compares the metrics between multiple sets of data generated from -the [Kata Containers report generation scripts](https://github.com/kata-containers/tests/tree/main/metrics/report/README.md). +the [Kata Containers report generation scripts](https://github.com/kata-containers/kata-containers/tree/main/metrics/report/README.md). This report was generated using the data from the **`r resultdirs`** results directories. \pagebreak +# Container scaling system footprint +This [test](https://github.com/kata-containers/kata-containers/blob/main/metrics/density/footprint_data.sh) +measures the system memory footprint impact whilst running an increasing number +of containers. For this test, [KSM](https://en.wikipedia.org/wiki/Kernel_same-page_merging) + is enabled. The results show how system memory is consumed for different sized +containers, and their average system memory footprint cost and density (how many +containers you can fit per Gb) is calculated. + +```{r footprint-density, echo=FALSE, fig.cap="System Memory density"} +test_name_extra="-ksm" +source('footprint-density.R') +rm(test_name_extra) +``` +\pagebreak From 75c92ba474e7cd68360b94c90a09a10f0cab61d0 Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Tue, 29 Aug 2023 17:53:17 +0000 Subject: [PATCH 096/339] metrics: Add memory inside container to metrics report This PR adds memory inside container to metrics report. Signed-off-by: Gabriela Cervantes (cherry picked from commit 3b0d6538f2c3eae0919b0ae85d377c924d5c9022) --- .../report/report_dockerfile/metrics_report.Rmd | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tests/metrics/report/report_dockerfile/metrics_report.Rmd b/tests/metrics/report/report_dockerfile/metrics_report.Rmd index 27f4e2b1d..68ff50923 100644 --- a/tests/metrics/report/report_dockerfile/metrics_report.Rmd +++ b/tests/metrics/report/report_dockerfile/metrics_report.Rmd @@ -27,3 +27,15 @@ source('footprint-density.R') rm(test_name_extra) ``` \pagebreak + +# Memory used inside container +This [test](https://github.com/kata-containers/kata-containers/blob/main/metrics/density/memory_usage_inside_container.sh) +measures the memory inside a container taken by the container runtime. It shows the difference between the amount of memory requested for the container, and the amount the container can actually 'see'. + +The *% Consumed* is the key row in the table, which compares the *Requested* against *Total* values. + + +```{r mem-in-cont, echo=FALSE, fig.cap="System Memory density"} +source('mem-in-cont.R') +``` +\pagebreak From 99103db1fb94889b852f4313688c3dbbce2755ab Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Tue, 29 Aug 2023 17:55:44 +0000 Subject: [PATCH 097/339] metrics: Add boot lifecycle times to metrics report This PR adds the boot lifecycle times to metrics report. Signed-off-by: Gabriela Cervantes (cherry picked from commit 17dc1b976044f57c048ff76613b4e91e8dd43186) --- .../report/report_dockerfile/metrics_report.Rmd | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tests/metrics/report/report_dockerfile/metrics_report.Rmd b/tests/metrics/report/report_dockerfile/metrics_report.Rmd index 68ff50923..dc4dd7b0a 100644 --- a/tests/metrics/report/report_dockerfile/metrics_report.Rmd +++ b/tests/metrics/report/report_dockerfile/metrics_report.Rmd @@ -39,3 +39,16 @@ The *% Consumed* is the key row in the table, which compares the *Requested* aga source('mem-in-cont.R') ``` \pagebreak + +# Container boot lifecycle times +This [test](https://github.com/kata-containers/kata-containers/blob/main/metrics/time/launch_times.sh) +uses the `date` command on the host and in the container, as well as data from the container +kernel `dmesg`, to ascertain how long different phases of the create/boot/run/delete +Docker container lifecycle take for the first launched container. + +To decode the stats table, the prefixes are 'to(`2`)' and '`i`n'. The suffixes are '`k`ernel', '`w`orkload' and '`q`uit'. 'tot' is the total time for a complete container start-to-finished cycle. + +```{r lifecycle-time, echo=FALSE, fig.cap="Execution lifecycle times"} +source('lifecycle-time.R') +``` +\pagebreak From 08027f2282773e027de83d59b5e781c3163e13ec Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Tue, 29 Aug 2023 17:56:53 +0000 Subject: [PATCH 098/339] metrics: Add test setup details to metrics report This PR adds test setup details to metrics report. Signed-off-by: Gabriela Cervantes (cherry picked from commit 7e364716dd658ef77f2df2944ad84b0d34e856a9) --- .../metrics/report/report_dockerfile/metrics_report.Rmd | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tests/metrics/report/report_dockerfile/metrics_report.Rmd b/tests/metrics/report/report_dockerfile/metrics_report.Rmd index dc4dd7b0a..9dca2b49f 100644 --- a/tests/metrics/report/report_dockerfile/metrics_report.Rmd +++ b/tests/metrics/report/report_dockerfile/metrics_report.Rmd @@ -52,3 +52,12 @@ To decode the stats table, the prefixes are 'to(`2`)' and '`i`n'. The suffixes a source('lifecycle-time.R') ``` \pagebreak + +# Test setup details + +This table describes the test system details, as derived from the information contained +in the test results files. + + +```{r dut, echo=FALSE, fig.cap="System configuration details"} +source('dut-details.R') From 3491407581173fd1b1b10f98b1ed26a402ac55c1 Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Tue, 29 Aug 2023 21:08:45 +0000 Subject: [PATCH 099/339] metrics: Fix memory inside limits for kata metrics This PR fixes the memory inside limit for clh for kata metrics due to the recent changes that we had in the script which impacted in the performance measurement. Fixes #7786 Signed-off-by: Gabriela Cervantes (cherry picked from commit 8877ec62fbb5cf75e61533d97d5342f88c62a0c8) --- .../ci_worker/checkmetrics-json-clh-kata-metric8.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml index 98bb17752..d59ebc0c3 100644 --- a/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml +++ b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml @@ -29,7 +29,7 @@ description = "measure memory usage" # within (inclusive) checkvar = ".\"memory-footprint\".Results | .[] | .average.Result" checktype = "mean" -midval = 2518364.00 +midval = 127220.25 minpercent = 20.0 maxpercent = 20.0 From 41d05b88579203b6956f623f536fd044c2338f07 Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Tue, 29 Aug 2023 22:51:21 +0000 Subject: [PATCH 100/339] metrics: Fix memory footprint qemu limit This PR fixes the memory footprint qemu limit for kata metrics. Signed-off-by: Gabriela Cervantes (cherry picked from commit c8dd3c07376c5a4b886ff90e82a64a0c77e1f87a) --- .../ci_worker/checkmetrics-json-qemu-kata-metric8.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-qemu-kata-metric8.toml b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-qemu-kata-metric8.toml index 98b7bce04..2ddd94b59 100644 --- a/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-qemu-kata-metric8.toml +++ b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-qemu-kata-metric8.toml @@ -29,7 +29,7 @@ description = "measure memory usage" # within (inclusive) checkvar = ".\"memory-footprint\".Results | .[] | .average.Result" checktype = "mean" -midval = 2435844.00 +midval = 122832.40 minpercent = 20.0 maxpercent = 20.0 From aebf392e4554140b70f2382b9b622990e788c4b4 Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Mon, 28 Aug 2023 15:24:44 +0000 Subject: [PATCH 101/339] metrics: Enable FIO limits for kata metrics This PR enables the FIO limits for kata metrics. Fixes #7771 Signed-off-by: Gabriela Cervantes (cherry picked from commit 6e06392c556246999573329fb5d6581a8b7cdc88) --- tests/metrics/gha-run.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/metrics/gha-run.sh b/tests/metrics/gha-run.sh index 46b91769e..a9121917a 100755 --- a/tests/metrics/gha-run.sh +++ b/tests/metrics/gha-run.sh @@ -81,14 +81,14 @@ function run_test_tensorflow() { info "Running TensorFlow test using ${KATA_HYPERVISOR} hypervisor" bash tests/metrics/machine_learning/tensorflow_nhwc.sh 1 20 - - check_metrics } function run_test_fio() { info "Running FIO test using ${KATA_HYPERVISOR} hypervisor" bash tests/metrics/storage/fio-k8s/fio-test-ci.sh + + check_metrics } function run_test_iperf() { From dbb4761c4bf0314d4da530e8ba85af8abd9844b4 Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Mon, 28 Aug 2023 15:32:11 +0000 Subject: [PATCH 102/339] metrics: Add limit for write 90 percentile value for clh This PR adds the limit for write 90 percentile value for clh for FIO metrics. Signed-off-by: Gabriela Cervantes (cherry picked from commit 99db6568e959335398507645064b2136668cdcf7) --- .../checkmetrics-json-clh-kata-metric8.toml | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml index d59ebc0c3..e680c771e 100644 --- a/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml +++ b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml @@ -97,3 +97,16 @@ checktype = "mean" midval = 98.0 minpercent = 20.0 maxpercent = 20.0 + +[[metric]] +name = "fio" +type = "json" +description = "measure write 90 percentile using fio" +# Min and Max values to set a 'range' that +# the median of the CSV Results data must fall +# within (inclusive) +checkvar = ".\"fio\".Results | .[] | .write90percentile.Result" +checktype = "mean" +midval = 38656.0 +minpercent = 20.0 +maxpercent = 20.0 From 17c88a1a7fc1c157f55f5830141c7129062c2ae6 Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Mon, 28 Aug 2023 15:33:30 +0000 Subject: [PATCH 103/339] metrics: Add limit for 90 percentile for qemu value This PR adds the limit for 90 percentile for qemu value for FIO kata metrics. Signed-off-by: Gabriela Cervantes (cherry picked from commit a7b59a5bf945f7b0d5efeb6a5af567726733e18f) --- .../checkmetrics-json-qemu-kata-metric8.toml | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-qemu-kata-metric8.toml b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-qemu-kata-metric8.toml index 2ddd94b59..56e661a49 100644 --- a/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-qemu-kata-metric8.toml +++ b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-qemu-kata-metric8.toml @@ -97,3 +97,16 @@ checktype = "mean" midval = 98.0 minpercent = 20.0 maxpercent = 20.0 + +[[metric]] +name = "fio" +type = "json" +description = "measure write 90 percentile using fio" +# Min and Max values to set a 'range' that +# the median of the CSV Results data must fall +# within (inclusive) +checkvar = ".\"fio\".Results | .[] | .write90percentile.Result" +checktype = "mean" +midval = 37120.0 +minpercent = 20.0 +maxpercent = 20.0 From 05180b61a088b3865ef64cb36cabe1a0becca525 Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Wed, 30 Aug 2023 16:31:49 +0000 Subject: [PATCH 104/339] metrics: Add README for kata metrics report This PR adds the README for kata metrics report. Fixes #7802 Signed-off-by: Gabriela Cervantes (cherry picked from commit c0ed5ea0ad71066b259cddccab378ff262324b91) --- tests/metrics/report/README.md | 64 ++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 tests/metrics/report/README.md diff --git a/tests/metrics/report/README.md b/tests/metrics/report/README.md new file mode 100644 index 000000000..da586953d --- /dev/null +++ b/tests/metrics/report/README.md @@ -0,0 +1,64 @@ +# Kata Containers metrics report generator + +The files within this directory can be used to generate a "metrics report" for Kata Containers. The +primary workflow consists of two stages: +1) Run the provided report metrics data gathering scripts on the system(s) you wish to analyze. +2) Run the provided report generation script to analyze the data and generate a report file. + +## Data gathering + +Data gathering is provided by the `grabdata.sh` script. When run, this script executes a set of +tests from the `tests/metrics` directory. The JSON results files will be placed into the +`tests/metrics/results` directory. Once the results are generated, create a suitably named +subdirectory of `tests/metrics/results`, and move the JSON files into it. Repeat this process if +you want to compare multiple sets of results. Note, the report generation scripts process all +subdirectories of `tests/metrics/results` when generating the report. + +> **Note:** By default, the `grabdata.sh` script tries to launch some moderately large containers +> (i.e. 8Gbyte RAM) and may fail to produce some results on a memory constrained system. + +You can restrict the subset of tests run by `grabdata.sh` via its commandline parameters: + +| Option | Description | +| ------ | ------------------------| +| -a | Run all tests (default) | +| -d | Run the density tests | +| -h | Print this help | +| -s | Run the storage tests | +| -t | Run the time tests | + +## Report generation + +Report generation is provided by the `makereport.sh` script. By default this script processes all +subdirectories of the `tests/metrics/results` directory to generate the report. To run in the +default mode, execute the following: + +``` +$ ./makereport.sh +``` + +The report generation tool uses [`Rmarkdown`](https://github.com/rstudio/rmarkdown), [R](https://www.r-project.org/about.html) and +[Pandoc](https://pandoc.org/) to produce a PDF report. To avoid the need for all users to set up a +working environment with all the necessary tooling, the `makereport.sh` script utilises a +`Dockerfile` with the environment pre-defined in order to produce the report. Thus, you need to +have Docker installed on your system in order to run the report generation. The resulting +`metrics_report.pdf` is generated into the `output` subdirectory of the `report` directory. + +## Debugging and development + +To aid in script development and debugging, the `makereport.sh` script offers a debug facility via +the `-d` command line option. Using this option will place you into a `bash` shell within the +running `Dockerfile` image used to generate the report, whilst also mapping your host side `R` +scripts from the `report_dockerfile` subdirectory into the container, thus facilitating a "live" +edit/reload/run development cycle. From there you can examine the Docker image environment, and +execute the generation scripts. E.g., to test the `scaling.R` script, you can execute: + +``` +$ makereport.sh -d +# R +> source('/inputdir/Env.R') +> source('/scripts/lifecycle-time.R') +``` + +You can then edit the `report_dockerfile/lifecycle-time.R` file on the host, and re-run +the `source('/scripts/lifecycle-time.R')` command inside the still running `R` container. From 8f1cc278ca4b71501c66d574985bd269151036c0 Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Wed, 30 Aug 2023 16:55:14 +0000 Subject: [PATCH 105/339] metrics: Add report generator link to general documentation This PR adds the report generator link to general documentation. Signed-off-by: Gabriela Cervantes (cherry picked from commit 9f21fa9b39f24deb1ea8b6860a3893131f956f2a) --- tests/metrics/README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/metrics/README.md b/tests/metrics/README.md index 6afa8a35b..279e39af9 100644 --- a/tests/metrics/README.md +++ b/tests/metrics/README.md @@ -212,3 +212,7 @@ set if necessary. ## `checkmetrics` `checkmetrics` is a CLI tool to check a metrics CI results file. For further reference see the [`checkmetrics`](cmd/checkmetrics). + +## Report generator + +See the [report generator](report) documentation. From 59e7c3a34709f663b299e21184a96052db43b84e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Thu, 31 Aug 2023 15:34:16 +0200 Subject: [PATCH 106/339] gha: Update to checkout@v3 action MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit At this point we should always be using the latest checkout action. Signed-off-by: Fabiano Fidêncio (cherry picked from commit d7a996c686867db87bf4514d6747d48323119003) --- .github/workflows/add-issues-to-project.yaml | 2 +- .github/workflows/add-pr-sizing-label.yaml | 2 +- .github/workflows/darwin-tests.yaml | 2 +- .github/workflows/docs-url-alive-check.yaml | 2 +- .github/workflows/move-issues-to-in-progress.yaml | 2 +- .github/workflows/require-pr-porting-labels.yaml | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/add-issues-to-project.yaml b/.github/workflows/add-issues-to-project.yaml index 117e62600..6ba266261 100644 --- a/.github/workflows/add-issues-to-project.yaml +++ b/.github/workflows/add-issues-to-project.yaml @@ -39,7 +39,7 @@ jobs: popd &>/dev/null - name: Checkout code to allow hub to communicate with the project - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Add issue to issue backlog env: diff --git a/.github/workflows/add-pr-sizing-label.yaml b/.github/workflows/add-pr-sizing-label.yaml index 313c9f285..b8d498363 100644 --- a/.github/workflows/add-pr-sizing-label.yaml +++ b/.github/workflows/add-pr-sizing-label.yaml @@ -21,7 +21,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout code - uses: actions/checkout@v1 + uses: actions/checkout@v3 - name: Install PR sizing label script run: | diff --git a/.github/workflows/darwin-tests.yaml b/.github/workflows/darwin-tests.yaml index 02bbb0e72..8b3f9041a 100644 --- a/.github/workflows/darwin-tests.yaml +++ b/.github/workflows/darwin-tests.yaml @@ -21,6 +21,6 @@ jobs: with: go-version: 1.19.3 - name: Checkout code - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Build utils run: ./ci/darwin-test.sh diff --git a/.github/workflows/docs-url-alive-check.yaml b/.github/workflows/docs-url-alive-check.yaml index bd1f5ab2c..543215f77 100644 --- a/.github/workflows/docs-url-alive-check.yaml +++ b/.github/workflows/docs-url-alive-check.yaml @@ -22,7 +22,7 @@ jobs: echo "GOPATH=${{ github.workspace }}" >> $GITHUB_ENV echo "${{ github.workspace }}/bin" >> $GITHUB_PATH - name: Checkout code - uses: actions/checkout@v2 + uses: actions/checkout@v3 with: fetch-depth: 0 path: ./src/github.com/${{ github.repository }} diff --git a/.github/workflows/move-issues-to-in-progress.yaml b/.github/workflows/move-issues-to-in-progress.yaml index 0e15abaea..64a453d64 100644 --- a/.github/workflows/move-issues-to-in-progress.yaml +++ b/.github/workflows/move-issues-to-in-progress.yaml @@ -38,7 +38,7 @@ jobs: - name: Checkout code to allow hub to communicate with the project if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }} - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Move issue to "In progress" if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }} diff --git a/.github/workflows/require-pr-porting-labels.yaml b/.github/workflows/require-pr-porting-labels.yaml index b16e5c371..ea3016b5a 100644 --- a/.github/workflows/require-pr-porting-labels.yaml +++ b/.github/workflows/require-pr-porting-labels.yaml @@ -36,7 +36,7 @@ jobs: - name: Checkout code to allow hub to communicate with the project if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }} - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Install porting checker script run: | From 75dcca5a53368328c28aab365882e325df503633 Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Thu, 31 Aug 2023 16:17:29 +0000 Subject: [PATCH 107/339] metrics: Add grabdata script for metrics report This PR adds the grabdata script so it can be used for the metrics report for kata metrics. Fixes #7812 Signed-off-by: Gabriela Cervantes (cherry picked from commit 666882575233736b745a0345ab910f43d581b810) --- tests/metrics/report/grabdata.sh | 168 +++++++++++++++++++++++++++++++ 1 file changed, 168 insertions(+) create mode 100755 tests/metrics/report/grabdata.sh diff --git a/tests/metrics/report/grabdata.sh b/tests/metrics/report/grabdata.sh new file mode 100755 index 000000000..3544339f8 --- /dev/null +++ b/tests/metrics/report/grabdata.sh @@ -0,0 +1,168 @@ +#!/bin/bash +# Copyright (c) 2018-2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +# Run a set of the metrics tests to gather data to be used with the report +# generator. The general ideal is to have the tests configured to generate +# useful, meaninful and repeatable (stable, with minimised variance) results. +# If the tests have to be run more or longer to achieve that, then generally +# that is fine - this test is not intended to be quick, it is intended to +# be repeatable. + +# Note - no 'set -e' in this file - if one of the metrics tests fails +# then we wish to continue to try the rest. +# Finally at the end, in some situations, we explicitly exit with a +# failure code if necessary. + +SCRIPT_DIR=$(dirname "$(readlink -f "$0")") +source "${SCRIPT_DIR}/../lib/common.bash" +RESULTS_DIR=${SCRIPT_DIR}/../results + +# By default we run all the tests +RUN_ALL=1 + +help() { + usage=$(cat << EOF +Usage: $0 [-h] [options] + Description: + This script gathers a number of metrics for use in the + report generation script. Which tests are run can be + configured on the commandline. Specifically enabling + individual tests will disable the 'all' option, unless + 'all' is also specified last. + Options: + -a, Run all tests (default). + -d, Run the density tests. + -h, Print this help. + -s, Run the storage tests. + -t, Run the time tests. +EOF +) + echo "$usage" +} + +# Set up the initial state +init() { + metrics_onetime_init + + local OPTIND + while getopts "adhst" opt;do + case ${opt} in + a) + RUN_ALL=1 + ;; + d) + RUN_DENSITY=1 + RUN_ALL= + ;; + h) + help + exit 0; + ;; + s) + RUN_STORAGE=1 + RUN_ALL= + ;; + t) + RUN_TIME=1 + RUN_ALL= + ;; + ?) + # parse failure + help + die "Failed to parse arguments" + ;; + esac + done + shift $((OPTIND-1)) +} + +run_density_ksm() { + echo "Running KSM density tests" + + # Run the memory footprint test - the main test that + # KSM affects. Run for a sufficient number of containers + # (that gives us a fair view of how memory gets shared across + # containers), and a large enough timeout for KSM to settle. + # If KSM has not settled down by then, just take the measurement. + # 'auto' mode should detect when KSM has settled automatically. + bash density/memory_usage.sh 20 300 auto + + # Get a measure for the overhead we take from the container memory + bash density/memory_usage_inside_container.sh +} + +run_density() { + echo "Running non-KSM density tests" + + # Run the density tests - no KSM, so no need to wait for settle + # Set a token short timeout, and use enough containers to get a + # good average measurement. + bash density/memory_usage.sh 20 5 +} + +run_time() { + echo "Running time tests" + # Run the time tests - take time measures for an ubuntu image, over + # 100 'first and only container' launches. + # NOTE - whichever container you test here must support a full 'date' + # command - busybox based containers (including Alpine) will not work. + bash time/launch_times.sh -i public.ecr.aws/ubuntu/ubuntu:latest -n 100 +} + +run_storage() { + echo "Running storage tests" + + bash storage/blogbench.sh +} + + +# Execute metrics scripts +run() { + pushd "$SCRIPT_DIR/.." + + # If KSM is available on this platform, let's run any tests that are + # affected by having KSM on/off first, and then turn it off for the + # rest of the tests, as KSM may introduce some extra noise in the + # results by stealing CPU time for instance. + if [[ -f ${KSM_ENABLE_FILE} ]]; then + # No point enabling and disabling KSM if we have nothing to test. + if [ -n "$RUN_ALL" ] || [ -n "$RUN_DENSITY" ]; then + save_ksm_settings + trap restore_ksm_settings EXIT QUIT KILL + set_ksm_aggressive + + run_density_ksm + + # And now ensure KSM is turned off for the rest of the tests + disable_ksm + fi + else + echo "No KSM control file, skipping KSM tests" + fi + + if [ -n "$RUN_ALL" ] || [ -n "$RUN_TIME" ]; then + run_time + fi + + if [ -n "$RUN_ALL" ] || [ -n "$RUN_DENSITY" ]; then + run_density + fi + + if [ -n "$RUN_ALL" ] || [ -n "$RUN_STORAGE" ]; then + run_storage + fi + + popd +} + +finish() { + echo "Now please create a suitably descriptively named subdirectory in" + echo "$RESULTS_DIR and copy the .json results files into it before running" + echo "this script again." +} + +init "$@" +run +finish From 8c7a4fd121dd8a8f6a7716f61498c59f267621b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Fri, 1 Sep 2023 11:23:31 +0200 Subject: [PATCH 108/339] gha: Rebase atop of the target branch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We have two scenarios we care about this, `pull_request` and `pull_request_target` events triggered a job. `pull_request` event: When using the checkout action, it'll already provide a "rebased atop of main" repo for us, nothing else is needed, and that's basically what we already have as part of the jobs in our CI. `pull_request_target` event: This one is a little bit tricky, as the checkout action, unless passing a spsecific repo, give us the PR checked out rebased atop of the HEAD of the PR branch. Jeremi Piotrowski nicely pointed out that we could use github.event.pull_request.merge_commit_sha instead, which is the result of the PR's branch with the official repo target branch. Now, the only cases where the contributor's rebase would still be needed is when the action itself has been changed. Fixes: #7414 Signed-off-by: Fabiano Fidêncio (cherry picked from commit ac939c458cde2286f15b0fa3bbf43a3aae7778d6) --- .github/workflows/add-pr-sizing-label.yaml | 2 ++ .github/workflows/ci-on-push.yaml | 2 +- .github/workflows/move-issues-to-in-progress.yaml | 2 ++ .github/workflows/require-pr-porting-labels.yaml | 2 ++ 4 files changed, 7 insertions(+), 1 deletion(-) diff --git a/.github/workflows/add-pr-sizing-label.yaml b/.github/workflows/add-pr-sizing-label.yaml index b8d498363..b972616fa 100644 --- a/.github/workflows/add-pr-sizing-label.yaml +++ b/.github/workflows/add-pr-sizing-label.yaml @@ -22,6 +22,8 @@ jobs: steps: - name: Checkout code uses: actions/checkout@v3 + with: + ref: ${{ github.event.pull_request.merge_commit_sha }} - name: Install PR sizing label script run: | diff --git a/.github/workflows/ci-on-push.yaml b/.github/workflows/ci-on-push.yaml index e1f3eaf96..f37f1b91c 100644 --- a/.github/workflows/ci-on-push.yaml +++ b/.github/workflows/ci-on-push.yaml @@ -25,7 +25,7 @@ jobs: if: ${{ contains(github.event.pull_request.labels.*.name, 'ok-to-test') }} uses: ./.github/workflows/ci.yaml with: - commit-hash: ${{ github.event.pull_request.head.sha }} + commit-hash: ${{ github.event.pull_request.merge_commit_sha }} pr-number: ${{ github.event.pull_request.number }} tag: ${{ github.event.pull_request.number }}-${{ github.event.pull_request.head.sha }} secrets: inherit diff --git a/.github/workflows/move-issues-to-in-progress.yaml b/.github/workflows/move-issues-to-in-progress.yaml index 64a453d64..2c4c3d2c8 100644 --- a/.github/workflows/move-issues-to-in-progress.yaml +++ b/.github/workflows/move-issues-to-in-progress.yaml @@ -39,6 +39,8 @@ jobs: - name: Checkout code to allow hub to communicate with the project if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }} uses: actions/checkout@v3 + with: + ref: ${{ github.event.pull_request.merge_commit_sha }} - name: Move issue to "In progress" if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }} diff --git a/.github/workflows/require-pr-porting-labels.yaml b/.github/workflows/require-pr-porting-labels.yaml index ea3016b5a..a06bbbf96 100644 --- a/.github/workflows/require-pr-porting-labels.yaml +++ b/.github/workflows/require-pr-porting-labels.yaml @@ -37,6 +37,8 @@ jobs: - name: Checkout code to allow hub to communicate with the project if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }} uses: actions/checkout@v3 + with: + ref: ${{ github.event.pull_request.merge_commit_sha }} - name: Install porting checker script run: | From d2d7c041f3009f92b3a2688ae31e426c87ff3213 Mon Sep 17 00:00:00 2001 From: David Esparza Date: Mon, 28 Aug 2023 23:18:49 -0600 Subject: [PATCH 109/339] metrics: fix parsing issue on memory-usage test This PR fixes an issues in the parsing results stage, by collecting just the n-results from the n-running containers, discarding irrelevant data. Fixes: #7774 Signed-off-by: David Esparza (cherry picked from commit 538c965c2b79053f0b4d8fe2855196307b72932b) --- tests/metrics/density/memory_usage.sh | 86 ++++++++++++++------------- 1 file changed, 46 insertions(+), 40 deletions(-) diff --git a/tests/metrics/density/memory_usage.sh b/tests/metrics/density/memory_usage.sh index f0505aaff..e9a9d2751 100755 --- a/tests/metrics/density/memory_usage.sh +++ b/tests/metrics/density/memory_usage.sh @@ -19,7 +19,7 @@ source "${SCRIPT_PATH}/../lib/common.bash" # Busybox image: Choose a small workload image, this is # in order to measure the runtime footprint, not the workload # footprint. -IMAGE='quay.io/prometheus/busybox:latest' +IMAGE="quay.io/prometheus/busybox:latest" CMD='tail -f /dev/null' NUM_CONTAINERS="$1" @@ -31,6 +31,11 @@ KSM_ENABLE_FILE="/sys/kernel/mm/ksm/run" MEM_TMP_FILE=$(mktemp meminfo.XXXXXXXXXX) PS_TMP_FILE=$(mktemp psinfo.XXXXXXXXXX) +# Variables used to collect memory footprint +global_hypervisor_mem=0 +global_virtiofsd_mem=0 +global_shim_mem=0 + function remove_tmp_file() { rm -rf "${MEM_TMP_FILE}" "${PS_TMP_FILE}" } @@ -85,14 +90,14 @@ EOF # This function measures the PSS average # memory of a process. function get_pss_memory(){ - ps="$1" - mem_amount=0 - count=0 - avg=0 + local ps="${1}" + local shim_result_on="${2:-0}" + local mem_amount=0 + local count=0 + local avg=0 - if [ -z "${ps}" ]; then - die "No argument to get_pss_memory()" - fi + [ -z "${ps}" ] && die "No argument to get_pss_memory()" + ps="$(readlink -f ${ps})" # Save all the processes names # This will be help us to retrieve raw information @@ -104,19 +109,23 @@ function get_pss_memory(){ # This will help us to retrieve raw information echo "${data}" >> "${MEM_TMP_FILE}" - gral_data=$(echo "${data// /+}" | bc) - for i in "${gral_data}"; do - if (( $i > 0 ));then - mem_amount=$(( i + mem_amount )) - (( count++ )) + for i in ${data[*]}; do + if [ ${i} -gt 0 ]; then + let "mem_amount+=i" + let "count+=1" + [ $count -eq $NUM_CONTAINERS ] && break fi done - if (( "${count}" > 0 ));then - avg=$(bc -l <<< "scale=2; ${mem_amount} / ${count}") - fi + [ ${count} -eq 0 ] && die "No pss memory was measured for PID: ${ps}" - echo "${avg}" + avg=$(bc -l <<< "scale=2; ${mem_amount} / ${count}") + + if [ "${shim_result_on}" -eq "1" ]; then + global_shim_mem="${avg}" + else + global_hypervisor_mem="${avg}" + fi } function ppid() { @@ -137,12 +146,9 @@ function get_pss_memory_virtiofsd() { avg=0 virtiofsd_path=${1:-} - if [ -z "${virtiofsd_path}" ]; then - die "virtiofsd_path not provided" - fi + [ -z "${virtiofsd_path}" ] && die "virtiofsd_path not provided" echo "${virtiofsd_path}" >> "${PS_TMP_FILE}" - virtiofsd_pids=$(ps aux | grep [v]irtiofsd | awk '{print $2}' | head -1) data=$(sudo smem --no-header -P "^${virtiofsd_path}" -c pid -c "pid pss") @@ -164,15 +170,13 @@ function get_pss_memory_virtiofsd() { if ((pss_process > 0)); then mem_amount=$((pss_process + mem_amount)) - ((count++)) + let "count+=1" fi fi done - if (( "${count}" > 0 ));then - avg=$(bc -l <<< "scale=2; ${mem_amount} / ${count}") - fi - echo "${avg}" + [ "${count}" -gt 0 ] && global_virtiofsd_mem=$(bc -l <<< "scale=2; ${mem_amount} / ${count}") + } function get_individual_memory(){ @@ -278,22 +282,25 @@ EOF # Now if you do not have enough rights # the smem failure to read the stats will also be trapped. - hypervisor_mem="$(get_pss_memory ${HYPERVISOR_PATH})" - if [ "${hypervisor_mem}" == "0" ]; then + get_pss_memory ${HYPERVISOR_PATH} + + if [ "${global_hypervisor_mem}" == "0" ]; then die "Failed to find PSS for ${HYPERVISOR_PATH}" fi - virtiofsd_mem="$(get_pss_memory_virtiofsd ${VIRTIOFSD_PATH})" - if [ "${virtiofsd_mem}" == "0" ]; then + get_pss_memory_virtiofsd ${VIRTIOFSD_PATH} + + if [ "${global_virtiofsd_mem}" == "0" ]; then echo >&2 "WARNING: Failed to find PSS for ${VIRTIOFSD_PATH}" fi - shim_mem="$(get_pss_memory ${SHIM_PATH})" - if [ "${shim_mem}" == "0" ]; then + + get_pss_memory ${SHIM_PATH} 1 + + if [ "${global_shim_mem}" == "0" ]; then die "Failed to find PSS for ${SHIM_PATH}" fi - mem_usage="$(bc -l <<< "scale=2; ${hypervisor_mem} +${virtiofsd_mem} + ${shim_mem}")" - memory_usage="${mem_usage}" + mem_usage="$(bc -l <<< "scale=2; ${global_hypervisor_mem} + ${global_virtiofsd_mem} + ${global_shim_mem}")" local json="$(cat << EOF { @@ -302,15 +309,15 @@ EOF "Units" : "KB" }, "qemus": { - "Result": ${hypervisor_mem}, + "Result": ${global_hypervisor_mem}, "Units" : "KB" }, "virtiofsds": { - "Result": ${virtiofsd_mem}, + "Result": ${global_virtiofsd_mem}, "Units" : "KB" }, "shims": { - "Result": ${shim_mem}, + "Result": ${global_shim_mem}, "Units" : "KB" } } @@ -354,9 +361,7 @@ function main(){ #Check for KSM before reporting test name, as it can modify it check_for_ksm - - init_env - +# init_env check_cmds "${SMEM_BIN}" bc check_images "${IMAGE}" @@ -378,6 +383,7 @@ function main(){ get_individual_memory fi + info "memory usage test completed" metrics_json_save clean_env_ctr } From 4403af74ec9552a8e9d61c0fc0f8e1f17c5888aa Mon Sep 17 00:00:00 2001 From: David Esparza Date: Fri, 1 Sep 2023 14:29:34 -0600 Subject: [PATCH 110/339] metrics: re-enable memory-usage initialization step This PR re-enables the initialization step disabled on 538c965c2b79053f0b4d8fe2855196307b72932b. Fixes: #7804 Signed-off-by: David Esparza (cherry picked from commit b151cfd140149f9abde2d5f291db812b80d4c3f7) --- tests/metrics/density/memory_usage.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/metrics/density/memory_usage.sh b/tests/metrics/density/memory_usage.sh index e9a9d2751..4487f0dec 100755 --- a/tests/metrics/density/memory_usage.sh +++ b/tests/metrics/density/memory_usage.sh @@ -361,7 +361,7 @@ function main(){ #Check for KSM before reporting test name, as it can modify it check_for_ksm -# init_env + init_env check_cmds "${SMEM_BIN}" bc check_images "${IMAGE}" From 9b6c5eaff1c58c3c47cc56d87099ac2140eb69fc Mon Sep 17 00:00:00 2001 From: Jeremi Piotrowski Date: Mon, 4 Sep 2023 17:22:47 +0200 Subject: [PATCH 111/339] kata-deploy: Create kata-static.tar with correct ownership Pass --owner and --group to the tar invokation to prevent gihtub runner user from leaking into release artifacts. Fixes: #7832 Signed-off-by: Jeremi Piotrowski (cherry picked from commit 18c94ebbe352db3b413211f3093fef9aeb223cc7) --- .../kata-deploy/local-build/kata-deploy-merge-builds.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/packaging/kata-deploy/local-build/kata-deploy-merge-builds.sh b/tools/packaging/kata-deploy/local-build/kata-deploy-merge-builds.sh index ff5ed3e4f..9d9ee3c40 100755 --- a/tools/packaging/kata-deploy/local-build/kata-deploy-merge-builds.sh +++ b/tools/packaging/kata-deploy/local-build/kata-deploy-merge-builds.sh @@ -37,5 +37,5 @@ pushd ${tarball_content_dir} popd echo "create ${tar_path}" -(cd "${tarball_content_dir}"; tar cvfJ "${tar_path}" .) +(cd "${tarball_content_dir}"; tar cvfJ "${tar_path}" --owner=0 --group=0 .) popd From 7fd7186780e7cc341b4ef2c05d73c499faa502d7 Mon Sep 17 00:00:00 2001 From: Chao Wu Date: Tue, 5 Sep 2023 14:34:38 +0800 Subject: [PATCH 112/339] CI: switch static-checks-dragonball CI machines to Azure Previously, static-checks-dragonball is using machines from Alibaba Cloud to run all the CI jobs. Currently, we are going through an internal process to apply for the new machines for Dragonball CI. Before the internal process is over, we will temporarily use Azure VM to run static-checks-dragonball jobs. fixes: #7838 Signed-off-by: Chao Wu (cherry picked from commit 60f733d3010996ea53e62a74b0fccc77fdfaf092) --- .github/workflows/static-checks-dragonball.yaml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/static-checks-dragonball.yaml b/.github/workflows/static-checks-dragonball.yaml index d47689e3a..2c99210a1 100644 --- a/.github/workflows/static-checks-dragonball.yaml +++ b/.github/workflows/static-checks-dragonball.yaml @@ -14,7 +14,7 @@ concurrency: name: Static checks dragonball jobs: test-dragonball: - runs-on: dragonball + runs-on: garm-ubuntu-2004 env: RUST_BACKTRACE: "1" steps: @@ -23,6 +23,10 @@ jobs: if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }} run: | echo "GOPATH=${{ github.workspace }}" >> $GITHUB_ENV + - name: Install dependencies + run: | + sudo apt-get update + sudo apt-get install -y --no-install-recommends build-essential haveged - name: Install Rust if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }} run: | From 3583f373f58d0ecc4e1ceb21f4e9e2275c50568d Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Mon, 7 Aug 2023 21:11:02 +0000 Subject: [PATCH 113/339] metrics: Enable iperf benchmark on gha for kata metrics This PR enables the iperf benchmark to run on the gha for kata metrics. Fixes #7575 Signed-off-by: Gabriela Cervantes (cherry picked from commit 5b8db30422ff62aeb6bdb620e4b43eabd02ebee5) --- tests/metrics/gha-run.sh | 2 -- .../network/iperf3_kubernetes/k8s-network-metrics-iperf3.sh | 2 ++ 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/metrics/gha-run.sh b/tests/metrics/gha-run.sh index a9121917a..bc8cbaf67 100755 --- a/tests/metrics/gha-run.sh +++ b/tests/metrics/gha-run.sh @@ -93,8 +93,6 @@ function run_test_fio() { function run_test_iperf() { info "Running Iperf test using ${KATA_HYPERVISOR} hypervisor" - # ToDo: remove the exit once the metrics workflow is stable - exit 0 bash network/iperf3_kubernetes/k8s-network-metrics-iperf3.sh } diff --git a/tests/metrics/network/iperf3_kubernetes/k8s-network-metrics-iperf3.sh b/tests/metrics/network/iperf3_kubernetes/k8s-network-metrics-iperf3.sh index 6593bb651..f9d24463b 100755 --- a/tests/metrics/network/iperf3_kubernetes/k8s-network-metrics-iperf3.sh +++ b/tests/metrics/network/iperf3_kubernetes/k8s-network-metrics-iperf3.sh @@ -18,6 +18,8 @@ # case 2" # container-server <----> host-client +set -x + set -o pipefail SCRIPT_PATH=$(dirname "$(readlink -f "$0")") From d9408a72830bc481c71b37cfd0455524a7d02776 Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Mon, 7 Aug 2023 21:15:22 +0000 Subject: [PATCH 114/339] metrics: Add test selector to iperf metrics This PR adds test selector to iperf metrics. Signed-off-by: Gabriela Cervantes (cherry picked from commit f609a9a754378e43efac82bfd0809d76dab6f834) --- tests/metrics/gha-run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/metrics/gha-run.sh b/tests/metrics/gha-run.sh index bc8cbaf67..e17736266 100755 --- a/tests/metrics/gha-run.sh +++ b/tests/metrics/gha-run.sh @@ -94,7 +94,7 @@ function run_test_fio() { function run_test_iperf() { info "Running Iperf test using ${KATA_HYPERVISOR} hypervisor" - bash network/iperf3_kubernetes/k8s-network-metrics-iperf3.sh + bash tests/metrics/network/iperf3_kubernetes/k8s-network-metrics-iperf3.sh -a } function main() { From 907baa3464ef9e63186c51beb05cc16271077e0b Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Wed, 23 Aug 2023 19:54:58 +0000 Subject: [PATCH 115/339] metrics: Add jitter value for clh This PR adds jitter value for clh for iperf metrics. Signed-off-by: Gabriela Cervantes (cherry picked from commit 6a79ecedf9aeca8b6933b15acb4b7ece9f10dc34) --- .../checkmetrics-json-clh-kata-metric8.toml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml index e680c771e..a7217274b 100644 --- a/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml +++ b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml @@ -110,3 +110,15 @@ checktype = "mean" midval = 38656.0 minpercent = 20.0 maxpercent = 20.0 + +name = "iperf" +type = "json" +description = "iperf" +# Min and Max values to set a 'range' that +# the median of the CSV Results data must fall +# within (inclusive) +checkvar = ".\"iperf\".Results | .[] | .jitter.Result" +checktype = "mean" +midval = 0.039 +minpercent = 20.0 +maxpercent = 20.0 From 651b89ba413d879a19eb51268d47ac36d8297d17 Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Wed, 23 Aug 2023 20:01:26 +0000 Subject: [PATCH 116/339] metrics: Add checkmetrics value for qemu for iperf This PR adds the checkmetrics value for qemu for iperf benchmark. Signed-off-by: Gabriela Cervantes (cherry picked from commit c1edfe551133f91669eb03925acbdbceac36863f) --- .../ci_worker/checkmetrics-json-qemu-kata-metric8.toml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-qemu-kata-metric8.toml b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-qemu-kata-metric8.toml index 56e661a49..f3a1bf52e 100644 --- a/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-qemu-kata-metric8.toml +++ b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-qemu-kata-metric8.toml @@ -108,5 +108,15 @@ description = "measure write 90 percentile using fio" checkvar = ".\"fio\".Results | .[] | .write90percentile.Result" checktype = "mean" midval = 37120.0 + +name = "iperf" +type = "json" +description = "iperf" +# Min and Max values to set a 'range' that +# the median of the CSV Results data must fall +# within (inclusive) +checkvar = ".\"iperf\".Results | .[] | .jitter.Result" +checktype = "mean" +midval = 0.50 minpercent = 20.0 maxpercent = 20.0 From 5e90c8e176460467cbab3c1db5ff8a8802b2ab4a Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Wed, 23 Aug 2023 20:02:50 +0000 Subject: [PATCH 117/339] metrics: Add checkmetrics to gha run script This PR adds the checkmetrics to gha run script. Signed-off-by: Gabriela Cervantes (cherry picked from commit e98e5cdea2c4d718a4090f6dd393aaee0b0e57ce) --- .../checkmetrics-json-clh-kata-metric8.toml | 11 ++++++----- .../checkmetrics-json-qemu-kata-metric8.toml | 13 ++++++++----- tests/metrics/gha-run.sh | 4 ++-- .../iperf3_kubernetes/k8s-network-metrics-iperf3.sh | 4 ++-- 4 files changed, 18 insertions(+), 14 deletions(-) diff --git a/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml index a7217274b..c852b6ec3 100644 --- a/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml +++ b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml @@ -111,14 +111,15 @@ midval = 38656.0 minpercent = 20.0 maxpercent = 20.0 -name = "iperf" +[[metric]] +name = "network-iperf3" type = "json" description = "iperf" # Min and Max values to set a 'range' that # the median of the CSV Results data must fall # within (inclusive) -checkvar = ".\"iperf\".Results | .[] | .jitter.Result" +checkvar = ".\"network-iperf3\".Results | .[] | .jitter.Result" checktype = "mean" -midval = 0.039 -minpercent = 20.0 -maxpercent = 20.0 +midval = 0.044 +minpercent = 30.0 +maxpercent = 30.0 diff --git a/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-qemu-kata-metric8.toml b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-qemu-kata-metric8.toml index f3a1bf52e..66e8f0e22 100644 --- a/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-qemu-kata-metric8.toml +++ b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-qemu-kata-metric8.toml @@ -108,15 +108,18 @@ description = "measure write 90 percentile using fio" checkvar = ".\"fio\".Results | .[] | .write90percentile.Result" checktype = "mean" midval = 37120.0 +minpercent = 20.0 +maxpercent = 20.0 -name = "iperf" +[[metric]] +name = "network-iperf3" type = "json" description = "iperf" # Min and Max values to set a 'range' that # the median of the CSV Results data must fall # within (inclusive) -checkvar = ".\"iperf\".Results | .[] | .jitter.Result" +checkvar = ".\"network-iperf3\".Results | .[] | .jitter.Result" checktype = "mean" -midval = 0.50 -minpercent = 20.0 -maxpercent = 20.0 +midval = 0.041 +minpercent = 30.0 +maxpercent = 30.0 diff --git a/tests/metrics/gha-run.sh b/tests/metrics/gha-run.sh index e17736266..77ef0fc25 100755 --- a/tests/metrics/gha-run.sh +++ b/tests/metrics/gha-run.sh @@ -87,14 +87,14 @@ function run_test_fio() { info "Running FIO test using ${KATA_HYPERVISOR} hypervisor" bash tests/metrics/storage/fio-k8s/fio-test-ci.sh - - check_metrics } function run_test_iperf() { info "Running Iperf test using ${KATA_HYPERVISOR} hypervisor" bash tests/metrics/network/iperf3_kubernetes/k8s-network-metrics-iperf3.sh -a + + check_metrics } function main() { diff --git a/tests/metrics/network/iperf3_kubernetes/k8s-network-metrics-iperf3.sh b/tests/metrics/network/iperf3_kubernetes/k8s-network-metrics-iperf3.sh index f9d24463b..9d434e912 100755 --- a/tests/metrics/network/iperf3_kubernetes/k8s-network-metrics-iperf3.sh +++ b/tests/metrics/network/iperf3_kubernetes/k8s-network-metrics-iperf3.sh @@ -18,8 +18,6 @@ # case 2" # container-server <----> host-client -set -x - set -o pipefail SCRIPT_PATH=$(dirname "$(readlink -f "$0")") @@ -179,6 +177,8 @@ function iperf3_start_deployment() { cmds=("bc" "jq") check_cmds "${cmds[@]}" + init_env + # Check no processes are left behind check_processes From 22ce1671a6adf8855378ee2b6ee2518c50445552 Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Tue, 5 Sep 2023 21:00:05 +0000 Subject: [PATCH 118/339] metrics: Add write 95 percentile FIO value This PR adds the write 95 percentile FIO value for checkmetrics for kata metrics. Fixes #7842 Signed-off-by: Gabriela Cervantes (cherry picked from commit 024b4d2ffeacd4d7eaa5a0a3f8f3cccfbd07eb4d) --- .../checkmetrics-json-clh-kata-metric8.toml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml index c852b6ec3..10434360d 100644 --- a/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml +++ b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml @@ -111,6 +111,20 @@ midval = 38656.0 minpercent = 20.0 maxpercent = 20.0 +[[metric]] +name = "fio" +type = "json" +description = "measure write 95 percentile using fio" +# Min and Max values to set a 'range' that +# the median of the CSV Results data must fall +# within (inclusive) +checkvar = ".\"fio\".Results | .[] | .write95percentile.Result" +checktype = "mean" +midval = 823296.0 +minpercent = 20.0 +maxpercent = 20.0 + + [[metric]] name = "network-iperf3" type = "json" From 374e77d3308fc8d04397dc0e0e3632c6d12a43eb Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Tue, 5 Sep 2023 21:03:37 +0000 Subject: [PATCH 119/339] metrics: Add write 95 percentile for FIO for qemu This PR adds the write 95 percentile for FIO for qemu for checkmetrics for kata metrics. Signed-off-by: Gabriela Cervantes (cherry picked from commit 438fbf9669d67fc000be685f1069431a3c2eb3da) --- .../checkmetrics-json-clh-kata-metric8.toml | 3 +-- .../checkmetrics-json-qemu-kata-metric8.toml | 13 +++++++++++++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml index 10434360d..00bdcd8e0 100644 --- a/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml +++ b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml @@ -120,11 +120,10 @@ description = "measure write 95 percentile using fio" # within (inclusive) checkvar = ".\"fio\".Results | .[] | .write95percentile.Result" checktype = "mean" -midval = 823296.0 +midval = 33024.0 minpercent = 20.0 maxpercent = 20.0 - [[metric]] name = "network-iperf3" type = "json" diff --git a/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-qemu-kata-metric8.toml b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-qemu-kata-metric8.toml index 66e8f0e22..2f7decae8 100644 --- a/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-qemu-kata-metric8.toml +++ b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-qemu-kata-metric8.toml @@ -111,6 +111,19 @@ midval = 37120.0 minpercent = 20.0 maxpercent = 20.0 +[[metric]] +name = "fio" +type = "json" +description = "measure write 95 percentile using fio" +# Min and Max values to set a 'range' that +# the median of the CSV Results data must fall +# within (inclusive) +checkvar = ".\"fio\".Results | .[] | .write95percentile.Result" +checktype = "mean" +midval = 43264.0 +minpercent = 20.0 +maxpercent = 20.0 + [[metric]] name = "network-iperf3" type = "json" From 899c823c0b887cca33f2d065fb91698a4fc09869 Mon Sep 17 00:00:00 2001 From: Hyounggyu Choi Date: Wed, 6 Sep 2023 11:12:03 +0200 Subject: [PATCH 120/339] packaging: do not install docker-compose-plugin for s390x|ppc64le This PR is to skip installing docker-compose-plugin while buiding a `build-kata-deploy` image for s390x|ppc64le. It is a temporary solution to fix current CI failures for s390x regarding `hash sum mismatch`. Fixes: #7848 Signed-off-by: Hyounggyu Choi (cherry picked from commit 2efda20c778940bf0ac096dcc435da74583378c6) --- tools/packaging/kata-deploy/local-build/dockerbuild/Dockerfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/packaging/kata-deploy/local-build/dockerbuild/Dockerfile b/tools/packaging/kata-deploy/local-build/dockerbuild/Dockerfile index 569a4ffdb..51e7ba431 100644 --- a/tools/packaging/kata-deploy/local-build/dockerbuild/Dockerfile +++ b/tools/packaging/kata-deploy/local-build/dockerbuild/Dockerfile @@ -19,7 +19,8 @@ RUN apt-get update && \ apt-get clean && rm -rf /var/lib/apt/lists/ && \ install_yq.sh && \ curl -fsSL https://get.docker.com -o get-docker.sh && \ - if uname -m | grep -Eq 's390x|ppc64le'; then export VERSION="v20.10"; fi && \ + if uname -m | grep -Eq 's390x|ppc64le'; then export VERSION="v20.10" && \ + sed -i 's/\//g' get-docker.sh; fi && \ sh get-docker.sh ARG IMG_USER=kata-builder From 315288a00010f1d2406e588613c60c2822f35972 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Tue, 5 Sep 2023 14:50:27 +0200 Subject: [PATCH 121/339] ci: k8s: Add a function to deploy k3s MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit One can use different kubernetes flavours for getting a kubernetes cluster up and running. As part of our CI, though, I really would like to avoid contributors spending time maintaining and updating kubernetes dependencies, as done with the tests repo, and which has been proven to be really good on getting things rotten. With this in mind, I'm taking the bullet and using "k3s" as the way to deploy kubernetes for the devmapper related tests, and that's the reason I'm adding a function to do so, and this will be used later on as part of this series. It's important to note that the k3s setup doesn't take into consideration a BM machine, and this is not suitable for that. We're really only targetting GHA runners which will be thrown away after the run is over. Signed-off-by: Fabiano Fidêncio (cherry picked from commit 54f71172129d9973b463453ff057c47f13de254e) --- tests/integration/kubernetes/gha-run.sh | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tests/integration/kubernetes/gha-run.sh b/tests/integration/kubernetes/gha-run.sh index 1dda036b1..70982121a 100755 --- a/tests/integration/kubernetes/gha-run.sh +++ b/tests/integration/kubernetes/gha-run.sh @@ -62,6 +62,24 @@ function deploy_kata() { echo "::endgroup::" } +function deploy_k3s() { + curl -sfL https://get.k3s.io | sh - + + # This is an arbitrary value that came up from local tests + wait 240s +} + +function deploy_k8s() { + echo "::group::Deploying ${KUBERNETES}" + + case ${KUBERNETES} in + k3s) deploy_k3s ;; + *) >&2 echo "${KUBERNETES} flavour is not supported"; exit 2 ;; + esac + + echo "::endgroup::" +} + function run_tests() { # Delete any spurious tests namespace that was left behind kubectl delete namespace kata-containers-k8s-tests &> /dev/null || true @@ -126,6 +144,7 @@ function main() { install-azure-cli) install_azure_cli ;; login-azure) login_azure ;; create-cluster) create_cluster ;; + deploy-k8s) deploy_k8s ;; install-bats) install_bats ;; install-kubectl) install_kubectl ;; get-cluster-credentials) get_cluster_credentials ;; From a41a56e326414497d5b07814f59cf5cb85057da3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Tue, 5 Sep 2023 16:18:34 +0200 Subject: [PATCH 122/339] ci: k8s: Add a function to configure devmapper for containerd MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This function right now is completely based on what's part of the tests repo[0], and that's the reason I'm keeping the `Signed-off-by` of all the contributors to that file. This is not perfect, though, as it changes the default snapshotter to devmapper, instead of only doing so for the Kata Containers specific runtime handlers. OTOH, this is exactly what we've always been doing as part of the tests. We'll improve it, soon enough, when we get to also add a way for kata-deploy to set up different snapshotters for different handlers. But, for now, this is as good (or as bad) as it's always been. It's important to note that the devmapper setup doesn't take into consideration a BM machine, and this is not suitable for that. We're really only targetting GHA runners which will be thrown away after the run is over. Signed-off-by: Fabiano Fidêncio Signed-off-by: Shiming Zhang Signed-off-by: Marcel Apfelbaum Signed-off-by: Gabriela Cervantes (cherry picked from commit b28b54df04be36ac7a1c940320cefacbd572597c) --- tests/integration/kubernetes/gha-run.sh | 73 +++++++++++++++++++++++++ 1 file changed, 73 insertions(+) diff --git a/tests/integration/kubernetes/gha-run.sh b/tests/integration/kubernetes/gha-run.sh index 70982121a..2df72ac23 100755 --- a/tests/integration/kubernetes/gha-run.sh +++ b/tests/integration/kubernetes/gha-run.sh @@ -12,6 +12,78 @@ kubernetes_dir="$(dirname "$(readlink -f "$0")")" source "${kubernetes_dir}/../../gha-run-k8s-common.sh" tools_dir="${repo_root_dir}/tools" +function configure_devmapper() { + sudo mkdir -p /var/lib/containerd/devmapper + sudo truncate --size 10G /var/lib/containerd/devmapper/data-disk.img + sudo truncate --size 10G /var/lib/containerd/devmapper/meta-disk.img + + cat<&2 echo "${KUBERNETES} flavour is not supported"; exit 2 ;; + esac + + # We're not using this with baremetal machines, so we're fine on cutting + # corners here and just append this to the configuration file. + cat<&2 echo "${KUBERNETES} flavour is not supported"; exit 2 ;; + esac +} + +function configure_snapshotter() { + echo "::group::Configuring ${SNAPSHOTTER}" + + case ${SNAPSHOTTER} in + devmapper) configure_devmapper ;; + *) >&2 echo "${SNAPSHOTTER} flavour is not supported"; exit 2 ;; + esac + + echo "::endgroup::" +} + function deploy_kata() { platform="${1}" ensure_yq @@ -144,6 +216,7 @@ function main() { install-azure-cli) install_azure_cli ;; login-azure) login_azure ;; create-cluster) create_cluster ;; + configure-snapshotter) configure_snapshotter ;; deploy-k8s) deploy_k8s ;; install-bats) install_bats ;; install-kubectl) install_kubectl ;; From 79de72592f8400e0066c83b8f1c19ff3bc60f4df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Tue, 5 Sep 2023 14:35:46 +0200 Subject: [PATCH 123/339] ci: k8s: Add k8s devmapper tests (part 0) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's enable the devmapper kubernetes tests to match exactly what's been tested as part of the Jenkins CI. Fixes: #6542 Signed-off-by: Fabiano Fidêncio (cherry picked from commit 0e8bd50cbbe9309ca13bda913692f50b4d926b82) --- .github/workflows/ci.yaml | 11 ++++ .github/workflows/run-k8s-tests-on-garm.yaml | 65 ++++++++++++++++++++ 2 files changed, 76 insertions(+) create mode 100644 .github/workflows/run-k8s-tests-on-garm.yaml diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 1e0cac69a..d627ce788 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -92,6 +92,17 @@ jobs: pr-number: ${{ inputs.pr-number }} secrets: inherit + run-k8s-tests-on-garm: + needs: publish-kata-deploy-payload-amd64 + uses: ./.github/workflows/run-k8s-tests-on-garm.yaml + with: + registry: ghcr.io + repo: ${{ github.repository_owner }}/kata-deploy-ci + tag: ${{ inputs.tag }}-amd64 + commit-hash: ${{ inputs.commit-hash }} + pr-number: ${{ inputs.pr-number }} + secrets: inherit + run-k8s-tests-on-sev: needs: [publish-kata-deploy-payload-amd64, build-and-publish-tee-confidential-unencrypted-image] uses: ./.github/workflows/run-k8s-tests-on-sev.yaml diff --git a/.github/workflows/run-k8s-tests-on-garm.yaml b/.github/workflows/run-k8s-tests-on-garm.yaml new file mode 100644 index 000000000..530de3cf7 --- /dev/null +++ b/.github/workflows/run-k8s-tests-on-garm.yaml @@ -0,0 +1,65 @@ +name: CI | Run kubernetes tests on GARM +on: + workflow_call: + inputs: + registry: + required: true + type: string + repo: + required: true + type: string + tag: + required: true + type: string + pr-number: + required: true + type: string + commit-hash: + required: false + type: string + +jobs: + run-k8s-tests: + strategy: + fail-fast: false + matrix: + vmm: + - clh #cloud-hypervisor + - fc #firecracker + - qemu + snapshotter: + - devmapper + k8s: + - k3s + runs-on: garm-ubuntu-2204 + env: + DOCKER_REGISTRY: ${{ inputs.registry }} + DOCKER_REPO: ${{ inputs.repo }} + DOCKER_TAG: ${{ inputs.tag }} + PR_NUMBER: ${{ inputs.pr-number }} + KATA_HYPERVISOR: ${{ matrix.vmm }} + KUBERNETES: ${{ matrix.k8s }} + SNAPSHOTTER: ${{ matrix.snapshotter }} + USING_NFD: "false" + steps: + - uses: actions/checkout@v3 + with: + ref: ${{ inputs.commit-hash }} + + - name: Deploy ${{ matrix.k8s }} + run: bash tests/integrations/kubernetes/gha-run.sh deploy-k8s + + - name: Configure the ${{ matrix.snapshotter }} snapshotter + run: bash tests/integrtion/kubernetes/gha-run.sh configure-snapshotter + + - name: Deploy Kata + timeout-minutes: 10 + run: bash tests/integration/kubernetes/gha-run.sh deploy-kata-tdx + + - name: Run tests + timeout-minutes: 30 + run: bash tests/integration/kubernetes/gha-run.sh run-tests + + - name: Delete kata-deploy + if: always() + run: bash tests/integration/kubernetes/gha-run.sh cleanup-tdx From 1b7ffeac531f5c65c0926533779af53e86139912 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Thu, 7 Sep 2023 11:27:04 +0200 Subject: [PATCH 124/339] ci: k8s: Fix typo in run-k8s-tests-on-garm.yaml MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit integrations -> integration integrtion -> integration Fixes: #6542 Signed-off-by: Fabiano Fidêncio (cherry picked from commit 8d99972a8a3cdf55f695f7ca2af7126d4cd47af2) --- .github/workflows/run-k8s-tests-on-garm.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/run-k8s-tests-on-garm.yaml b/.github/workflows/run-k8s-tests-on-garm.yaml index 530de3cf7..eee9b48a3 100644 --- a/.github/workflows/run-k8s-tests-on-garm.yaml +++ b/.github/workflows/run-k8s-tests-on-garm.yaml @@ -47,10 +47,10 @@ jobs: ref: ${{ inputs.commit-hash }} - name: Deploy ${{ matrix.k8s }} - run: bash tests/integrations/kubernetes/gha-run.sh deploy-k8s + run: bash tests/integration/kubernetes/gha-run.sh deploy-k8s - name: Configure the ${{ matrix.snapshotter }} snapshotter - run: bash tests/integrtion/kubernetes/gha-run.sh configure-snapshotter + run: bash tests/integration/kubernetes/gha-run.sh configure-snapshotter - name: Deploy Kata timeout-minutes: 10 From 92fdaf971977b27cc3d1f49d4a230c76288c0524 Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Thu, 7 Sep 2023 15:37:06 +0000 Subject: [PATCH 125/339] metrics: Use TensorFlow optimized image This PR replaces the ubuntu image for one which has TensorFlow optimized for kata metrics. Fixes #7866 Signed-off-by: Gabriela Cervantes (cherry picked from commit 3a427795eac910a3f68cd1e07a3e997e152d49cf) --- .../machine_learning/resnet50_fp32_dockerfile/Dockerfile | 2 +- .../machine_learning/resnet50_int8_dockerfile/Dockerfile | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/metrics/machine_learning/resnet50_fp32_dockerfile/Dockerfile b/tests/metrics/machine_learning/resnet50_fp32_dockerfile/Dockerfile index a0e796456..edbdf7370 100644 --- a/tests/metrics/machine_learning/resnet50_fp32_dockerfile/Dockerfile +++ b/tests/metrics/machine_learning/resnet50_fp32_dockerfile/Dockerfile @@ -3,7 +3,7 @@ # SPDX-License-Identifier: Apache-2.0 # Usage: FROM [image name] -FROM ubuntu:20.04 +FROM intel/intel-optimized-tensorflow:2.9.1 ENV DEBIAN_FRONTEND=noninteractive diff --git a/tests/metrics/machine_learning/resnet50_int8_dockerfile/Dockerfile b/tests/metrics/machine_learning/resnet50_int8_dockerfile/Dockerfile index a033475ee..6e0c1e611 100644 --- a/tests/metrics/machine_learning/resnet50_int8_dockerfile/Dockerfile +++ b/tests/metrics/machine_learning/resnet50_int8_dockerfile/Dockerfile @@ -3,7 +3,7 @@ # SPDX-License-Identifier: Apache-2.0 # Usage: FROM [image name] -FROM ubuntu:20.04 +FROM intel/intel-optimized-tensorflow:2.9.1 ENV DEBIAN_FRONTEND=noninteractive From 7a96d0a58991a74fe60364adefb5c42d7d87da7f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Thu, 7 Sep 2023 12:26:47 +0200 Subject: [PATCH 126/339] ci: k8s: Use the proper command for sleep MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `wait` waits for a job to complete, not a number of seconds. Not sure how I got that wrong in the first place, but it's what it's. Fixes: #6542 Signed-off-by: Fabiano Fidêncio (cherry picked from commit 028a97e0d5552153a4ae1469e4cf9715b1f1db25) --- tests/integration/kubernetes/gha-run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/kubernetes/gha-run.sh b/tests/integration/kubernetes/gha-run.sh index 2df72ac23..6d3c33efe 100755 --- a/tests/integration/kubernetes/gha-run.sh +++ b/tests/integration/kubernetes/gha-run.sh @@ -138,7 +138,7 @@ function deploy_k3s() { curl -sfL https://get.k3s.io | sh - # This is an arbitrary value that came up from local tests - wait 240s + sleep 240s } function deploy_k8s() { From 7f865be39870e122bda94afde682d81665206299 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Thu, 7 Sep 2023 13:02:47 +0200 Subject: [PATCH 127/339] ci: k8s: Ensure k3s is deploy with --write-kubeconfig-mode=644 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Otherwise the /etc/rancher/k3s/k3s.yaml is not readable by other users than root. As --write-config-mode is being passed, and that's an option that has to be passed to the `server`, -s is also added to the command line. Signed-off-by: Fabiano Fidêncio (cherry picked from commit ad45ab5d337b8bf56d9ec2b8a203d9edab86fe2e) --- tests/integration/kubernetes/gha-run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/kubernetes/gha-run.sh b/tests/integration/kubernetes/gha-run.sh index 6d3c33efe..3e579ff4c 100755 --- a/tests/integration/kubernetes/gha-run.sh +++ b/tests/integration/kubernetes/gha-run.sh @@ -135,7 +135,7 @@ function deploy_kata() { } function deploy_k3s() { - curl -sfL https://get.k3s.io | sh - + curl -sfL https://get.k3s.io | sh -s - --write-kubeconfig-mode 644 # This is an arbitrary value that came up from local tests sleep 240s From 695c7162ef09dbe6469153eea366c344de2141f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Thu, 7 Sep 2023 16:27:21 +0200 Subject: [PATCH 128/339] ci: k8s: Use vanilla kubectl with k3s MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's download the vanilla kubectl binary into `/usr/bin/`, as we need to avoid hitting issues like: ```sh error: open /etc/rancher/k3s/k3s.yaml.lock: permission denied ``` The issue basically happens because k3s links `/usr/local/bin/kubectl` to `/usr/local/bin/k3s`, and that does extra stuff that vanilla `kubectl` doesn't do. Also, in order to properly use the k3s.yaml config with the vanilla kubectl, we're copying it to ~/.kube/config. Signed-off-by: Fabiano Fidêncio (cherry picked from commit ada65b988a33e743e9c043d419b5b7f177eca836) --- tests/integration/kubernetes/gha-run.sh | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/tests/integration/kubernetes/gha-run.sh b/tests/integration/kubernetes/gha-run.sh index 3e579ff4c..9f5d63b73 100755 --- a/tests/integration/kubernetes/gha-run.sh +++ b/tests/integration/kubernetes/gha-run.sh @@ -139,6 +139,27 @@ function deploy_k3s() { # This is an arbitrary value that came up from local tests sleep 240s + + # Download the kubectl binary into /usr/bin and remove /usr/local/bin/kubectl + # + # We need to do this to avoid hitting issues like: + # ```sh + # error: open /etc/rancher/k3s/k3s.yaml.lock: permission denied + # ``` + # Which happens basically because k3s links `/usr/local/bin/kubectl` + # to `/usr/local/bin/k3s`, and that does extra stuff that vanilla + # `kubectl` doesn't do. + ARCH=$(uname -m) + if [ "${ARCH}" = "x86_64" ]; then + ARCH=amd64 + fi + kubectl_version=$(/usr/local/bin/k3s kubectl version --short 2>/dev/null | grep "Client Version" | sed -e 's/Client Version: //' -e 's/\+k3s1//') + sudo curl -fL --progress-bar -o /usr/bin/kubectl https://storage.googleapis.com/kubernetes-release/release/${kubectl_version}/bin/linux/${ARCH}/kubectl + sudo chmod +x /usr/bin/kubectl + sudo rm -rf /usr/local/bin/kubectl + + mkdir -p ~/.kube + cp /etc/rancher/k3s/k3s.yaml ~/.kube/config } function deploy_k8s() { From bb675f8101281ad36194ad0f3f01a793c55fa223 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Thu, 7 Sep 2023 19:49:52 +0200 Subject: [PATCH 129/339] ci: k8s: Decrease k3s sleep from 4 to 2 minutes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It should be plenty, and worked well in local tests. Signed-off-by: Fabiano Fidêncio (cherry picked from commit 369a8af8f74bfacac41ba57b5fdb42d90adf190c) --- tests/integration/kubernetes/gha-run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/kubernetes/gha-run.sh b/tests/integration/kubernetes/gha-run.sh index 9f5d63b73..793c67387 100755 --- a/tests/integration/kubernetes/gha-run.sh +++ b/tests/integration/kubernetes/gha-run.sh @@ -138,7 +138,7 @@ function deploy_k3s() { curl -sfL https://get.k3s.io | sh -s - --write-kubeconfig-mode 644 # This is an arbitrary value that came up from local tests - sleep 240s + sleep 120s # Download the kubectl binary into /usr/bin and remove /usr/local/bin/kubectl # From 89345b6731cc30f2d31e98d122c5e14e8a50b7d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Thu, 7 Sep 2023 21:47:29 +0200 Subject: [PATCH 130/339] ci: k8s: Append, instead of overwrite, the devmapper config MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As we were using `tee` without the `-a` (or `--apend`) aptton, the containerd config would be overwritten, leading to a NotReady state of the Node. Signed-off-by: Fabiano Fidêncio (cherry picked from commit 2df183fd9993696e98651f5e525ced67966e90d5) --- tests/integration/kubernetes/gha-run.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/integration/kubernetes/gha-run.sh b/tests/integration/kubernetes/gha-run.sh index 793c67387..16845e0e2 100755 --- a/tests/integration/kubernetes/gha-run.sh +++ b/tests/integration/kubernetes/gha-run.sh @@ -59,7 +59,7 @@ EOF # We're not using this with baremetal machines, so we're fine on cutting # corners here and just append this to the configuration file. - cat<&2 echo "${KUBERNETES} flavour is not supported"; exit 2 ;; esac + + sudo cat ${containerd_config_file} } function configure_snapshotter() { From 053308eefc5635de18294b9104ea7c7920fc75c5 Mon Sep 17 00:00:00 2001 From: David Esparza Date: Thu, 7 Sep 2023 15:41:59 -0600 Subject: [PATCH 131/339] metrics: fix FIO test initialization This PR changes the order in which the FIO test first cleans the environment and then checks if the environment is indeed clean. Fixes: #7869 Signed-off-by: David Esparza (cherry picked from commit adfea55b8f347d0caa13d29c8f6f16a6cc1a8da4) --- tests/metrics/storage/fio-k8s/fio-test-ci.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/metrics/storage/fio-k8s/fio-test-ci.sh b/tests/metrics/storage/fio-k8s/fio-test-ci.sh index 0576647f0..9933bffdb 100755 --- a/tests/metrics/storage/fio-k8s/fio-test-ci.sh +++ b/tests/metrics/storage/fio-k8s/fio-test-ci.sh @@ -15,8 +15,8 @@ TEST_NAME="${TEST_NAME:-fio}" function main() { cmds=("bc" "jq") check_cmds "${cmds[@]}" - check_processes init_env + check_processes pushd "${FIO_PATH}" [ -z "${KATA_HYPERVISOR}" ] && die "Hypervisor ID is missing." From 9fb291d88a62a434303a47b9a9bde20b9516ffdb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Thu, 7 Sep 2023 23:46:58 +0200 Subject: [PATCH 132/339] ci: k8s: Wait some time after restarting k3s MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's put a 1 minute sleep, just to make sure everything is back up again. Signed-off-by: Fabiano Fidêncio (cherry picked from commit 3de23034f8821cbb514b3ce261842ffae9793ad4) --- tests/integration/kubernetes/gha-run.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/integration/kubernetes/gha-run.sh b/tests/integration/kubernetes/gha-run.sh index 16845e0e2..c12f886cf 100755 --- a/tests/integration/kubernetes/gha-run.sh +++ b/tests/integration/kubernetes/gha-run.sh @@ -72,6 +72,7 @@ EOF *) >&2 echo "${KUBERNETES} flavour is not supported"; exit 2 ;; esac + sleep 60s sudo cat ${containerd_config_file} } From 7ce5c8b3fa90d0da419b4b8f801b505083ba2192 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Fri, 8 Sep 2023 09:57:25 +0200 Subject: [PATCH 133/339] ci: k8s: Install bats on GARM runners MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GARM runners do not come with the whole set of tools we need, or are used to when it comes to the GHA runners, so we need to manually install bats on those. Signed-off-by: Fabiano Fidêncio (cherry picked from commit 8c9380a7980aa113ba91b626788b2036bb9ec919) --- .github/workflows/run-k8s-tests-on-garm.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/run-k8s-tests-on-garm.yaml b/.github/workflows/run-k8s-tests-on-garm.yaml index eee9b48a3..2c90ec72b 100644 --- a/.github/workflows/run-k8s-tests-on-garm.yaml +++ b/.github/workflows/run-k8s-tests-on-garm.yaml @@ -55,6 +55,9 @@ jobs: - name: Deploy Kata timeout-minutes: 10 run: bash tests/integration/kubernetes/gha-run.sh deploy-kata-tdx + + - name: Install `bats` + run: bash tests/integration/kubernetes/gha-run.sh install-bats - name: Run tests timeout-minutes: 30 From 5bb77b628db47845950a77fb2c7158093ca8d96d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Fri, 8 Sep 2023 10:02:08 +0200 Subject: [PATCH 134/339] ci: k8s: Export KUBERNETES env var MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit So we have a better control on which flavour of kubernetes kata-deploy is expected to be targetting. Signed-off-by: Fabiano Fidêncio (cherry picked from commit fa62a4c01b9ffac59bfe8740ee9c82e4c4f2dc81) --- .github/workflows/run-k8s-tests-on-aks.yaml | 1 + .github/workflows/run-k8s-tests-on-sev.yaml | 1 + .github/workflows/run-k8s-tests-on-snp.yaml | 1 + .github/workflows/run-k8s-tests-on-tdx.yaml | 1 + tests/integration/kubernetes/gha-run.sh | 4 ++-- 5 files changed, 6 insertions(+), 2 deletions(-) diff --git a/.github/workflows/run-k8s-tests-on-aks.yaml b/.github/workflows/run-k8s-tests-on-aks.yaml index 130be1829..261454bb2 100644 --- a/.github/workflows/run-k8s-tests-on-aks.yaml +++ b/.github/workflows/run-k8s-tests-on-aks.yaml @@ -40,6 +40,7 @@ jobs: GH_PR_NUMBER: ${{ inputs.pr-number }} KATA_HOST_OS: ${{ matrix.host_os }} KATA_HYPERVISOR: ${{ matrix.vmm }} + KUBERNETES: "vanilla" USING_NFD: "false" steps: - uses: actions/checkout@v3 diff --git a/.github/workflows/run-k8s-tests-on-sev.yaml b/.github/workflows/run-k8s-tests-on-sev.yaml index bb4c5d0bd..4260f4335 100644 --- a/.github/workflows/run-k8s-tests-on-sev.yaml +++ b/.github/workflows/run-k8s-tests-on-sev.yaml @@ -33,6 +33,7 @@ jobs: PR_NUMBER: ${{ inputs.pr-number }} KATA_HYPERVISOR: ${{ matrix.vmm }} KUBECONFIG: /home/kata/.kube/config + KUBERNETES: "vanilla" USING_NFD: "false" steps: - uses: actions/checkout@v3 diff --git a/.github/workflows/run-k8s-tests-on-snp.yaml b/.github/workflows/run-k8s-tests-on-snp.yaml index fde2131a7..34cb5e6cb 100644 --- a/.github/workflows/run-k8s-tests-on-snp.yaml +++ b/.github/workflows/run-k8s-tests-on-snp.yaml @@ -33,6 +33,7 @@ jobs: PR_NUMBER: ${{ inputs.pr-number }} KATA_HYPERVISOR: ${{ matrix.vmm }} KUBECONFIG: /home/kata/.kube/config + KUBERNETES: "vanilla" USING_NFD: "false" steps: - uses: actions/checkout@v3 diff --git a/.github/workflows/run-k8s-tests-on-tdx.yaml b/.github/workflows/run-k8s-tests-on-tdx.yaml index 3cf2e9b10..1e15dd5a6 100644 --- a/.github/workflows/run-k8s-tests-on-tdx.yaml +++ b/.github/workflows/run-k8s-tests-on-tdx.yaml @@ -32,6 +32,7 @@ jobs: DOCKER_TAG: ${{ inputs.tag }} PR_NUMBER: ${{ inputs.pr-number }} KATA_HYPERVISOR: ${{ matrix.vmm }} + KUBERNETES: "k3s" USING_NFD: "true" steps: - uses: actions/checkout@v3 diff --git a/tests/integration/kubernetes/gha-run.sh b/tests/integration/kubernetes/gha-run.sh index c12f886cf..657d7161a 100755 --- a/tests/integration/kubernetes/gha-run.sh +++ b/tests/integration/kubernetes/gha-run.sh @@ -112,7 +112,7 @@ function deploy_kata() { echo "::endgroup::" kubectl apply -f "${tools_dir}/packaging/kata-deploy/kata-rbac/base/kata-rbac.yaml" - if [ "${platform}" = "tdx" ]; then + if [ "${KUBERNETES}" = "k3s" ]; then kubectl apply -k "${tools_dir}/packaging/kata-deploy/kata-deploy/overlays/k3s" else kubectl apply -f "${tools_dir}/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml" @@ -207,7 +207,7 @@ function cleanup() { kubectl config set-context --current --namespace=default kubectl delete namespace kata-containers-k8s-tests - if [ "${platform}" = "tdx" ]; then + if [ "${KUBERNETES}" = "k3s" ]; then deploy_spec="-k "${tools_dir}/packaging/kata-deploy/kata-deploy/overlays/k3s"" cleanup_spec="-k "${tools_dir}/packaging/kata-deploy/kata-cleanup/overlays/k3s"" else From aee6f36c86c85eec195ef6edc1ac65ad21cb891c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Fri, 8 Sep 2023 09:59:57 +0200 Subject: [PATCH 135/339] ci: k8s: Add a kata-deploy-garm target MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We've been using the `kata-deploy-tdx` target as that also uses k3s as base, but it's better to just have a specific garm target. Signed-off-by: Fabiano Fidêncio (cherry picked from commit 27fa7d828d2f5d35a5530b85e666ed58b785dc25) --- .github/workflows/run-k8s-tests-on-garm.yaml | 4 ++-- tests/integration/kubernetes/gha-run.sh | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/run-k8s-tests-on-garm.yaml b/.github/workflows/run-k8s-tests-on-garm.yaml index 2c90ec72b..de5ed31cf 100644 --- a/.github/workflows/run-k8s-tests-on-garm.yaml +++ b/.github/workflows/run-k8s-tests-on-garm.yaml @@ -54,7 +54,7 @@ jobs: - name: Deploy Kata timeout-minutes: 10 - run: bash tests/integration/kubernetes/gha-run.sh deploy-kata-tdx + run: bash tests/integration/kubernetes/gha-run.sh deploy-kata-garm - name: Install `bats` run: bash tests/integration/kubernetes/gha-run.sh install-bats @@ -65,4 +65,4 @@ jobs: - name: Delete kata-deploy if: always() - run: bash tests/integration/kubernetes/gha-run.sh cleanup-tdx + run: bash tests/integration/kubernetes/gha-run.sh cleanup-garm diff --git a/tests/integration/kubernetes/gha-run.sh b/tests/integration/kubernetes/gha-run.sh index 657d7161a..ae1a9d23c 100755 --- a/tests/integration/kubernetes/gha-run.sh +++ b/tests/integration/kubernetes/gha-run.sh @@ -249,6 +249,7 @@ function main() { deploy-kata-sev) deploy_kata "sev" ;; deploy-kata-snp) deploy_kata "snp" ;; deploy-kata-tdx) deploy_kata "tdx" ;; + deploy-kata-garm) deploy_kata "garm" ;; run-tests) run_tests ;; cleanup-sev) cleanup "sev" ;; cleanup-snp) cleanup "snp" ;; From c723a7d9c89df8352c0fcb3e8a271f1bd666d8a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Fri, 8 Sep 2023 10:08:13 +0200 Subject: [PATCH 136/339] ci: k8s: devmapper tests should be using ubuntu 20.04 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit That's what we've been using as part of Jenkins, so let's ensure things will work as they did before, and only after that consider upgrading the base OS used for the tests. Signed-off-by: Fabiano Fidêncio (cherry picked from commit aaec5a09f32574dacdabb95fd9fae079a084ead8) --- .github/workflows/run-k8s-tests-on-garm.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run-k8s-tests-on-garm.yaml b/.github/workflows/run-k8s-tests-on-garm.yaml index de5ed31cf..4588be335 100644 --- a/.github/workflows/run-k8s-tests-on-garm.yaml +++ b/.github/workflows/run-k8s-tests-on-garm.yaml @@ -31,7 +31,7 @@ jobs: - devmapper k8s: - k3s - runs-on: garm-ubuntu-2204 + runs-on: garm-ubuntu-2004 env: DOCKER_REGISTRY: ${{ inputs.registry }} DOCKER_REPO: ${{ inputs.repo }} From 8892d9a7b28f833f232df68edd0414d4eed16311 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Fri, 8 Sep 2023 11:49:24 +0200 Subject: [PATCH 137/339] ci: k8s: Add clean-up-garm argument for gha-run.sh MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The tests are failing to finish as the argument is invalid. Fixes: #6542 Signed-off-by: Fabiano Fidêncio (cherry picked from commit b5bad3cb0ff5ad73a0b9910608bb3d2278251ff9) --- tests/integration/kubernetes/gha-run.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/integration/kubernetes/gha-run.sh b/tests/integration/kubernetes/gha-run.sh index ae1a9d23c..c53583aff 100755 --- a/tests/integration/kubernetes/gha-run.sh +++ b/tests/integration/kubernetes/gha-run.sh @@ -254,6 +254,7 @@ function main() { cleanup-sev) cleanup "sev" ;; cleanup-snp) cleanup "snp" ;; cleanup-tdx) cleanup "tdx" ;; + cleanup-garm) cleanup "garm" ;; delete-cluster) cleanup "aks" ;; *) >&2 echo "Invalid argument"; exit 2 ;; esac From 7e135294a7325ae8eb711d075f61facdf8f86aed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Fri, 8 Sep 2023 14:23:43 +0200 Subject: [PATCH 138/339] ci: k8s: Also check for "fc" (for firecracker) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's keep both checks for now, but in the future we'll be able to remove the check for "firecracker", as the hypervisor name used as part of the GitHub Actions has to match what's used as part of the kata-deploy stuff, which is `fc` (as in `kata-fc for the runtime class) instead of `firecracker`. Signed-off-by: Fabiano Fidêncio (cherry picked from commit 3cc20b47a683cfc67f4a97831349417d9e57528f) --- tests/integration/kubernetes/k8s-cpu-ns.bats | 2 ++ tests/integration/kubernetes/k8s-credentials-secrets.bats | 2 ++ tests/integration/kubernetes/k8s-file-volume.bats | 2 ++ tests/integration/kubernetes/k8s-inotify.bats | 2 ++ tests/integration/kubernetes/k8s-nested-configmap-secret.bats | 2 ++ tests/integration/kubernetes/k8s-projected-volume.bats | 2 ++ tests/integration/kubernetes/k8s-volume.bats | 2 ++ 7 files changed, 14 insertions(+) diff --git a/tests/integration/kubernetes/k8s-cpu-ns.bats b/tests/integration/kubernetes/k8s-cpu-ns.bats index f3c69a2f6..d8db75d17 100644 --- a/tests/integration/kubernetes/k8s-cpu-ns.bats +++ b/tests/integration/kubernetes/k8s-cpu-ns.bats @@ -10,6 +10,7 @@ load "${BATS_TEST_DIRNAME}/tests_common.sh" setup() { [ "${KATA_HYPERVISOR}" == "firecracker" ] && skip "test not working see: ${fc_limitations}" + [ "${KATA_HYPERVISOR}" == "fc" ] && skip "test not working see: ${fc_limitations}" [ "${KATA_HYPERVISOR}" == "dragonball" ] && skip "test not working see: ${dragonball_limitations}" ( [ "${KATA_HYPERVISOR}" == "qemu-tdx" ] || [ "${KATA_HYPERVISOR}" == "qemu-snp" ] || [ "${KATA_HYPERVISOR}" == "qemu-sev" ] ) \ && skip "TEEs do not support memory / CPU hotplug" @@ -76,6 +77,7 @@ setup() { teardown() { [ "${KATA_HYPERVISOR}" == "firecracker" ] && skip "test not working see: ${fc_limitations}" + [ "${KATA_HYPERVISOR}" == "fc" ] && skip "test not working see: ${fc_limitations}" [ "${KATA_HYPERVISOR}" == "dragonball" ] && skip "test not working see: ${dragonball_limitations}" ( [ "${KATA_HYPERVISOR}" == "qemu-tdx" ] || [ "${KATA_HYPERVISOR}" == "qemu-snp" ] || [ "${KATA_HYPERVISOR}" == "qemu-sev" ] ) \ && skip "TEEs do not support memory / CPU hotplug" diff --git a/tests/integration/kubernetes/k8s-credentials-secrets.bats b/tests/integration/kubernetes/k8s-credentials-secrets.bats index 51d2ba995..b8c03dd9c 100644 --- a/tests/integration/kubernetes/k8s-credentials-secrets.bats +++ b/tests/integration/kubernetes/k8s-credentials-secrets.bats @@ -10,6 +10,7 @@ load "${BATS_TEST_DIRNAME}/tests_common.sh" setup() { [ "${KATA_HYPERVISOR}" == "firecracker" ] && skip "test not working see: ${fc_limitations}" + [ "${KATA_HYPERVISOR}" == "fcr" ] && skip "test not working see: ${fc_limitations}" get_pod_config_dir } @@ -52,6 +53,7 @@ setup() { teardown() { [ "${KATA_HYPERVISOR}" == "firecracker" ] && skip "test not working see: ${fc_limitations}" + [ "${KATA_HYPERVISOR}" == "fc" ] && skip "test not working see: ${fc_limitations}" # Debugging information kubectl describe "pod/$pod_name" diff --git a/tests/integration/kubernetes/k8s-file-volume.bats b/tests/integration/kubernetes/k8s-file-volume.bats index fd09d7886..70462c223 100644 --- a/tests/integration/kubernetes/k8s-file-volume.bats +++ b/tests/integration/kubernetes/k8s-file-volume.bats @@ -11,6 +11,7 @@ TEST_INITRD="${TEST_INITRD:-no}" setup() { [ "${KATA_HYPERVISOR}" == "firecracker" ] && skip "test not working see: ${fc_limitations}" + [ "${KATA_HYPERVISOR}" == "fc" ] && skip "test not working see: ${fc_limitations}" pod_name="test-file-volume" container_name="busybox-file-volume-container" tmp_file=$(exec_host mktemp /tmp/file-volume-test-foo.XXXXX) @@ -41,6 +42,7 @@ setup() { teardown() { [ "${KATA_HYPERVISOR}" == "firecracker" ] && skip "test not working see: ${fc_limitations}" + [ "${KATA_HYPERVISOR}" == "fc" ] && skip "test not working see: ${fc_limitations}" kubectl delete pod "$pod_name" exec_host rm -f $tmp_file rm -f ${pod_config_dir}/test-pod-file-volume.yaml.yaml diff --git a/tests/integration/kubernetes/k8s-inotify.bats b/tests/integration/kubernetes/k8s-inotify.bats index f3dbc073f..9a87b0a96 100644 --- a/tests/integration/kubernetes/k8s-inotify.bats +++ b/tests/integration/kubernetes/k8s-inotify.bats @@ -10,6 +10,7 @@ load "${BATS_TEST_DIRNAME}/tests_common.sh" setup() { [ "${KATA_HYPERVISOR}" == "firecracker" ] && skip "test not working see: ${fc_limitations}" + [ "${KATA_HYPERVISOR}" == "fc" ] && skip "test not working see: ${fc_limitations}" get_pod_config_dir } @@ -40,6 +41,7 @@ setup() { teardown() { [ "${KATA_HYPERVISOR}" == "firecracker" ] && skip "test not working see: ${fc_limitations}" + [ "${KATA_HYPERVISOR}" == "fc" ] && skip "test not working see: ${fc_limitations}" # Debugging information kubectl describe "pod/$pod_name" kubectl delete pod "$pod_name" diff --git a/tests/integration/kubernetes/k8s-nested-configmap-secret.bats b/tests/integration/kubernetes/k8s-nested-configmap-secret.bats index b84fb89cc..ebffb6e18 100644 --- a/tests/integration/kubernetes/k8s-nested-configmap-secret.bats +++ b/tests/integration/kubernetes/k8s-nested-configmap-secret.bats @@ -10,6 +10,7 @@ load "${BATS_TEST_DIRNAME}/tests_common.sh" setup() { [ "${KATA_HYPERVISOR}" == "firecracker" ] && skip "test not working see: ${fc_limitations}" + [ "${KATA_HYPERVISOR}" == "fc" ] && skip "test not working see: ${fc_limitations}" get_pod_config_dir @@ -30,6 +31,7 @@ setup() { teardown() { [ "${KATA_HYPERVISOR}" == "firecracker" ] && skip "test not working see: ${fc_limitations}" + [ "${KATA_HYPERVISOR}" == "fc" ] && skip "test not working see: ${fc_limitations}" # Debugging information kubectl describe "pod/$pod_name" diff --git a/tests/integration/kubernetes/k8s-projected-volume.bats b/tests/integration/kubernetes/k8s-projected-volume.bats index 33788e475..9c6484b54 100644 --- a/tests/integration/kubernetes/k8s-projected-volume.bats +++ b/tests/integration/kubernetes/k8s-projected-volume.bats @@ -10,6 +10,7 @@ load "${BATS_TEST_DIRNAME}/tests_common.sh" setup() { [ "${KATA_HYPERVISOR}" == "firecracker" ] && skip "test not working see: ${fc_limitations}" + [ "${KATA_HYPERVISOR}" == "fc" ] && skip "test not working see: ${fc_limitations}" get_pod_config_dir } @@ -53,6 +54,7 @@ setup() { teardown() { [ "${KATA_HYPERVISOR}" == "firecracker" ] && skip "test not working see: ${fc_limitations}" + [ "${KATA_HYPERVISOR}" == "fc" ] && skip "test not working see: ${fc_limitations}" # Debugging information kubectl describe "pod/$pod_name" diff --git a/tests/integration/kubernetes/k8s-volume.bats b/tests/integration/kubernetes/k8s-volume.bats index 0489ff461..705794443 100644 --- a/tests/integration/kubernetes/k8s-volume.bats +++ b/tests/integration/kubernetes/k8s-volume.bats @@ -11,6 +11,7 @@ TEST_INITRD="${TEST_INITRD:-no}" setup() { [ "${KATA_HYPERVISOR}" == "firecracker" ] && skip "test not working see: ${fc_limitations}" + [ "${KATA_HYPERVISOR}" == "fc" ] && skip "test not working see: ${fc_limitations}" get_pod_config_dir @@ -55,6 +56,7 @@ setup() { teardown() { [ "${KATA_HYPERVISOR}" == "firecracker" ] && skip "test not working see: ${fc_limitations}" + [ "${KATA_HYPERVISOR}" == "fc" ] && skip "test not working see: ${fc_limitations}" # Debugging information kubectl describe "pod/$pod_name" From 263ed4afd1d5b283ca7255216986b0652b947e2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Fri, 8 Sep 2023 14:25:29 +0200 Subject: [PATCH 139/339] ci: k8s: Remove useless skip statement from tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There's absolutely no need to have the skip check as part of the test itself when it's already done as part of the setup function. We're only touching the files here that were touched in the previous commit. Signed-off-by: Fabiano Fidêncio (cherry picked from commit f6cd3930c5ad25dd125b477ab3fa7592e37c00d8) --- tests/integration/kubernetes/k8s-cpu-ns.bats | 6 ------ tests/integration/kubernetes/k8s-credentials-secrets.bats | 2 -- tests/integration/kubernetes/k8s-file-volume.bats | 1 - tests/integration/kubernetes/k8s-projected-volume.bats | 2 -- 4 files changed, 11 deletions(-) diff --git a/tests/integration/kubernetes/k8s-cpu-ns.bats b/tests/integration/kubernetes/k8s-cpu-ns.bats index d8db75d17..878e761a2 100644 --- a/tests/integration/kubernetes/k8s-cpu-ns.bats +++ b/tests/integration/kubernetes/k8s-cpu-ns.bats @@ -29,12 +29,6 @@ setup() { } @test "Check CPU constraints" { - [ "${KATA_HYPERVISOR}" == "firecracker" ] && skip "test not working see: ${fc_limitations}" - [ "${KATA_HYPERVISOR}" == "dragonball" ] && skip "test not working see: ${dragonball_limitations}" - ( [ "${KATA_HYPERVISOR}" == "qemu-tdx" ] || [ "${KATA_HYPERVISOR}" == "qemu-snp" ] || [ "${KATA_HYPERVISOR}" == "qemu-sev" ] ) \ - && skip "TEEs do not support memory / CPU hotplug" - - # Create the pod kubectl create -f "${pod_config_dir}/pod-cpu.yaml" diff --git a/tests/integration/kubernetes/k8s-credentials-secrets.bats b/tests/integration/kubernetes/k8s-credentials-secrets.bats index b8c03dd9c..8becf2e2e 100644 --- a/tests/integration/kubernetes/k8s-credentials-secrets.bats +++ b/tests/integration/kubernetes/k8s-credentials-secrets.bats @@ -16,8 +16,6 @@ setup() { } @test "Credentials using secrets" { - [ "${KATA_HYPERVISOR}" == "firecracker" ] && skip "test not working see: ${fc_limitations}" - secret_name="test-secret" pod_name="secret-test-pod" second_pod_name="secret-envars-test-pod" diff --git a/tests/integration/kubernetes/k8s-file-volume.bats b/tests/integration/kubernetes/k8s-file-volume.bats index 70462c223..d849db2ed 100644 --- a/tests/integration/kubernetes/k8s-file-volume.bats +++ b/tests/integration/kubernetes/k8s-file-volume.bats @@ -21,7 +21,6 @@ setup() { } @test "Test readonly volume for pods" { - [ "${KATA_HYPERVISOR}" == "firecracker" ] && skip "test not working see: ${fc_limitations}" # Write test body to temp file exec_host "echo "$file_body" > $tmp_file" diff --git a/tests/integration/kubernetes/k8s-projected-volume.bats b/tests/integration/kubernetes/k8s-projected-volume.bats index 9c6484b54..87160eeb5 100644 --- a/tests/integration/kubernetes/k8s-projected-volume.bats +++ b/tests/integration/kubernetes/k8s-projected-volume.bats @@ -16,8 +16,6 @@ setup() { } @test "Projected volume" { - [ "${KATA_HYPERVISOR}" == "firecracker" ] && skip "test not working see: ${fc_limitations}" - password="1f2d1e2e67df" username="admin" pod_name="test-projected-volume" From bb5dbfbbcebcb104fd490a464ddb7b4d14f98297 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Fri, 8 Sep 2023 15:51:46 +0200 Subject: [PATCH 140/339] k8s: ci: Skip "Pod quota" test with firecracker MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The test is failing, and an issue has been opened to track it. For now, let's skip it. Issue: https://github.com/kata-containers/kata-containers/issues/7873 Signed-off-by: Fabiano Fidêncio (cherry picked from commit 9d74b7ccc9136abc2d3eea4c2f216edefa5ba765) Conflicts: tests/integration/kubernetes/k8s-pod-quota.bats --- tests/integration/kubernetes/k8s-pod-quota.bats | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/integration/kubernetes/k8s-pod-quota.bats b/tests/integration/kubernetes/k8s-pod-quota.bats index 20ad06274..f74d8a595 100644 --- a/tests/integration/kubernetes/k8s-pod-quota.bats +++ b/tests/integration/kubernetes/k8s-pod-quota.bats @@ -8,6 +8,8 @@ load "${BATS_TEST_DIRNAME}/../../common.bash" load "${BATS_TEST_DIRNAME}/tests_common.sh" setup() { + [ "${KATA_HYPERVISOR}" == "fc" ] && skip "test not working see: https://github.com/kata-containers/kata-containers/issues/7873" + get_pod_config_dir } @@ -31,6 +33,12 @@ setup() { } teardown() { + [ "${KATA_HYPERVISOR}" == "fc" ] && skip "test not working see: https://github.com/kata-containers/kata-containers/issues/7873" + + # Debugging information + kubectl describe deployment ${deployment_name} + + # Clean-up kubectl delete -f "${pod_config_dir}/pod-quota-deployment.yaml" kubectl delete -f "${pod_config_dir}/resource-quota.yaml" } From db563709e3b4948248ccaf152bd4b41022323d4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Thu, 7 Sep 2023 12:23:23 +0200 Subject: [PATCH 141/339] kata-deploy: Switch to an alpine image MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This will make our image smaller, and still ensure it's multi-arch support. Fixes: #7861 Signed-off-by: Fabiano Fidêncio (cherry picked from commit 670a8e9c73d55ad12b5204320b389c6a64b5d403) --- tools/packaging/kata-deploy/Dockerfile | 27 ++++++++++---------------- 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/tools/packaging/kata-deploy/Dockerfile b/tools/packaging/kata-deploy/Dockerfile index 8e7f6e2ac..2db312458 100644 --- a/tools/packaging/kata-deploy/Dockerfile +++ b/tools/packaging/kata-deploy/Dockerfile @@ -2,30 +2,23 @@ # # SPDX-License-Identifier: Apache-2.0 -# Specify alternative base image, e.g. clefos for s390x -ARG BASE_IMAGE_NAME=ubuntu -ARG BASE_IMAGE_TAG=20.04 +ARG BASE_IMAGE_NAME=alpine +ARG BASE_IMAGE_TAG=3.18 FROM $BASE_IMAGE_NAME:$BASE_IMAGE_TAG -ENV DEBIAN_FRONTEND=noninteractive ARG KATA_ARTIFACTS=./kata-static.tar.xz ARG DESTINATION=/opt/kata-artifacts COPY ${KATA_ARTIFACTS} ${WORKDIR} -SHELL ["/bin/bash", "-o", "pipefail", "-c"] - RUN \ -apt-get update && \ -apt-get install -y --no-install-recommends apt-transport-https ca-certificates curl gpg xz-utils systemd && \ -mkdir -p /etc/apt/keyrings/ && \ -curl -fsSL https://packages.cloud.google.com/apt/doc/apt-key.gpg | gpg --dearmor -o /etc/apt/keyrings/kubernetes-archive-keyring.gpg && \ -echo "deb [signed-by=/etc/apt/keyrings/kubernetes-archive-keyring.gpg] https://apt.kubernetes.io/ kubernetes-xenial main" | tee /etc/apt/sources.list.d/kubernetes.list && \ -apt-get update && \ -apt-get install -y --no-install-recommends kubectl && \ -apt-get clean && rm -rf /var/lib/apt/lists/ && \ -mkdir -p ${DESTINATION} && \ -tar xvf ${WORKDIR}/${KATA_ARTIFACTS} -C ${DESTINATION} && \ -rm -f ${WORKDIR}/${KATA_ARTIFACTS} + apk --no-cache add bash curl && \ + ARCH=$(uname -m) && \ + if [ "${ARCH}" = "x86_64" ]; then ARCH=amd64; fi && \ + curl -fL --progress-bar -o /usr/bin/kubectl https://storage.googleapis.com/kubernetes-release/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/${ARCH}/kubectl && \ + chmod +x /usr/bin/kubectl && \ + mkdir -p ${DESTINATION} && \ + tar xvf ${WORKDIR}/${KATA_ARTIFACTS} -C ${DESTINATION} && \ + rm -f ${WORKDIR}/${KATA_ARTIFACTS} COPY scripts ${DESTINATION}/scripts COPY runtimeclasses ${DESTINATION}/runtimeclasses From 61b1a99fcaba524b71a002b107d7b53a0b978a6b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Thu, 31 Aug 2023 15:48:05 +0200 Subject: [PATCH 142/339] gha: Manually rebase PR atop of the target branch before testing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We're changing what's been done as part of ac939c458cde, as we've notcied issues using `github.event.pull_request.merge_commit_sha`. Basically, whenever a force-push would happen, the reference of merge_commit_sha wouldn't be updated, leading us to test PRs with the old code. :-/ In order to get the rebase properly working, we need to ensure we pull the hash of the commit as part of checkout action, and ensure fetch-depth is set to 0. Fixes: #7414 Signed-off-by: Fabiano Fidêncio (cherry picked from commit bd24afcf737f94481992e2d88b27e66cfae64df8) --- .github/workflows/add-pr-sizing-label.yaml | 9 ++++- .../build-kata-static-tarball-amd64.yaml | 16 ++++++++ .../build-kata-static-tarball-arm64.yaml | 17 +++++++++ .../build-kata-static-tarball-s390x.yaml | 17 +++++++++ .github/workflows/ci-on-push.yaml | 3 +- .github/workflows/ci.yaml | 22 +++++++++++ .../workflows/move-issues-to-in-progress.yaml | 10 ++++- .../workflows/require-pr-porting-labels.yaml | 10 ++++- .../workflows/run-cri-containerd-tests.yaml | 11 ++++++ .github/workflows/run-k8s-tests-on-aks.yaml | 11 ++++++ .github/workflows/run-k8s-tests-on-garm.yaml | 11 ++++++ .github/workflows/run-k8s-tests-on-sev.yaml | 11 ++++++ .github/workflows/run-k8s-tests-on-snp.yaml | 11 ++++++ .github/workflows/run-k8s-tests-on-tdx.yaml | 11 ++++++ .../run-kata-deploy-tests-on-aks.yaml | 11 ++++++ .../run-kata-deploy-tests-on-tdx.yaml | 11 ++++++ .github/workflows/run-metrics.yaml | 11 ++++++ .github/workflows/run-nydus-tests.yaml | 11 ++++++ .github/workflows/run-vfio-tests.yaml | 11 ++++++ tests/git-helper.sh | 37 +++++++++++++++++++ 20 files changed, 258 insertions(+), 4 deletions(-) create mode 100755 tests/git-helper.sh diff --git a/.github/workflows/add-pr-sizing-label.yaml b/.github/workflows/add-pr-sizing-label.yaml index b972616fa..2fd0abc64 100644 --- a/.github/workflows/add-pr-sizing-label.yaml +++ b/.github/workflows/add-pr-sizing-label.yaml @@ -23,7 +23,14 @@ jobs: - name: Checkout code uses: actions/checkout@v3 with: - ref: ${{ github.event.pull_request.merge_commit_sha }} + ref: ${{ github.event.pull_request.head.sha }} + fetch-depth: 0 + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ github.event.pull_request.base.ref }} - name: Install PR sizing label script run: | diff --git a/.github/workflows/build-kata-static-tarball-amd64.yaml b/.github/workflows/build-kata-static-tarball-amd64.yaml index 869d49bc6..67020ae60 100644 --- a/.github/workflows/build-kata-static-tarball-amd64.yaml +++ b/.github/workflows/build-kata-static-tarball-amd64.yaml @@ -16,6 +16,10 @@ on: commit-hash: required: false type: string + target-branch: + required: false + type: string + default: "" jobs: build-asset: @@ -66,6 +70,12 @@ jobs: ref: ${{ inputs.commit-hash }} fetch-depth: 0 # This is needed in order to keep the commit ids history + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} + - name: Build ${{ matrix.asset }} run: | make "${KATA_ASSET}-tarball" @@ -92,6 +102,12 @@ jobs: - uses: actions/checkout@v3 with: ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} - name: get-artifacts uses: actions/download-artifact@v3 with: diff --git a/.github/workflows/build-kata-static-tarball-arm64.yaml b/.github/workflows/build-kata-static-tarball-arm64.yaml index cafc6e020..689ec04c9 100644 --- a/.github/workflows/build-kata-static-tarball-arm64.yaml +++ b/.github/workflows/build-kata-static-tarball-arm64.yaml @@ -16,6 +16,10 @@ on: commit-hash: required: false type: string + target-branch: + required: false + type: string + default: "" jobs: build-asset: @@ -52,6 +56,13 @@ jobs: with: ref: ${{ inputs.commit-hash }} fetch-depth: 0 # This is needed in order to keep the commit ids history + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} + - name: Build ${{ matrix.asset }} run: | make "${KATA_ASSET}-tarball" @@ -82,6 +93,12 @@ jobs: - uses: actions/checkout@v3 with: ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} - name: get-artifacts uses: actions/download-artifact@v3 with: diff --git a/.github/workflows/build-kata-static-tarball-s390x.yaml b/.github/workflows/build-kata-static-tarball-s390x.yaml index 0fe7e9200..75856e7e4 100644 --- a/.github/workflows/build-kata-static-tarball-s390x.yaml +++ b/.github/workflows/build-kata-static-tarball-s390x.yaml @@ -16,6 +16,10 @@ on: commit-hash: required: false type: string + target-branch: + required: false + type: string + default: "" jobs: build-asset: @@ -48,6 +52,13 @@ jobs: with: ref: ${{ inputs.commit-hash }} fetch-depth: 0 # This is needed in order to keep the commit ids history + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} + - name: Build ${{ matrix.asset }} run: | make "${KATA_ASSET}-tarball" @@ -79,6 +90,12 @@ jobs: - uses: actions/checkout@v3 with: ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} - name: get-artifacts uses: actions/download-artifact@v3 with: diff --git a/.github/workflows/ci-on-push.yaml b/.github/workflows/ci-on-push.yaml index f37f1b91c..59a297b78 100644 --- a/.github/workflows/ci-on-push.yaml +++ b/.github/workflows/ci-on-push.yaml @@ -25,7 +25,8 @@ jobs: if: ${{ contains(github.event.pull_request.labels.*.name, 'ok-to-test') }} uses: ./.github/workflows/ci.yaml with: - commit-hash: ${{ github.event.pull_request.merge_commit_sha }} + commit-hash: ${{ github.event.pull_request.head.sha }} pr-number: ${{ github.event.pull_request.number }} tag: ${{ github.event.pull_request.number }}-${{ github.event.pull_request.head.sha }} + target-branch: ${{ github.event.pull_request.base.ref }} secrets: inherit diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index d627ce788..b80a3f322 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -11,6 +11,10 @@ on: tag: required: true type: string + target-branch: + required: false + type: string + default: "" jobs: build-kata-static-tarball-amd64: @@ -28,6 +32,7 @@ jobs: repo: ${{ github.repository_owner }}/kata-deploy-ci tag: ${{ inputs.tag }}-amd64 commit-hash: ${{ inputs.commit-hash }} + target-branch: ${{ inputs.target-branch }} secrets: inherit build-and-publish-tee-confidential-unencrypted-image: @@ -37,6 +42,13 @@ jobs: uses: actions/checkout@v3 with: ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} - name: Set up QEMU uses: docker/setup-qemu-action@v2 @@ -69,6 +81,7 @@ jobs: tag: ${{ inputs.tag }}-amd64 commit-hash: ${{ inputs.commit-hash }} pr-number: ${{ inputs.pr-number }} + target-branch: ${{ inputs.target-branch }} secrets: inherit run-kata-deploy-tests-on-tdx: @@ -80,6 +93,7 @@ jobs: tag: ${{ inputs.tag }}-amd64 commit-hash: ${{ inputs.commit-hash }} pr-number: ${{ inputs.pr-number }} + target-branch: ${{ inputs.target-branch }} run-k8s-tests-on-aks: needs: publish-kata-deploy-payload-amd64 @@ -90,6 +104,7 @@ jobs: tag: ${{ inputs.tag }}-amd64 commit-hash: ${{ inputs.commit-hash }} pr-number: ${{ inputs.pr-number }} + target-branch: ${{ inputs.target-branch }} secrets: inherit run-k8s-tests-on-garm: @@ -112,6 +127,7 @@ jobs: tag: ${{ inputs.tag }}-amd64 commit-hash: ${{ inputs.commit-hash }} pr-number: ${{ inputs.pr-number }} + target-branch: ${{ inputs.target-branch }} run-k8s-tests-on-snp: needs: [publish-kata-deploy-payload-amd64, build-and-publish-tee-confidential-unencrypted-image] @@ -122,6 +138,7 @@ jobs: tag: ${{ inputs.tag }}-amd64 commit-hash: ${{ inputs.commit-hash }} pr-number: ${{ inputs.pr-number }} + target-branch: ${{ inputs.target-branch }} run-k8s-tests-on-tdx: needs: [publish-kata-deploy-payload-amd64, build-and-publish-tee-confidential-unencrypted-image] @@ -132,6 +149,7 @@ jobs: tag: ${{ inputs.tag }}-amd64 commit-hash: ${{ inputs.commit-hash }} pr-number: ${{ inputs.pr-number }} + target-branch: ${{ inputs.target-branch }} run-metrics-tests: needs: build-kata-static-tarball-amd64 @@ -139,6 +157,7 @@ jobs: with: tarball-suffix: -${{ inputs.tag }} commit-hash: ${{ inputs.commit-hash }} + target-branch: ${{ inputs.target-branch }} run-cri-containerd-tests: needs: build-kata-static-tarball-amd64 @@ -146,6 +165,7 @@ jobs: with: tarball-suffix: -${{ inputs.tag }} commit-hash: ${{ inputs.commit-hash }} + target-branch: ${{ inputs.target-branch }} run-nydus-tests: needs: build-kata-static-tarball-amd64 @@ -153,6 +173,7 @@ jobs: with: tarball-suffix: -${{ inputs.tag }} commit-hash: ${{ inputs.commit-hash }} + target-branch: ${{ inputs.target-branch }} run-vfio-tests: needs: build-kata-static-tarball-amd64 @@ -160,3 +181,4 @@ jobs: with: tarball-suffix: -${{ inputs.tag }} commit-hash: ${{ inputs.commit-hash }} + target-branch: ${{ inputs.target-branch }} diff --git a/.github/workflows/move-issues-to-in-progress.yaml b/.github/workflows/move-issues-to-in-progress.yaml index 2c4c3d2c8..23819e18c 100644 --- a/.github/workflows/move-issues-to-in-progress.yaml +++ b/.github/workflows/move-issues-to-in-progress.yaml @@ -40,7 +40,15 @@ jobs: if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }} uses: actions/checkout@v3 with: - ref: ${{ github.event.pull_request.merge_commit_sha }} + ref: ${{ github.event.pull_request.head.sha }} + fetch-depth: 0 + + - name: Rebase atop of the latest target branch + if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }} + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ github.event.pull_request.base.ref }} - name: Move issue to "In progress" if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }} diff --git a/.github/workflows/require-pr-porting-labels.yaml b/.github/workflows/require-pr-porting-labels.yaml index a06bbbf96..4f799c4ba 100644 --- a/.github/workflows/require-pr-porting-labels.yaml +++ b/.github/workflows/require-pr-porting-labels.yaml @@ -38,7 +38,15 @@ jobs: if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }} uses: actions/checkout@v3 with: - ref: ${{ github.event.pull_request.merge_commit_sha }} + ref: ${{ github.event.pull_request.head.sha }} + fetch-depth: 0 + + - name: Rebase atop of the latest target branch + if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }} + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ github.event.pull_request.base.ref }} - name: Install porting checker script run: | diff --git a/.github/workflows/run-cri-containerd-tests.yaml b/.github/workflows/run-cri-containerd-tests.yaml index 19e8687ea..205561391 100644 --- a/.github/workflows/run-cri-containerd-tests.yaml +++ b/.github/workflows/run-cri-containerd-tests.yaml @@ -8,6 +8,10 @@ on: commit-hash: required: false type: string + target-branch: + required: false + type: string + default: "" jobs: run-cri-containerd: @@ -28,6 +32,13 @@ jobs: - uses: actions/checkout@v3 with: ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} - name: Install dependencies run: bash tests/integration/cri-containerd/gha-run.sh install-dependencies diff --git a/.github/workflows/run-k8s-tests-on-aks.yaml b/.github/workflows/run-k8s-tests-on-aks.yaml index 261454bb2..c7b2f9081 100644 --- a/.github/workflows/run-k8s-tests-on-aks.yaml +++ b/.github/workflows/run-k8s-tests-on-aks.yaml @@ -17,6 +17,10 @@ on: commit-hash: required: false type: string + target-branch: + required: false + type: string + default: "" jobs: run-k8s-tests: @@ -46,6 +50,13 @@ jobs: - uses: actions/checkout@v3 with: ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} - name: Download Azure CLI run: bash tests/integration/kubernetes/gha-run.sh install-azure-cli diff --git a/.github/workflows/run-k8s-tests-on-garm.yaml b/.github/workflows/run-k8s-tests-on-garm.yaml index 4588be335..22e9925d3 100644 --- a/.github/workflows/run-k8s-tests-on-garm.yaml +++ b/.github/workflows/run-k8s-tests-on-garm.yaml @@ -17,6 +17,10 @@ on: commit-hash: required: false type: string + target-branch: + required: false + type: string + default: "" jobs: run-k8s-tests: @@ -45,6 +49,13 @@ jobs: - uses: actions/checkout@v3 with: ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} - name: Deploy ${{ matrix.k8s }} run: bash tests/integration/kubernetes/gha-run.sh deploy-k8s diff --git a/.github/workflows/run-k8s-tests-on-sev.yaml b/.github/workflows/run-k8s-tests-on-sev.yaml index 4260f4335..df105974a 100644 --- a/.github/workflows/run-k8s-tests-on-sev.yaml +++ b/.github/workflows/run-k8s-tests-on-sev.yaml @@ -17,6 +17,10 @@ on: commit-hash: required: false type: string + target-branch: + required: false + type: string + default: "" jobs: run-k8s-tests: @@ -39,6 +43,13 @@ jobs: - uses: actions/checkout@v3 with: ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} - name: Deploy Kata timeout-minutes: 10 diff --git a/.github/workflows/run-k8s-tests-on-snp.yaml b/.github/workflows/run-k8s-tests-on-snp.yaml index 34cb5e6cb..80e146795 100644 --- a/.github/workflows/run-k8s-tests-on-snp.yaml +++ b/.github/workflows/run-k8s-tests-on-snp.yaml @@ -17,6 +17,10 @@ on: commit-hash: required: false type: string + target-branch: + required: false + type: string + default: "" jobs: run-k8s-tests: @@ -39,6 +43,13 @@ jobs: - uses: actions/checkout@v3 with: ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} - name: Deploy Kata timeout-minutes: 10 diff --git a/.github/workflows/run-k8s-tests-on-tdx.yaml b/.github/workflows/run-k8s-tests-on-tdx.yaml index 1e15dd5a6..f9b14bc9d 100644 --- a/.github/workflows/run-k8s-tests-on-tdx.yaml +++ b/.github/workflows/run-k8s-tests-on-tdx.yaml @@ -17,6 +17,10 @@ on: commit-hash: required: false type: string + target-branch: + required: false + type: string + default: "" jobs: run-k8s-tests: @@ -38,6 +42,13 @@ jobs: - uses: actions/checkout@v3 with: ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} - name: Deploy Kata timeout-minutes: 10 diff --git a/.github/workflows/run-kata-deploy-tests-on-aks.yaml b/.github/workflows/run-kata-deploy-tests-on-aks.yaml index 951e6e932..8962fcf32 100644 --- a/.github/workflows/run-kata-deploy-tests-on-aks.yaml +++ b/.github/workflows/run-kata-deploy-tests-on-aks.yaml @@ -17,6 +17,10 @@ on: commit-hash: required: false type: string + target-branch: + required: false + type: string + default: "" jobs: run-kata-deploy-tests: @@ -45,6 +49,13 @@ jobs: - uses: actions/checkout@v3 with: ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} - name: Download Azure CLI run: bash tests/functional/kata-deploy/gha-run.sh install-azure-cli diff --git a/.github/workflows/run-kata-deploy-tests-on-tdx.yaml b/.github/workflows/run-kata-deploy-tests-on-tdx.yaml index f4029b6e9..b9e7e0588 100644 --- a/.github/workflows/run-kata-deploy-tests-on-tdx.yaml +++ b/.github/workflows/run-kata-deploy-tests-on-tdx.yaml @@ -17,6 +17,10 @@ on: commit-hash: required: false type: string + target-branch: + required: false + type: string + default: "" jobs: run-kata-deploy-tests: @@ -37,6 +41,13 @@ jobs: - uses: actions/checkout@v3 with: ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} - name: Run tests run: bash tests/functional/kata-deploy/gha-run.sh run-tests diff --git a/.github/workflows/run-metrics.yaml b/.github/workflows/run-metrics.yaml index d2e5ab64e..08cbef904 100644 --- a/.github/workflows/run-metrics.yaml +++ b/.github/workflows/run-metrics.yaml @@ -8,6 +8,10 @@ on: commit-hash: required: false type: string + target-branch: + required: false + type: string + default: "" jobs: setup-kata: @@ -19,6 +23,13 @@ jobs: - uses: actions/checkout@v3 with: ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} - name: get-kata-tarball uses: actions/download-artifact@v3 diff --git a/.github/workflows/run-nydus-tests.yaml b/.github/workflows/run-nydus-tests.yaml index 10ff1cc9a..54c8cf864 100644 --- a/.github/workflows/run-nydus-tests.yaml +++ b/.github/workflows/run-nydus-tests.yaml @@ -8,6 +8,10 @@ on: commit-hash: required: false type: string + target-branch: + required: false + type: string + default: "" jobs: run-nydus: @@ -28,6 +32,13 @@ jobs: - uses: actions/checkout@v3 with: ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} - name: Install dependencies run: bash tests/integration/nydus/gha-run.sh install-dependencies diff --git a/.github/workflows/run-vfio-tests.yaml b/.github/workflows/run-vfio-tests.yaml index 6a42a1b4e..b5aa739ce 100644 --- a/.github/workflows/run-vfio-tests.yaml +++ b/.github/workflows/run-vfio-tests.yaml @@ -8,6 +8,10 @@ on: commit-hash: required: false type: string + target-branch: + required: false + type: string + default: "" jobs: run-vfio: @@ -23,6 +27,13 @@ jobs: - uses: actions/checkout@v3 with: ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} - name: Install dependencies run: bash tests/functional/vfio/gha-run.sh install-dependencies diff --git a/tests/git-helper.sh b/tests/git-helper.sh new file mode 100755 index 000000000..81ec53cfa --- /dev/null +++ b/tests/git-helper.sh @@ -0,0 +1,37 @@ +#!/usr/bin/env bash +# +# Copyright (c) 2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# + +set -o errexit +set -o nounset +set -o pipefail + +function add_kata_bot_info() { + echo "Adding user name and email to the local git repo" + + git config user.email "katacontainersbot@gmail.com" + git config user.name "Kata Containers Bot" +} + +function rebase_atop_of_the_latest_target_branch() { + if [ -n "${TARGET_BRANCH}" ]; then + echo "Rebasing atop of the latest ${TARGET_BRANCH}" + git rebase origin/${TARGET_BRANCH} + fi +} + +function main() { + action="${1:-}" + + add_kata_bot_info + + case "${action}" in + rebase-atop-of-the-latest-target-branch) rebase_atop_of_the_latest_target_branch;; + *) >&2 echo "Invalid argument"; exit 2 ;; + esac +} + +main "$@" From 1df997c38c5799c2f596d722dcdeb2f693f8392a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Sat, 9 Sep 2023 10:48:07 +0200 Subject: [PATCH 143/339] kata-deploy: Fix aarch64 image build MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Similarly to what's been done for x86_64 -> amd64, we need to do a aarch64 -> arm64 change in order to be able to download the kubectl binary. Fixes: #7861 Signed-off-by: Fabiano Fidêncio (cherry picked from commit 139c7f03ab67180d7ce3d9c291cfc963ff9b38a5) --- tools/packaging/kata-deploy/Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/packaging/kata-deploy/Dockerfile b/tools/packaging/kata-deploy/Dockerfile index 2db312458..dd16a775d 100644 --- a/tools/packaging/kata-deploy/Dockerfile +++ b/tools/packaging/kata-deploy/Dockerfile @@ -14,6 +14,7 @@ RUN \ apk --no-cache add bash curl && \ ARCH=$(uname -m) && \ if [ "${ARCH}" = "x86_64" ]; then ARCH=amd64; fi && \ + if [ "${ARCH}" = "aarch64" ]; then ARCH=arm64; fi && \ curl -fL --progress-bar -o /usr/bin/kubectl https://storage.googleapis.com/kubernetes-release/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/${ARCH}/kubectl && \ chmod +x /usr/bin/kubectl && \ mkdir -p ${DESTINATION} && \ From 400418bce0ac5405f77ea09cd2295e471d9b5572 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Sat, 9 Sep 2023 10:51:01 +0200 Subject: [PATCH 144/339] kata-deploy: Remove curl after it's used MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There's no need to keep curl there after the kubectl binary has already been downloaded. Signed-off-by: Fabiano Fidêncio (cherry picked from commit 8b4a0b368f54b78a1070d4c71ca83c116786ecd4) --- tools/packaging/kata-deploy/Dockerfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/packaging/kata-deploy/Dockerfile b/tools/packaging/kata-deploy/Dockerfile index dd16a775d..95f07f143 100644 --- a/tools/packaging/kata-deploy/Dockerfile +++ b/tools/packaging/kata-deploy/Dockerfile @@ -19,7 +19,8 @@ RUN \ chmod +x /usr/bin/kubectl && \ mkdir -p ${DESTINATION} && \ tar xvf ${WORKDIR}/${KATA_ARTIFACTS} -C ${DESTINATION} && \ - rm -f ${WORKDIR}/${KATA_ARTIFACTS} + rm -f ${WORKDIR}/${KATA_ARTIFACTS} && \ + apk del curl COPY scripts ${DESTINATION}/scripts COPY runtimeclasses ${DESTINATION}/runtimeclasses From 9f510d059bc82de34f9ebf9e87ab305fa4ccff78 Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Mon, 11 Sep 2023 16:41:48 +0000 Subject: [PATCH 145/339] metrics: Remove warning from metrics documentation Now that the metrics migration from the tests to kata containers has been completed, this PR removes the warning from the main metrics documentation. Fixes #7894 Signed-off-by: Gabriela Cervantes (cherry picked from commit 060499dcaefeadb886aafda0fcad1c77cac4bb82) --- tests/metrics/README.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/metrics/README.md b/tests/metrics/README.md index 279e39af9..ab55e1052 100644 --- a/tests/metrics/README.md +++ b/tests/metrics/README.md @@ -1,7 +1,5 @@ # Kata Containers metrics -> **_Warning:_** Migration of metrics tests is WIP and you may not find all tests available here, but you can take a look at the [tests repo](https://github.com/kata-containers/tests/tree/main/metrics). - This directory contains the metrics tests for Kata Containers. The tests within this directory have a number of potential use cases: From fe4247a90c57cde70cb51583eb9ca267679a8de8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Mon, 11 Sep 2023 18:46:16 +0200 Subject: [PATCH 146/339] ci: Fix target-branch usage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We missed those one as part of bd24afcf737f94481992e2d88b27e66cfae64df8. Fixes: #7414 Signed-off-by: Fabiano Fidêncio (cherry picked from commit 8509c31870566b57e1f7b38241a0a7e040d8d2a0) --- .github/workflows/ci.yaml | 2 ++ .github/workflows/publish-kata-deploy-payload-amd64.yaml | 7 +++++++ 2 files changed, 9 insertions(+) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index b80a3f322..f7358b54e 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -22,6 +22,7 @@ jobs: with: tarball-suffix: -${{ inputs.tag }} commit-hash: ${{ inputs.commit-hash }} + target-branch: ${{ inputs.target-branch }} publish-kata-deploy-payload-amd64: needs: build-kata-static-tarball-amd64 @@ -116,6 +117,7 @@ jobs: tag: ${{ inputs.tag }}-amd64 commit-hash: ${{ inputs.commit-hash }} pr-number: ${{ inputs.pr-number }} + target-branch: ${{ inputs.target-branch }} secrets: inherit run-k8s-tests-on-sev: diff --git a/.github/workflows/publish-kata-deploy-payload-amd64.yaml b/.github/workflows/publish-kata-deploy-payload-amd64.yaml index b5ba900d8..8b15a06b5 100644 --- a/.github/workflows/publish-kata-deploy-payload-amd64.yaml +++ b/.github/workflows/publish-kata-deploy-payload-amd64.yaml @@ -25,6 +25,13 @@ jobs: - uses: actions/checkout@v3 with: ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} - name: get-kata-tarball uses: actions/download-artifact@v3 From 19d9fd9eb17bc0710565d8c04fc1c33c829b3deb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Mon, 11 Sep 2023 20:41:01 +0200 Subject: [PATCH 147/339] ci: Add more target-branch related fixes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ones for the payload-after-push.yamland ci-nightly.yaml are not that much important right now, but they're needed for when we start running those on stable branches as well. The other ones were missed during bd24afcf737f94481992e2d88b27e66cfae64df8. Fixes: #7414 Signed-off-by: Fabiano Fidêncio (cherry picked from commit 6d795c089eb6716d5c4a99af0a067b068d56bd64) --- .github/workflows/ci-nightly.yaml | 1 + .github/workflows/payload-after-push.yaml | 6 ++++++ .../workflows/publish-kata-deploy-payload-amd64.yaml | 4 ++++ .../workflows/publish-kata-deploy-payload-arm64.yaml | 11 +++++++++++ .../workflows/publish-kata-deploy-payload-s390x.yaml | 11 +++++++++++ 5 files changed, 33 insertions(+) diff --git a/.github/workflows/ci-nightly.yaml b/.github/workflows/ci-nightly.yaml index 5c7676710..da763e392 100644 --- a/.github/workflows/ci-nightly.yaml +++ b/.github/workflows/ci-nightly.yaml @@ -15,4 +15,5 @@ jobs: commit-hash: ${{ github.sha }} pr-number: "nightly" tag: ${{ github.sha }}-nightly + target-branch: $GITHUB_REF_NAME secrets: inherit diff --git a/.github/workflows/payload-after-push.yaml b/.github/workflows/payload-after-push.yaml index 46766c54b..d0bf1dd8f 100644 --- a/.github/workflows/payload-after-push.yaml +++ b/.github/workflows/payload-after-push.yaml @@ -15,6 +15,7 @@ jobs: with: commit-hash: ${{ github.sha }} push-to-registry: yes + target-branch: $GITHUB_REF_NAME secrets: inherit build-assets-arm64: @@ -22,6 +23,7 @@ jobs: with: commit-hash: ${{ github.sha }} push-to-registry: yes + target-branch: $GITHUB_REF_NAME secrets: inherit build-assets-s390x: @@ -29,6 +31,7 @@ jobs: with: commit-hash: ${{ github.sha }} push-to-registry: yes + target-branch: $GITHUB_REF_NAME secrets: inherit publish-kata-deploy-payload-amd64: @@ -39,6 +42,7 @@ jobs: registry: quay.io repo: kata-containers/kata-deploy-ci tag: kata-containers-amd64 + target-branch: $GITHUB_REF_NAME secrets: inherit publish-kata-deploy-payload-arm64: @@ -49,6 +53,7 @@ jobs: registry: quay.io repo: kata-containers/kata-deploy-ci tag: kata-containers-arm64 + target-branch: $GITHUB_REF_NAME secrets: inherit publish-kata-deploy-payload-s390x: @@ -59,6 +64,7 @@ jobs: registry: quay.io repo: kata-containers/kata-deploy-ci tag: kata-containers-s390x + target-branch: $GITHUB_REF_NAME secrets: inherit publish-manifest: diff --git a/.github/workflows/publish-kata-deploy-payload-amd64.yaml b/.github/workflows/publish-kata-deploy-payload-amd64.yaml index 8b15a06b5..253b93fbc 100644 --- a/.github/workflows/publish-kata-deploy-payload-amd64.yaml +++ b/.github/workflows/publish-kata-deploy-payload-amd64.yaml @@ -17,6 +17,10 @@ on: commit-hash: required: false type: string + target-branch: + required: false + type: string + default: "" jobs: kata-payload: diff --git a/.github/workflows/publish-kata-deploy-payload-arm64.yaml b/.github/workflows/publish-kata-deploy-payload-arm64.yaml index 6c35ed8a3..0bdf909f1 100644 --- a/.github/workflows/publish-kata-deploy-payload-arm64.yaml +++ b/.github/workflows/publish-kata-deploy-payload-arm64.yaml @@ -17,6 +17,10 @@ on: commit-hash: required: false type: string + target-branch: + required: false + type: string + default: "" jobs: kata-payload: @@ -29,6 +33,13 @@ jobs: - uses: actions/checkout@v3 with: ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} - name: get-kata-tarball uses: actions/download-artifact@v3 diff --git a/.github/workflows/publish-kata-deploy-payload-s390x.yaml b/.github/workflows/publish-kata-deploy-payload-s390x.yaml index ee7fa3fd7..6d1d44f7b 100644 --- a/.github/workflows/publish-kata-deploy-payload-s390x.yaml +++ b/.github/workflows/publish-kata-deploy-payload-s390x.yaml @@ -17,6 +17,10 @@ on: commit-hash: required: false type: string + target-branch: + required: false + type: string + default: "" jobs: kata-payload: @@ -29,6 +33,13 @@ jobs: - uses: actions/checkout@v3 with: ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} - name: get-kata-tarball uses: actions/download-artifact@v3 From fcfa6c6e1abbce831bffa7ff0b9aaf7d04db0466 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Mon, 11 Sep 2023 22:14:55 +0200 Subject: [PATCH 148/339] ci: use github.ref_name instead of $GITHUB_REF_NAME MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As, regardless of what's mentioned in the documentation, it seems that $GITHUB_REF_NAME is passed down as a literal string. Fixes: #7414 Signed-off-by: Fabiano Fidêncio (cherry picked from commit f811b064cab41a478a9c3116e0424c8c0e3d92a5) --- .github/workflows/ci-nightly.yaml | 2 +- .github/workflows/payload-after-push.yaml | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/ci-nightly.yaml b/.github/workflows/ci-nightly.yaml index da763e392..75f5f2667 100644 --- a/.github/workflows/ci-nightly.yaml +++ b/.github/workflows/ci-nightly.yaml @@ -15,5 +15,5 @@ jobs: commit-hash: ${{ github.sha }} pr-number: "nightly" tag: ${{ github.sha }}-nightly - target-branch: $GITHUB_REF_NAME + target-branch: ${{ github.ref_name }} secrets: inherit diff --git a/.github/workflows/payload-after-push.yaml b/.github/workflows/payload-after-push.yaml index d0bf1dd8f..0f1ab742f 100644 --- a/.github/workflows/payload-after-push.yaml +++ b/.github/workflows/payload-after-push.yaml @@ -15,7 +15,7 @@ jobs: with: commit-hash: ${{ github.sha }} push-to-registry: yes - target-branch: $GITHUB_REF_NAME + target-branch: ${{ github.ref_name }} secrets: inherit build-assets-arm64: @@ -23,7 +23,7 @@ jobs: with: commit-hash: ${{ github.sha }} push-to-registry: yes - target-branch: $GITHUB_REF_NAME + target-branch: ${{ github.ref_name }} secrets: inherit build-assets-s390x: @@ -31,7 +31,7 @@ jobs: with: commit-hash: ${{ github.sha }} push-to-registry: yes - target-branch: $GITHUB_REF_NAME + target-branch: ${{ github.ref_name }} secrets: inherit publish-kata-deploy-payload-amd64: @@ -42,7 +42,7 @@ jobs: registry: quay.io repo: kata-containers/kata-deploy-ci tag: kata-containers-amd64 - target-branch: $GITHUB_REF_NAME + target-branch: ${{ github.ref_name }} secrets: inherit publish-kata-deploy-payload-arm64: @@ -53,7 +53,7 @@ jobs: registry: quay.io repo: kata-containers/kata-deploy-ci tag: kata-containers-arm64 - target-branch: $GITHUB_REF_NAME + target-branch: ${{ github.ref_name }} secrets: inherit publish-kata-deploy-payload-s390x: @@ -64,7 +64,7 @@ jobs: registry: quay.io repo: kata-containers/kata-deploy-ci tag: kata-containers-s390x - target-branch: $GITHUB_REF_NAME + target-branch: ${{ github.ref_name }} secrets: inherit publish-manifest: From 69dd11f45938f99649ecf1e77418d5e974aa795e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Tue, 12 Sep 2023 09:05:54 +0200 Subject: [PATCH 149/339] ci: Add a very basic docker sanity test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's add a very basic sanity test to check that we can spawn a containers using docker + Kata Containers. This will ensure that, at least, we don't regress to the point where this feature doesn't work at all. For now we're running this test against Cloud Hypervisor and QEMU only, due to an already reported issue with dragonball: https://github.com/kata-containers/kata-containers/issues/7912 In the future, we should also test all the VMMs with devmapper, but that's for a follow-up PR after this test is working as expected. Fixes: #7910 Signed-off-by: Fabiano Fidêncio (cherry picked from commit 348b8644d6e0343d1a94690c4f85175b6f9019a3) --- .github/workflows/ci.yaml | 8 +++ .../workflows/run-docker-tests-on-garm.yaml | 56 +++++++++++++++++++ tests/integration/docker/gha-run.sh | 54 ++++++++++++++++++ 3 files changed, 118 insertions(+) create mode 100644 .github/workflows/run-docker-tests-on-garm.yaml create mode 100755 tests/integration/docker/gha-run.sh diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index f7358b54e..4838344ee 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -73,6 +73,14 @@ jobs: platforms: linux/amd64, linux/s390x file: tests/integration/kubernetes/runtimeclass_workloads/confidential/unencrypted/Dockerfile + run-docker-tests-on-garm: + needs: publish-kata-deploy-payload-amd64 + uses: ./.github/workflows/run-docker-tests-on-garm.yaml + with: + tarball-suffix: -${{ inputs.tag }} + commit-hash: ${{ inputs.commit-hash }} + target-branch: ${{ inputs.target-branch }} + run-kata-deploy-tests-on-aks: needs: publish-kata-deploy-payload-amd64 uses: ./.github/workflows/run-kata-deploy-tests-on-aks.yaml diff --git a/.github/workflows/run-docker-tests-on-garm.yaml b/.github/workflows/run-docker-tests-on-garm.yaml new file mode 100644 index 000000000..abb6b0354 --- /dev/null +++ b/.github/workflows/run-docker-tests-on-garm.yaml @@ -0,0 +1,56 @@ +name: CI | Run docker integration tests +on: + workflow_call: + inputs: + tarball-suffix: + required: false + type: string + commit-hash: + required: false + type: string + target-branch: + required: false + type: string + default: "" + +jobs: + run-docker-tests: + strategy: + # We can set this to true whenever we're 100% sure that + # all the tests are not flaky, otherwise we'll fail them + # all due to a single flaky instance. + fail-fast: false + matrix: + vmm: + - clh + - qemu + runs-on: garm-ubuntu-2304 + env: + KATA_HYPERVISOR: ${{ matrix.vmm }} + steps: + - uses: actions/checkout@v3 + with: + ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} + + - name: Install dependencies + run: bash tests/integration/docker/gha-run.sh install-dependencies + + - name: get-kata-tarball + uses: actions/download-artifact@v3 + with: + name: kata-static-tarball-amd64${{ inputs.tarball-suffix }} + path: kata-artifacts + + - name: Install kata + run: bash tests/integration/docker/gha-run.sh install-kata kata-artifacts + + - name: Run docker smoke test + timeout-minutes: 5 + run: bash tests/integration/docker/gha-run.sh run diff --git a/tests/integration/docker/gha-run.sh b/tests/integration/docker/gha-run.sh new file mode 100755 index 000000000..fc4f430d3 --- /dev/null +++ b/tests/integration/docker/gha-run.sh @@ -0,0 +1,54 @@ +#!/bin/bash +# +# Copyright (c) 2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# + +set -o errexit +set -o nounset +set -o pipefail + +kata_tarball_dir="${2:-kata-artifacts}" +docker_dir="$(dirname "$(readlink -f "$0")")" +source "${docker_dir}/../../common.bash" + +function install_dependencies() { + info "Installing the dependencies needed for running the docker smoke test" + + # Add Docker's official GPG key: + sudo apt-get update + sudo apt-get -y install ca-certificates curl gnupg + sudo install -m 0755 -d /etc/apt/keyrings + curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /etc/apt/keyrings/docker.gpg + sudo chmod a+r /etc/apt/keyrings/docker.gpg + + # Add the repository to Apt sources: + echo \ + "deb [arch="$(dpkg --print-architecture)" signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu \ + "$(. /etc/os-release && echo "$VERSION_CODENAME")" stable" | \ + sudo tee /etc/apt/sources.list.d/docker.list > /dev/null + sudo apt-get update + + sudo apt-get install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin +} + +function run() { + info "Running docker smoke test tests using ${KATA_HYPERVISOR} hypervisor" + + enabling_hypervisor + + sudo docker run --rm --runtime io.containerd.kata.v2 alpine ping -c 2 www.github.com +} + +function main() { + action="${1:-}" + case "${action}" in + install-dependencies) install_dependencies ;; + install-kata) install_kata ;; + run) run ;; + *) >&2 die "Invalid argument" ;; + esac +} + +main "$@" From 33430ad60ccd5a3c67988bbc66d443317bcff71b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Tue, 12 Sep 2023 10:35:54 +0200 Subject: [PATCH 150/339] ci: Add a very basic nerdctl sanity test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's add a very basic sanity test to check that we can spawn a containers using nerdctl + Kata Containers. This will ensure that, at least, we don't regress to the point where this feature doesn't work at all. In the future, we should also test all the VMMs with devmapper, but that's for a follow-up PR after this test is working as expected. Fixes: #7911 Signed-off-by: Fabiano Fidêncio (cherry picked from commit 12d833d07d365becf5716d3bf22daa5d5e07bc52) --- .github/workflows/ci.yaml | 8 ++ .../workflows/run-nerdctl-tests-on-garm.yaml | 57 ++++++++++++++ tests/integration/nerdctl/gha-run.sh | 78 +++++++++++++++++++ 3 files changed, 143 insertions(+) create mode 100644 .github/workflows/run-nerdctl-tests-on-garm.yaml create mode 100644 tests/integration/nerdctl/gha-run.sh diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 4838344ee..8f7c90ffb 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -81,6 +81,14 @@ jobs: commit-hash: ${{ inputs.commit-hash }} target-branch: ${{ inputs.target-branch }} + run-nerdctl-tests-on-garm: + needs: build-kata-static-tarball-amd64 + uses: ./.github/workflows/run-nerdctl-tests-on-garm.yaml + with: + tarball-suffix: -${{ inputs.tag }} + commit-hash: ${{ inputs.commit-hash }} + target-branch: ${{ inputs.target-branch }} + run-kata-deploy-tests-on-aks: needs: publish-kata-deploy-payload-amd64 uses: ./.github/workflows/run-kata-deploy-tests-on-aks.yaml diff --git a/.github/workflows/run-nerdctl-tests-on-garm.yaml b/.github/workflows/run-nerdctl-tests-on-garm.yaml new file mode 100644 index 000000000..a902d7cc5 --- /dev/null +++ b/.github/workflows/run-nerdctl-tests-on-garm.yaml @@ -0,0 +1,57 @@ +name: CI | Run nerdctl integration tests +on: + workflow_call: + inputs: + tarball-suffix: + required: false + type: string + commit-hash: + required: false + type: string + target-branch: + required: false + type: string + default: "" + +jobs: + run-nerdctl-tests: + strategy: + # We can set this to true whenever we're 100% sure that + # all the tests are not flaky, otherwise we'll fail them + # all due to a single flaky instance. + fail-fast: false + matrix: + vmm: + - clh + - dragonball + - qemu + runs-on: garm-ubuntu-2304 + env: + KATA_HYPERVISOR: ${{ matrix.vmm }} + steps: + - uses: actions/checkout@v3 + with: + ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} + + - name: Install dependencies + run: bash tests/integration/nerdctl/gha-run.sh install-dependencies + + - name: get-kata-tarball + uses: actions/download-artifact@v3 + with: + name: kata-static-tarball-amd64${{ inputs.tarball-suffix }} + path: kata-artifacts + + - name: Install kata + run: bash tests/integration/nerdctl/gha-run.sh install-kata kata-artifacts + + - name: Run nerdctl smoke test + timeout-minutes: 5 + run: bash tests/integration/nerdctl/gha-run.sh run diff --git a/tests/integration/nerdctl/gha-run.sh b/tests/integration/nerdctl/gha-run.sh new file mode 100644 index 000000000..dc1585156 --- /dev/null +++ b/tests/integration/nerdctl/gha-run.sh @@ -0,0 +1,78 @@ +#!/bin/bash +# +# Copyright (c) 2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# + +set -o errexit +set -o nounset +set -o pipefail + +kata_tarball_dir="${2:-kata-artifacts}" +nerdctl_dir="$(dirname "$(readlink -f "$0")")" +source "${nerdctl_dir}/../../common.bash" + +function install_dependencies() { + info "Installing the dependencies for running the nerdctl tests" + + # Dependency list of projects that we can rely on the system packages + # - wget + # - Used to download the nerdctl-full tarball + # - pip + # - Used to install lastversion, which will be used to get the latest + # release of the nerdctl + declare -a system_deps=( + wget + pip + ) + + sudo apt update + sudo apt -y install "${system_deps[@]}" + + # Install lastversion from pip + # + # --break-system-packages is, unfortunately, needed here as it'll also + # bring in some python3 dependencies on its own + pip install lastversion --break-system-packages + + # As the command above will install lastversion on $HOME/.local/bin, we + # need to add it to the PATH + export PATH=$PATH:${HOME}/.local/bin + + # Download the nerdctl-full tarball, as it comes with all the deps + # needed. + nerdctl_lastest_version=$(lastversion containerd/nerdctl) + wget https://github.com/containerd/nerdctl/releases/download/v${nerdctl_lastest_version}/nerdctl-full-${nerdctl_lastest_version}-linux-amd64.tar.gz + + # Unpack the latest nerdctl into /usr/local/ + sudo tar -xvf nerdctl-full-${nerdctl_lastest_version}-linux-amd64.tar.gz -C /usr/local/ + + # Start containerd service + sudo systemctl daemon-reload + sudo systemctl start containerd +} + +function run() { + info "Running nerdctl smoke test tests using ${KATA_HYPERVISOR} hypervisor" + + enabling_hypervisor + + info "Running nerdctl with runc" + sudo nerdctl run --rm alpine ping -c 2 www.github.com + + info "Running nerdctl with Kata Containers (${KATA_HYPERVISOR})" + sudo nerdctl run --rm --runtime io.containerd.kata.v2 alpine ping -c 2 www.github.com +} + +function main() { + action="${1:-}" + case "${action}" in + install-dependencies) install_dependencies ;; + install-kata) install_kata ;; + run) run ;; + *) >&2 die "Invalid argument" ;; + esac +} + +main "$@" From c7147dabceea32246a3eaa10b6f19aa320e99f00 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Tue, 12 Sep 2023 16:13:00 +0200 Subject: [PATCH 151/339] ci: docker: Run the tests after the kata-static is created MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There's no reason to wait till the payload is created to run the tests, as we rely on the tarball, not on the kata-deploy payload. That was a mistake on my side, and that's already fixed for the nerdctl tests. Signed-off-by: Fabiano Fidêncio (cherry picked from commit c83f167c5981a5c043dd2d3dcbcaa5f333e353bc) --- .github/workflows/ci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 8f7c90ffb..8b8ad8ab4 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -74,7 +74,7 @@ jobs: file: tests/integration/kubernetes/runtimeclass_workloads/confidential/unencrypted/Dockerfile run-docker-tests-on-garm: - needs: publish-kata-deploy-payload-amd64 + needs: build-kata-static-tarball-amd64 uses: ./.github/workflows/run-docker-tests-on-garm.yaml with: tarball-suffix: -${{ inputs.tag }} From e5e395139879369ed6fe9023f4f2876bc7bd0069 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Tue, 12 Sep 2023 16:26:41 +0200 Subject: [PATCH 152/339] ci: docker: Also run the smoke test with runc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This will help us to make sure that the failure is actually related to Kata Containers. Signed-off-by: Fabiano Fidêncio (cherry picked from commit f536ef5ce1ba5a32d7d9727c469e20f3820436f8) --- tests/integration/docker/gha-run.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/integration/docker/gha-run.sh b/tests/integration/docker/gha-run.sh index fc4f430d3..58a6dceac 100755 --- a/tests/integration/docker/gha-run.sh +++ b/tests/integration/docker/gha-run.sh @@ -38,6 +38,10 @@ function run() { enabling_hypervisor + info "Running docker with runc" + sudo docker run --rm alpine ping -c 2 www.github.com + + info "Running docker with Kata Containers (${KATA_HYPERVISOR})" sudo docker run --rm --runtime io.containerd.kata.v2 alpine ping -c 2 www.github.com } From 0968bf1eb9e78f9248c879f60d505c7ac41d1484 Mon Sep 17 00:00:00 2001 From: David Esparza Date: Tue, 12 Sep 2023 10:11:38 -0600 Subject: [PATCH 153/339] metrics: this PR skips the FIO test temprarily to fix issues FIO test is showing ongoing issues when running in k8s. Working on running FIO on the ctr client which has been shown to be stable. Fixes: #7920 Signed-off-by: David Esparza (cherry picked from commit a58ea66592ed1f28bd9598bd067fae5eeb606404) --- .../checkmetrics-json-clh-kata-metric8.toml | 26 ------------------- .../checkmetrics-json-qemu-kata-metric8.toml | 26 ------------------- tests/metrics/gha-run.sh | 4 +-- 3 files changed, 2 insertions(+), 54 deletions(-) diff --git a/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml index 00bdcd8e0..507d62dc3 100644 --- a/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml +++ b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml @@ -98,32 +98,6 @@ midval = 98.0 minpercent = 20.0 maxpercent = 20.0 -[[metric]] -name = "fio" -type = "json" -description = "measure write 90 percentile using fio" -# Min and Max values to set a 'range' that -# the median of the CSV Results data must fall -# within (inclusive) -checkvar = ".\"fio\".Results | .[] | .write90percentile.Result" -checktype = "mean" -midval = 38656.0 -minpercent = 20.0 -maxpercent = 20.0 - -[[metric]] -name = "fio" -type = "json" -description = "measure write 95 percentile using fio" -# Min and Max values to set a 'range' that -# the median of the CSV Results data must fall -# within (inclusive) -checkvar = ".\"fio\".Results | .[] | .write95percentile.Result" -checktype = "mean" -midval = 33024.0 -minpercent = 20.0 -maxpercent = 20.0 - [[metric]] name = "network-iperf3" type = "json" diff --git a/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-qemu-kata-metric8.toml b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-qemu-kata-metric8.toml index 2f7decae8..f02700890 100644 --- a/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-qemu-kata-metric8.toml +++ b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-qemu-kata-metric8.toml @@ -98,32 +98,6 @@ midval = 98.0 minpercent = 20.0 maxpercent = 20.0 -[[metric]] -name = "fio" -type = "json" -description = "measure write 90 percentile using fio" -# Min and Max values to set a 'range' that -# the median of the CSV Results data must fall -# within (inclusive) -checkvar = ".\"fio\".Results | .[] | .write90percentile.Result" -checktype = "mean" -midval = 37120.0 -minpercent = 20.0 -maxpercent = 20.0 - -[[metric]] -name = "fio" -type = "json" -description = "measure write 95 percentile using fio" -# Min and Max values to set a 'range' that -# the median of the CSV Results data must fall -# within (inclusive) -checkvar = ".\"fio\".Results | .[] | .write95percentile.Result" -checktype = "mean" -midval = 43264.0 -minpercent = 20.0 -maxpercent = 20.0 - [[metric]] name = "network-iperf3" type = "json" diff --git a/tests/metrics/gha-run.sh b/tests/metrics/gha-run.sh index 77ef0fc25..4076ad7ff 100755 --- a/tests/metrics/gha-run.sh +++ b/tests/metrics/gha-run.sh @@ -84,9 +84,9 @@ function run_test_tensorflow() { } function run_test_fio() { - info "Running FIO test using ${KATA_HYPERVISOR} hypervisor" + info "Skipping FIO test temporarily using ${KATA_HYPERVISOR} hypervisor" - bash tests/metrics/storage/fio-k8s/fio-test-ci.sh + # bash tests/metrics/storage/fio-k8s/fio-test-ci.sh } function run_test_iperf() { From 31c33f9c1c9779f2970a06ca77dc00fbf3199a2e Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Tue, 12 Sep 2023 16:30:35 +0000 Subject: [PATCH 154/339] metrics: Add Cassandra Metrics documentation This PR adds the Cassandra Metrics documentation for kata metrics. Fixes #7922 Signed-off-by: Gabriela Cervantes (cherry picked from commit ad08321b83c6605506ce0c4df5e7bd97e9a1fcb1) --- tests/metrics/disk/README.md | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 tests/metrics/disk/README.md diff --git a/tests/metrics/disk/README.md b/tests/metrics/disk/README.md new file mode 100644 index 000000000..274fb9730 --- /dev/null +++ b/tests/metrics/disk/README.md @@ -0,0 +1,18 @@ +# Kata Containers Cassandra Metrics + +Kata Containers provides a series of read and write performance tests using +Cassandra Stress tool. The Cassandra Stress tool is a Java-based stress testing +utility for basic benchmarking and load testing a cluster. This tool helps us +to populate a cluster and stress test CQL tables and queries. This test is +based in two operations, the first one is writing against the cluster or +populating the database and the second one is reading the cluster that was +populated by the writing test. + +## Running the test + +Individual tests can be run by hand, for example: + +``` +$ cd metrics/disk/cassandra_kubernetes +$ ./cassandra.sh +``` From deadacd58f2c0f927fc723d11f384ecc73e93e41 Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Mon, 11 Sep 2023 16:53:57 +0000 Subject: [PATCH 155/339] metrics: Ensure docker is running in init_env This PR ensures that docker is running as part of the init_env function in kata metrics to avoid failures like docker is not running and making the kata metrics CI to fail. Fixes #7898 Signed-off-by: Gabriela Cervantes (cherry picked from commit d53eb73eecf4b28306259e2170def6344bc635fd) --- tests/metrics/lib/common.bash | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/metrics/lib/common.bash b/tests/metrics/lib/common.bash index 327379a51..8528d2c7b 100755 --- a/tests/metrics/lib/common.bash +++ b/tests/metrics/lib/common.bash @@ -168,6 +168,8 @@ function init_env() cmd=("docker" "ctr") + sudo systemctl restart docker + # check dependencies check_cmds "${cmd[@]}" From 40ae855f0e96027fb8ca6bb616e47058784d4c96 Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Tue, 12 Sep 2023 19:30:24 +0000 Subject: [PATCH 156/339] metrics: Add iperf bandwidth value for kata metrics This PR adds the iperf bandwidth value for kata metrics. Fixes #7924 Signed-off-by: Gabriela Cervantes (cherry picked from commit 615c1cbf19a0a0405867c902510df9034dcf9a23) --- .../checkmetrics-json-clh-kata-metric8.toml | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml index 507d62dc3..0ef1753a8 100644 --- a/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml +++ b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml @@ -98,6 +98,19 @@ midval = 98.0 minpercent = 20.0 maxpercent = 20.0 +[[metric]] +name = "network-iperf3" +type = "json" +description = "measure container bandwidth using iperf3" +# Min and Max values to set a 'range' that +# the median of the CSV Results data must fall +# within (inclusive) +checkvar = ".\"network-iperf3\".Results | .[] | .bandwidth.Result" +checktype = "mean" +midval = 55302925246.02 +minpercent = 20.0 +maxpercent = 20.0 + [[metric]] name = "network-iperf3" type = "json" From 31c3d9bd80c7f7f0c190152269837c7cbdfe1610 Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Tue, 12 Sep 2023 19:33:21 +0000 Subject: [PATCH 157/339] metrics: Add iperf bandwidth value for qemu This PR adds the iperf bandwidth value for qemu for kata metrics. Signed-off-by: Gabriela Cervantes (cherry picked from commit 0aa073967d98a7d5316dbc6f6c586493e4cbd6b5) --- .../checkmetrics-json-clh-kata-metric8.toml | 2 +- .../checkmetrics-json-qemu-kata-metric8.toml | 13 +++++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml index 0ef1753a8..6c6066bc0 100644 --- a/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml +++ b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml @@ -107,7 +107,7 @@ description = "measure container bandwidth using iperf3" # within (inclusive) checkvar = ".\"network-iperf3\".Results | .[] | .bandwidth.Result" checktype = "mean" -midval = 55302925246.02 +midval = 61176889941.19 minpercent = 20.0 maxpercent = 20.0 diff --git a/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-qemu-kata-metric8.toml b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-qemu-kata-metric8.toml index f02700890..278b7568f 100644 --- a/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-qemu-kata-metric8.toml +++ b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-qemu-kata-metric8.toml @@ -98,6 +98,19 @@ midval = 98.0 minpercent = 20.0 maxpercent = 20.0 +[[metric]] +name = "network-iperf3" +type = "json" +description = "measure container bandwidth using iperf3" +# Min and Max values to set a 'range' that +# the median of the CSV Results data must fall +# within (inclusive) +checkvar = ".\"network-iperf3\".Results | .[] | .bandwidth.Result" +checktype = "mean" +midval = 52644229340.91 +minpercent = 20.0 +maxpercent = 20.0 + [[metric]] name = "network-iperf3" type = "json" From 55c8a47a406e5de42c47b418423675f8096e8515 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Wed, 13 Sep 2023 11:04:17 +0200 Subject: [PATCH 158/339] ci: docker: Switch to tcp port 80 ping MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit TIL that the Azure VMs we use are created without an explicit outbund connectivity defined. This leads us to issues using `ping ...` as part of our tests, and when consulting Jeremi Piotrowski about the issue he pointed me out to two interesting links: * https://learn.microsoft.com/en-us/azure/virtual-network/ip-services/default-outbound-access * https://learn.microsoft.com/en-us/archive/blogs/mast/use-port-pings-instead-of-icmp-to-test-azure-vm-connectivity For your own sanity, do not read the comments, after all this is internet. :-) Anyways, the suggestion is to use nping instead, which is provided by the nmap package, so we can explicitly switch to using the tcp port 80 for the ping. With this in mind, I'm switching the image we use for the test and using one that provided nping as a possible entry point, and from now on (this part of) the tests should work. Fixes: #7910 Signed-off-by: Fabiano Fidêncio (cherry picked from commit e0c811678bf33fd4edeb99e5bd35314cd481128f) --- tests/integration/docker/gha-run.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration/docker/gha-run.sh b/tests/integration/docker/gha-run.sh index 58a6dceac..f677b8258 100755 --- a/tests/integration/docker/gha-run.sh +++ b/tests/integration/docker/gha-run.sh @@ -39,10 +39,10 @@ function run() { enabling_hypervisor info "Running docker with runc" - sudo docker run --rm alpine ping -c 2 www.github.com + sudo docker run --rm --entrypoint nping instrumentisto/nmap --tcp-connect -c 2 -p 80 www.github.com info "Running docker with Kata Containers (${KATA_HYPERVISOR})" - sudo docker run --rm --runtime io.containerd.kata.v2 alpine ping -c 2 www.github.com + sudo docker run --rm --runtime io.containerd.kata.v2 --entrypoint nping instrumentisto/nmap --tcp-connect -c 2 -p 80 www.github.com } function main() { From 5734c4cbca613148dd290a98c8f0eb095dc3313b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Wed, 13 Sep 2023 11:09:34 +0200 Subject: [PATCH 159/339] ci: nerdctl: Switch to tcp port 80 ping MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit TIL that the Azure VMs we use are created without an explicit outbund connectivity defined. This leads us to issues using `ping ...` as part of our tests, and when consulting Jeremi Piotrowski about the issue he pointed me out to two interesting links: * https://learn.microsoft.com/en-us/azure/virtual-network/ip-services/default-outbound-access * https://learn.microsoft.com/en-us/archive/blogs/mast/use-port-pings-instead-of-icmp-to-test-azure-vm-connectivity For your own sanity, do not read the comments, after all this is internet. :-) Anyways, the suggestion is to use nping instead, which is provided by the nmap package, so we can explicitly switch to using the tcp port 80 for the ping. With this in mind, I'm switching the image we use for the test and using one that provided nping as a possible entry point, and from now on (this part of) the tests should work. Fixes: #7910 Signed-off-by: Fabiano Fidêncio (cherry picked from commit 13968aa7f6cd4ade5682726ced0affbcdb180db1) --- tests/integration/nerdctl/gha-run.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration/nerdctl/gha-run.sh b/tests/integration/nerdctl/gha-run.sh index dc1585156..6d1a32f24 100644 --- a/tests/integration/nerdctl/gha-run.sh +++ b/tests/integration/nerdctl/gha-run.sh @@ -59,10 +59,10 @@ function run() { enabling_hypervisor info "Running nerdctl with runc" - sudo nerdctl run --rm alpine ping -c 2 www.github.com + sudo nerdctl run --rm --entrypoint nping instrumentisto/nmap --tcp-connect -c 2 -p 80 www.github.com info "Running nerdctl with Kata Containers (${KATA_HYPERVISOR})" - sudo nerdctl run --rm --runtime io.containerd.kata.v2 alpine ping -c 2 www.github.com + sudo nerdctl run --rm --runtime io.containerd.kata.v2 --entrypoint nping instrumentisto/nmap --tcp-connect -c 2 -p 80 www.github.com } function main() { From 9a664ea8bb6fd04384478840b3c62f884b2a9274 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Wed, 13 Sep 2023 11:23:46 +0200 Subject: [PATCH 160/339] ci: nerdctl: Create the containerd config MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Otherwise we'll fail to configure kata-containers in the `install-kata` step. This is mostly needed because the nerdctl-full tarball doesn't provide a contaienrd configuration, just the binary, as contaienrd does not actually require a configuration file to run with the default config. Signed-off-by: Fabiano Fidêncio (cherry picked from commit 46bc0b1c017b55a57b183bdbf3b158a60a1af156) --- tests/integration/nerdctl/gha-run.sh | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/integration/nerdctl/gha-run.sh b/tests/integration/nerdctl/gha-run.sh index 6d1a32f24..64b7c7cc6 100644 --- a/tests/integration/nerdctl/gha-run.sh +++ b/tests/integration/nerdctl/gha-run.sh @@ -51,6 +51,11 @@ function install_dependencies() { # Start containerd service sudo systemctl daemon-reload sudo systemctl start containerd + + # Create the default containerd configuration + sudo mkdir -p /etc/containerd + containerd config default > sudo tee /etc/containerd/config.toml + sudo systemctl restart containerd } function run() { From 6abf513f064b8e9f8f3105ce773e06576dab497e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Wed, 13 Sep 2023 13:10:14 +0200 Subject: [PATCH 161/339] ci: docker: nerdtl: Use io.containerd.kata-${KATA_HYPERVISOR}.io MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This will ensure that we're calling the correct binary for the hypervisor. Signed-off-by: Fabiano Fidêncio (cherry picked from commit 813bfdec0178e59569eba366063553e0def033ab) --- tests/integration/docker/gha-run.sh | 2 +- tests/integration/nerdctl/gha-run.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration/docker/gha-run.sh b/tests/integration/docker/gha-run.sh index f677b8258..cab3401ea 100755 --- a/tests/integration/docker/gha-run.sh +++ b/tests/integration/docker/gha-run.sh @@ -42,7 +42,7 @@ function run() { sudo docker run --rm --entrypoint nping instrumentisto/nmap --tcp-connect -c 2 -p 80 www.github.com info "Running docker with Kata Containers (${KATA_HYPERVISOR})" - sudo docker run --rm --runtime io.containerd.kata.v2 --entrypoint nping instrumentisto/nmap --tcp-connect -c 2 -p 80 www.github.com + sudo docker run --rm --runtime io.containerd.kata-${KATA_HYPERVISOR}.v2 --entrypoint nping instrumentisto/nmap --tcp-connect -c 2 -p 80 www.github.com } function main() { diff --git a/tests/integration/nerdctl/gha-run.sh b/tests/integration/nerdctl/gha-run.sh index 64b7c7cc6..c4803d5db 100644 --- a/tests/integration/nerdctl/gha-run.sh +++ b/tests/integration/nerdctl/gha-run.sh @@ -67,7 +67,7 @@ function run() { sudo nerdctl run --rm --entrypoint nping instrumentisto/nmap --tcp-connect -c 2 -p 80 www.github.com info "Running nerdctl with Kata Containers (${KATA_HYPERVISOR})" - sudo nerdctl run --rm --runtime io.containerd.kata.v2 --entrypoint nping instrumentisto/nmap --tcp-connect -c 2 -p 80 www.github.com + sudo nerdctl run --rm --runtime io.containerd.kata-${KATA_HYPERVISOR}.v2 --entrypoint nping instrumentisto/nmap --tcp-connect -c 2 -p 80 www.github.com } function main() { From 3c5bd8c44d1d93153e77b9b54cbc241c05ad30e7 Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Wed, 13 Sep 2023 21:27:08 +0000 Subject: [PATCH 162/339] metrics: Increase value limit for jitter in clh This PR increases the value limit for jitter in clh. Signed-off-by: Gabriela Cervantes (cherry picked from commit 49234433a71a5b4c060344dfa5bfdfbea7e03ec8) --- .../ci_worker/checkmetrics-json-clh-kata-metric8.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml index 6c6066bc0..82fa422de 100644 --- a/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml +++ b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml @@ -121,5 +121,5 @@ description = "iperf" checkvar = ".\"network-iperf3\".Results | .[] | .jitter.Result" checktype = "mean" midval = 0.044 -minpercent = 30.0 -maxpercent = 30.0 +minpercent = 35.0 +maxpercent = 35.0 From 97283b18b49d187b66f9fa49ae21a6e7b3256676 Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Wed, 13 Sep 2023 21:28:13 +0000 Subject: [PATCH 163/339] metrics: Increase jitter value for qemu This PR increases the jitter value for qemu for kata metrics. Signed-off-by: Gabriela Cervantes (cherry picked from commit 49e2fa189c89b5ac1fd530a3b90181a5985ebcad) --- .../ci_worker/checkmetrics-json-clh-kata-metric8.toml | 4 ++-- .../ci_worker/checkmetrics-json-qemu-kata-metric8.toml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml index 82fa422de..7cd3a025e 100644 --- a/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml +++ b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml @@ -121,5 +121,5 @@ description = "iperf" checkvar = ".\"network-iperf3\".Results | .[] | .jitter.Result" checktype = "mean" midval = 0.044 -minpercent = 35.0 -maxpercent = 35.0 +minpercent = 40.0 +maxpercent = 40.0 diff --git a/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-qemu-kata-metric8.toml b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-qemu-kata-metric8.toml index 278b7568f..7dc842688 100644 --- a/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-qemu-kata-metric8.toml +++ b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-qemu-kata-metric8.toml @@ -121,5 +121,5 @@ description = "iperf" checkvar = ".\"network-iperf3\".Results | .[] | .jitter.Result" checktype = "mean" midval = 0.041 -minpercent = 30.0 -maxpercent = 30.0 +minpercent = 40.0 +maxpercent = 40.0 From 62804d637ce50857e4fa3fc88e92f8e10eb862e6 Mon Sep 17 00:00:00 2001 From: Jeremi Piotrowski Date: Mon, 21 Aug 2023 16:06:04 +0200 Subject: [PATCH 164/339] gha: vfio: Import jobs scripts from tests repo This imports the vfio test scripts github.com/kata-containers/tests. The test case doesn't work yet but doing the changes in a separate commit will make it easier to track the changes. The only change in this commit is renaming vfio_jenkins_job_build.sh -> vfio_fedora_vm_wrapper.sh Fixes: #6555 Signed-off-by: Jeremi Piotrowski (cherry picked from commit 5a551a85b1b33f0a18525d3a9ac42e10b3c30bce) --- tests/functional/vfio/gha-run.sh | 1 + tests/functional/vfio/guest-kernel.json.in | 176 +++++++++ tests/functional/vfio/run.sh | 341 ++++++++++++++++++ tests/functional/vfio/vfio.json.in | 187 ++++++++++ .../functional/vfio/vfio_fedora_vm_wrapper.sh | 320 ++++++++++++++++ 5 files changed, 1025 insertions(+) create mode 100644 tests/functional/vfio/guest-kernel.json.in create mode 100755 tests/functional/vfio/run.sh create mode 100644 tests/functional/vfio/vfio.json.in create mode 100755 tests/functional/vfio/vfio_fedora_vm_wrapper.sh diff --git a/tests/functional/vfio/gha-run.sh b/tests/functional/vfio/gha-run.sh index f4cb608de..cfd808224 100755 --- a/tests/functional/vfio/gha-run.sh +++ b/tests/functional/vfio/gha-run.sh @@ -19,6 +19,7 @@ function install_dependencies() { function run() { info "Running cri-containerd tests using ${KATA_HYPERVISOR} hypervisor" + "${vfio_dir}"/vfio_fedora_vm_wrapper.sh } function main() { diff --git a/tests/functional/vfio/guest-kernel.json.in b/tests/functional/vfio/guest-kernel.json.in new file mode 100644 index 000000000..31c0af9f0 --- /dev/null +++ b/tests/functional/vfio/guest-kernel.json.in @@ -0,0 +1,176 @@ +# +# Copyright (c) 2021 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# +{ + "ociVersion": "1.0.0-rc2-dev", + "platform": { + "os": "linux", + "arch": "amd64" + }, + "annotations": { + "io.katacontainers.config.hypervisor.enable_iommu": "false", + "io.katacontainers.config.runtime.vfio_mode": "guest-kernel" + }, + "process": { + "terminal": false, + "consoleSize": { + "height": 0, + "width": 0 + }, + "user": { + "uid": 0, + "gid": 0 + }, + "args": [ "/bin/tail", "-f", "/dev/null" ], + "env": [ + "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", + "TERM=xterm" + ], + "cwd": "/", + "rlimits": [{ + "type": "RLIMIT_NOFILE", + "hard": 1024, + "soft": 1024 + }], + "noNewPrivileges": true + }, + "root": { + "path": "@ROOTFS@", + "readonly": false + }, + "hostname": "vfio-test", + "mounts": [{ + "destination": "/proc", + "type": "proc", + "source": "proc" + }, + { + "destination": "/dev", + "type": "tmpfs", + "source": "tmpfs", + "options": [ + "nosuid", + "strictatime", + "mode=755", + "size=65536k" + ] + }, + { + "destination": "/dev/pts", + "type": "devpts", + "source": "devpts", + "options": [ + "nosuid", + "noexec", + "newinstance", + "ptmxmode=0666", + "mode=0620", + "gid=5" + ] + }, + { + "destination": "/dev/shm", + "type": "tmpfs", + "source": "shm", + "options": [ + "nosuid", + "noexec", + "nodev", + "mode=1777", + "size=65536k" + ] + }, + { + "destination": "/dev/mqueue", + "type": "mqueue", + "source": "mqueue", + "options": [ + "nosuid", + "noexec", + "nodev" + ] + }, + { + "destination": "/sys", + "type": "sysfs", + "source": "sysfs", + "options": [ + "nosuid", + "noexec", + "nodev", + "ro" + ] + }, + { + "destination": "/sys/fs/cgroup", + "type": "cgroup", + "source": "cgroup", + "options": [ + "nosuid", + "noexec", + "nodev", + "relatime", + "ro" + ] + } + ], + "hooks": {}, + "linux": { + "devices": [{ + "path": "@VFIO_PATH@", + "type": "c", + "major": @VFIO_MAJOR@, + "minor": @VFIO_MINOR@, + "fileMode": 384, + "uid": 0, + "gid": 0 + }], + "cgroupsPath": "kata/vfiotest", + "resources": { + "devices": [ + {"allow":false,"access":"rwm"}, + {"allow":true,"type":"c","major":1,"minor":3,"access":"rwm"}, + {"allow":true,"type":"c","major":1,"minor":5,"access":"rwm"}, + {"allow":true,"type":"c","major":1,"minor":8,"access":"rwm"}, + {"allow":true,"type":"c","major":1,"minor":9,"access":"rwm"}, + {"allow":true,"type":"c","major":5,"minor":0,"access":"rwm"}, + {"allow":true,"type":"c","major":5,"minor":1,"access":"rwm"}, + {"allow": true,"access": "rwm","major": @VFIO_MAJOR@,"minor": @VFIO_MINOR@,"type": "c"} + ] + }, + "namespaces": [{ + "type": "pid" + }, + { + "type": "network" + }, + { + "type": "ipc" + }, + { + "type": "uts" + }, + { + "type": "mount" + } + ], + "maskedPaths": [ + "/proc/kcore", + "/proc/latency_stats", + "/proc/timer_list", + "/proc/timer_stats", + "/proc/sched_debug", + "/sys/firmware" + ], + "readonlyPaths": [ + "/proc/asound", + "/proc/bus", + "/proc/fs", + "/proc/irq", + "/proc/sys", + "/proc/sysrq-trigger" + ] + } +} diff --git a/tests/functional/vfio/run.sh b/tests/functional/vfio/run.sh new file mode 100755 index 000000000..f60ac82c7 --- /dev/null +++ b/tests/functional/vfio/run.sh @@ -0,0 +1,341 @@ +#!/bin/bash +# +# Copyright (c) 2021 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# + +set -x +set -o errexit +set -o nounset +set -o pipefail +set -o errtrace + +script_path=$(dirname "$0") +source "${script_path}/../../common.bash" + +addr= +tmp_data_dir="$(mktemp -d)" +rootfs_tar="${tmp_data_dir}/rootfs.tar" +trap cleanup EXIT + +# kata-runtime options +SANDBOX_CGROUP_ONLY="" +HYPERVISOR= +MACHINE_TYPE= +IMAGE_TYPE= + +cleanup() { + clean_env_ctr + sudo rm -rf "${tmp_data_dir}" + + [ -n "${host_pci}" ] && sudo driverctl unset-override "${host_pci}" +} + +host_pci_addr() { + lspci -D | grep "Ethernet controller" | grep "Virtio.*network device" | tail -1 | cut -d' ' -f1 +} + +get_vfio_path() { + local addr="$1" + echo "/dev/vfio/$(basename $(realpath /sys/bus/pci/drivers/vfio-pci/${host_pci}/iommu_group))" +} + +pull_rootfs() { + # pull and export busybox image in tar file + local image="quay.io/prometheus/busybox:latest" + sudo -E ctr i pull ${image} + sudo -E ctr i export "${rootfs_tar}" "${image}" + sudo chown ${USER}:${USER} "${rootfs_tar}" + sync +} + +create_bundle() { + local bundle_dir="$1" + mkdir -p "${bundle_dir}" + + # extract busybox rootfs + local rootfs_dir="${bundle_dir}/rootfs" + mkdir -p "${rootfs_dir}" + local layers_dir="$(mktemp -d)" + tar -C "${layers_dir}" -pxf "${rootfs_tar}" + for ((i=0;i<$(cat ${layers_dir}/manifest.json | jq -r ".[].Layers | length");i++)); do + tar -C ${rootfs_dir} -xf ${layers_dir}/$(cat ${layers_dir}/manifest.json | jq -r ".[].Layers[${i}]") + done + sync + + # Copy config.json + cp -a "${script_path}/config.json" "${bundle_dir}/config.json" +} + +run_container() { + local container_id="$1" + local bundle_dir="$2" + + sudo -E ctr run -d --runtime io.containerd.kata.v2 --config "${bundle_dir}/config.json" "${container_id}" +} + + +get_ctr_cmd_output() { + local container_id="$1" + shift + sudo -E ctr t exec --exec-id 2 "${container_id}" "${@}" +} + +check_guest_kernel() { + local container_id="$1" + # For vfio_mode=guest-kernel, the device should be bound to + # the guest kernel's native driver. To check this has worked, + # we look for an ethernet device named 'eth*' + get_ctr_cmd_output "${container_id}" ip a | grep "eth" || die "Missing VFIO network interface" +} + +check_vfio() { + local cid="$1" + # For vfio_mode=vfio, the device should be bound to the guest + # vfio-pci driver. + + # Check the control device is visible + get_ctr_cmd_output "${cid}" ls /dev/vfio/vfio || die "Couldn't find VFIO control device in container" + + # The device should *not* cause an ethernet interface to appear + ! get_ctr_cmd_output "${cid}" ip a | grep "eth" || die "Unexpected network interface" + + # There should be exactly one VFIO group device (there might + # be multiple IOMMU groups in the VM, but only one device + # should be bound to the VFIO driver, so there should still + # only be one VFIO device + group="$(get_ctr_cmd_output "${cid}" ls /dev/vfio | grep -v vfio)" + if [ $(echo "${group}" | wc -w) != "1" ] ; then + die "Expected exactly one VFIO group got: ${group}" + fi + + # There should be two devices in the IOMMU group: the ethernet + # device we care about, plus the PCIe to PCI bridge device + devs="$(get_ctr_cmd_output "${cid}" ls /sys/kernel/iommu_groups/"${group}"/devices)" + if [ $(echo "${devs}" | wc -w) != "2" ] ; then + die "Expected exactly two devices got: ${devs}" + fi + + # The bridge device will always sort first, because it is on + # bus zero, whereas the NIC will be on a non-zero bus + guest_pci=$(echo "${devs}" | tail -1) + + # This is a roundabout way of getting the environment + # variable, but to use the more obvious "echo $PCIDEVICE_..." + # we would have to escape the '$' enough to not be expanded + # before it's injected into the container, but not so much + # that it *is* expanded by the shell within the container. + # Doing that with another shell function in between is very + # fragile, so do it this way instead. + guest_env="$(get_ctr_cmd_output "${cid}" env | grep ^PCIDEVICE_VIRTIO_NET | sed s/^[^=]*=//)" + if [ "${guest_env}" != "${guest_pci}" ]; then + die "PCIDEVICE variable was \"${guest_env}\" instead of \"${guest_pci}\"" + fi +} + +get_dmesg() { + local container_id="$1" + get_ctr_cmd_output "${container_id}" dmesg +} + +# Show help about this script +help(){ +cat << EOF +Usage: $0 [-h] [options] + Description: + This script runs a kata container and passthrough a vfio device + Options: + -h, Help + -i , Specify initrd or image + -m , Specify kata-runtime machine type for qemu hypervisor + -p , Specify kata-runtime hypervisor + -s , Set sandbox_cgroup_only in the configuration file +EOF +} + +setup_configuration_file() { + local qemu_config_file="configuration-qemu.toml" + local clh_config_file="configuration-clh.toml" + local image_file="/opt/kata/share/kata-containers/kata-containers.img" + local initrd_file="/opt/kata/share/kata-containers/kata-containers-initrd.img" + local kata_config_file="" + + for file in $(kata-runtime --kata-show-default-config-paths); do + if [ ! -f "${file}" ]; then + continue + fi + + kata_config_file="${file}" + config_dir=$(dirname ${file}) + config_filename="" + + if [ "$HYPERVISOR" = "qemu" ]; then + config_filename="${qemu_config_file}" + elif [ "$HYPERVISOR" = "clh" ]; then + config_filename="${clh_config_file}" + fi + + config_file="${config_dir}/${config_filename}" + if [ -f "${config_file}" ]; then + rm -f "${kata_config_file}" + cp -a $(realpath "${config_file}") "${kata_config_file}" + break + fi + done + + # machine type applies to configuration.toml and configuration-qemu.toml + if [ -n "$MACHINE_TYPE" ]; then + if [ "$HYPERVISOR" = "qemu" ]; then + sed -i 's|^machine_type.*|machine_type = "'${MACHINE_TYPE}'"|g' "${kata_config_file}" + # Make sure we have set hot_plug_vfio to a reasonable value + sudo sed -i -e 's|^#hot_plug_vfio =.*$|hot_plug_vfio = "bridge-port"|' -e 's|^hot_plug_vfio = .*$|hot_plug_vfio = "bridge-port"|' "${kata_config_file}" + else + warn "Variable machine_type only applies to qemu. It will be ignored" + fi + fi + + if [ -n "${SANDBOX_CGROUP_ONLY}" ]; then + sed -i 's|^sandbox_cgroup_only.*|sandbox_cgroup_only='${SANDBOX_CGROUP_ONLY}'|g' "${kata_config_file}" + fi + + # Change to initrd or image depending on user input. + # Non-default configs must be changed to specify either initrd or image, image is default. + if [ "$IMAGE_TYPE" = "initrd" ]; then + if $(grep -q "^image.*" ${kata_config_file}); then + if $(grep -q "^initrd.*" ${kata_config_file}); then + sed -i '/^image.*/d' "${kata_config_file}" + else + sed -i 's|^image.*|initrd = "'${initrd_file}'"|g' "${kata_config_file}" + fi + fi + else + if $(grep -q "^initrd.*" ${kata_config_file}); then + if $(grep -q "^image.*" ${kata_config_file}); then + sed -i '/^initrd.*/d' "${kata_config_file}" + else + sed -i 's|^initrd.*|image = "'${image_file}'"|g' "${kata_config_file}" + fi + fi + fi + + # enable debug + sed -i -e 's/^#\(enable_debug\).*=.*$/\1 = true/g' \ + -e 's/^#\(debug_console_enabled\).*=.*$/\1 = true/g' \ + -e 's/^kernel_params = "\(.*\)"/kernel_params = "\1 agent.log=debug"/g' \ + "${kata_config_file}" + + # enable VFIO relevant hypervisor annotations + sed -i -e 's/^\(enable_annotations\).*=.*$/\1 = ["enable_iommu"]/' \ + "${kata_config_file}" +} + +run_test_container() { + local container_id="$1" + local bundle_dir="$2" + local config_json_in="$3" + local host_pci="$4" + + # generate final config.json + sed -e '/^#.*/d' \ + -e 's|@VFIO_PATH@|'"${vfio_device}"'|g' \ + -e 's|@VFIO_MAJOR@|'"${vfio_major}"'|g' \ + -e 's|@VFIO_MINOR@|'"${vfio_minor}"'|g' \ + -e 's|@VFIO_CTL_MAJOR@|'"${vfio_ctl_major}"'|g' \ + -e 's|@VFIO_CTL_MINOR@|'"${vfio_ctl_minor}"'|g' \ + -e 's|@ROOTFS@|'"${bundle_dir}/rootfs"'|g' \ + -e 's|@HOST_PCI@|'"${host_pci}"'|g' \ + "${config_json_in}" > "${script_path}/config.json" + + create_bundle "${bundle_dir}" + + # run container + run_container "${container_id}" "${bundle_dir}" + + # output VM dmesg + get_dmesg "${container_id}" +} + +main() { + local OPTIND + while getopts "hi:m:p:s:" opt;do + case ${opt} in + h) + help + exit 0; + ;; + i) + IMAGE_TYPE="${OPTARG}" + ;; + m) + MACHINE_TYPE="${OPTARG}" + ;; + p) + HYPERVISOR="${OPTARG}" + ;; + s) + SANDBOX_CGROUP_ONLY="${OPTARG}" + ;; + ?) + # parse failure + help + die "Failed to parse arguments" + ;; + esac + done + shift $((OPTIND-1)) + + # + # Get the device ready on the host + # + setup_configuration_file + + restart_containerd_service + sudo modprobe vfio + sudo modprobe vfio-pci + + host_pci=$(host_pci_addr) + [ -n "${host_pci}" ] || die "virtio ethernet controller PCI address not found" + + cat /proc/cmdline | grep -q "intel_iommu=on" || \ + die "intel_iommu=on not found in kernel cmdline" + + sudo driverctl set-override "${host_pci}" vfio-pci + + vfio_device="$(get_vfio_path "${host_pci}")" + [ -n "${vfio_device}" ] || die "vfio device not found" + vfio_major="$(printf '%d' $(stat -c '0x%t' ${vfio_device}))" + vfio_minor="$(printf '%d' $(stat -c '0x%T' ${vfio_device}))" + + [ -n "/dev/vfio/vfio" ] || die "vfio control device not found" + vfio_ctl_major="$(printf '%d' $(stat -c '0x%t' /dev/vfio/vfio))" + vfio_ctl_minor="$(printf '%d' $(stat -c '0x%T' /dev/vfio/vfio))" + + # Get the rootfs we'll use for all tests + pull_rootfs + + # + # Run the tests + # + + # test for guest-kernel mode + guest_kernel_cid="vfio-guest-kernel-${RANDOM}" + run_test_container "${guest_kernel_cid}" \ + "${tmp_data_dir}/vfio-guest-kernel" \ + "${script_path}/guest-kernel.json.in" \ + "${host_pci}" + check_guest_kernel "${guest_kernel_cid}" + + # Remove the container so we can re-use the device for the next test + clean_env_ctr + + # test for vfio mode + vfio_cid="vfio-vfio-${RANDOM}" + run_test_container "${vfio_cid}" \ + "${tmp_data_dir}/vfio-vfio" \ + "${script_path}/vfio.json.in" \ + "${host_pci}" + check_vfio "${vfio_cid}" +} + +main $@ diff --git a/tests/functional/vfio/vfio.json.in b/tests/functional/vfio/vfio.json.in new file mode 100644 index 000000000..19667c01e --- /dev/null +++ b/tests/functional/vfio/vfio.json.in @@ -0,0 +1,187 @@ +# +# Copyright (c) 2021 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# +{ + "ociVersion": "1.0.0-rc2-dev", + "platform": { + "os": "linux", + "arch": "amd64" + }, + "annotations": { + "io.katacontainers.config.hypervisor.enable_iommu": "true", + "io.katacontainers.config.runtime.vfio_mode": "vfio" + }, + "process": { + "terminal": false, + "consoleSize": { + "height": 0, + "width": 0 + }, + "user": { + "uid": 0, + "gid": 0 + }, + "args": [ "/bin/tail", "-f", "/dev/null" ], + "env": [ + "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", + "TERM=xterm", + "PCIDEVICE_VIRTIO_NET=@HOST_PCI@" + ], + "cwd": "/", + "rlimits": [{ + "type": "RLIMIT_NOFILE", + "hard": 1024, + "soft": 1024 + }], + "noNewPrivileges": true + }, + "root": { + "path": "@ROOTFS@", + "readonly": false + }, + "hostname": "vfio-test", + "mounts": [{ + "destination": "/proc", + "type": "proc", + "source": "proc" + }, + { + "destination": "/dev", + "type": "tmpfs", + "source": "tmpfs", + "options": [ + "nosuid", + "strictatime", + "mode=755", + "size=65536k" + ] + }, + { + "destination": "/dev/pts", + "type": "devpts", + "source": "devpts", + "options": [ + "nosuid", + "noexec", + "newinstance", + "ptmxmode=0666", + "mode=0620", + "gid=5" + ] + }, + { + "destination": "/dev/shm", + "type": "tmpfs", + "source": "shm", + "options": [ + "nosuid", + "noexec", + "nodev", + "mode=1777", + "size=65536k" + ] + }, + { + "destination": "/dev/mqueue", + "type": "mqueue", + "source": "mqueue", + "options": [ + "nosuid", + "noexec", + "nodev" + ] + }, + { + "destination": "/sys", + "type": "sysfs", + "source": "sysfs", + "options": [ + "nosuid", + "noexec", + "nodev", + "ro" + ] + }, + { + "destination": "/sys/fs/cgroup", + "type": "cgroup", + "source": "cgroup", + "options": [ + "nosuid", + "noexec", + "nodev", + "relatime", + "ro" + ] + } + ], + "hooks": {}, + "linux": { + "devices": [{ + "path": "/dev/vfio/vfio", + "type": "c", + "major": @VFIO_CTL_MAJOR@, + "minor": @VFIO_CTL_MINOR@, + "fileMode": 438, + "uid": 0, + "gid": 0 + }, + { + "path": "@VFIO_PATH@", + "type": "c", + "major": @VFIO_MAJOR@, + "minor": @VFIO_MINOR@, + "fileMode": 384, + "uid": 0, + "gid": 0 + }], + "cgroupsPath": "kata/vfiotest", + "resources": { + "devices": [ + {"allow":false,"access":"rwm"}, + {"allow":true,"type":"c","major":1,"minor":3,"access":"rwm"}, + {"allow":true,"type":"c","major":1,"minor":5,"access":"rwm"}, + {"allow":true,"type":"c","major":1,"minor":8,"access":"rwm"}, + {"allow":true,"type":"c","major":1,"minor":9,"access":"rwm"}, + {"allow":true,"type":"c","major":5,"minor":0,"access":"rwm"}, + {"allow":true,"type":"c","major":5,"minor":1,"access":"rwm"}, + {"allow": true,"access": "rwm","major": @VFIO_CTL_MAJOR@,"minor": @VFIO_CTL_MINOR@,"type": "c"}, + {"allow": true,"access": "rwm","major": @VFIO_MAJOR@,"minor": @VFIO_MINOR@,"type": "c"} + ] + }, + "namespaces": [{ + "type": "pid" + }, + { + "type": "network" + }, + { + "type": "ipc" + }, + { + "type": "uts" + }, + { + "type": "mount" + } + ], + "maskedPaths": [ + "/proc/kcore", + "/proc/latency_stats", + "/proc/timer_list", + "/proc/timer_stats", + "/proc/sched_debug", + "/sys/firmware" + ], + "readonlyPaths": [ + "/proc/asound", + "/proc/bus", + "/proc/fs", + "/proc/irq", + "/proc/sys", + "/proc/sysrq-trigger" + ] + } +} diff --git a/tests/functional/vfio/vfio_fedora_vm_wrapper.sh b/tests/functional/vfio/vfio_fedora_vm_wrapper.sh new file mode 100755 index 000000000..bdebbeb80 --- /dev/null +++ b/tests/functional/vfio/vfio_fedora_vm_wrapper.sh @@ -0,0 +1,320 @@ +#!/bin/bash +# +# Copyright (c) 2020 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# + +# Run the .ci/jenkins_job_build.sh script in a VM +# that supports VFIO, then run VFIO functional tests + +set -o xtrace +set -o errexit +set -o nounset +set -o pipefail +set -o errtrace + +cidir=$(dirname "$0") + +source /etc/os-release || source /usr/lib/os-release +# +#source "${cidir}/lib.sh" +export WORKSPACE="${WORKSPACE:-${cidir}/../../..}" +export GIT_URL="https://github.com/kata-containers/kata-containers.git" +# + +http_proxy=${http_proxy:-} +https_proxy=${https_proxy:-} +vm_ip="127.0.15.1" +vm_port="10022" +# Don't save data in /tmp, we need it after rebooting the system +data_dir="${HOME}/functional-vfio-test" +ssh_key_file="${data_dir}/key" +arch=$(uname -m) +artifacts_dir="${WORKSPACE}/artifacts" + +kill_vms() { + sudo killall -9 qemu-system-${arch} +} + +cleanup() { + mkdir -p ${artifacts_dir} + sudo chown -R ${USER} ${artifacts_dir} + scp_vm ${artifacts_dir}/* ${artifacts_dir} || true + kill_vms +} + +create_ssh_key() { + rm -f "${ssh_key_file}" + ssh-keygen -f "${ssh_key_file}" -t rsa -N "" +} + +create_meta_data() { + file="$1" + cat < "${file}" +{ + "uuid": "d1b4aafa-5d75-4f9c-87eb-2ceabe110c39", + "hostname": "test" +} +EOF +} + +create_user_data() { + file="$1" + ssh_pub_key_file="$2" + + ssh_pub_key="$(cat "${ssh_pub_key_file}")" + dnf_proxy="" + service_proxy="" + docker_user_proxy="{}" + environment=$(env | egrep "ghprb|WORKSPACE|KATA|GIT|JENKINS|_PROXY|_proxy" | \ + sed -e "s/'/'\"'\"'/g" \ + -e "s/\(^[[:alnum:]_]\+\)=/\1='/" \ + -e "s/$/'/" \ + -e 's/^/ export /') + + if [ -n "${http_proxy}" ] && [ -n "${https_proxy}" ]; then + dnf_proxy="proxy=${http_proxy}" + service_proxy='[Service] + Environment="HTTP_PROXY='${http_proxy}'" "HTTPS_PROXY='${https_proxy}'" "NO_PROXY='${no_proxy}'"' + docker_user_proxy='{"proxies": { "default": { + "httpProxy": "'${http_proxy}'", + "httpsProxy": "'${https_proxy}'", + "noProxy": "'${no_proxy}'" + } } }' + fi + + cat < "${file}" +#cloud-config +package_upgrade: false +runcmd: +- chown -R ${USER}:${USER} /home/${USER} +- touch /.done +users: +- gecos: User + gid: "1000" + lock-passwd: true + name: ${USER} + shell: /bin/bash + ssh-authorized-keys: + - ${ssh_pub_key} + sudo: ALL=(ALL) NOPASSWD:ALL + uid: "1000" +write_files: +- content: | +${environment} + path: /etc/environment +- content: | + ${service_proxy} + path: /etc/systemd/system/docker.service.d/http-proxy.conf +- content: | + ${service_proxy} + path: /etc/systemd/system/containerd.service.d/http-proxy.conf +- content: | + ${docker_user_proxy} + path: ${HOME}/.docker/config.json +- content: | + ${docker_user_proxy} + path: /root/.docker/config.json +- content: | + set -x + set -o errexit + set -o nounset + set -o pipefail + set -o errtrace + . /etc/environment + . /etc/os-release + + [ "\$ID" = "fedora" ] || (echo >&2 "$0 only supports Fedora"; exit 1) + + echo "${dnf_proxy}" | sudo tee -a /etc/dnf/dnf.conf + + for i in \$(seq 1 50); do + [ -f /.done ] && break + echo "waiting for cloud-init to finish" + sleep 5; + done + + export FORCE_JENKINS_JOB_BUILD=1 + export DEBUG=true + export CI_JOB="VFIO" + export GOPATH=\${WORKSPACE}/go + export PATH=\${GOPATH}/bin:/usr/local/go/bin:/usr/sbin:\${PATH} + export GOROOT="/usr/local/go" + export KUBERNETES="no" + export USE_DOCKER="true" + export ghprbPullId + export ghprbTargetBranch + + # Make sure the packages were installed + # Sometimes cloud-init is unable to install them + sudo dnf makecache + sudo dnf install -y git make pciutils driverctl + + git config --global user.email "foo@bar" + git config --global user.name "Foo Bar" + + tests_repo="github.com/kata-containers/tests" + tests_repo_dir="\${GOPATH}/src/\${tests_repo}" + trap "cd \${tests_repo_dir}; sudo -E PATH=\$PATH .ci/teardown.sh ${artifacts_dir} || true; sudo chown -R \${USER} ${artifacts_dir}" EXIT + + curl -OL https://raw.githubusercontent.com/kata-containers/tests/\${ghprbTargetBranch}/.ci/ci_entry_point.sh + bash -x ci_entry_point.sh "${GIT_URL}" + + path: /home/${USER}/run.sh + permissions: '0755' +EOF +} + +create_config_iso() { + iso_file="$1" + ssh_pub_key_file="${ssh_key_file}.pub" + iso_data_dir="${data_dir}/d" + meta_data_file="${iso_data_dir}/openstack/latest/meta_data.json" + user_data_file="${iso_data_dir}/openstack/latest/user_data" + + mkdir -p $(dirname "${user_data_file}") + + create_meta_data "${meta_data_file}" + create_user_data "${user_data_file}" "${ssh_pub_key_file}" + + [ -f "${iso_file}" ] && rm -f "${iso_file}" + + xorriso -as mkisofs -R -V config-2 -o "${iso_file}" "${iso_data_dir}" +} + +pull_fedora_cloud_image() { + fedora_img="$1" + fedora_version=37 + # Add a version to the image cache, otherwise the tests are going to + # use always the same image without rebuilding it, regardless the version + # set in fedora_version + fedora_img_cache="${fedora_img}.cache.${fedora_version}" + fedora_img_url="https://download.fedoraproject.org/pub/fedora/linux/releases/${fedora_version}/Cloud/${arch}/images/Fedora-Cloud-Base-${fedora_version}-1.7.${arch}.raw.xz" + + if [ ! -f "${fedora_img_cache}" ]; then + curl -sL ${fedora_img_url} -o "${fedora_img_cache}.xz" + xz -f -d "${fedora_img_cache}.xz" + sync + fi + + cp -a "${fedora_img_cache}" "${fedora_img}" + sync + + # setup cloud image + sudo losetup -D + loop=$(sudo losetup --show -Pf "${fedora_img}") + sudo mount "${loop}p2" /mnt + + # add intel_iommu=on to the guest kernel command line + kernelopts="intel_iommu=on iommu=pt selinux=0" + entries=$(sudo ls /mnt/loader/entries/) + for entry in ${entries}; do + sudo sed -i '/^options / s/$/ intel_iommu=on iommu=pt selinux=0 /g' /mnt/loader/entries/"${entry}" + done + sudo sed -i 's|kernelopts="|kernelopts="'"${kernelopts}"'|g' /mnt/grub2/grub.cfg + sudo sed -i 's|kernelopts=|kernelopts='"${kernelopts}"'|g' /mnt/grub2/grubenv + + # cleanup + sudo umount -R /mnt/ + sudo losetup -d "${loop}" + + qemu-img resize -f raw "${fedora_img}" +20G +} + +run_vm() { + image="$1" + config_iso="$2" + disable_modern="off" + hostname="$(hostname)" + memory="16384M" + cpus=$(nproc) + machine_type="q35" + + /usr/bin/qemu-system-${arch} -m "${memory}" -smp cpus="${cpus}" \ + -cpu host,host-phys-bits \ + -machine ${machine_type},accel=kvm,kernel_irqchip=split \ + -device intel-iommu,intremap=on,caching-mode=on,device-iotlb=on \ + -drive file=${image},if=virtio,aio=threads,format=raw \ + -drive file=${config_iso_file},if=virtio,media=cdrom \ + -daemonize -enable-kvm -device virtio-rng-pci -display none -vga none \ + -netdev user,hostfwd=tcp:${vm_ip}:${vm_port}-:22,hostname="${hostname}",id=net0 \ + -device virtio-net-pci,netdev=net0,disable-legacy=on,disable-modern="${disable_modern}",iommu_platform=on,ats=on \ + -netdev user,id=net1 \ + -device virtio-net-pci,netdev=net1,disable-legacy=on,disable-modern="${disable_modern}",iommu_platform=on,ats=on +} + +install_dependencies() { + case "${ID}" in + ubuntu) + # cloud image dependencies + deps=(xorriso curl qemu-utils openssh-client) + + sudo apt-get update + sudo apt-get install -y ${deps[@]} + sudo apt-get install --reinstall -y qemu-system-x86 + ;; + fedora) + # cloud image dependencies + deps=(xorriso curl qemu-img openssh) + + sudo dnf install -y ${deps[@]} qemu-system-x86-core + sudo dnf reinstall -y qemu-system-x86-core + ;; + + "*") + die "Unsupported distro: ${ID}" + ;; + esac +} + +ssh_vm() { + cmd=$@ + ssh -q -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -o IdentitiesOnly=yes -i "${ssh_key_file}" -p "${vm_port}" "${USER}@${vm_ip}" "${cmd}" +} + +scp_vm() { + guest_src=$1 + host_dest=$2 + scp -q -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -o IdentitiesOnly=yes -i "${ssh_key_file}" -P "${vm_port}" ${USER}@${vm_ip}:${guest_src} ${host_dest} +} + +wait_for_vm() { + for i in $(seq 1 30); do + if ssh_vm true; then + return 0 + fi + info "waiting for VM to start" + sleep 5 + done + return 1 +} + +main() { + trap cleanup EXIT + + config_iso_file="${data_dir}/config.iso" + fedora_img="${data_dir}/image.img" + + mkdir -p "${data_dir}" + + install_dependencies + + create_ssh_key + + create_config_iso "${config_iso_file}" + + for i in $(seq 1 5); do + pull_fedora_cloud_image "${fedora_img}" + run_vm "${fedora_img}" "${config_iso_file}" + if wait_for_vm; then + break + fi + info "Couldn't connect to the VM. Stopping VM and starting a new one." + kill_vms + done + + ssh_vm "/home/${USER}/run.sh" +} + +main $@ From e90027f38ca7676ea14ca9f699730366c979e209 Mon Sep 17 00:00:00 2001 From: Jeremi Piotrowski Date: Wed, 23 Aug 2023 16:59:26 +0200 Subject: [PATCH 165/339] tests/vfio: Move dependency installation to gha-run.sh To match the flow of other github actions workflows. Signed-off-by: Jeremi Piotrowski (cherry picked from commit a92ddeea15e11078b095bd748df709da48e3ad52) --- tests/functional/vfio/gha-run.sh | 22 ++++++++++++++++ .../functional/vfio/vfio_fedora_vm_wrapper.sh | 26 ------------------- 2 files changed, 22 insertions(+), 26 deletions(-) diff --git a/tests/functional/vfio/gha-run.sh b/tests/functional/vfio/gha-run.sh index cfd808224..97c72f80b 100755 --- a/tests/functional/vfio/gha-run.sh +++ b/tests/functional/vfio/gha-run.sh @@ -15,6 +15,28 @@ source "${vfio_dir}/../../common.bash" function install_dependencies() { info "Installing the dependencies needed for running the vfio tests" + ( + source /etc/os-release || source /usr/lib/os-release + case "${ID}" in + ubuntu) + # cloud image dependencies + deps=(xorriso curl qemu-utils openssh-client) + + sudo apt-get update + sudo apt-get install -y ${deps[@]} qemu-system-x86 + ;; + fedora) + # cloud image dependencies + deps=(xorriso curl qemu-img openssh) + + sudo dnf install -y ${deps[@]} qemu-system-x86-core + ;; + + "*") + die "Unsupported distro: ${ID}" + ;; + esac + ) } function run() { diff --git a/tests/functional/vfio/vfio_fedora_vm_wrapper.sh b/tests/functional/vfio/vfio_fedora_vm_wrapper.sh index bdebbeb80..e3fbf855e 100755 --- a/tests/functional/vfio/vfio_fedora_vm_wrapper.sh +++ b/tests/functional/vfio/vfio_fedora_vm_wrapper.sh @@ -244,30 +244,6 @@ run_vm() { -device virtio-net-pci,netdev=net1,disable-legacy=on,disable-modern="${disable_modern}",iommu_platform=on,ats=on } -install_dependencies() { - case "${ID}" in - ubuntu) - # cloud image dependencies - deps=(xorriso curl qemu-utils openssh-client) - - sudo apt-get update - sudo apt-get install -y ${deps[@]} - sudo apt-get install --reinstall -y qemu-system-x86 - ;; - fedora) - # cloud image dependencies - deps=(xorriso curl qemu-img openssh) - - sudo dnf install -y ${deps[@]} qemu-system-x86-core - sudo dnf reinstall -y qemu-system-x86-core - ;; - - "*") - die "Unsupported distro: ${ID}" - ;; - esac -} - ssh_vm() { cmd=$@ ssh -q -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -o IdentitiesOnly=yes -i "${ssh_key_file}" -p "${vm_port}" "${USER}@${vm_ip}" "${cmd}" @@ -298,8 +274,6 @@ main() { mkdir -p "${data_dir}" - install_dependencies - create_ssh_key create_config_iso "${config_iso_file}" From a69d0d1772be5864a606db7e473335d27b706411 Mon Sep 17 00:00:00 2001 From: Jeremi Piotrowski Date: Mon, 21 Aug 2023 18:03:58 +0200 Subject: [PATCH 166/339] tests/vfio: Change to get the test working in GHA - reduce memory and cpu usage to fit in a D4s_v5 - source correct lib - mount workspace from 9p - disable cpu mitigations for speed - drop unused commands and variables - install containerd - install kata from built artifacts Signed-off-by: Jeremi Piotrowski (cherry picked from commit df815087e774f4a0ae9a479ef01490d58e20187a) --- tests/functional/vfio/run.sh | 2 +- .../functional/vfio/vfio_fedora_vm_wrapper.sh | 60 ++++++++++++------- 2 files changed, 40 insertions(+), 22 deletions(-) diff --git a/tests/functional/vfio/run.sh b/tests/functional/vfio/run.sh index f60ac82c7..965329fcd 100755 --- a/tests/functional/vfio/run.sh +++ b/tests/functional/vfio/run.sh @@ -222,7 +222,7 @@ setup_configuration_file() { # enable debug sed -i -e 's/^#\(enable_debug\).*=.*$/\1 = true/g' \ -e 's/^#\(debug_console_enabled\).*=.*$/\1 = true/g' \ - -e 's/^kernel_params = "\(.*\)"/kernel_params = "\1 agent.log=debug"/g' \ + -e 's/^kernel_params = "\(.*\)"/kernel_params = "\1 mitigations=off agent.log=debug"/g' \ "${kata_config_file}" # enable VFIO relevant hypervisor annotations diff --git a/tests/functional/vfio/vfio_fedora_vm_wrapper.sh b/tests/functional/vfio/vfio_fedora_vm_wrapper.sh index e3fbf855e..3d666e022 100755 --- a/tests/functional/vfio/vfio_fedora_vm_wrapper.sh +++ b/tests/functional/vfio/vfio_fedora_vm_wrapper.sh @@ -14,13 +14,14 @@ set -o nounset set -o pipefail set -o errtrace -cidir=$(dirname "$0") +cidir=$(readlink -f $(dirname "$0")) source /etc/os-release || source /usr/lib/os-release # -#source "${cidir}/lib.sh" -export WORKSPACE="${WORKSPACE:-${cidir}/../../..}" +source "${cidir}/../../common.bash" +export WORKSPACE="${WORKSPACE:-${HOME}}" export GIT_URL="https://github.com/kata-containers/kata-containers.git" +export KATA_HYPERVISOR="${KATA_HYPERVISOR:-qemu}" # http_proxy=${http_proxy:-} @@ -101,6 +102,16 @@ users: sudo: ALL=(ALL) NOPASSWD:ALL uid: "1000" write_files: +- content: | + [main] + fastestmirror=True + gpgcheck=1 + max_parallel_downloads=10 + installonly_limit=2 + clean_requirements_on_remove=True + keepcache=True + ip_resolve=4 + path: /etc/dnf/dnf.conf - content: | ${environment} path: /etc/environment @@ -135,31 +146,35 @@ ${environment} sleep 5; done - export FORCE_JENKINS_JOB_BUILD=1 export DEBUG=true - export CI_JOB="VFIO" export GOPATH=\${WORKSPACE}/go export PATH=\${GOPATH}/bin:/usr/local/go/bin:/usr/sbin:\${PATH} export GOROOT="/usr/local/go" - export KUBERNETES="no" - export USE_DOCKER="true" - export ghprbPullId - export ghprbTargetBranch # Make sure the packages were installed # Sometimes cloud-init is unable to install them - sudo dnf makecache - sudo dnf install -y git make pciutils driverctl + sudo dnf install -y git wget pciutils driverctl git config --global user.email "foo@bar" git config --global user.name "Foo Bar" - tests_repo="github.com/kata-containers/tests" - tests_repo_dir="\${GOPATH}/src/\${tests_repo}" + sudo mkdir -p /workspace + sudo mount -t 9p -o access=any,trans=virtio,version=9p2000.L workspace /workspace trap "cd \${tests_repo_dir}; sudo -E PATH=\$PATH .ci/teardown.sh ${artifacts_dir} || true; sudo chown -R \${USER} ${artifacts_dir}" EXIT - curl -OL https://raw.githubusercontent.com/kata-containers/tests/\${ghprbTargetBranch}/.ci/ci_entry_point.sh - bash -x ci_entry_point.sh "${GIT_URL}" + pushd /workspace + source tests/common.bash + ensure_yq + cri_containerd=\$(get_from_kata_deps "externals.containerd.lts") + cri_tools=\$(get_from_kata_deps "externals.critools.latest") + install_cri_containerd \${cri_containerd} + install_cri_tools \${cri_tools} + + kata_tarball_dir="kata-artifacts" + install_kata + + sudo /workspace/tests/functional/vfio/run.sh -s false -p \${KATA_HYPERVISOR} -m q35 -i image + sudo /workspace/tests/functional/vfio/run.sh -s true -p \${KATA_HYPERVISOR} -m q35 -i image path: /home/${USER}/run.sh permissions: '0755' @@ -207,10 +222,10 @@ pull_fedora_cloud_image() { sudo mount "${loop}p2" /mnt # add intel_iommu=on to the guest kernel command line - kernelopts="intel_iommu=on iommu=pt selinux=0" + kernelopts="intel_iommu=on iommu=pt selinux=0 mitigations=off idle=poll kvm.tdp_mmu=0" entries=$(sudo ls /mnt/loader/entries/) for entry in ${entries}; do - sudo sed -i '/^options / s/$/ intel_iommu=on iommu=pt selinux=0 /g' /mnt/loader/entries/"${entry}" + sudo sed -i '/^options / s/$/ '"${kernelopts}"' /g' /mnt/loader/entries/"${entry}" done sudo sed -i 's|kernelopts="|kernelopts="'"${kernelopts}"'|g' /mnt/grub2/grub.cfg sudo sed -i 's|kernelopts=|kernelopts='"${kernelopts}"'|g' /mnt/grub2/grubenv @@ -227,11 +242,11 @@ run_vm() { config_iso="$2" disable_modern="off" hostname="$(hostname)" - memory="16384M" - cpus=$(nproc) + memory="8192M" + cpus=2 machine_type="q35" - /usr/bin/qemu-system-${arch} -m "${memory}" -smp cpus="${cpus}" \ + sudo /usr/bin/qemu-system-${arch} -m "${memory}" -smp cpus="${cpus}" \ -cpu host,host-phys-bits \ -machine ${machine_type},accel=kvm,kernel_irqchip=split \ -device intel-iommu,intremap=on,caching-mode=on,device-iotlb=on \ @@ -241,7 +256,10 @@ run_vm() { -netdev user,hostfwd=tcp:${vm_ip}:${vm_port}-:22,hostname="${hostname}",id=net0 \ -device virtio-net-pci,netdev=net0,disable-legacy=on,disable-modern="${disable_modern}",iommu_platform=on,ats=on \ -netdev user,id=net1 \ - -device virtio-net-pci,netdev=net1,disable-legacy=on,disable-modern="${disable_modern}",iommu_platform=on,ats=on + -device virtio-net-pci,netdev=net1,disable-legacy=on,disable-modern="${disable_modern}",iommu_platform=on,ats=on \ + -fsdev local,path=${repo_root_dir},security_model=passthrough,id=fs0 \ + -device virtio-9p-pci,fsdev=fs0,mount_tag=workspace + } ssh_vm() { From 44f37f689adc5545bdaac4982f5d0cc40af373b2 Mon Sep 17 00:00:00 2001 From: Jeremi Piotrowski Date: Wed, 23 Aug 2023 17:00:58 +0200 Subject: [PATCH 167/339] tests/vfio: Capture journal from vm For debugging (though this doesn't get exposed yet). Signed-off-by: Jeremi Piotrowski (cherry picked from commit 8fffdc81c5987301584ffefe43e5267dfff84f71) --- tests/functional/vfio/vfio_fedora_vm_wrapper.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/functional/vfio/vfio_fedora_vm_wrapper.sh b/tests/functional/vfio/vfio_fedora_vm_wrapper.sh index 3d666e022..07ef83874 100755 --- a/tests/functional/vfio/vfio_fedora_vm_wrapper.sh +++ b/tests/functional/vfio/vfio_fedora_vm_wrapper.sh @@ -160,7 +160,8 @@ ${environment} sudo mkdir -p /workspace sudo mount -t 9p -o access=any,trans=virtio,version=9p2000.L workspace /workspace - trap "cd \${tests_repo_dir}; sudo -E PATH=\$PATH .ci/teardown.sh ${artifacts_dir} || true; sudo chown -R \${USER} ${artifacts_dir}" EXIT + mkdir -p ${artifacts_dir} + trap "cd /workspace; sudo journalctl -b0 > ${artifacts_dir}/journal.log || true; sudo chown -R \${USER} ${artifacts_dir}" EXIT pushd /workspace source tests/common.bash From 01265fb2172352c93c0054fc8b97c07c9d9bb30d Mon Sep 17 00:00:00 2001 From: Jeremi Piotrowski Date: Thu, 24 Aug 2023 13:23:05 +0200 Subject: [PATCH 168/339] tests/vfio: Gather debug info and disable tdp_mmu tdp_mmu had some issues up until around Linux v6.3 that make it work particularly bad when running nested on Hyper-V. Reload the module at the start of the test and disable the tdp_mmu param. Gather debug info at the end of the test to make it easier to figure out what went wrong. This uses github actions group syntax so that each section can be collapsed. Signed-off-by: Jeremi Piotrowski (cherry picked from commit 5f6475a28a6329eaca693cc00fc4034f79e68b24) --- .../functional/vfio/vfio_fedora_vm_wrapper.sh | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tests/functional/vfio/vfio_fedora_vm_wrapper.sh b/tests/functional/vfio/vfio_fedora_vm_wrapper.sh index 07ef83874..292845532 100755 --- a/tests/functional/vfio/vfio_fedora_vm_wrapper.sh +++ b/tests/functional/vfio/vfio_fedora_vm_wrapper.sh @@ -43,6 +43,14 @@ cleanup() { sudo chown -R ${USER} ${artifacts_dir} scp_vm ${artifacts_dir}/* ${artifacts_dir} || true kill_vms + + echo "::group::L2 journal" + cat "${artifacts_dir}/journal.log" + echo "::endgroup::" + + echo "::group::L1 dmesg" + sudo dmesg + echo "::endgroup::" } create_ssh_key() { @@ -238,6 +246,14 @@ pull_fedora_cloud_image() { qemu-img resize -f raw "${fedora_img}" +20G } +reload_kvm() { + # TDP_MMU is buggy on Hyper-V until v6.3/v6.4 + sudo rmmod kvm-intel kvm-amd kvm || true + sudo modprobe kvm tdp_mmu=0 + sudo modprobe kvm-intel || true + sudo modprobe kvm-amd || true +} + run_vm() { image="$1" config_iso="$2" @@ -247,6 +263,8 @@ run_vm() { cpus=2 machine_type="q35" + reload_kvm + sudo /usr/bin/qemu-system-${arch} -m "${memory}" -smp cpus="${cpus}" \ -cpu host,host-phys-bits \ -machine ${machine_type},accel=kvm,kernel_irqchip=split \ From 86201ace5a0d6800375e0ec243f31ce217ef4c45 Mon Sep 17 00:00:00 2001 From: Jeremi Piotrowski Date: Wed, 30 Aug 2023 15:29:38 +0200 Subject: [PATCH 169/339] runtime: clh: Add hot_plug_vfio entry to config hot_plug_vfio needs to be set to root-port, otherwise attaching vfio devices to CLH VMs fails. Either cold_plug_vfio or hot_plug_vfio is required, and we have not implemented support for cold_plug_vfio in CLH yet. Signed-off-by: Jeremi Piotrowski (cherry picked from commit 509771e6f531db731cd5a2ecbbb1c6d6507ed991) --- src/runtime/config/configuration-clh.toml.in | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/runtime/config/configuration-clh.toml.in b/src/runtime/config/configuration-clh.toml.in index fd906ff0a..a7c203d20 100644 --- a/src/runtime/config/configuration-clh.toml.in +++ b/src/runtime/config/configuration-clh.toml.in @@ -194,6 +194,10 @@ block_device_driver = "virtio-blk" # Default false #enable_debug = true +# Enable hot-plugging of VFIO devices to a root-port. +# The default setting is "no-port" +#hot_plug_vfio = "root-port" + # Path to OCI hook binaries in the *guest rootfs*. # This does not affect host-side hooks which must instead be added to # the OCI spec passed to the runtime. From 0a918d0d20d0f2fea05fc94a0a59d7af3ebf6b67 Mon Sep 17 00:00:00 2001 From: Jeremi Piotrowski Date: Wed, 30 Aug 2023 15:32:14 +0200 Subject: [PATCH 170/339] runtime: Check config for supported CLH (cold|hot)_plug_vfio values The only supported options are hot_plug_vfio=root-port or no-port. cold_plug_vfio not supported yet. Signed-off-by: Jeremi Piotrowski (cherry picked from commit fc51e4b9eb2d0d99aa224e2cb6805be7af3e9d67) --- src/runtime/pkg/katautils/config.go | 12 ++++++++++-- src/runtime/virtcontainers/clh_test.go | 1 + 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/src/runtime/pkg/katautils/config.go b/src/runtime/pkg/katautils/config.go index f577df526..0a2c8022c 100644 --- a/src/runtime/pkg/katautils/config.go +++ b/src/runtime/pkg/katautils/config.go @@ -1680,8 +1680,8 @@ func checkConfig(config oci.RuntimeConfig) error { // Only allow one of the following settings for cold-plug: // no-port, root-port, switch-port func checkPCIeConfig(coldPlug config.PCIePort, hotPlug config.PCIePort, machineType string, hypervisorType virtcontainers.HypervisorType) error { - if hypervisorType != virtcontainers.QemuHypervisor { - kataUtilsLogger.Warn("Advanced PCIe Topology only available for QEMU hypervisor, ignoring hot(cold)_vfio_port setting") + if hypervisorType != virtcontainers.QemuHypervisor && hypervisorType != virtcontainers.ClhHypervisor { + kataUtilsLogger.Warn("Advanced PCIe Topology only available for QEMU/CLH hypervisor, ignoring hot(cold)_vfio_port setting") return nil } @@ -1696,6 +1696,14 @@ func checkPCIeConfig(coldPlug config.PCIePort, hotPlug config.PCIePort, machineT if machineType != "q35" && machineType != "virt" { return nil } + if hypervisorType == virtcontainers.ClhHypervisor { + if coldPlug != config.NoPort { + return fmt.Errorf("cold-plug not supported on CLH") + } + if hotPlug != config.RootPort { + return fmt.Errorf("only hot-plug=%s supported on CLH", config.RootPort) + } + } var port config.PCIePort if coldPlug != config.NoPort { diff --git a/src/runtime/virtcontainers/clh_test.go b/src/runtime/virtcontainers/clh_test.go index d617ab4e1..c141adfd9 100644 --- a/src/runtime/virtcontainers/clh_test.go +++ b/src/runtime/virtcontainers/clh_test.go @@ -68,6 +68,7 @@ func newClhConfig() (HypervisorConfig, error) { NetRateLimiterBwOneTimeBurst: int64(0), NetRateLimiterOpsMaxRate: int64(0), NetRateLimiterOpsOneTimeBurst: int64(0), + HotPlugVFIO: config.NoPort, }, nil } From d86af5923f2dc154f38da70d1d2772628b03713a Mon Sep 17 00:00:00 2001 From: Jeremi Piotrowski Date: Fri, 8 Sep 2023 15:12:31 +0200 Subject: [PATCH 171/339] runtime: Add test cases for checkPCIeConfig These test cases shows which options are valid for CLH/Qemu, and test that we correctly catch unsupported combinations. Signed-off-by: Jeremi Piotrowski (cherry picked from commit 7c4e73b609a6698fd7f589253b6fe90fe857e042) --- src/runtime/pkg/katautils/create_test.go | 31 ++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/src/runtime/pkg/katautils/create_test.go b/src/runtime/pkg/katautils/create_test.go index 260800378..903e68d95 100644 --- a/src/runtime/pkg/katautils/create_test.go +++ b/src/runtime/pkg/katautils/create_test.go @@ -18,8 +18,10 @@ import ( "syscall" "testing" + config "github.com/kata-containers/kata-containers/src/runtime/pkg/device/config" ktu "github.com/kata-containers/kata-containers/src/runtime/pkg/katatestutils" "github.com/kata-containers/kata-containers/src/runtime/pkg/oci" + "github.com/kata-containers/kata-containers/src/runtime/virtcontainers" vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/compatoci" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/vcmock" @@ -419,3 +421,32 @@ func TestCreateContainer(t *testing.T) { assert.NoError(err) } } + +func TestVfioChecksClh(t *testing.T) { + assert := assert.New(t) + + // Check valid CLH vfio configs + f := func(coldPlug, hotPlug config.PCIePort) error { + return checkPCIeConfig(coldPlug, hotPlug, defaultMachineType, virtcontainers.ClhHypervisor) + } + assert.NoError(f(config.NoPort, config.NoPort)) + assert.NoError(f(config.NoPort, config.RootPort)) + assert.Error(f(config.RootPort, config.RootPort)) + assert.Error(f(config.RootPort, config.NoPort)) + assert.Error(f(config.NoPort, config.SwitchPort)) +} + +func TestVfioCheckQemu(t *testing.T) { + assert := assert.New(t) + + // Check valid Qemu vfio configs + f := func(coldPlug, hotPlug config.PCIePort) error { + return checkPCIeConfig(coldPlug, hotPlug, defaultMachineType, virtcontainers.QemuHypervisor) + } + + assert.NoError(f(config.NoPort, config.NoPort)) + assert.NoError(f(config.RootPort, config.NoPort)) + assert.NoError(f(config.NoPort, config.RootPort)) + assert.Error(f(config.RootPort, config.RootPort)) + assert.Error(f(config.SwitchPort, config.RootPort)) +} From 18a8b8df03f25e696a0400f2b4ccd0f8ef6ea478 Mon Sep 17 00:00:00 2001 From: Jeremi Piotrowski Date: Fri, 8 Sep 2023 15:14:56 +0200 Subject: [PATCH 172/339] runtime: Remove redundant check in checkPCIeConfig There is no way for this branch to be hit, as port is only set when it is different than config.NoPort. Signed-off-by: Jeremi Piotrowski (cherry picked from commit bfc93927fbc4fcd13ec2f182091e14ded4be7760) --- src/runtime/pkg/katautils/config.go | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/runtime/pkg/katautils/config.go b/src/runtime/pkg/katautils/config.go index 0a2c8022c..a4e17b239 100644 --- a/src/runtime/pkg/katautils/config.go +++ b/src/runtime/pkg/katautils/config.go @@ -1712,10 +1712,6 @@ func checkPCIeConfig(coldPlug config.PCIePort, hotPlug config.PCIePort, machineT if hotPlug != config.NoPort { port = hotPlug } - if port == config.NoPort { - return fmt.Errorf("invalid vfio_port=%s setting, use on of %s, %s, %s", - port, config.BridgePort, config.RootPort, config.SwitchPort) - } if port == config.BridgePort || port == config.RootPort || port == config.SwitchPort { return nil } From 65943d5b77b4868488a5d87a6277dbfbb576f59a Mon Sep 17 00:00:00 2001 From: Jeremi Piotrowski Date: Wed, 30 Aug 2023 15:50:48 +0200 Subject: [PATCH 173/339] tests/vfio: Configure a value for 'hot_plug_vfio' for both vmms This shouldn't be hiding behind only a qemu check, we need this for clh as well. Signed-off-by: Jeremi Piotrowski (cherry picked from commit b46b0ecf8b9ab2bd9eaac4128f88e93c162895b9) --- tests/functional/vfio/run.sh | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tests/functional/vfio/run.sh b/tests/functional/vfio/run.sh index 965329fcd..635e161c8 100755 --- a/tests/functional/vfio/run.sh +++ b/tests/functional/vfio/run.sh @@ -188,13 +188,18 @@ setup_configuration_file() { if [ -n "$MACHINE_TYPE" ]; then if [ "$HYPERVISOR" = "qemu" ]; then sed -i 's|^machine_type.*|machine_type = "'${MACHINE_TYPE}'"|g' "${kata_config_file}" - # Make sure we have set hot_plug_vfio to a reasonable value - sudo sed -i -e 's|^#hot_plug_vfio =.*$|hot_plug_vfio = "bridge-port"|' -e 's|^hot_plug_vfio = .*$|hot_plug_vfio = "bridge-port"|' "${kata_config_file}" else warn "Variable machine_type only applies to qemu. It will be ignored" fi fi + # Make sure we have set hot_plug_vfio to a reasonable value + if [ "$HYPERVISOR" = "qemu" ]; then + sed -i -e 's|^#*.*hot_plug_vfio.*|hot_plug_vfio = "bridge-port"|' "${kata_config_file}" + elif [ "$HYPERVISOR" = "clh" ]; then + sed -i -e 's|^#*.*hot_plug_vfio.*|hot_plug_vfio = "root-port"|' "${kata_config_file}" + fi + if [ -n "${SANDBOX_CGROUP_ONLY}" ]; then sed -i 's|^sandbox_cgroup_only.*|sandbox_cgroup_only='${SANDBOX_CGROUP_ONLY}'|g' "${kata_config_file}" fi From a0f59829b21370a080ce5f3a8dc85d08a95234bf Mon Sep 17 00:00:00 2001 From: Jeremi Piotrowski Date: Wed, 30 Aug 2023 17:35:22 +0200 Subject: [PATCH 174/339] tests/vfio: Give commands 30s to execute This is a to catch the case of the guest getting stuck. Signed-off-by: Jeremi Piotrowski (cherry picked from commit 9f1a42c6cc94462fb2e90553f5ce58de5a51702d) --- tests/functional/vfio/run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/functional/vfio/run.sh b/tests/functional/vfio/run.sh index 635e161c8..83cf16100 100755 --- a/tests/functional/vfio/run.sh +++ b/tests/functional/vfio/run.sh @@ -79,7 +79,7 @@ run_container() { get_ctr_cmd_output() { local container_id="$1" shift - sudo -E ctr t exec --exec-id 2 "${container_id}" "${@}" + timeout 30s sudo -E ctr t exec --exec-id 2 "${container_id}" "${@}" } check_guest_kernel() { From 3b5c5bcfa4eda9343e32d0967f1fc903c1f1458f Mon Sep 17 00:00:00 2001 From: Jeremi Piotrowski Date: Wed, 30 Aug 2023 16:40:55 +0000 Subject: [PATCH 175/339] runtime: clh: Support enabling iommu by enabling IOMMU on the default PCI segment. For hotplug to work we need a virtualized iommu and clh exposes one if there is some device or PCI segment that requests it. I would have preferred to add a separate PCI segment for hotplugging vfio devices but unfortunately kata assumes there is only one segment all over the place. See create_pci_root_bus_path(), split_vfio_pci_option() and grep for '0000'. Enabling the IOMMU on the default PCI segment requires passing enabling IOMMU on every device that is attached to it, which is why it is sprinkled all over the place. CLH does not support IOMMU for VirtioFs, so I've added a non IOMMU segment for that device. Signed-off-by: Jeremi Piotrowski (cherry picked from commit 3a1db7a86ba1ab0dd3a659f29612535087c0922e) --- src/runtime/config/configuration-clh.toml.in | 6 ++++++ src/runtime/virtcontainers/clh.go | 18 ++++++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/src/runtime/config/configuration-clh.toml.in b/src/runtime/config/configuration-clh.toml.in index a7c203d20..4531a4e65 100644 --- a/src/runtime/config/configuration-clh.toml.in +++ b/src/runtime/config/configuration-clh.toml.in @@ -188,6 +188,12 @@ block_device_driver = "virtio-blk" # Disable the 'seccomp' feature from Cloud Hypervisor, default false # disable_seccomp = true +# Enable vIOMMU, default false +# Enabling this will result in the VM having a vIOMMU device +# This will also add the following options to the kernel's +# command line: iommu=pt +#enable_iommu = true + # This option changes the default hypervisor and kernel parameters # to enable debug output where available. # diff --git a/src/runtime/virtcontainers/clh.go b/src/runtime/virtcontainers/clh.go index 10a1fc7ab..f031811e9 100644 --- a/src/runtime/virtcontainers/clh.go +++ b/src/runtime/virtcontainers/clh.go @@ -490,6 +490,13 @@ func (clh *cloudHypervisor) CreateVM(ctx context.Context, id string, network Net } clh.vmconfig.Payload.SetKernel(kernelPath) + clh.vmconfig.Platform = chclient.NewPlatformConfig() + platform := clh.vmconfig.Platform + platform.SetNumPciSegments(2) + if clh.config.IOMMU { + platform.SetIommuSegments([]int32{0}) + } + if clh.config.ConfidentialGuest { if err := clh.enableProtection(); err != nil { return err @@ -528,6 +535,9 @@ func (clh *cloudHypervisor) CreateVM(ctx context.Context, id string, network Net // start the guest kernel with 'quiet' in non-debug mode params = append(params, Param{"quiet", ""}) } + if clh.config.IOMMU { + params = append(params, Param{"iommu", "pt"}) + } // Followed by extra kernel parameters defined in the configuration file params = append(params, clh.config.KernelParams...) @@ -536,6 +546,7 @@ func (clh *cloudHypervisor) CreateVM(ctx context.Context, id string, network Net // set random device generator to hypervisor clh.vmconfig.Rng = chclient.NewRngConfig(clh.config.EntropySource) + clh.vmconfig.Rng.SetIommu(clh.config.IOMMU) // set the initial root/boot disk of hypervisor imagePath, err := clh.config.ImageAssetPath() @@ -561,6 +572,7 @@ func (clh *cloudHypervisor) CreateVM(ctx context.Context, id string, network Net } else { pmem := chclient.NewPmemConfig(imagePath) *pmem.DiscardWrites = true + pmem.SetIommu(clh.config.IOMMU) if clh.vmconfig.Pmem != nil { *clh.vmconfig.Pmem = append(*clh.vmconfig.Pmem, *pmem) @@ -598,6 +610,7 @@ func (clh *cloudHypervisor) CreateVM(ctx context.Context, id string, network Net clh.vmconfig.Console = chclient.NewConsoleConfig(cctOFF) } + clh.vmconfig.Console.SetIommu(clh.config.IOMMU) cpu_topology := chclient.NewCpuTopology() cpu_topology.ThreadsPerCore = func(i int32) *int32 { return &i }(1) @@ -840,6 +853,7 @@ func (clh *cloudHypervisor) hotplugAddBlockDevice(drive *config.BlockDrive) erro queueSize := int32(1024) clhDisk.NumQueues = &queues clhDisk.QueueSize = &queueSize + clhDisk.SetIommu(clh.config.IOMMU) diskRateLimiterConfig := clh.getDiskRateLimiterConfig() if diskRateLimiterConfig != nil { @@ -865,6 +879,7 @@ func (clh *cloudHypervisor) hotPlugVFIODevice(device *config.VFIODev) error { // Create the clh device config via the constructor to ensure default values are properly assigned clhDevice := *chclient.NewDeviceConfig(device.SysfsDev) + clhDevice.SetIommu(clh.config.IOMMU) pciInfo, _, err := cl.VmAddDevicePut(ctx, clhDevice) if err != nil { return fmt.Errorf("Failed to hotplug device %+v %s", device, openAPIClientError(err)) @@ -1538,6 +1553,7 @@ func (clh *cloudHypervisor) addVSock(cid int64, path string) { }).Info("Adding HybridVSock") clh.vmconfig.Vsock = chclient.NewVsockConfig(cid, path) + clh.vmconfig.Vsock.SetIommu(clh.config.IOMMU) } func (clh *cloudHypervisor) getRateLimiterConfig(bwSize, bwOneTimeBurst, opsSize, opsOneTimeBurst int64) *chclient.RateLimiterConfig { @@ -1607,6 +1623,7 @@ func (clh *cloudHypervisor) addNet(e Endpoint) error { if netRateLimiterConfig != nil { net.SetRateLimiterConfig(*netRateLimiterConfig) } + net.SetIommu(clh.config.IOMMU) if clh.netDevices != nil { *clh.netDevices = append(*clh.netDevices, *net) @@ -1639,6 +1656,7 @@ func (clh *cloudHypervisor) addVolume(volume types.Volume) error { } fs := chclient.NewFsConfig(volume.MountTag, vfsdSockPath, numQueues, queueSize) + fs.SetPciSegment(1) clh.vmconfig.Fs = &[]chclient.FsConfig{*fs} clh.Logger().Debug("Adding share volume to hypervisor: ", volume.MountTag) From 32be55aa8a2abd70172f21c9c018ab7605d3f39b Mon Sep 17 00:00:00 2001 From: Jeremi Piotrowski Date: Wed, 30 Aug 2023 16:41:57 +0000 Subject: [PATCH 176/339] packaging: kernel: Enable VIRTIO_IOMMU on x86_64 Cloud Hypervisor exposes a VIRTIO_IOMMU device to the VM when IOMMU support is enabled. We need to add it to the whitelist because dragonball uses kernel v5.10 which restricted VIRTIO_IOMMU to ARM64 only. Signed-off-by: Jeremi Piotrowski (cherry picked from commit 1b02f89e4f9c9e0afe012da99135d097029ad312) Conflicts: tools/packaging/kernel/kata_config_version --- tools/packaging/kernel/configs/fragments/whitelist.conf | 1 + tools/packaging/kernel/configs/fragments/x86_64/vfio.conf | 1 + tools/packaging/kernel/kata_config_version | 2 +- 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/packaging/kernel/configs/fragments/whitelist.conf b/tools/packaging/kernel/configs/fragments/whitelist.conf index d6e04444f..a6396a582 100644 --- a/tools/packaging/kernel/configs/fragments/whitelist.conf +++ b/tools/packaging/kernel/configs/fragments/whitelist.conf @@ -18,3 +18,4 @@ CONFIG_ARM64_UAO CONFIG_VFIO_MDEV_DEVICE CONFIG_SPECULATION_MITIGATIONS CONFIG_X86_SGX +CONFIG_VIRTIO_IOMMU diff --git a/tools/packaging/kernel/configs/fragments/x86_64/vfio.conf b/tools/packaging/kernel/configs/fragments/x86_64/vfio.conf index 09a4bf02f..e052e10e6 100644 --- a/tools/packaging/kernel/configs/fragments/x86_64/vfio.conf +++ b/tools/packaging/kernel/configs/fragments/x86_64/vfio.conf @@ -1,3 +1,4 @@ # x86 specific items we need in order to handle vfio_mode=vfio devices CONFIG_INTEL_IOMMU=y CONFIG_IRQ_REMAP=y +CONFIG_VIRTIO_IOMMU=y diff --git a/tools/packaging/kernel/kata_config_version b/tools/packaging/kernel/kata_config_version index 194b81caa..9b252fd09 100644 --- a/tools/packaging/kernel/kata_config_version +++ b/tools/packaging/kernel/kata_config_version @@ -1 +1 @@ -112 +113 From a1aed0c78e73f36759567045523986ab6cd84741 Mon Sep 17 00:00:00 2001 From: Jeremi Piotrowski Date: Tue, 5 Sep 2023 16:57:38 +0200 Subject: [PATCH 177/339] gha: vfio: Set test timeout to 15m Sometimes the test gets stuck running commands in the container - need to investigate why later. Signed-off-by: Jeremi Piotrowski (cherry picked from commit 7211c3dccc9aaee5fd3ad18ab6b013b502abc06e) --- .github/workflows/run-vfio-tests.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/run-vfio-tests.yaml b/.github/workflows/run-vfio-tests.yaml index b5aa739ce..4542ec1a3 100644 --- a/.github/workflows/run-vfio-tests.yaml +++ b/.github/workflows/run-vfio-tests.yaml @@ -45,4 +45,5 @@ jobs: path: kata-artifacts - name: Run vfio tests + timeout-minutes: 15 run: bash tests/functional/vfio/gha-run.sh run From 82a02251592ea9d06c8921a7cbf7e97fc51a2217 Mon Sep 17 00:00:00 2001 From: Jeremi Piotrowski Date: Tue, 5 Sep 2023 17:12:59 +0200 Subject: [PATCH 178/339] tests/vfio: Get rid of sync's It is fine to start a VM with the disk image without syncing it as we now run the test in an ephemeral Azure instance. Signed-off-by: Jeremi Piotrowski (cherry picked from commit df3dc1105c31a7e2ed301f5f90b4e6796d268bd3) --- tests/functional/vfio/vfio_fedora_vm_wrapper.sh | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/functional/vfio/vfio_fedora_vm_wrapper.sh b/tests/functional/vfio/vfio_fedora_vm_wrapper.sh index 292845532..1bb18f1d4 100755 --- a/tests/functional/vfio/vfio_fedora_vm_wrapper.sh +++ b/tests/functional/vfio/vfio_fedora_vm_wrapper.sh @@ -219,11 +219,9 @@ pull_fedora_cloud_image() { if [ ! -f "${fedora_img_cache}" ]; then curl -sL ${fedora_img_url} -o "${fedora_img_cache}.xz" xz -f -d "${fedora_img_cache}.xz" - sync fi cp -a "${fedora_img_cache}" "${fedora_img}" - sync # setup cloud image sudo losetup -D From b40a42699d1125c541a246b29c0c51c82c5516c8 Mon Sep 17 00:00:00 2001 From: Jeremi Piotrowski Date: Thu, 7 Sep 2023 14:25:20 +0200 Subject: [PATCH 179/339] tests/vfio: Accept single device in vfio group for CLH cloud hypervisor does not emulate pcie switches or pci bridges, so we need to accept a lonely device. Signed-off-by: Jeremi Piotrowski (cherry picked from commit faee59b520a7f916d5aab0d1ac254daf6294a178) --- tests/functional/vfio/run.sh | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/functional/vfio/run.sh b/tests/functional/vfio/run.sh index 83cf16100..4f36709a8 100755 --- a/tests/functional/vfio/run.sh +++ b/tests/functional/vfio/run.sh @@ -113,9 +113,13 @@ check_vfio() { # There should be two devices in the IOMMU group: the ethernet # device we care about, plus the PCIe to PCI bridge device devs="$(get_ctr_cmd_output "${cid}" ls /sys/kernel/iommu_groups/"${group}"/devices)" - if [ $(echo "${devs}" | wc -w) != "2" ] ; then + num_devices=$(echo "${devs}" | wc -w) + if [ "${HYPERVISOR}" = "qemu" ] && [ "${num_devices}" != "2" ] ; then die "Expected exactly two devices got: ${devs}" fi + if [ "${HYPERVISOR}" = "clh" ] && [ "${num_devices}" != "1" ] ; then + die "Expected exactly one device got: ${devs}" + fi # The bridge device will always sort first, because it is on # bus zero, whereas the NIC will be on a non-zero bus From 1d78871713ee45108102d18c4ebc6ae696fef7ae Mon Sep 17 00:00:00 2001 From: Jeremi Piotrowski Date: Thu, 7 Sep 2023 17:14:43 +0200 Subject: [PATCH 180/339] tests/vfio: Bump VM image to Fedora 38 We need a very recent L2 guest kernel to fix all the bugs that occur in nested virtualization. Signed-off-by: Jeremi Piotrowski (cherry picked from commit 9d93036783812c3f74e05bb3b639670aa04bf2cc) --- tests/functional/vfio/vfio_fedora_vm_wrapper.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/functional/vfio/vfio_fedora_vm_wrapper.sh b/tests/functional/vfio/vfio_fedora_vm_wrapper.sh index 1bb18f1d4..bddd03445 100755 --- a/tests/functional/vfio/vfio_fedora_vm_wrapper.sh +++ b/tests/functional/vfio/vfio_fedora_vm_wrapper.sh @@ -209,12 +209,12 @@ create_config_iso() { pull_fedora_cloud_image() { fedora_img="$1" - fedora_version=37 + fedora_version=38 # Add a version to the image cache, otherwise the tests are going to # use always the same image without rebuilding it, regardless the version # set in fedora_version fedora_img_cache="${fedora_img}.cache.${fedora_version}" - fedora_img_url="https://download.fedoraproject.org/pub/fedora/linux/releases/${fedora_version}/Cloud/${arch}/images/Fedora-Cloud-Base-${fedora_version}-1.7.${arch}.raw.xz" + fedora_img_url="https://download.fedoraproject.org/pub/fedora/linux/releases/${fedora_version}/Cloud/${arch}/images/Fedora-Cloud-Base-${fedora_version}-1.6.${arch}.raw.xz" if [ ! -f "${fedora_img_cache}" ]; then curl -sL ${fedora_img_url} -o "${fedora_img_cache}.xz" From 1c32b31589c042e438997f773b9594d07a86a699 Mon Sep 17 00:00:00 2001 From: Jeremi Piotrowski Date: Fri, 8 Sep 2023 16:44:22 +0200 Subject: [PATCH 181/339] tests: Apply timeout to 'ctr t kill' This task has been observed to hang at times. Signed-off-by: Jeremi Piotrowski (cherry picked from commit a96050a7ad6ea18b7a326810d2b4a2bd6a8ce510) --- tests/common.bash | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/common.bash b/tests/common.bash index 08b82601b..fc0ed0a7f 100644 --- a/tests/common.bash +++ b/tests/common.bash @@ -158,7 +158,7 @@ function clean_env_ctr() info "Wait until the containers gets removed" for task_id in "${running_tasks[@]}"; do - sudo ctr t kill -a -s SIGTERM ${task_id} >/dev/null 2>&1 + sudo timeout -s SIGKILL 30s ctr t kill -a -s SIGTERM ${task_id} >/dev/null 2>&1 || true sleep 0.5 done From 73e989c4b10d394261bd25ea491b7680da411765 Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Wed, 13 Sep 2023 15:53:02 +0000 Subject: [PATCH 182/339] metrics: Add iperf value for cpu utilization This PR adds the iperf value for cpu utilization for kata metrics. Fixes #7936 Signed-off-by: Gabriela Cervantes (cherry picked from commit df5cd10ea0d2b0a9d76777eba93a0afba29a871d) --- .../checkmetrics-json-clh-kata-metric8.toml | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml index 7cd3a025e..9968a195d 100644 --- a/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml +++ b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml @@ -98,6 +98,19 @@ midval = 98.0 minpercent = 20.0 maxpercent = 20.0 +[[metric]] +name = "network-iperf3" +type = "json" +description = "measure container cpu utilization using iperf3" +# Min and Max values to set a 'range' that +# the median of the CSV Results data must fall +# within (inclusive) +checkvar = ".\"network-iperf3\".Results | .[] | .cpu.Result" +checktype = "mean" +midval = 99.0 +minpercent = 20.0 +maxpercent = 20.0 + [[metric]] name = "network-iperf3" type = "json" From c78f7408544ab802cce75c7c9da4cb0b6fec4042 Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Wed, 13 Sep 2023 15:55:39 +0000 Subject: [PATCH 183/339] metrics: Add iperf cpu utilization limit for qemu This PR adds the iperf cpu utilization limit for qemu for kata metrics. Signed-off-by: Gabriela Cervantes (cherry picked from commit cd4fd1292a9bdce724c1fbadcf098da423c9bcea) --- .../checkmetrics-json-clh-kata-metric8.toml | 2 +- .../checkmetrics-json-qemu-kata-metric8.toml | 13 +++++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml index 9968a195d..15e6f7545 100644 --- a/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml +++ b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml @@ -107,7 +107,7 @@ description = "measure container cpu utilization using iperf3" # within (inclusive) checkvar = ".\"network-iperf3\".Results | .[] | .cpu.Result" checktype = "mean" -midval = 99.0 +midval = 85.60 minpercent = 20.0 maxpercent = 20.0 diff --git a/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-qemu-kata-metric8.toml b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-qemu-kata-metric8.toml index 7dc842688..0db751246 100644 --- a/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-qemu-kata-metric8.toml +++ b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-qemu-kata-metric8.toml @@ -98,6 +98,19 @@ midval = 98.0 minpercent = 20.0 maxpercent = 20.0 +[[metric]] +name = "network-iperf3" +type = "json" +description = "measure container cpu utilization using iperf3" +# Min and Max values to set a 'range' that +# the median of the CSV Results data must fall +# within (inclusive) +checkvar = ".\"network-iperf3\".Results | .[] | .cpu.Result" +checktype = "mean" +midval = 99.86 +minpercent = 20.0 +maxpercent = 20.0 + [[metric]] name = "network-iperf3" type = "json" From bda035449163920b34983ffddf4d795f2774e18e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Thu, 14 Sep 2023 17:58:40 +0200 Subject: [PATCH 184/339] ci: cache: Export env vars needed to use ORAS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We do the build of our artefacts inside a container image, and we need to expose some env vars to the container so ORAS can be used there to push the artefacts we want to cache to ghcr.io. The env vars we're exposing are: * ARTEFACT_REGISTRY: The registry where we're going to save the artefacts. * ARTEFACT_REGISTRY_USERNAME: The username to log in to the registry, as ORAS does not use the same json file used by docker. * ARTEFACT_REGISTRY_PASSWORD: The pasword to log in to the the registry, as the ORAS does not use the same json file used by docker. * TARGET_BRANCH: The target branch, which will be part of the tag of the artefact, as we may end up caching the artefacts for both main and stable branches. Fixes: #7834 -- part 0 Signed-off-by: Fabiano Fidêncio (cherry picked from commit 6bd15a85d531fec5c088615011b507ab8bb006cd) --- .github/workflows/build-kata-static-tarball-amd64.yaml | 4 ++++ .github/workflows/build-kata-static-tarball-arm64.yaml | 4 ++++ .github/workflows/build-kata-static-tarball-s390x.yaml | 4 ++++ 3 files changed, 12 insertions(+) diff --git a/.github/workflows/build-kata-static-tarball-amd64.yaml b/.github/workflows/build-kata-static-tarball-amd64.yaml index 67020ae60..20a59bb8c 100644 --- a/.github/workflows/build-kata-static-tarball-amd64.yaml +++ b/.github/workflows/build-kata-static-tarball-amd64.yaml @@ -86,6 +86,10 @@ jobs: KATA_ASSET: ${{ matrix.asset }} TAR_OUTPUT: ${{ matrix.asset }}.tar.gz PUSH_TO_REGISTRY: ${{ inputs.push-to-registry }} + ARTEFACT_REGISTRY: ghcr.io + ARTEFACT_REGISTRY_USERNAME: ${{ github.actor }} + ARTEFACT_REGISTRY_PASSWORD: ${{ secrets.GITHUB_TOKEN }} + TARGET_BRANCH: ${{ inputs.target-branch }} - name: store-artifact ${{ matrix.asset }} uses: actions/upload-artifact@v3 diff --git a/.github/workflows/build-kata-static-tarball-arm64.yaml b/.github/workflows/build-kata-static-tarball-arm64.yaml index 689ec04c9..4225abee8 100644 --- a/.github/workflows/build-kata-static-tarball-arm64.yaml +++ b/.github/workflows/build-kata-static-tarball-arm64.yaml @@ -73,6 +73,10 @@ jobs: KATA_ASSET: ${{ matrix.asset }} TAR_OUTPUT: ${{ matrix.asset }}.tar.gz PUSH_TO_REGISTRY: ${{ inputs.push-to-registry }} + ARTEFACT_REGISTRY: ghcr.io + ARTEFACT_REGISTRY_USERNAME: ${{ github.actor }} + ARTEFACT_REGISTRY_PASSWORD: ${{ secrets.GITHUB_TOKEN }} + TARGET_BRANCH: ${{ inputs.target-branch }} - name: store-artifact ${{ matrix.asset }} uses: actions/upload-artifact@v3 diff --git a/.github/workflows/build-kata-static-tarball-s390x.yaml b/.github/workflows/build-kata-static-tarball-s390x.yaml index 75856e7e4..90be3aa77 100644 --- a/.github/workflows/build-kata-static-tarball-s390x.yaml +++ b/.github/workflows/build-kata-static-tarball-s390x.yaml @@ -70,6 +70,10 @@ jobs: KATA_ASSET: ${{ matrix.asset }} TAR_OUTPUT: ${{ matrix.asset }}.tar.gz PUSH_TO_REGISTRY: ${{ inputs.push-to-registry }} + ARTEFACT_REGISTRY: ghcr.io + ARTEFACT_REGISTRY_USERNAME: ${{ github.actor }} + ARTEFACT_REGISTRY_PASSWORD: ${{ secrets.GITHUB_TOKEN }} + TARGET_BRANCH: ${{ inputs.target-branch }} - name: store-artifact ${{ matrix.asset }} uses: actions/upload-artifact@v3 From 5e22a3085bee4f75a3284f24acef3a851bd818ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Thu, 14 Sep 2023 18:27:54 +0200 Subject: [PATCH 185/339] ci: cache: Pass the exposed env vars to the kata-deploy binaries in docker MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As the environment variables are now being passed down from the GitHub Actions, let's make sure they're exposed to the container used to build the kata-deploy binaries, and during the build process we'll be able to use those to log in and push the artefacts to the OCI registry, using ORAS. Signed-off-by: Fabiano Fidêncio (cherry picked from commit c7a851efd788379df5eb95bf56d3f5195a3e6303) --- .../local-build/kata-deploy-binaries-in-docker.sh | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/tools/packaging/kata-deploy/local-build/kata-deploy-binaries-in-docker.sh b/tools/packaging/kata-deploy/local-build/kata-deploy-binaries-in-docker.sh index 5c336cc8b..0cb0c0c63 100755 --- a/tools/packaging/kata-deploy/local-build/kata-deploy-binaries-in-docker.sh +++ b/tools/packaging/kata-deploy/local-build/kata-deploy-binaries-in-docker.sh @@ -77,13 +77,22 @@ docker build -q -t build-kata-deploy \ --build-arg HOST_DOCKER_GID=${docker_gid} \ "${script_dir}/dockerbuild/" +ARTEFACT_REGISTRY="${ARTEFACT_REGISTRY:-}" +ARTEFACT_REGISTRY_USERNAME="${ARTEFACT_REGISTRY_USERNAME:-}" +ARTEFACT_REGISTRY_PASSWORD="${ARTEFACT_REGISTRY_PASSWORD:-}" +TARGET_BRANCH="${TARGET_BRANCH:-}" + docker run \ -v $HOME/.docker:/root/.docker \ -v /var/run/docker.sock:/var/run/docker.sock \ -v "${kata_dir}:${kata_dir}" \ --env CI="${CI:-}" \ --env USER=${USER} \ - --env BUILDER_REGISTRY="${BUILDER_REGISTRY:-}" \ + --env ARTEFACT_REGISTRY="${ARTEFACT_REGISTRY}" \ + --env ARTEFACT_REGISTRY_USERNAME="${ARTEFACT_REGISTRY_USERNAME}" \ + --env ARTEFACT_REGISTRY_PASSWORD="${ARTEFACT_REGISTRY_PASSWORD}" \ + --env TARGET_BRANCH="${TARGET_BRANCH}" \ + --env BUILDER_REGISTRY="${BUILDER_REGISTRY}" \ --env PUSH_TO_REGISTRY="${PUSH_TO_REGISTRY:-"no"}" \ --env INITRAMFS_CONTAINER_BUILDER="${INITRAMFS_CONTAINER_BUILDER:-}" \ --env KERNEL_CONTAINER_BUILDER="${KERNEL_CONTAINER_BUILDER:-}" \ From 054895fcdd72d2505b6d5038e25c9662b0901a5c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Thu, 14 Sep 2023 19:01:18 +0200 Subject: [PATCH 186/339] ci: cache: For consistency, read all used env vars MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of having some of them only being considered if explicitly passed to the script. Signed-off-by: Fabiano Fidêncio (cherry picked from commit adc18ecdb11056309afe9e38222f360944296025) --- .../kata-deploy-binaries-in-docker.sh | 34 +++++++++++++------ 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/tools/packaging/kata-deploy/local-build/kata-deploy-binaries-in-docker.sh b/tools/packaging/kata-deploy/local-build/kata-deploy-binaries-in-docker.sh index 0cb0c0c63..47cf2dd1d 100755 --- a/tools/packaging/kata-deploy/local-build/kata-deploy-binaries-in-docker.sh +++ b/tools/packaging/kata-deploy/local-build/kata-deploy-binaries-in-docker.sh @@ -77,32 +77,44 @@ docker build -q -t build-kata-deploy \ --build-arg HOST_DOCKER_GID=${docker_gid} \ "${script_dir}/dockerbuild/" +CI="${CI:-}" ARTEFACT_REGISTRY="${ARTEFACT_REGISTRY:-}" ARTEFACT_REGISTRY_USERNAME="${ARTEFACT_REGISTRY_USERNAME:-}" ARTEFACT_REGISTRY_PASSWORD="${ARTEFACT_REGISTRY_PASSWORD:-}" TARGET_BRANCH="${TARGET_BRANCH:-}" +BUILDER_REGISTRY="${BUILDER_REGISTRY:-}" +PUSH_TO_REGISTRY="${PUSH_TO_REGISTRY:-"no"}" +INITRAMFS_CONTAINER_BUILDER="${INITRAMFS_CONTAINER_BUILDER:-}" +KERNEL_CONTAINER_BUILDER="${KERNEL_CONTAINER_BUILDER:-}" +OVMF_CONTAINER_BUILDER="${OVMF_CONTAINER_BUILDER:-}" +QEMU_CONTAINER_BUILDER="${QEMU_CONTAINER_BUILDER:-}" +SHIM_V2_CONTAINER_BUILDER="${SHIM_V2_CONTAINER_BUILDER:-}" +TDSHIM_CONTAINER_BUILDER="${TDSHIM_CONTAINER_BUILDER:-}" +VIRTIOFSD_CONTAINER_BUILDER="${VIRTIOFSD_CONTAINER_BUILDER:-}" +MEASURED_ROOTFS="${MEASURED_ROOTFS:-}" +USE_CACHE="${USE_CACHE:-}" docker run \ -v $HOME/.docker:/root/.docker \ -v /var/run/docker.sock:/var/run/docker.sock \ -v "${kata_dir}:${kata_dir}" \ - --env CI="${CI:-}" \ + --env CI="${CI}" \ --env USER=${USER} \ --env ARTEFACT_REGISTRY="${ARTEFACT_REGISTRY}" \ --env ARTEFACT_REGISTRY_USERNAME="${ARTEFACT_REGISTRY_USERNAME}" \ --env ARTEFACT_REGISTRY_PASSWORD="${ARTEFACT_REGISTRY_PASSWORD}" \ --env TARGET_BRANCH="${TARGET_BRANCH}" \ --env BUILDER_REGISTRY="${BUILDER_REGISTRY}" \ - --env PUSH_TO_REGISTRY="${PUSH_TO_REGISTRY:-"no"}" \ - --env INITRAMFS_CONTAINER_BUILDER="${INITRAMFS_CONTAINER_BUILDER:-}" \ - --env KERNEL_CONTAINER_BUILDER="${KERNEL_CONTAINER_BUILDER:-}" \ - --env OVMF_CONTAINER_BUILDER="${OVMF_CONTAINER_BUILDER:-}" \ - --env QEMU_CONTAINER_BUILDER="${QEMU_CONTAINER_BUILDER:-}" \ - --env SHIM_V2_CONTAINER_BUILDER="${SHIM_V2_CONTAINER_BUILDER:-}" \ - --env TDSHIM_CONTAINER_BUILDER="${TDSHIM_CONTAINER_BUILDER:-}" \ - --env VIRTIOFSD_CONTAINER_BUILDER="${VIRTIOFSD_CONTAINER_BUILDER:-}" \ - --env MEASURED_ROOTFS="${MEASURED_ROOTFS:-}" \ - --env USE_CACHE="${USE_CACHE:-}" \ + --env PUSH_TO_REGISTRY="${PUSH_TO_REGISTRY}" \ + --env INITRAMFS_CONTAINER_BUILDER="${INITRAMFS_CONTAINER_BUILDER}" \ + --env KERNEL_CONTAINER_BUILDER="${KERNEL_CONTAINER_BUILDER}" \ + --env OVMF_CONTAINER_BUILDER="${OVMF_CONTAINER_BUILDER}" \ + --env QEMU_CONTAINER_BUILDER="${QEMU_CONTAINER_BUILDER}" \ + --env SHIM_V2_CONTAINER_BUILDER="${SHIM_V2_CONTAINER_BUILDER}" \ + --env TDSHIM_CONTAINER_BUILDER="${TDSHIM_CONTAINER_BUILDER}" \ + --env VIRTIOFSD_CONTAINER_BUILDER="${VIRTIOFSD_CONTAINER_BUILDER}" \ + --env MEASURED_ROOTFS="${MEASURED_ROOTFS}" \ + --env USE_CACHE="${USE_CACHE}" \ --env CROSS_BUILD="${CROSS_BUILD}" \ --env TARGET_ARCH="${TARGET_ARCH}" \ --env ARCH="${ARCH}" \ From 767ccb117f5fbd69c8c1ac567f0b4167dbe69226 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Thu, 14 Sep 2023 20:14:00 +0200 Subject: [PATCH 187/339] ci: Reduce the size of the AKS VMs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We do **not** need a very powerful machine for our tests, as we're not building anything there. The instance we switched to (Standard_D2s_v5) still has nested virt available, as shown here[0], but has half of the amount of vCPUs / Memory, which should be fine only for running the tests, costing us basically half of the price[1]. [0]: https://learn.microsoft.com/en-us/azure/virtual-machines/dv5-dsv5-series [1]: https://azure.microsoft.com/en-us/pricing/details/virtual-machines/linux/#pricing Fixes: #7955 Signed-off-by: Fabiano Fidêncio (cherry picked from commit faf98c0623d6846804f5e5f03b586bc5efb89cc8) --- tests/gha-run-k8s-common.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/gha-run-k8s-common.sh b/tests/gha-run-k8s-common.sh index bf0d202e5..40a0903b9 100644 --- a/tests/gha-run-k8s-common.sh +++ b/tests/gha-run-k8s-common.sh @@ -43,7 +43,7 @@ function create_cluster() { az aks create \ -g "${AZ_RG}" \ -n "$(_print_cluster_name ${test_type})" \ - -s "Standard_D4s_v5" \ + -s "Standard_D2s_v5" \ --node-count 1 \ --generate-ssh-keys \ $([ "${KATA_HOST_OS}" = "cbl-mariner" ] && echo "--os-sku AzureLinux --workload-runtime KataMshvVmIsolation") From fa9dd46041956d6fa6c85419c2edd7340ce99aaf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Thu, 14 Sep 2023 22:01:11 +0200 Subject: [PATCH 188/339] ci: k8s: Don't set cpu limit request for k8s-inotofy test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Without setting the cpu limit / request to 1, we can make this test run in a smaller VM instance without any issue. Signed-off-by: Fabiano Fidêncio (cherry picked from commit 92fff129fdae4cf21ab367650f2b4c84634e7b2e) --- .../runtimeclass_workloads/inotify-configmap-pod.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/integration/kubernetes/runtimeclass_workloads/inotify-configmap-pod.yaml b/tests/integration/kubernetes/runtimeclass_workloads/inotify-configmap-pod.yaml index c85240c94..2e23864bf 100644 --- a/tests/integration/kubernetes/runtimeclass_workloads/inotify-configmap-pod.yaml +++ b/tests/integration/kubernetes/runtimeclass_workloads/inotify-configmap-pod.yaml @@ -16,10 +16,8 @@ spec: args: ["-c", "inotifywait --timeout 120 -r /config/ && [[ -L /config/config.toml ]] && echo success" ] resources: requests: - cpu: 1 memory: 50Mi limits: - cpu: 1 memory: 1024Mi volumeMounts: - name: config From 594fcdce5620d0291d4b5857b25e68f527efcaea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Thu, 14 Sep 2023 22:36:55 +0200 Subject: [PATCH 189/339] ci: cri-containerd: Use a smaller / cheaper VM instance MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We don't need to run on a D4s_v5. as those tests are not CPU / memory intense. With this is mind, let's use a smaller version of the instance, the D2s_v5 one. Fixes: #7958 Signed-off-by: Fabiano Fidêncio (cherry picked from commit 32841827b8c54aad30f9448103e6becc866fc193) --- .github/workflows/run-cri-containerd-tests.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run-cri-containerd-tests.yaml b/.github/workflows/run-cri-containerd-tests.yaml index 205561391..f42833609 100644 --- a/.github/workflows/run-cri-containerd-tests.yaml +++ b/.github/workflows/run-cri-containerd-tests.yaml @@ -23,7 +23,7 @@ jobs: matrix: containerd_version: ['lts', 'active'] vmm: ['clh', 'qemu'] - runs-on: garm-ubuntu-2204 + runs-on: garm-ubuntu-2204-smaller env: CONTAINERD_VERSION: ${{ matrix.containerd_version }} GOPATH: ${{ github.workspace }} From 301edcb92e75c90729a40d2cb830cd79fd9af494 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Thu, 14 Sep 2023 22:38:48 +0200 Subject: [PATCH 190/339] ci: docker: Use a smaller / cheaper VM instance MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We don't need to run on a D4s_v5. as those tests are not CPU / memory intense. With this is mind, let's use a smaller version of the instance, the D2s_v5 one. Fixes: #7958 Signed-off-by: Fabiano Fidêncio (cherry picked from commit 4db416997c2d1591b300ee973f1e99e39cca4bd1) --- .github/workflows/run-docker-tests-on-garm.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run-docker-tests-on-garm.yaml b/.github/workflows/run-docker-tests-on-garm.yaml index abb6b0354..ea90759fa 100644 --- a/.github/workflows/run-docker-tests-on-garm.yaml +++ b/.github/workflows/run-docker-tests-on-garm.yaml @@ -24,7 +24,7 @@ jobs: vmm: - clh - qemu - runs-on: garm-ubuntu-2304 + runs-on: garm-ubuntu-2304-smaller env: KATA_HYPERVISOR: ${{ matrix.vmm }} steps: From 03857041e4477785458703e5f2674d161f6c9c34 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Thu, 14 Sep 2023 22:39:13 +0200 Subject: [PATCH 191/339] ci: nerdctl: Use a smaller / cheaper VM instance MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We don't need to run on a D4s_v5. as those tests are not CPU / memory intense. With this is mind, let's use a smaller version of the instance, the D2s_v5 one. Fixes: #7958 Signed-off-by: Fabiano Fidêncio (cherry picked from commit e60d81f554b804d21182407b5352c3974428e5f8) --- .github/workflows/run-nerdctl-tests-on-garm.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run-nerdctl-tests-on-garm.yaml b/.github/workflows/run-nerdctl-tests-on-garm.yaml index a902d7cc5..eb4e04bc2 100644 --- a/.github/workflows/run-nerdctl-tests-on-garm.yaml +++ b/.github/workflows/run-nerdctl-tests-on-garm.yaml @@ -25,7 +25,7 @@ jobs: - clh - dragonball - qemu - runs-on: garm-ubuntu-2304 + runs-on: garm-ubuntu-2304-smaller env: KATA_HYPERVISOR: ${{ matrix.vmm }} steps: From 3b64c8d68719eeadb1841720f73803580e50c4a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Thu, 14 Sep 2023 22:39:30 +0200 Subject: [PATCH 192/339] ci: nydus: Use a smaller / cheaper VM instance MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We don't need to run on a D4s_v5. as those tests are not CPU / memory intense. With this is mind, let's use a smaller version of the instance, the D2s_v5 one. Fixes: #7958 Signed-off-by: Fabiano Fidêncio (cherry picked from commit 1daf02f5d4f6e26b21e66b5982594e16c0cde7db) --- .github/workflows/run-nydus-tests.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run-nydus-tests.yaml b/.github/workflows/run-nydus-tests.yaml index 54c8cf864..71ee0fe86 100644 --- a/.github/workflows/run-nydus-tests.yaml +++ b/.github/workflows/run-nydus-tests.yaml @@ -23,7 +23,7 @@ jobs: matrix: containerd_version: ['lts', 'active'] vmm: ['clh', 'qemu', 'dragonball'] - runs-on: garm-ubuntu-2204 + runs-on: garm-ubuntu-2204-smaller env: CONTAINERD_VERSION: ${{ matrix.containerd_version }} GOPATH: ${{ github.workspace }} From 27dd77469dd7123357054c285a3dbc80ea4457ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Fri, 15 Sep 2023 00:27:05 +0200 Subject: [PATCH 193/339] ci: k8s: devmapper: Use a smaller / cheaper VM instance MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We don't need to run on a D4s_v5. as those tests are not CPU / memory intense. With this is mind, let's use a smaller version of the instance, the D2s_v5 one. Fixes: #7958 Signed-off-by: Fabiano Fidêncio (cherry picked from commit fb24fb0dc1d5c4e8cf3639c9a2a5839d655d61bd) --- .github/workflows/run-k8s-tests-on-garm.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run-k8s-tests-on-garm.yaml b/.github/workflows/run-k8s-tests-on-garm.yaml index 22e9925d3..3fdb8302d 100644 --- a/.github/workflows/run-k8s-tests-on-garm.yaml +++ b/.github/workflows/run-k8s-tests-on-garm.yaml @@ -35,7 +35,7 @@ jobs: - devmapper k8s: - k3s - runs-on: garm-ubuntu-2004 + runs-on: garm-ubuntu-2004-smaller env: DOCKER_REGISTRY: ${{ inputs.registry }} DOCKER_REPO: ${{ inputs.repo }} From 0210db6e34b01044e8b50ed82abd76892fc379ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Thu, 14 Sep 2023 14:36:02 +0200 Subject: [PATCH 194/339] ci: cache: Install ORAS in the kata-deploy binaries builder container MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ORAS is the tool which will help us to deal with our artefacts being pushed to and pulled from a container registry. As both the push to and the pull from will be done inside the kata-deploy binaries builder container, we need it installed there. Signed-off-by: Fabiano Fidêncio (cherry picked from commit be2eb7b378ee1cc79fe7b5a3a71a2a1a12a21759) --- .../local-build/dockerbuild/Dockerfile | 4 +- .../local-build/dockerbuild/install_oras.sh | 49 +++++++++++++++++++ 2 files changed, 52 insertions(+), 1 deletion(-) create mode 100755 tools/packaging/kata-deploy/local-build/dockerbuild/install_oras.sh diff --git a/tools/packaging/kata-deploy/local-build/dockerbuild/Dockerfile b/tools/packaging/kata-deploy/local-build/dockerbuild/Dockerfile index 51e7ba431..a29514968 100644 --- a/tools/packaging/kata-deploy/local-build/dockerbuild/Dockerfile +++ b/tools/packaging/kata-deploy/local-build/dockerbuild/Dockerfile @@ -7,10 +7,11 @@ ENV DEBIAN_FRONTEND=noninteractive ENV INSTALL_IN_GOPATH=false COPY install_yq.sh /usr/bin/install_yq.sh +COPY install_oras.sh /usr/bin/install_oras.sh SHELL ["/bin/bash", "-o", "pipefail", "-c"] -# Install yq and docker +# Install yq, oras, and docker RUN apt-get update && \ apt-get install -y --no-install-recommends \ ca-certificates \ @@ -18,6 +19,7 @@ RUN apt-get update && \ sudo && \ apt-get clean && rm -rf /var/lib/apt/lists/ && \ install_yq.sh && \ + install_oras.sh && \ curl -fsSL https://get.docker.com -o get-docker.sh && \ if uname -m | grep -Eq 's390x|ppc64le'; then export VERSION="v20.10" && \ sed -i 's/\//g' get-docker.sh; fi && \ diff --git a/tools/packaging/kata-deploy/local-build/dockerbuild/install_oras.sh b/tools/packaging/kata-deploy/local-build/dockerbuild/install_oras.sh new file mode 100755 index 000000000..973a10205 --- /dev/null +++ b/tools/packaging/kata-deploy/local-build/dockerbuild/install_oras.sh @@ -0,0 +1,49 @@ +#!/bin/bash +# +# Copyright (c) 2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# + +set -o errexit +set -o nounset +set -o pipefail + +install_dest="/usr/local/bin" + +function get_installed_oras_version() { + oras version | grep Version | sed -e s/Version:// | tr -d [:blank:] +} + +oras_required_version="v1.1.0" +if command -v oras; then + if [[ "${oras_required_version}" == "v$(get_installed_oras_version)" ]]; then + echo "ORAS is already installed in the system" + exit 0 + fi + + echo "Proceeding to cleanup the previous installed version of ORAS, and install the version specified in the versions.yaml file" + oras_system_path=$(which oras) + sudo rm -f ${oras_system_path} +fi + +arch=$(uname -m) +if [ "${arch}" = "ppc64le" ]; then + echo "An ORAS release for ppc64le is not available yet." + exit 0 +fi +if [ "${arch}" = "x86_64" ]; then + arch="amd64" +fi +if [ "${arch}" = "aarch64" ]; then + arch="arm64" +fi +oras_tarball="oras_${oras_required_version#v}_linux_${arch}.tar.gz" + +echo "Downloading ORAS ${oras_required_version}" +sudo curl -OL https://github.com/oras-project/oras/releases/download/${oras_required_version}/${oras_tarball} + +echo "Installing ORAS to ${install_dest}" +sudo mkdir -p "${install_dest}" +sudo tar -C "${install_dest}" -xzf "${oras_tarball}" +sudo rm -f "${oras_tarball}" From 6f8ded36b6bef0db3d668423e00151b19e72847a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Thu, 14 Sep 2023 14:08:36 +0200 Subject: [PATCH 195/339] kata-deploy: Generate latest_{artefact,image_builder} files MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Right now this is not used, but it'll be used when we start caching the artefacts using ORAS. Signed-off-by: Fabiano Fidêncio (cherry picked from commit 108f1b60ddf2627becf65e522abbc48525a8742b) --- .../local-build/kata-deploy-binaries.sh | 84 +++++++++++++------ 1 file changed, 60 insertions(+), 24 deletions(-) diff --git a/tools/packaging/kata-deploy/local-build/kata-deploy-binaries.sh b/tools/packaging/kata-deploy/local-build/kata-deploy-binaries.sh index a13f9c51f..79b31b01e 100755 --- a/tools/packaging/kata-deploy/local-build/kata-deploy-binaries.sh +++ b/tools/packaging/kata-deploy/local-build/kata-deploy-binaries.sh @@ -161,11 +161,14 @@ install_image() { local libseccomp_version="$(get_from_kata_deps "externals.libseccomp.version")" local rust_version="$(get_from_kata_deps "languages.rust.meta.newest-version")" + latest_artefact="${osbuilder_last_commit}-${guest_image_last_commit}-${agent_last_commit}-${libs_last_commit}-${gperf_version}-${libseccomp_version}-${rust_version}-${image_type}" + latest_builder_image="" + install_cached_tarball_component \ "${component}" \ "${jenkins}" \ - "${osbuilder_last_commit}-${guest_image_last_commit}-${agent_last_commit}-${libs_last_commit}-${gperf_version}-${libseccomp_version}-${rust_version}-${image_type}" \ - "" \ + "${latest_artefact}" \ + "${latest_builder_image}" \ "${final_tarball_name}" \ "${final_tarball_path}" \ && return 0 @@ -208,13 +211,16 @@ install_initrd() { local libseccomp_version="$(get_from_kata_deps "externals.libseccomp.version")" local rust_version="$(get_from_kata_deps "languages.rust.meta.newest-version")" + latest_artefact="${osbuilder_last_commit}-${guest_image_last_commit}-${agent_last_commit}-${libs_last_commit}-${gperf_version}-${libseccomp_version}-${rust_version}-${initrd_type}" + latest_builder_image="" + [[ "${ARCH}" == "aarch64" && "${CROSS_BUILD}" == "true" ]] && echo "warning: Don't cross build initrd for aarch64 as it's too slow" && exit 0 install_cached_tarball_component \ "${component}" \ "${jenkins}" \ - "${osbuilder_last_commit}-${guest_image_last_commit}-${agent_last_commit}-${libs_last_commit}-${gperf_version}-${libseccomp_version}-${rust_version}-${initrd_type}" \ - "" \ + "${latest_artefact}" \ + "${latest_builder_image}" \ "${final_tarball_name}" \ "${final_tarball_path}" \ && return 0 @@ -247,11 +253,14 @@ install_cached_kernel_tarball_component() { local kernel_name=${1} local module_dir=${2:-""} + latest_artefact="${kernel_version}-${kernel_kata_config_version}-$(get_last_modification $(dirname $kernel_builder))" + latest_builder_image="$(get_kernel_image_name)" + install_cached_tarball_component \ "${kernel_name}" \ "${jenkins_url}/job/kata-containers-main-${kernel_name}-${ARCH}/${cached_artifacts_path}" \ - "${kernel_version}-${kernel_kata_config_version}-$(get_last_modification $(dirname $kernel_builder))" \ - "$(get_kernel_image_name)" \ + "${latest_artefact}" \ + "${latest_builder_image}" \ "${final_tarball_name}" \ "${final_tarball_path}" \ || return 1 @@ -264,8 +273,8 @@ install_cached_kernel_tarball_component() { install_cached_tarball_component \ "${kernel_name}" \ "${jenkins_url}/job/kata-containers-main-${kernel_name}-$(uname -m)/${cached_artifacts_path}" \ - "${kernel_version}-${kernel_kata_config_version}-$(get_last_modification $(dirname $kernel_builder))" \ - "$(get_kernel_image_name)" \ + "${latest_artefact}" \ + "${latest_builder_image}" \ "kata-static-kernel-sev-modules.tar.xz" \ "${workdir}/kata-static-kernel-sev-modules.tar.xz" \ || return 1 @@ -384,11 +393,14 @@ install_qemu_helper() { export qemu_repo="$(get_from_kata_deps ${qemu_repo_yaml_path})" export qemu_version="$(get_from_kata_deps ${qemu_version_yaml_path})" + latest_artefact="${qemu_version}-$(calc_qemu_files_sha256sum)" + latest_builder_image="$(get_qemu_image_name)" + install_cached_tarball_component \ "${qemu_name}" \ "${jenkins_url}/job/kata-containers-main-${qemu_name}-${ARCH}/${cached_artifacts_path}" \ - "${qemu_version}-$(calc_qemu_files_sha256sum)" \ - "$(get_qemu_image_name)" \ + "${latest_artefact}" \ + "${latest_builder_image}" \ "${final_tarball_name}" \ "${final_tarball_path}" \ && return 0 @@ -433,11 +445,14 @@ install_qemu_snp_experimental() { install_firecracker() { local firecracker_version=$(get_from_kata_deps "assets.hypervisor.firecracker.version") + latest_artefact="${firecracker_version}" + latest_builder_image="" + install_cached_tarball_component \ "firecracker" \ "${jenkins_url}/job/kata-containers-main-firecracker-$(uname -m)/${cached_artifacts_path}" \ - "${firecracker_version}" \ - "" \ + "${latest_artefact}" \ + "${latest_builder_image}" \ "${final_tarball_name}" \ "${final_tarball_path}" \ && return 0 @@ -455,11 +470,14 @@ install_clh_helper() { features="${2}" suffix="${3:-""}" + latest_artefact="$(get_from_kata_deps "assets.hypervisor.cloud_hypervisor.version")" + latest_builder_image="" + install_cached_tarball_component \ "cloud-hypervisor${suffix}" \ "${jenkins_url}/job/kata-containers-main-clh-$(uname -m)${suffix}/${cached_artifacts_path}" \ - "$(get_from_kata_deps "assets.hypervisor.cloud_hypervisor.version")" \ - "" \ + "${latest_artefact}" \ + "${latest_builder_image}" \ "${final_tarball_name}" \ "${final_tarball_path}" \ && return 0 @@ -495,11 +513,14 @@ install_clh_glibc() { # Install static virtiofsd asset install_virtiofsd() { + latest_artefact="$(get_from_kata_deps "externals.virtiofsd.version")-$(get_from_kata_deps "externals.virtiofsd.toolchain")" + latest_builder_image="$(get_virtiofsd_image_name)" + install_cached_tarball_component \ "virtiofsd" \ "${jenkins_url}/job/kata-containers-main-virtiofsd-${ARCH}/${cached_artifacts_path}" \ - "$(get_from_kata_deps "externals.virtiofsd.version")-$(get_from_kata_deps "externals.virtiofsd.toolchain")" \ - "$(get_virtiofsd_image_name)" \ + "${latest_artefact}" \ + "${latest_builder_image}" \ "${final_tarball_name}" \ "${final_tarball_path}" \ && return 0 @@ -515,11 +536,14 @@ install_virtiofsd() { install_nydus() { [ "${ARCH}" == "aarch64" ] && ARCH=arm64 + latest_artefact="$(get_from_kata_deps "externals.nydus.version")" + latest_builder_image="" + install_cached_tarball_component \ "nydus" \ "${jenkins_url}/job/kata-containers-main-nydus-$(uname -m)/${cached_artifacts_path}" \ - "$(get_from_kata_deps "externals.nydus.version")" \ - "" \ + "${latest_artefact}" \ + "${latest_builder_image}" \ "${final_tarball_name}" \ "${final_tarball_path}" \ && return 0 @@ -540,13 +564,15 @@ install_shimv2() { local protocols_last_commit="$(get_last_modification "${repo_root_dir}/src/libs/protocols")" local GO_VERSION="$(get_from_kata_deps "languages.golang.meta.newest-version")" local RUST_VERSION="$(get_from_kata_deps "languages.rust.meta.newest-version")" - local shim_v2_version="${shim_v2_last_commit}-${protocols_last_commit}-${runtime_rs_last_commit}-${GO_VERSION}-${RUST_VERSION}" + + latest_artefact="${shim_v2_last_commit}-${protocols_last_commit}-${runtime_rs_last_commit}-${GO_VERSION}-${RUST_VERSION}" + latest_builder_image="$(get_shim_v2_image_name)" install_cached_tarball_component \ "shim-v2" \ "${jenkins_url}/job/kata-containers-main-shim-v2-${ARCH}/${cached_artifacts_path}" \ - "${shim_v2_version}" \ - "$(get_shim_v2_image_name)" \ + "${latest_artefact}" \ + "${latest_builder_image}" \ "${final_tarball_name}" \ "${final_tarball_path}" \ && return 0 @@ -573,13 +599,16 @@ install_ovmf() { tarball_name="${2:-edk2-x86_64.tar.gz}" local component_name="ovmf" - local component_version="$(get_from_kata_deps "externals.ovmf.${ovmf_type}.version")" [ "${ovmf_type}" == "tdx" ] && component_name="tdvf" + + latest_artefact="$(get_from_kata_deps "externals.ovmf.${ovmf_type}.version")" + latest_builder_image="$(get_ovmf_image_name)" + install_cached_tarball_component \ "${component_name}" \ "${jenkins_url}/job/kata-containers-main-ovmf-${ovmf_type}-$(uname -m)/${cached_artifacts_path}" \ - "${component_version}" \ - "$(get_ovmf_image_name)" \ + "${latest_artefact}" \ + "${latest_builder_image}" \ "${final_tarball_name}" \ "${final_tarball_path}" \ && return 0 @@ -606,6 +635,10 @@ get_kata_version() { handle_build() { info "DESTDIR ${destdir}" + + latest_artefact="" + latest_builder_image="" + local build_target build_target="$1" @@ -693,6 +726,9 @@ handle_build() { sudo tar cvfJ "${final_tarball_path}" "." fi tar tvf "${final_tarball_path}" + + echo "${latest_artefact}" > ${workdir}/${build_target}-version + echo "${latest_builder_image}" > ${workdir}/${build_target}-builder-image-version } silent_mode_error_trap() { From f160effaeefde1198d591b68935350e3a47d278e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Thu, 14 Sep 2023 19:33:35 +0200 Subject: [PATCH 196/339] ci: cache: Push cached artefacts to ghcr.io MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's push the artefacts to ghcr.io and stop relying on jenkins for that. Fixes: #7834 -- part 1 Signed-off-by: Fabiano Fidêncio (cherry picked from commit d0c257b3a77f83f13886d15a5721440ca5647fd9) --- .../local-build/kata-deploy-binaries.sh | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/tools/packaging/kata-deploy/local-build/kata-deploy-binaries.sh b/tools/packaging/kata-deploy/local-build/kata-deploy-binaries.sh index 79b31b01e..d480658a3 100755 --- a/tools/packaging/kata-deploy/local-build/kata-deploy-binaries.sh +++ b/tools/packaging/kata-deploy/local-build/kata-deploy-binaries.sh @@ -41,6 +41,11 @@ readonly cached_artifacts_path="lastSuccessfulBuild/artifact/artifacts" ARCH=${ARCH:-$(uname -m)} MEASURED_ROOTFS=${MEASURED_ROOTFS:-no} USE_CACHE="${USE_CACHE:-"yes"}" +ARTEFACT_REGISTRY="${ARTEFACT_REGISTRY:-}" +ARTEFACT_REGISTRY_USERNAME="${ARTEFACT_REGISTRY_USERNAME:-}" +ARTEFACT_REGISTRY_PASSWORD="${ARTEFACT_REGISTRY_PASSWORD:-}" +TARGET_BRANCH="${TARGET_BRANCH:=}" +PUSH_TO_REGISTRY="${PUSH_TO_REGISTRY:-}" workdir="${WORKDIR:-$PWD}" @@ -729,6 +734,22 @@ handle_build() { echo "${latest_artefact}" > ${workdir}/${build_target}-version echo "${latest_builder_image}" > ${workdir}/${build_target}-builder-image-version + + if [ "${PUSH_TO_REGISTRY}" = "yes" ]; then + if [ -z "${ARTEFACT_REGISTRY}" ] || + [ -z "${ARTEFACT_REGISTRY_USERNAME}" ] || + [ -z "${ARTEFACT_REGISTRY_PASSWORD}" ] || + [ -z "${TARGET_BRANCH}" ]; then + die "ARTEFACT_REGISTRY, ARTEFACT_REGISTRY_USERNAME, ARTEFACT_REGISTRY_PASSWORD and TARGET_BRANCH must be passed to the script when pushing the artefacts to the registry!" + fi + + pushd ${workdir} + echo "${ARTEFACT_REGISTRY_PASSWORD}" | oras login "${ARTEFACT_REGISTRY}" -u "${ARTEFACT_REGISTRY_USERNAME}" --password-stdin + + oras push ${ARTEFACT_REGISTRY}/kata-containers/cached-artefacts/${build_target}:latest-${TARGET_BRANCH}-$(uname -m) ${final_tarball_name} ${build_target}-version ${build_target}-builder-image-version + oras logout "${ARTEFACT_REGISTRY}" + popd + fi } silent_mode_error_trap() { From 2f280659b1cbb3f57014b1005a7498ec71a4180a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Thu, 14 Sep 2023 21:18:30 +0200 Subject: [PATCH 197/339] ci: k8s: Temporarily disable tests that require a bigger VM instance MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The list of tests which require a bigger VM instance is: * k8s-number-cpus.bats -- failing on all CIs * k8s-parallel.bats -- only failing on the cbl-mariner CI * k8s-scale-nginx.bats -- only failing on the cbl-mariner CI We'll keep those disabled while we re-work the logic to **only run those** in a bigger (and more expensive) VM instance. Signed-off-by: Fabiano Fidêncio (cherry picked from commit 094b6b2cf8a2d45fe60073d02bc9a355e7761375) --- tests/integration/kubernetes/run_kubernetes_tests.sh | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests/integration/kubernetes/run_kubernetes_tests.sh b/tests/integration/kubernetes/run_kubernetes_tests.sh index 3d963c6c3..aff825b47 100644 --- a/tests/integration/kubernetes/run_kubernetes_tests.sh +++ b/tests/integration/kubernetes/run_kubernetes_tests.sh @@ -37,18 +37,15 @@ else "k8s-liveness-probes.bats" \ "k8s-memory.bats" \ "k8s-nested-configmap-secret.bats" \ - "k8s-number-cpus.bats" \ "k8s-oom.bats" \ "k8s-optional-empty-configmap.bats" \ "k8s-optional-empty-secret.bats" \ - "k8s-parallel.bats" \ "k8s-pid-ns.bats" \ "k8s-pod-quota.bats" \ "k8s-port-forward.bats" \ "k8s-projected-volume.bats" \ "k8s-qos-pods.bats" \ "k8s-replication.bats" \ - "k8s-scale-nginx.bats" \ "k8s-seccomp.bats" \ "k8s-sysctls.bats" \ "k8s-security-context.bats" \ From b5da4ce0d8456b032f1b3dbc91379d8e31981e74 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Fri, 15 Sep 2023 10:13:47 +0200 Subject: [PATCH 198/339] ci: cache: Use the cached artefacts from ORAS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the previous series related to the artefacts we build, we've switching from storing the artefacts on Jenkins, to storing those in the ghcr.io/kata-containers/cached-artefacts/${artefact_name}. Now, let's take advantage of that and actually use the artefacts coming from that "package" (as GitHub calls it). NOTE: One thing that I've noticed that we're missing, is storing and checking the sha256sum of the artefact. The storing part will be done in a different commit, and the checking the sha256sum will be done in a different PR, as we need to ensure those were pushed to the registry before actually taking the bullet to check for them. Fixes: #7834 -- part 2 Signed-off-by: Fabiano Fidêncio (cherry picked from commit eccc76df635796ff2a2e9613f645e6a0888dfe70) --- .../local-build/kata-deploy-binaries.sh | 48 +++++-------------- 1 file changed, 13 insertions(+), 35 deletions(-) diff --git a/tools/packaging/kata-deploy/local-build/kata-deploy-binaries.sh b/tools/packaging/kata-deploy/local-build/kata-deploy-binaries.sh index d480658a3..ba4554d20 100755 --- a/tools/packaging/kata-deploy/local-build/kata-deploy-binaries.sh +++ b/tools/packaging/kata-deploy/local-build/kata-deploy-binaries.sh @@ -35,16 +35,13 @@ readonly nydus_builder="${static_build_dir}/nydus/build.sh" readonly rootfs_builder="${repo_root_dir}/tools/packaging/guest-image/build_image.sh" -readonly jenkins_url="http://jenkins.katacontainers.io" -readonly cached_artifacts_path="lastSuccessfulBuild/artifact/artifacts" - ARCH=${ARCH:-$(uname -m)} MEASURED_ROOTFS=${MEASURED_ROOTFS:-no} USE_CACHE="${USE_CACHE:-"yes"}" -ARTEFACT_REGISTRY="${ARTEFACT_REGISTRY:-}" +ARTEFACT_REGISTRY="${ARTEFACT_REGISTRY:-ghcr.io}" ARTEFACT_REGISTRY_USERNAME="${ARTEFACT_REGISTRY_USERNAME:-}" ARTEFACT_REGISTRY_PASSWORD="${ARTEFACT_REGISTRY_PASSWORD:-}" -TARGET_BRANCH="${TARGET_BRANCH:=}" +TARGET_BRANCH="${TARGET_BRANCH:-main}" PUSH_TO_REGISTRY="${PUSH_TO_REGISTRY:-}" workdir="${WORKDIR:-$PWD}" @@ -114,35 +111,29 @@ EOF exit "${return_code}" } - -cleanup_and_fail() { - rm -f "${component_tarball_path}" - return 1 -} - install_cached_tarball_component() { if [ "${USE_CACHE}" != "yes" ]; then return 1 fi local component="${1}" - local jenkins_build_url="${2}" - local current_version="${3}" - local current_image_version="${4}" - local component_tarball_name="${5}" - local component_tarball_path="${6}" + local current_version="${2}" + local current_image_version="${3}" + local component_tarball_name="${4}" + local component_tarball_path="${5}" - local cached_version=$(curl -sfL "${jenkins_build_url}/latest" | awk '{print $1}') || cached_version="none" - local cached_image_version=$(curl -sfL "${jenkins_build_url}/latest_image" | awk '{print $1}') || cached_image_version="none" + oras pull ${ARTEFACT_REGISTRY}/kata-containers/cached-artefacts/${build_target}:latest-${TARGET_BRANCH}-$(uname -m) + + cached_version="$(cat ${component}-version)" + cached_image_version="$(cat ${component}-builder-image-version)" + + rm -f ${component}-version + rm -f ${component}-builder-image-version [ "${cached_image_version}" != "${current_image_version}" ] && return 1 [ "${cached_version}" != "${current_version}" ] && return 1 info "Using cached tarball of ${component}" - echo "Downloading tarball from: ${jenkins_build_url}/${component_tarball_name}" - wget "${jenkins_build_url}/${component_tarball_name}" || return $(cleanup_and_fail) - wget "${jenkins_build_url}/sha256sum-${component_tarball_name}" || return $(cleanup_and_fail) - sha256sum -c "sha256sum-${component_tarball_name}" || return $(cleanup_and_fail) mv "${component_tarball_name}" "${component_tarball_path}" } @@ -155,7 +146,6 @@ install_image() { image_type+="-${variant}" fi - local jenkins="${jenkins_url}/job/kata-containers-main-rootfs-${image_type}-${ARCH}/${cached_artifacts_path}" local component="rootfs-${image_type}" local osbuilder_last_commit="$(get_last_modification "${repo_root_dir}/tools/osbuilder")" @@ -171,7 +161,6 @@ install_image() { install_cached_tarball_component \ "${component}" \ - "${jenkins}" \ "${latest_artefact}" \ "${latest_builder_image}" \ "${final_tarball_name}" \ @@ -205,7 +194,6 @@ install_initrd() { initrd_type+="-${variant}" fi - local jenkins="${jenkins_url}/job/kata-containers-main-rootfs-${initrd_type}-${ARCH}/${cached_artifacts_path}" local component="rootfs-${initrd_type}" local osbuilder_last_commit="$(get_last_modification "${repo_root_dir}/tools/osbuilder")" @@ -223,7 +211,6 @@ install_initrd() { install_cached_tarball_component \ "${component}" \ - "${jenkins}" \ "${latest_artefact}" \ "${latest_builder_image}" \ "${final_tarball_name}" \ @@ -263,7 +250,6 @@ install_cached_kernel_tarball_component() { install_cached_tarball_component \ "${kernel_name}" \ - "${jenkins_url}/job/kata-containers-main-${kernel_name}-${ARCH}/${cached_artifacts_path}" \ "${latest_artefact}" \ "${latest_builder_image}" \ "${final_tarball_name}" \ @@ -277,7 +263,6 @@ install_cached_kernel_tarball_component() { # SEV specific code path install_cached_tarball_component \ "${kernel_name}" \ - "${jenkins_url}/job/kata-containers-main-${kernel_name}-$(uname -m)/${cached_artifacts_path}" \ "${latest_artefact}" \ "${latest_builder_image}" \ "kata-static-kernel-sev-modules.tar.xz" \ @@ -403,7 +388,6 @@ install_qemu_helper() { install_cached_tarball_component \ "${qemu_name}" \ - "${jenkins_url}/job/kata-containers-main-${qemu_name}-${ARCH}/${cached_artifacts_path}" \ "${latest_artefact}" \ "${latest_builder_image}" \ "${final_tarball_name}" \ @@ -455,7 +439,6 @@ install_firecracker() { install_cached_tarball_component \ "firecracker" \ - "${jenkins_url}/job/kata-containers-main-firecracker-$(uname -m)/${cached_artifacts_path}" \ "${latest_artefact}" \ "${latest_builder_image}" \ "${final_tarball_name}" \ @@ -480,7 +463,6 @@ install_clh_helper() { install_cached_tarball_component \ "cloud-hypervisor${suffix}" \ - "${jenkins_url}/job/kata-containers-main-clh-$(uname -m)${suffix}/${cached_artifacts_path}" \ "${latest_artefact}" \ "${latest_builder_image}" \ "${final_tarball_name}" \ @@ -523,7 +505,6 @@ install_virtiofsd() { install_cached_tarball_component \ "virtiofsd" \ - "${jenkins_url}/job/kata-containers-main-virtiofsd-${ARCH}/${cached_artifacts_path}" \ "${latest_artefact}" \ "${latest_builder_image}" \ "${final_tarball_name}" \ @@ -546,7 +527,6 @@ install_nydus() { install_cached_tarball_component \ "nydus" \ - "${jenkins_url}/job/kata-containers-main-nydus-$(uname -m)/${cached_artifacts_path}" \ "${latest_artefact}" \ "${latest_builder_image}" \ "${final_tarball_name}" \ @@ -575,7 +555,6 @@ install_shimv2() { install_cached_tarball_component \ "shim-v2" \ - "${jenkins_url}/job/kata-containers-main-shim-v2-${ARCH}/${cached_artifacts_path}" \ "${latest_artefact}" \ "${latest_builder_image}" \ "${final_tarball_name}" \ @@ -611,7 +590,6 @@ install_ovmf() { install_cached_tarball_component \ "${component_name}" \ - "${jenkins_url}/job/kata-containers-main-ovmf-${ovmf_type}-$(uname -m)/${cached_artifacts_path}" \ "${latest_artefact}" \ "${latest_builder_image}" \ "${final_tarball_name}" \ From e464bbfc9308803e4b06cae5932108d0e2ae709c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Fri, 15 Sep 2023 10:25:26 +0200 Subject: [PATCH 199/339] ci: cache: Also store the ${component} sha256sum MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is something that was done by our Jenkins jobs, but that I ended up missing when writing d0c257b3a77f83f13886d15a5721440ca5647fd9. Now, let's also add the sha256sum to the cached artefact, and in a coming up PR (after this one is merged) we will also start checking for that. Signed-off-by: Fabiano Fidêncio (cherry picked from commit 4533a7a4163dd1a4d8d206a2911ba96e1fd04cbf) --- .../local-build/kata-deploy-binaries.sh | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/tools/packaging/kata-deploy/local-build/kata-deploy-binaries.sh b/tools/packaging/kata-deploy/local-build/kata-deploy-binaries.sh index ba4554d20..fc23a4655 100755 --- a/tools/packaging/kata-deploy/local-build/kata-deploy-binaries.sh +++ b/tools/packaging/kata-deploy/local-build/kata-deploy-binaries.sh @@ -710,8 +710,10 @@ handle_build() { fi tar tvf "${final_tarball_path}" - echo "${latest_artefact}" > ${workdir}/${build_target}-version - echo "${latest_builder_image}" > ${workdir}/${build_target}-builder-image-version + pushd ${workdir} + echo "${latest_artefact}" > ${build_target}-version + echo "${latest_builder_image}" > ${build_target}-builder-image-version + sha256sum "${final_tarball_name}" > ${build_target}-sha256sum if [ "${PUSH_TO_REGISTRY}" = "yes" ]; then if [ -z "${ARTEFACT_REGISTRY}" ] || @@ -721,13 +723,13 @@ handle_build() { die "ARTEFACT_REGISTRY, ARTEFACT_REGISTRY_USERNAME, ARTEFACT_REGISTRY_PASSWORD and TARGET_BRANCH must be passed to the script when pushing the artefacts to the registry!" fi - pushd ${workdir} - echo "${ARTEFACT_REGISTRY_PASSWORD}" | oras login "${ARTEFACT_REGISTRY}" -u "${ARTEFACT_REGISTRY_USERNAME}" --password-stdin + echo "${ARTEFACT_REGISTRY_PASSWORD}" | oras login "${ARTEFACT_REGISTRY}" -u "${ARTEFACT_REGISTRY_USERNAME}" --password-stdin - oras push ${ARTEFACT_REGISTRY}/kata-containers/cached-artefacts/${build_target}:latest-${TARGET_BRANCH}-$(uname -m) ${final_tarball_name} ${build_target}-version ${build_target}-builder-image-version - oras logout "${ARTEFACT_REGISTRY}" - popd + oras push ${ARTEFACT_REGISTRY}/kata-containers/cached-artefacts/${build_target}:latest-${TARGET_BRANCH}-$(uname -m) ${final_tarball_name} ${build_target}-version ${build_target}-builder-image-version ${build_target}-sha256sum + oras logout "${ARTEFACT_REGISTRY}" fi + + popd } silent_mode_error_trap() { From a55c082fa1218090744a602dc9e46bf282299dbb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Fri, 15 Sep 2023 10:27:55 +0200 Subject: [PATCH 200/339] ci: cache: Remove the script used to cache artefacts on Jenkins MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit That's not needed anymore, as we've switched to using ORAS and an OCI registry to cache the artefacts. Signed-off-by: Fabiano Fidêncio (cherry picked from commit 460988c5f70eb2a4948da84c8bb4bcf9d16e25bf) --- .../static-build/cache_components_main.sh | 264 ------------------ 1 file changed, 264 deletions(-) delete mode 100755 tools/packaging/static-build/cache_components_main.sh diff --git a/tools/packaging/static-build/cache_components_main.sh b/tools/packaging/static-build/cache_components_main.sh deleted file mode 100755 index dbfa66f63..000000000 --- a/tools/packaging/static-build/cache_components_main.sh +++ /dev/null @@ -1,264 +0,0 @@ -#!/bin/bash -# Copyright (c) 2022 Intel Corporation -# -# SPDX-License-Identifier: Apache-2.0 -# - -set -o errexit -set -o nounset -set -o pipefail - -script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" - -source "${script_dir}/../scripts/lib.sh" - -KERNEL_FLAVOUR="${KERNEL_FLAVOUR:-kernel}" # kernel | kernel-nvidia-gpu | kernel-experimental | kernel-arm-experimental | kernel-dragonball-experimental | kernel-tdx-experimental | kernel-nvidia-gpu-tdx-experimental | kernel-nvidia-gpu-snp -OVMF_FLAVOUR="${OVMF_FLAVOUR:-x86_64}" # x86_64 | tdx -QEMU_FLAVOUR="${QEMU_FLAVOUR:-qemu}" # qemu | qemu-tdx-experimental | qemu-snp-experimental -ROOTFS_IMAGE_TYPE="${ROOTFS_IMAGE_TYPE:-image}" # image | initrd - -cache_clh_artifacts() { - local clh_tarball_name="kata-static-cloud-hypervisor.tar.xz" - local current_clh_version="$(get_from_kata_deps "assets.hypervisor.cloud_hypervisor.version")" - create_cache_asset "${clh_tarball_name}" "${current_clh_version}" "" -} - -cache_firecracker_artifacts() { - local fc_tarball_name="kata-static-firecracker.tar.xz" - local current_fc_version="$(get_from_kata_deps "assets.hypervisor.firecracker.version")" - create_cache_asset "${fc_tarball_name}" "${current_fc_version}" "" -} - -cache_kernel_artifacts() { - local kernel_tarball_name="kata-static-${KERNEL_FLAVOUR}.tar.xz" - local current_kernel_image="$(get_kernel_image_name)" - local current_kernel_kata_config_version="$(cat ${repo_root_dir}/tools/packaging/kernel/kata_config_version)" - # Changes to tools/packaging/kernel are covered by the kata_config_version check - local kernel_last_commit="$(get_last_modification ${repo_root_dir}/tools/packaging/static-build/kernel)" - local kernel_modules_tarball_path="${repo_root_dir}/tools/packaging/kata-deploy/local-build/build/kata-static-kernel-sev-modules.tar.xz" - - # The ${vendor}-gpu kernels are based on an already existing entry, and does not require - # adding a new entry to the versions.yaml. - # - # With this in mind, let's just make sure we get the version from correct entry in the - # versions.yaml file. - case ${KERNEL_FLAVOUR} in - *"nvidia-gpu"*) - KERNEL_FLAVOUR=${KERNEL_FLAVOUR//"-nvidia-gpu"/} - ;; - *) - ;; - esac - - case ${KERNEL_FLAVOUR} in - "kernel-sev"|"kernel-snp") - # In these cases, like "kernel-foo", it must be set to "kernel.foo" when looking at - # the versions.yaml file - current_kernel_version="$(get_from_kata_deps "assets.${KERNEL_FLAVOUR/-/.}.version")" - ;; - *) - current_kernel_version="$(get_from_kata_deps "assets.${KERNEL_FLAVOUR}.version")" - ;; - esac - - local current_component_version="${current_kernel_version}-${current_kernel_kata_config_version}-${kernel_last_commit}" - create_cache_asset "${kernel_tarball_name}" "${current_component_version}" "${current_kernel_image}" - if [[ "${KERNEL_FLAVOUR}" == "kernel-sev" ]]; then - module_dir="${repo_root_dir}/tools/packaging/kata-deploy/local-build/build/kernel-sev/builddir/kata-linux-${current_kernel_version#v}-${current_kernel_kata_config_version}/lib/modules/${current_kernel_version#v}" - if [ ! -f "${kernel_modules_tarball_path}" ]; then - tar cvfJ "${kernel_modules_tarball_path}" "${module_dir}/kernel/drivers/virt/coco/efi_secret/" - fi - create_cache_asset "kata-static-kernel-sev-modules.tar.xz" "${current_component_version}" "${current_kernel_image}" - fi -} - -cache_nydus_artifacts() { - local nydus_tarball_name="kata-static-nydus.tar.xz" - local current_nydus_version="$(get_from_kata_deps "externals.nydus.version")" - create_cache_asset "${nydus_tarball_name}" "${current_nydus_version}" "" -} - -cache_ovmf_artifacts() { - local current_ovmf_version="$(get_from_kata_deps "externals.ovmf.${OVMF_FLAVOUR}.version")" - case ${OVMF_FLAVOUR} in - "tdx") - ovmf_tarball_name="kata-static-tdvf.tar.xz" - ;; - "x86_64") - ovmf_tarball_name="kata-static-ovmf.tar.xz" - ;; - *) - ovmf_tarball_name="kata-static-ovmf-${OVMF_FLAVOUR}.tar.xz" - ;; - esac - - local current_ovmf_image="$(get_ovmf_image_name)" - create_cache_asset "${ovmf_tarball_name}" "${current_ovmf_version}" "${current_ovmf_image}" -} - -cache_qemu_artifacts() { - local qemu_tarball_name="kata-static-${QEMU_FLAVOUR}.tar.xz" - local current_qemu_version=$(get_from_kata_deps "assets.hypervisor.${QEMU_FLAVOUR}.version") - [ -z "${current_qemu_version}" ] && current_qemu_version=$(get_from_kata_deps "assets.hypervisor.${QEMU_FLAVOUR}.tag") - local qemu_sha=$(calc_qemu_files_sha256sum) - local current_qemu_image="$(get_qemu_image_name)" - create_cache_asset "${qemu_tarball_name}" "${current_qemu_version}-${qemu_sha}" "${current_qemu_image}" -} - -cache_rootfs_artifacts() { - local osbuilder_last_commit="$(get_last_modification "${repo_root_dir}/tools/osbuilder")" - local guest_image_last_commit="$(get_last_modification "${repo_root_dir}/tools/packaging/guest-image")" - local agent_last_commit="$(get_last_modification "${repo_root_dir}/src/agent")" - local libs_last_commit="$(get_last_modification "${repo_root_dir}/src/libs")" - local gperf_version="$(get_from_kata_deps "externals.gperf.version")" - local libseccomp_version="$(get_from_kata_deps "externals.libseccomp.version")" - local rust_version="$(get_from_kata_deps "languages.rust.meta.newest-version")" - local rootfs_tarball_name="kata-static-rootfs-${ROOTFS_IMAGE_TYPE}.tar.xz" - local current_rootfs_version="${osbuilder_last_commit}-${guest_image_last_commit}-${agent_last_commit}-${libs_last_commit}-${gperf_version}-${libseccomp_version}-${rust_version}-${ROOTFS_IMAGE_TYPE}" - create_cache_asset "${rootfs_tarball_name}" "${current_rootfs_version}" "" -} - -cache_shim_v2_artifacts() { - local shim_v2_tarball_name="kata-static-shim-v2.tar.xz" - local shim_v2_last_commit="$(get_last_modification "${repo_root_dir}/src/runtime")" - local protocols_last_commit="$(get_last_modification "${repo_root_dir}/src/libs/protocols")" - local runtime_rs_last_commit="$(get_last_modification "${repo_root_dir}/src/runtime-rs")" - local golang_version="$(get_from_kata_deps "languages.golang.meta.newest-version")" - local rust_version="$(get_from_kata_deps "languages.rust.meta.newest-version")" - local current_shim_v2_version="${shim_v2_last_commit}-${protocols_last_commit}-${runtime_rs_last_commit}-${golang_version}-${rust_version}" - local current_shim_v2_image="$(get_shim_v2_image_name)" - create_cache_asset "${shim_v2_tarball_name}" "${current_shim_v2_version}" "${current_shim_v2_image}" -} - -cache_virtiofsd_artifacts() { - local virtiofsd_tarball_name="kata-static-virtiofsd.tar.xz" - local current_virtiofsd_version="$(get_from_kata_deps "externals.virtiofsd.version")-$(get_from_kata_deps "externals.virtiofsd.toolchain")" - local current_virtiofsd_image="$(get_virtiofsd_image_name)" - create_cache_asset "${virtiofsd_tarball_name}" "${current_virtiofsd_version}" "${current_virtiofsd_image}" -} - -create_cache_asset() { - local component_name="${1}" - local component_version="${2}" - local component_image="${3}" - - sudo cp "${repo_root_dir}/tools/packaging/kata-deploy/local-build/build/${component_name}" . - sudo chown -R "${USER}:${USER}" . - sha256sum "${component_name}" > "sha256sum-${component_name}" - cat "sha256sum-${component_name}" - echo "${component_version}" > "latest" - cat "latest" - echo "${component_image}" > "latest_image" - cat "latest_image" -} - -help() { -echo "$(cat << EOF -Usage: $0 "[options]" - Description: - Builds the cache of several kata components. - Options: - -c Cloud hypervisor cache - -F Firecracker cache - -k Kernel cache - * Export KERNEL_FLAVOUR="kernel | kernel-nvidia-gpu | kernel-experimental | kernel-arm-experimental | kernel-dragonball-experimental | kernel-tdx-experimental | kernel-nvidia-gpu-tdx-experimental | kernel-nvidia-gpu-snp" for a specific build - The default KERNEL_FLAVOUR value is "kernel" - -n Nydus cache - -q QEMU cache - * Export QEMU_FLAVOUR="qemu | qemu-tdx-experimental | qemu-snp-experimental" for a specific build - The default QEMU_FLAVOUR value is "qemu" - -r RootFS cache - * Export ROOTFS_IMAGE_TYPE="image|initrd" for one of those two types - The default ROOTFS_IMAGE_TYPE value is "image" - -s Shim v2 cache - -v VirtioFS cache - -h Shows help -EOF -)" -} - -main() { - local cloud_hypervisor_component="${cloud_hypervisor_component:-}" - local firecracker_component="${firecracker_component:-}" - local kernel_component="${kernel_component:-}" - local nydus_component="${nydus_component:-}" - local ovmf_component="${ovmf_component:-}" - local qemu_component="${qemu_component:-}" - local rootfs_component="${rootfs_component:-}" - local shim_v2_component="${shim_v2_component:-}" - local virtiofsd_component="${virtiofsd_component:-}" - local OPTIND - while getopts ":cFknoqrsvh:" opt - do - case "$opt" in - c) - cloud_hypervisor_component="1" - ;; - F) - firecracker_component="1" - ;; - k) - kernel_component="1" - ;; - n) - nydus_component="1" - ;; - o) - ovmf_component="1" - ;; - q) - qemu_component="1" - ;; - r) - rootfs_component="1" - ;; - s) - shim_v2_component="1" - ;; - v) - virtiofsd_component="1" - ;; - h) - help - exit 0; - ;; - :) - echo "Missing argument for -$OPTARG"; - help - exit 1; - ;; - esac - done - shift $((OPTIND-1)) - - [[ -z "${cloud_hypervisor_component}" ]] && \ - [[ -z "${firecracker_component}" ]] && \ - [[ -z "${kernel_component}" ]] && \ - [[ -z "${nydus_component}" ]] && \ - [[ -z "${ovmf_component}" ]] && \ - [[ -z "${qemu_component}" ]] && \ - [[ -z "${rootfs_component}" ]] && \ - [[ -z "${shim_v2_component}" ]] && \ - [[ -z "${virtiofsd_component}" ]] && \ - help && die "Must choose at least one option" - - mkdir -p "${WORKSPACE}/artifacts" - pushd "${WORKSPACE}/artifacts" - echo "Artifacts:" - - [ "${cloud_hypervisor_component}" == "1" ] && cache_clh_artifacts - [ "${firecracker_component}" == "1" ] && cache_firecracker_artifacts - [ "${kernel_component}" == "1" ] && cache_kernel_artifacts - [ "${nydus_component}" == "1" ] && cache_nydus_artifacts - [ "${ovmf_component}" == "1" ] && cache_ovmf_artifacts - [ "${qemu_component}" == "1" ] && cache_qemu_artifacts - [ "${rootfs_component}" == "1" ] && cache_rootfs_artifacts - [ "${shim_v2_component}" == "1" ] && cache_shim_v2_artifacts - [ "${virtiofsd_component}" == "1" ] && cache_virtiofsd_artifacts - - ls -la "${WORKSPACE}/artifacts/" - popd - sync -} - -main "$@" From dc9f2c24f14e636a6645769c488035f2c280534d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Fri, 15 Sep 2023 12:34:30 +0200 Subject: [PATCH 201/339] ci: cache: Check the sha256sum of the component MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We've removed this in the part 2 of this effort, as we were not caching the sha256sum of the component. Now that this part has been merged, let's get back to checking it. Fixes: #7834 -- part 3 Signed-off-by: Fabiano Fidêncio (cherry picked from commit 86c41074b403d092a532ae539010c024c7368c55) --- .../kata-deploy/local-build/kata-deploy-binaries.sh | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/packaging/kata-deploy/local-build/kata-deploy-binaries.sh b/tools/packaging/kata-deploy/local-build/kata-deploy-binaries.sh index fc23a4655..7bd938fdc 100755 --- a/tools/packaging/kata-deploy/local-build/kata-deploy-binaries.sh +++ b/tools/packaging/kata-deploy/local-build/kata-deploy-binaries.sh @@ -111,6 +111,11 @@ EOF exit "${return_code}" } +cleanup_and_fail() { + rm -f "${component_tarball_name}" + return 1 +} + install_cached_tarball_component() { if [ "${USE_CACHE}" != "yes" ]; then return 1 @@ -132,6 +137,7 @@ install_cached_tarball_component() { [ "${cached_image_version}" != "${current_image_version}" ] && return 1 [ "${cached_version}" != "${current_version}" ] && return 1 + sha256sum -c "${component}-sha256sum" || return $(cleanup_and_fail) info "Using cached tarball of ${component}" mv "${component_tarball_name}" "${component_tarball_path}" From 7019a25f2557b0389ba06097a38ebe82307d26a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Fri, 15 Sep 2023 12:37:22 +0200 Subject: [PATCH 202/339] ci: cache: Fix ovmf-sev cache MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The cached tarball is relying on the component name, thus it's important to set it correctly, otherwise we'll end up always building it. With this patch applied: ``` ≡ ⨯ make ovmf-sev-tarball make ovmf-sev-tarball-build make[1]: Entering directory '/home/ffidenci/src/upstream/kata-containers/kata-containers' /home/ffidenci/src/upstream/kata-containers/kata-containers/tools/packaging/kata-deploy/local-build//kata-deploy-binaries-in-docker.sh --build=ovmf-sev sha256:67cc94e393dc1d5bfc2b77a77e83c9b1c0833d0fbbebaa9e9e36f938bb841fcc Build kata version 3.2.0-rc0: ovmf-sev INFO: DESTDIR /home/ffidenci/src/upstream/kata-containers/kata-containers/tools/packaging/kata-deploy/local-build/build/ovmf-sev/destdir Downloading a76f5522493f ovmf-sev-builder-image-version Downloading 7e98c854bd94 kata-static-ovmf-sev.tar.xz Downloading 559311973ff8 ovmf-sev-version Downloaded a76f5522493f ovmf-sev-builder-image-version Downloading 353b655c2297 ovmf-sev-sha256sum Downloaded 559311973ff8 ovmf-sev-version Downloaded 353b655c2297 ovmf-sev-sha256sum Downloaded 7e98c854bd94 kata-static-ovmf-sev.tar.xz Pulled [registry] ghcr.io/kata-containers/cached-artefacts/ovmf-sev:latest-main-x86_64 Digest: sha256:933236c2c79e53be3ca7acc0b966d0ddac9c0335edcb1e8cad8b9bb3aaf508ce kata-static-ovmf-sev.tar.xz: OK INFO: Using cached tarball of ovmf-sev drwxr-xr-x runner/runner 0 2023-09-15 10:34 ./ drwxr-xr-x runner/runner 0 2023-09-15 10:34 ./opt/ drwxr-xr-x runner/runner 0 2023-09-15 10:34 ./opt/kata/ drwxr-xr-x runner/runner 0 2023-09-15 10:34 ./opt/kata/share/ drwxr-xr-x runner/runner 0 2023-09-15 10:34 ./opt/kata/share/ovmf/ -rwxr-xr-x runner/runner 4194304 2023-09-15 10:34 ./opt/kata/share/ovmf/AMDSEV.fd ~/src/upstream/kata-containers/kata-containers/tools/packaging/kata-deploy/local-build/build ~/src/upstream/kata-containers/kata-containers/tools/packaging/kata-deploy/local-build/build/ovmf-sev/builddir ~/src/upstream/kata-containers/kata-containers/tools/packaging/kata-deploy/local-build/build/ovmf-sev/builddir make[1]: Leaving directory '/home/ffidenci/src/upstream/kata-containers/kata-containers' ``` Signed-off-by: Fabiano Fidêncio (cherry picked from commit eecd5bf2aabfb015b37d8102e24c6b28e2f85149) --- tools/packaging/kata-deploy/local-build/kata-deploy-binaries.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/packaging/kata-deploy/local-build/kata-deploy-binaries.sh b/tools/packaging/kata-deploy/local-build/kata-deploy-binaries.sh index 7bd938fdc..15159a769 100755 --- a/tools/packaging/kata-deploy/local-build/kata-deploy-binaries.sh +++ b/tools/packaging/kata-deploy/local-build/kata-deploy-binaries.sh @@ -589,6 +589,7 @@ install_ovmf() { tarball_name="${2:-edk2-x86_64.tar.gz}" local component_name="ovmf" + [ "${ovmf_type}" == "sev" ] && component_name="ovmf-sev" [ "${ovmf_type}" == "tdx" ] && component_name="tdvf" latest_artefact="$(get_from_kata_deps "externals.ovmf.${ovmf_type}.version")" From 7c4a0f7facabab2af2ada581a57efa9a555d0a5c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Fri, 15 Sep 2023 11:47:14 +0200 Subject: [PATCH 203/339] ci: Use variable size of VMs depending on the tests running MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let me start with a fair warning that this commit is hard to split into different parts that could be easily tested (or not tested, just ignored) without breaking pieces. Now, about the commit itself, as we're on the run to reduce costs related to our sponsorship on Azure, we can split the k8s tests we run in 2 simple groups: * Tests that can be run in the smaller Azure instance (D2s_v5) * Tests that required the normal Azure instance (D4s_v5) With this in mind, we're now passing to the tests which type of host we're using, which allows us to select to run either one of the two types of tests, or even both in case of running the tests on a baremetal system. Fixes: #7972 Signed-off-by: Fabiano Fidêncio (cherry picked from commit c69a1e33bde402ec095aa47cdd27f75940fc8cfa) --- .github/workflows/run-k8s-tests-on-aks.yaml | 4 +++ .github/workflows/run-k8s-tests-on-garm.yaml | 11 +++++++- .github/workflows/run-k8s-tests-on-sev.yaml | 1 + .github/workflows/run-k8s-tests-on-snp.yaml | 1 + .github/workflows/run-k8s-tests-on-tdx.yaml | 1 + tests/gha-run-k8s-common.sh | 13 +++++++++- tests/integration/kubernetes/gha-run.sh | 1 + .../kubernetes/run_kubernetes_tests.sh | 26 ++++++++++++++++++- 8 files changed, 55 insertions(+), 3 deletions(-) diff --git a/.github/workflows/run-k8s-tests-on-aks.yaml b/.github/workflows/run-k8s-tests-on-aks.yaml index c7b2f9081..23439e3f1 100644 --- a/.github/workflows/run-k8s-tests-on-aks.yaml +++ b/.github/workflows/run-k8s-tests-on-aks.yaml @@ -33,6 +33,9 @@ jobs: - clh - dragonball - qemu + instance-type: + - small + - normal include: - host_os: cbl-mariner vmm: clh @@ -46,6 +49,7 @@ jobs: KATA_HYPERVISOR: ${{ matrix.vmm }} KUBERNETES: "vanilla" USING_NFD: "false" + K8S_TEST_HOST_TYPE: ${{ matrix.instance-type }} steps: - uses: actions/checkout@v3 with: diff --git a/.github/workflows/run-k8s-tests-on-garm.yaml b/.github/workflows/run-k8s-tests-on-garm.yaml index 3fdb8302d..1fd4b00ee 100644 --- a/.github/workflows/run-k8s-tests-on-garm.yaml +++ b/.github/workflows/run-k8s-tests-on-garm.yaml @@ -35,7 +35,15 @@ jobs: - devmapper k8s: - k3s - runs-on: garm-ubuntu-2004-smaller + instance: + - garm-ubuntu-2004 + - garm-ubuntu-2004-smaller + include: + - instance: garm-ubuntu-2004 + instance-type: normal + - instance: garm-ubuntu-2004-smaller + instance-type: small + runs-on: ${{ matrix.instance }} env: DOCKER_REGISTRY: ${{ inputs.registry }} DOCKER_REPO: ${{ inputs.repo }} @@ -45,6 +53,7 @@ jobs: KUBERNETES: ${{ matrix.k8s }} SNAPSHOTTER: ${{ matrix.snapshotter }} USING_NFD: "false" + K8S_TEST_HOST_TYPE: ${{ matrix.instance-type }} steps: - uses: actions/checkout@v3 with: diff --git a/.github/workflows/run-k8s-tests-on-sev.yaml b/.github/workflows/run-k8s-tests-on-sev.yaml index df105974a..a720e2486 100644 --- a/.github/workflows/run-k8s-tests-on-sev.yaml +++ b/.github/workflows/run-k8s-tests-on-sev.yaml @@ -39,6 +39,7 @@ jobs: KUBECONFIG: /home/kata/.kube/config KUBERNETES: "vanilla" USING_NFD: "false" + K8S_TEST_HOST_TYPE: "baremetal" steps: - uses: actions/checkout@v3 with: diff --git a/.github/workflows/run-k8s-tests-on-snp.yaml b/.github/workflows/run-k8s-tests-on-snp.yaml index 80e146795..33ae57d3a 100644 --- a/.github/workflows/run-k8s-tests-on-snp.yaml +++ b/.github/workflows/run-k8s-tests-on-snp.yaml @@ -39,6 +39,7 @@ jobs: KUBECONFIG: /home/kata/.kube/config KUBERNETES: "vanilla" USING_NFD: "false" + K8S_TEST_HOST_TYPE: "baremetal" steps: - uses: actions/checkout@v3 with: diff --git a/.github/workflows/run-k8s-tests-on-tdx.yaml b/.github/workflows/run-k8s-tests-on-tdx.yaml index f9b14bc9d..940fd0a39 100644 --- a/.github/workflows/run-k8s-tests-on-tdx.yaml +++ b/.github/workflows/run-k8s-tests-on-tdx.yaml @@ -38,6 +38,7 @@ jobs: KATA_HYPERVISOR: ${{ matrix.vmm }} KUBERNETES: "k3s" USING_NFD: "true" + K8S_TEST_HOST_TYPE: "baremetal" steps: - uses: actions/checkout@v3 with: diff --git a/tests/gha-run-k8s-common.sh b/tests/gha-run-k8s-common.sh index 40a0903b9..fd9b98109 100644 --- a/tests/gha-run-k8s-common.sh +++ b/tests/gha-run-k8s-common.sh @@ -12,6 +12,7 @@ tests_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" source "${tests_dir}/common.bash" AZ_RG="${AZ_RG:-kataCI}" +K8S_TEST_HOST_TYPE="${K8S_TEST_HOST_TYPE:-small}" function _print_cluster_name() { test_type="${1:-k8s}" @@ -40,10 +41,20 @@ function create_cluster() { # First, ensure that the cluster didn't fail to get cleaned up from a previous run. delete_cluster "${test_type}" || true + local instance_type="" + case ${K8S_TEST_HOST_TYPE} in + small) + instance_type="Standard_D2s_v5" + ;; + normal) + instance_type="Standard_D4s_v5" + ;; + esac + az aks create \ -g "${AZ_RG}" \ -n "$(_print_cluster_name ${test_type})" \ - -s "Standard_D2s_v5" \ + -s "${instance_type}" \ --node-count 1 \ --generate-ssh-keys \ $([ "${KATA_HOST_OS}" = "cbl-mariner" ] && echo "--os-sku AzureLinux --workload-runtime KataMshvVmIsolation") diff --git a/tests/integration/kubernetes/gha-run.sh b/tests/integration/kubernetes/gha-run.sh index c53583aff..1cb11c456 100755 --- a/tests/integration/kubernetes/gha-run.sh +++ b/tests/integration/kubernetes/gha-run.sh @@ -233,6 +233,7 @@ function cleanup() { function main() { export KATA_HOST_OS="${KATA_HOST_OS:-}" + export K8S_TEST_HOST_TYPE="${K8S_TEST_HOST_TYPE:-}" action="${1:-}" diff --git a/tests/integration/kubernetes/run_kubernetes_tests.sh b/tests/integration/kubernetes/run_kubernetes_tests.sh index aff825b47..0d4c40cfa 100644 --- a/tests/integration/kubernetes/run_kubernetes_tests.sh +++ b/tests/integration/kubernetes/run_kubernetes_tests.sh @@ -13,11 +13,12 @@ source "${kubernetes_dir}/../../common.bash" TARGET_ARCH="${TARGET_ARCH:-x86_64}" KATA_HYPERVISOR="${KATA_HYPERVISOR:-qemu}" K8S_TEST_DEBUG="${K8S_TEST_DEBUG:-false}" +K8S_TEST_HOST_TYPE="${K8S_TEST_HOST_TYPE:-small}" if [ -n "${K8S_TEST_UNION:-}" ]; then K8S_TEST_UNION=($K8S_TEST_UNION) else - K8S_TEST_UNION=( \ + K8S_TEST_SMALL_HOST_UNION=( \ "k8s-confidential.bats" \ "k8s-attach-handlers.bats" \ "k8s-caps.bats" \ @@ -53,6 +54,29 @@ else "k8s-volume.bats" \ "k8s-nginx-connectivity.bats" \ ) + + K8S_TEST_NORMAL_HOST_UNION=( \ + "k8s-number-cpus.bats" \ + "k8s-parallel.bats" \ + "k8s-scale-nginx.bats" \ + ) + + case ${K8S_TEST_HOST_TYPE} in + small) + K8S_TEST_UNION=($K8S_TEST_SMALL_HOST_UNION) + ;; + normal) + K8S_TEST_UNION=($K8S_TEST_NORMAL_HOST_UNION) + ;; + baremetal) + K8S_TEST_UNION=(${K8S_TEST_SMALL_HOST_UNION[@]} ${K8S_TEST_NORMAL_HOST_UNION[@]}) + + ;; + *) + echo "${K8S_TEST_HOST_TYPE} is an invalid K8S_TEST_HOST_TYPE option. Valid options are: small | normal | baremetal" + return 1 + ;; + esac fi # we may need to skip a few test cases when running on non-x86_64 arch From bb8d1be300da8a610864b48098e51a08a8491e18 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Fri, 15 Sep 2023 13:26:40 +0200 Subject: [PATCH 204/339] ci: static-checks: Move kernel config check to its own job MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It doesn't make sense to run this for all the bits of the matrix, neither it's demanding enough to require running this in one of our Azure sponsored runners. Fixes: #7974 -- part 0 Signed-off-by: Fabiano Fidêncio (cherry picked from commit 75c974c8024d931e085ffd214be4717e046b6640) --- .github/workflows/static-checks.yaml | 37 +++++++++++++++++----------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/.github/workflows/static-checks.yaml b/.github/workflows/static-checks.yaml index cb113bfb0..ff952623c 100644 --- a/.github/workflows/static-checks.yaml +++ b/.github/workflows/static-checks.yaml @@ -12,6 +12,28 @@ concurrency: name: Static checks jobs: + check-kernel-config-version: + runs-on: ubuntu-latest + steps: + - name: Checkout the code + uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: Ensure the kernel config version has been updated + run: | + kernel_dir="tools/packaging/kernel/" + kernel_version_file="${kernel_dir}kata_config_version" + modified_files=$(git diff --name-only origin/$GITHUB_BASE_REF..HEAD) + if git diff --name-only origin/$GITHUB_BASE_REF..HEAD "${kernel_dir}" | grep "${kernel_dir}"; then + echo "Kernel directory has changed, checking if $kernel_version_file has been updated" + if echo "$modified_files" | grep -v "README.md" | grep "${kernel_dir}" >>"/dev/null"; then + echo "$modified_files" | grep "$kernel_version_file" >>/dev/null || ( echo "Please bump version in $kernel_version_file" && exit 1) + else + echo "Readme file changed, no need for kernel config version update." + fi + echo "Check passed" + fi + static-checks: runs-on: garm-ubuntu-2004 strategy: @@ -48,21 +70,6 @@ jobs: uses: actions/setup-go@v3 with: go-version: 1.19.3 - - name: Check kernel config version - run: | - cd "${{ github.workspace }}/src/github.com/${{ github.repository }}" - kernel_dir="tools/packaging/kernel/" - kernel_version_file="${kernel_dir}kata_config_version" - modified_files=$(git diff --name-only origin/main..HEAD) - if git diff --name-only origin/main..HEAD "${kernel_dir}" | grep "${kernel_dir}"; then - echo "Kernel directory has changed, checking if $kernel_version_file has been updated" - if echo "$modified_files" | grep -v "README.md" | grep "${kernel_dir}" >>"/dev/null"; then - echo "$modified_files" | grep "$kernel_version_file" >>/dev/null || ( echo "Please bump version in $kernel_version_file" && exit 1) - else - echo "Readme file changed, no need for kernel config version update." - fi - echo "Check passed" - fi - name: Set PATH if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }} run: | From c18c412db762eb9797ae7b06e57b33a4dc62bd82 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Fri, 15 Sep 2023 13:59:41 +0200 Subject: [PATCH 205/339] tests: Move functions from kata_arch script here MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We can use this a lot as part of our CI, but right now I'm just moving those here with the intent to use later on in this series. Fixes: #7974 -- part 0 Signed-off-by: Fabiano Fidêncio (cherry picked from commit 11dff731b74d46c5135098073bc47c0b6a43dfbd) --- tests/common.bash | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/tests/common.bash b/tests/common.bash index fc0ed0a7f..fc5eac5cb 100644 --- a/tests/common.bash +++ b/tests/common.bash @@ -413,3 +413,42 @@ function install_cri_tools() { sudo tar -xvf "${tarball_name}" -C /usr/local/bin rm -f "${tarball_name}" } + +# Convert architecture to the name used by golang +function arch_to_golang() { + local arch="$(uname -m)" + + case "${arch}" in + aarch64) echo "arm64";; + ppc64le) echo "${arch}";; + x86_64) echo "amd64";; + s390x) echo "s390x";; + *) die "unsupported architecture: ${arch}";; + esac +} + +# Convert architecture to the name used by rust +function arch_to_rust() { + local -r arch="$(uname -m)" + + case "${arch}" in + aarch64) echo "${arch}";; + ppc64le) echo "powerpc64le";; + x86_64) echo "${arch}";; + s390x) echo "${arch}";; + *) die "unsupported architecture: ${arch}";; + esac +} + +# Convert architecture to the name used by the Linux kernel build system +function arch_to_kernel() { + local -r arch="$(uname -m)" + + case "${arch}" in + aarch64) echo "arm64";; + ppc64le) echo "powerpc";; + x86_64) echo "${arch}";; + s390x) echo "s390x";; + *) die "unsupported architecture: ${arch}";; + esac +} From d3a04b7b8f202508ddf902fda23e96040bed6dcf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Fri, 15 Sep 2023 14:01:37 +0200 Subject: [PATCH 206/339] tests: install_go: Remove tests repo dependency MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We can rely on the functions that are now part of the common.bash. Fixes: #7974 -- part 0 Signed-off-by: Fabiano Fidêncio (cherry picked from commit e64508c3089eab09cfa8907f822fd2acd0e0acb1) --- tests/install_go.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/install_go.sh b/tests/install_go.sh index 3827bb7b3..dadaf6ca0 100755 --- a/tests/install_go.sh +++ b/tests/install_go.sh @@ -87,7 +87,7 @@ if command -v go; then fi fi -goarch=$("${repo_root_dir}/tests/kata-arch.sh" --golang) +goarch=$(arch_to_golang) info "Download go version ${go_version}" kernel_name=$(uname -s) From 755057c9ed9a3a933a92791f74fa34184158a0c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Fri, 15 Sep 2023 14:06:18 +0200 Subject: [PATCH 207/339] tests: Move install_rust.sh from the tests repo MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We'll use it as part of the refactoring we're doing in the static check tests. I can see a lot of other uses of this, but changing all of them to this one is out of the scope for this PR. Fixes: #7974 -- part 0 Signed-off-by: Fabiano Fidêncio (cherry picked from commit 6794d4c843e9b5bf91c1b1fdae88585cab0dd7c4) --- tests/install_rust.sh | 44 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100755 tests/install_rust.sh diff --git a/tests/install_rust.sh b/tests/install_rust.sh new file mode 100755 index 000000000..6fc62ef02 --- /dev/null +++ b/tests/install_rust.sh @@ -0,0 +1,44 @@ +#!/bin/bash +# +# Copyright (c) 2019 Ant Financial +# +# SPDX-License-Identifier: Apache-2.0 + +set -o errexit +set -o nounset +set -o pipefail + +script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +script_name="$(basename "${BASH_SOURCE[0]}")" + +source "${script_dir}/common.bash" + +rustarch=$(arch_to_rust) + +version="${1:-""}" +if [ -z "${version}" ]; then + version=$(get_from_kata_deps "languages.rust.meta.newest-version") +fi + +echo "Install rust ${version}" + +if ! command -v rustup > /dev/null; then + curl https://sh.rustup.rs -sSf | sh -s -- -y --default-toolchain ${version} +fi + +export PATH="${PATH}:${HOME}/.cargo/bin" + +## Still try to install the target version of toolchain, +## in case that the rustup has been installed but +## with a different version toolchain. +## Even though the target version toolchain has been installed, +## this command will not take too long to run. +rustup toolchain install ${version} +rustup default ${version} +if [ "${rustarch}" == "powerpc64le" ] || [ "${rustarch}" == "s390x" ] ; then + rustup target add ${rustarch}-unknown-linux-gnu +else + rustup target add ${rustarch}-unknown-linux-musl + $([ "$(whoami)" != "root" ] && echo sudo) ln -sf /usr/bin/g++ /bin/musl-g++ +fi +rustup component add rustfmt From d288e1ab876947099db82e0119c84bf0e522bad0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Fri, 15 Sep 2023 14:23:07 +0200 Subject: [PATCH 208/339] ci: static-checks: Move vendor check to its own job MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Similarly to the static-check jobs, those jobs can be run on the zero cost runners. Fixes: #7974 -- part 0 Signed-off-by: Fabiano Fidêncio (cherry picked from commit e2c61a152c0bfa7fc3ea34addbe67538fe3642e9) --- .github/workflows/static-checks.yaml | 62 +++++++++++++++++++++++++++- 1 file changed, 61 insertions(+), 1 deletion(-) diff --git a/.github/workflows/static-checks.yaml b/.github/workflows/static-checks.yaml index ff952623c..393869174 100644 --- a/.github/workflows/static-checks.yaml +++ b/.github/workflows/static-checks.yaml @@ -34,6 +34,67 @@ jobs: echo "Check passed" fi + check-vendor: + runs-on: ubuntu-20.04 + strategy: + fail-fast: false + matrix: + component: + - agent + - dragonball + - runtime + - runtime-rs + - agent-ctl + - kata-ctl + - log-parser-rs + - runk + - trace-forwarder + include: + - component: agent + component-path: src/agent + - component: dragonball + component-path: src/dragonball + - component: runtime + component-path: src/runtime + - component: runtime-rs + component-path: src/runtime-rs + - component: agent-ctl + component-path: src/tools/agent-ctl + - component: kata-ctl + component-path: src/tools/kata-ctl + - component: log-parser-rs + component-path: src/tools/log-parser-rs + - component: runk + component-path: src/tools/runk + - component: trace-forwarder + component-path: src/tools/trace-forwarder + steps: + - name: Checkout the code + uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: Install yq + run: | + ./ci/install_yq.sh + env: + INSTALL_IN_GOPATH: false + - name: Install golang + if: ${{ matrix.component == 'runtime' }} + run: | + ./tests/install_go.sh -f -p + echo "/usr/local/go/bin" >> $GITHUB_PATH + - name: Install rust + if: ${{ matrix.component != 'runtime' }} + run: | + ./tests/install_rust.sh + echo "${HOME}/.cargo/bin" >> $GITHUB_PATH + - name: Check ${{ matrix.component }} vendored code + run: | + cd ${{ matrix.component-path }} + make vendor + env: + RUST_BACKTRACE: "1" + static-checks: runs-on: garm-ubuntu-2004 strategy: @@ -43,7 +104,6 @@ jobs: fail-fast: false matrix: cmd: - - "make vendor" - "make static-checks" - "make check" - "make test" From cd6ab3cf07a7e5af87e71ba58295e0713599def3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Fri, 15 Sep 2023 15:28:09 +0200 Subject: [PATCH 209/339] tests: install_rust: Also install clippy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit clippy is used as part our tests, so it's useful to have it installed while we're already installing rust. In case of developers, they also better be using it. :-) Fixes: #7974 -- part 0 Signed-off-by: Fabiano Fidêncio (cherry picked from commit e1257758634c2d27ac8518f1cf0b23febcf74f04) --- tests/install_rust.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/install_rust.sh b/tests/install_rust.sh index 6fc62ef02..abb93cac6 100755 --- a/tests/install_rust.sh +++ b/tests/install_rust.sh @@ -42,3 +42,4 @@ else $([ "$(whoami)" != "root" ] && echo sudo) ln -sf /usr/bin/g++ /bin/musl-g++ fi rustup component add rustfmt +rustup component add clippy From a62e18b27f23fd81f967e9665dfc4630bd924edd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Fri, 15 Sep 2023 15:35:41 +0200 Subject: [PATCH 210/339] kata-ctl: Ensure GENERATED_CODE is a dep of `make check` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Otherwise `make check` would fail with: ``` Error writing files: failed to resolve mod `version`: /home/runner/work/kata-containers/kata-containers/src/tools/kata-ctl/src/ops/version.rs does not exist make: *** [../../../utils.mk:176: standard_rust_check] Error 1 ``` Fixes: #7974 -- part 0 Signed-off-by: Fabiano Fidêncio (cherry picked from commit ea19549a997fc53a56435a75c4f7dda5c3ee1613) --- src/tools/kata-ctl/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tools/kata-ctl/Makefile b/src/tools/kata-ctl/Makefile index 23ae7ca1e..ab677525e 100644 --- a/src/tools/kata-ctl/Makefile +++ b/src/tools/kata-ctl/Makefile @@ -58,7 +58,7 @@ test: install: @RUSTFLAGS="$(EXTRA_RUSTFLAGS) --deny warnings" cargo install --locked --target $(TRIPLE) --path . --root $(INSTALL_PATH) -check: standard_rust_check +check: $(GENERATED_CODE) standard_rust_check .PHONY: \ build \ From d6996d01c0c6e0b8c423656ed2374e295ba74970 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Fri, 15 Sep 2023 15:36:51 +0200 Subject: [PATCH 211/339] kata-ctl: Add `kata-types` to the Cargo.lock file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit message covered everything. :-) Fixes: #7974 -- part 0 Signed-off-by: Fabiano Fidêncio (cherry picked from commit 473ec8780675b7377655ef3a56b73552b9bbbcf5) --- src/tools/kata-ctl/Cargo.lock | 1 + 1 file changed, 1 insertion(+) diff --git a/src/tools/kata-ctl/Cargo.lock b/src/tools/kata-ctl/Cargo.lock index eda34fb17..2ddbcdbb9 100644 --- a/src/tools/kata-ctl/Cargo.lock +++ b/src/tools/kata-ctl/Cargo.lock @@ -1946,6 +1946,7 @@ dependencies = [ "anyhow", "hyper", "hyperlocal", + "kata-types", "tokio", ] From bb920178ada80138edd3a6163494c5f1d7b9429a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Fri, 15 Sep 2023 15:21:35 +0200 Subject: [PATCH 212/339] ci: static-checks: Move "make check" to the new test matrix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We're moving it out of the previous "static-checks" confusing matrix, and adding it to the matrix that was currently being used for the `make vendor` checks. This will allow us to have one job per component, and with that we can easily run those in parallel and on the zero cost runners. Fixes: #7974 -- part 0 Signed-off-by: Fabiano Fidêncio (cherry picked from commit bf888b9a5eb954561d91c190e6fd352f0eb18914) --- .github/workflows/static-checks.yaml | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/.github/workflows/static-checks.yaml b/.github/workflows/static-checks.yaml index 393869174..b2de664ac 100644 --- a/.github/workflows/static-checks.yaml +++ b/.github/workflows/static-checks.yaml @@ -34,7 +34,7 @@ jobs: echo "Check passed" fi - check-vendor: + build-checks: runs-on: ubuntu-20.04 strategy: fail-fast: false @@ -49,6 +49,9 @@ jobs: - log-parser-rs - runk - trace-forwarder + command: + - "make vendor" + - "make check" include: - component: agent component-path: src/agent @@ -88,10 +91,10 @@ jobs: run: | ./tests/install_rust.sh echo "${HOME}/.cargo/bin" >> $GITHUB_PATH - - name: Check ${{ matrix.component }} vendored code + - name: Running `${{ matrix.command }}` for ${{ matrix.component }} run: | cd ${{ matrix.component-path }} - make vendor + ${{ matrix.command }} env: RUST_BACKTRACE: "1" @@ -105,7 +108,6 @@ jobs: matrix: cmd: - "make static-checks" - - "make check" - "make test" - "sudo -E PATH=\"$PATH\" make test" env: From d269f09a6641d7c6a3eb1dbdc956098a0fc9e2f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Fri, 15 Sep 2023 16:08:34 +0200 Subject: [PATCH 213/339] ci: install_libseccomp: Do not depend on the tests repo MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It makes things way simpler, waaaaay simpler. Fixes: #7974 -- part 0 Signed-off-by: Fabiano Fidêncio (cherry picked from commit 1d32410a832c929b595e230ebe3e531f3d7d41a3) --- ci/install_libseccomp.sh | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/ci/install_libseccomp.sh b/ci/install_libseccomp.sh index 683d0f65b..5d53be733 100755 --- a/ci/install_libseccomp.sh +++ b/ci/install_libseccomp.sh @@ -7,12 +7,10 @@ set -o errexit -cidir=$(dirname "$0") -source "${cidir}/lib.sh" +script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +script_name="$(basename "${BASH_SOURCE[0]}")" -clone_tests_repo - -source "${tests_repo_dir}/.ci/lib.sh" +source "${script_dir}/../tests/common.bash" # The following variables if set on the environment will change the behavior # of gperf and libseccomp configure scripts, that may lead this script to @@ -25,11 +23,11 @@ workdir="$(mktemp -d --tmpdir build-libseccomp.XXXXX)" # Variables for libseccomp libseccomp_version="${LIBSECCOMP_VERSION:-""}" if [ -z "${libseccomp_version}" ]; then - libseccomp_version=$(get_version "externals.libseccomp.version") + libseccomp_version=$(get_from_kata_deps "externals.libseccomp.version") fi libseccomp_url="${LIBSECCOMP_URL:-""}" if [ -z "${libseccomp_url}" ]; then - libseccomp_url=$(get_version "externals.libseccomp.url") + libseccomp_url=$(get_from_kata_deps "externals.libseccomp.url") fi libseccomp_tarball="libseccomp-${libseccomp_version}.tar.gz" libseccomp_tarball_url="${libseccomp_url}/releases/download/v${libseccomp_version}/${libseccomp_tarball}" @@ -38,11 +36,11 @@ cflags="-O2" # Variables for gperf gperf_version="${GPERF_VERSION:-""}" if [ -z "${gperf_version}" ]; then - gperf_version=$(get_version "externals.gperf.version") + gperf_version=$(get_from_kata_deps "externals.gperf.version") fi gperf_url="${GPERF_URL:-""}" if [ -z "${gperf_url}" ]; then - gperf_url=$(get_version "externals.gperf.url") + gperf_url=$(get_from_kata_deps "externals.gperf.url") fi gperf_tarball="gperf-${gperf_version}.tar.gz" gperf_tarball_url="${gperf_url}/${gperf_tarball}" From 93440dc141f613ce16561b449d3986ba035c7679 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Fri, 15 Sep 2023 17:08:34 +0200 Subject: [PATCH 214/339] agent: Ensure GENERATED_CODE is a dep of `make test` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Otherwise `make test` will fail with: ``` error[E0583]: file not found for module `version` ``` Fixes: #7974 -- part 0 Signed-off-by: Fabiano Fidêncio (cherry picked from commit ec826f328f88a1ff665c37381ada069eec337e65) --- src/agent/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/agent/Makefile b/src/agent/Makefile index ba065b4d0..a47e69911 100644 --- a/src/agent/Makefile +++ b/src/agent/Makefile @@ -140,7 +140,7 @@ vendor: #TARGET test: run cargo tests -test: +test: $(GENERATED_FILES) @cargo test --all --target $(TRIPLE) $(EXTRA_RUSTFEATURES) -- --nocapture ##TARGET check: run test From 93577381a5dce38c98f7f3e1a2ee5298cd67dc6f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Fri, 15 Sep 2023 17:25:55 +0200 Subject: [PATCH 215/339] kata-ctl: Ensure GENERATED_CODE is a dep of `make test` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Otherwise `make test` will simply fail with: ``` error[E0583]: file not found for module `version` ``` Fixes: #7974 -- part 0 Signed-off-by: Fabiano Fidêncio (cherry picked from commit 46daddc5005c95718e26a2841515ee40823031ce) --- src/tools/kata-ctl/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tools/kata-ctl/Makefile b/src/tools/kata-ctl/Makefile index ab677525e..546f0783a 100644 --- a/src/tools/kata-ctl/Makefile +++ b/src/tools/kata-ctl/Makefile @@ -52,7 +52,7 @@ clean: vendor: cargo vendor -test: +test: $(GENERATED_CODE) @RUSTFLAGS="$(EXTRA_RUSTFLAGS) --deny warnings" cargo test --target $(TRIPLE) $(if $(findstring release,$(BUILD_TYPE)),--release) $(EXTRA_RUSTFEATURES) -- --nocapture install: From d5d21f4cb40ff6529e5e39c58327b5c870d96698 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Sat, 16 Sep 2023 10:06:59 +0200 Subject: [PATCH 216/339] kata-ctl: Use `loop` instead of `kvm` module in tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This makes it pssible to run the tests in the cost free runners, which are not KVM capable. Fixes: #7974 -- part 0 Signed-off-by: Fabiano Fidêncio (cherry picked from commit 2bc3a616aec17aa145aaa7fbb839a8c3bb1104ab) --- src/tools/kata-ctl/src/check.rs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/tools/kata-ctl/src/check.rs b/src/tools/kata-ctl/src/check.rs index 3bf899ad2..f23418680 100644 --- a/src/tools/kata-ctl/src/check.rs +++ b/src/tools/kata-ctl/src/check.rs @@ -539,10 +539,10 @@ mod tests { }, // Success scenarios TestData { - module_name: "kvm", + module_name: "loop", param_name: "", kernel_module: &KernelModule { - name: "kvm", + name: "loop", params: &[KernelParam { name: "nonexistantparam", value: KernelParamType::Simple("Y"), @@ -552,16 +552,16 @@ mod tests { result: Ok(()), }, TestData { - module_name: "kvm", - param_name: "kvmclock_periodic_sync", + module_name: "loop", + param_name: "hw_queue_depth", kernel_module: &KernelModule { - name: "kvm", + name: "loop", params: &[KernelParam { - name: "kvmclock_periodic_sync", - value: KernelParamType::Simple("Y"), + name: "hw_queue_depth", + value: KernelParamType::Simple("128"), }], }, - param_value: "Y", + param_value: "128", result: Ok(()), }, ]; From 9e614ce466e7a9b3bc3050b378701b6a464fcdb7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Fri, 15 Sep 2023 18:15:53 +0200 Subject: [PATCH 217/339] runtime-rs: Ensure static-checks-build is a dep of `make test` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Otherwise `make test` will simply fail with: ``` error[E0583]: file not found for module `config` ``` Fixes: #7974 -- part 0 Signed-off-by: Fabiano Fidêncio (cherry picked from commit 08f2e5ae0bbdc2e6c08b2ec29f29404207b2c2fb) --- src/runtime-rs/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/runtime-rs/Makefile b/src/runtime-rs/Makefile index 1981a37d9..6fb3f20ed 100644 --- a/src/runtime-rs/Makefile +++ b/src/runtime-rs/Makefile @@ -49,7 +49,7 @@ else ##TARGET default: build code default: runtime show-header ##TARGET test: run cargo tests -test: +test: static-checks-build @cargo test --all --target $(TRIPLE) $(EXTRA_RUSTFEATURES) -- --nocapture install: install-runtime install-configs endif From 4b660a4991d7a58e53f48e655d5623979508bade Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Fri, 15 Sep 2023 15:51:45 +0200 Subject: [PATCH 218/339] ci: static-checks: Move "make test" to the new test matrix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We're moving it out of the previous "static-checks" confusing matrix, and adding it to the matrix that was currently being used for the `make vendor` and `make check` checks. This will allow us to have one job per component, and with that we can easily run those in parallel and on the zero cost runners. Fixes: #7974 -- part 0 Signed-off-by: Fabiano Fidêncio (cherry picked from commit 4e963cedf4f8fbddd112fb6461b0d191e54c3407) --- .github/workflows/static-checks.yaml | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/.github/workflows/static-checks.yaml b/.github/workflows/static-checks.yaml index b2de664ac..7be939a4b 100644 --- a/.github/workflows/static-checks.yaml +++ b/.github/workflows/static-checks.yaml @@ -52,6 +52,7 @@ jobs: command: - "make vendor" - "make check" + - "make test" include: - component: agent component-path: src/agent @@ -71,6 +72,11 @@ jobs: component-path: src/tools/runk - component: trace-forwarder component-path: src/tools/trace-forwarder + - install-libseccomp: no + - component: agent + install-libseccomp: yes + - component: runk + install-libseccomp: yes steps: - name: Checkout the code uses: actions/checkout@v4 @@ -91,6 +97,23 @@ jobs: run: | ./tests/install_rust.sh echo "${HOME}/.cargo/bin" >> $GITHUB_PATH + - name: Install musl-tools + if: ${{ matrix.component != 'runtime' }} + run: sudo apt-get -y install musl-tools + - name: Install libseccomp + if: ${{ matrix.command != 'make vendor' && matrix.command != 'make check' && matrix.install-libseccomp == 'yes' }} + run: | + libseccomp_install_dir=$(mktemp -d -t libseccomp.XXXXXXXXXX) + gperf_install_dir=$(mktemp -d -t gperf.XXXXXXXXXX) + ./ci/install_libseccomp.sh "${libseccomp_install_dir}" "${gperf_install_dir}" + echo "Set environment variables for the libseccomp crate to link the libseccomp library statically" + echo "LIBSECCOMP_LINK_TYPE=static" >> $GITHUB_ENV + echo "LIBSECCOMP_LIB_PATH=${libseccomp_install_dir}/lib" >> $GITHUB_ENV + - name: Setup XDG_RUNTIME_DIR for the `runtime` tests + if: ${{ matrix.command != 'make vendor' && matrix.command != 'make check' && matrix.component == 'runtime' }} + run: | + XDG_RUNTIME_DIR=$(mktemp -d /tmp/kata-tests-$USER.XXX | tee >(xargs chmod 0700)) + echo "XDG_RUNTIME_DIR=${XDG_RUNTIME_DIR}" >> $GITHUB_ENV - name: Running `${{ matrix.command }}` for ${{ matrix.component }} run: | cd ${{ matrix.component-path }} @@ -108,7 +131,6 @@ jobs: matrix: cmd: - "make static-checks" - - "make test" - "sudo -E PATH=\"$PATH\" make test" env: RUST_BACKTRACE: "1" From 668b7effb4135914a815c5c5f01f85b05d1ed900 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Fri, 15 Sep 2023 17:59:49 +0200 Subject: [PATCH 219/339] ci: static-checks: Move "sudo make test" to the new test matrix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We're moving it out of the previous "static-checks" confusing matrix, and adding it to the matrix that was currently being used for the `make vendor` and `make check` checks. This will allow us to have one job per component, and with that we can easily run those in parallel and on the zero cost runners. Fixes: #7974 -- part 0 Signed-off-by: Fabiano Fidêncio (cherry picked from commit 509c309ab22096cd7c340908cf6c71ab15d3a197) --- .github/workflows/static-checks.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/static-checks.yaml b/.github/workflows/static-checks.yaml index 7be939a4b..796330129 100644 --- a/.github/workflows/static-checks.yaml +++ b/.github/workflows/static-checks.yaml @@ -53,6 +53,7 @@ jobs: - "make vendor" - "make check" - "make test" + - "sudo -E PATH=\"$PATH\" make test" include: - component: agent component-path: src/agent @@ -131,7 +132,6 @@ jobs: matrix: cmd: - "make static-checks" - - "sudo -E PATH=\"$PATH\" make test" env: RUST_BACKTRACE: "1" target_branch: ${{ github.base_ref }} From 9784ded336b3a7c17432d6d96a764a2711ede290 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Fri, 15 Sep 2023 15:00:05 +0200 Subject: [PATCH 220/339] ci: static-checks: Run tests depending on KVM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With this we're removing the dragonball static-checks CI, as the test is running here now. :-) Fixes: #7974 -- part 0 Signed-off-by: Fabiano Fidêncio (cherry picked from commit 2c5ca2eaf816799a7dad3d47d5d52abac63c76ee) --- .../workflows/static-checks-dragonball.yaml | 41 ------------------- .github/workflows/static-checks.yaml | 37 +++++++++++++++++ 2 files changed, 37 insertions(+), 41 deletions(-) delete mode 100644 .github/workflows/static-checks-dragonball.yaml diff --git a/.github/workflows/static-checks-dragonball.yaml b/.github/workflows/static-checks-dragonball.yaml deleted file mode 100644 index 2c99210a1..000000000 --- a/.github/workflows/static-checks-dragonball.yaml +++ /dev/null @@ -1,41 +0,0 @@ -on: - pull_request: - types: - - opened - - edited - - reopened - - synchronize - paths-ignore: [ '**.md', '**.png', '**.jpg', '**.jpeg', '**.svg', '/docs/**' ] - -concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} - cancel-in-progress: true - -name: Static checks dragonball -jobs: - test-dragonball: - runs-on: garm-ubuntu-2004 - env: - RUST_BACKTRACE: "1" - steps: - - uses: actions/checkout@v3 - - name: Set env - if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }} - run: | - echo "GOPATH=${{ github.workspace }}" >> $GITHUB_ENV - - name: Install dependencies - run: | - sudo apt-get update - sudo apt-get install -y --no-install-recommends build-essential haveged - - name: Install Rust - if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }} - run: | - ./ci/install_rust.sh - echo PATH="$HOME/.cargo/bin:$PATH" >> $GITHUB_ENV - - name: Run Unit Test - if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }} - run: | - cd src/dragonball - cargo version - rustc --version - sudo -E env PATH=$PATH LIBC=gnu SUPPORT_VIRTUALIZATION=true make test diff --git a/.github/workflows/static-checks.yaml b/.github/workflows/static-checks.yaml index 796330129..a6e27c7e7 100644 --- a/.github/workflows/static-checks.yaml +++ b/.github/workflows/static-checks.yaml @@ -122,6 +122,43 @@ jobs: env: RUST_BACKTRACE: "1" + build-checks-depending-on-kvm: + runs-on: garm-ubuntu-2004-smaller + strategy: + fail-fast: false + matrix: + component: + - runtime-rs + include: + - component: runtime-rs + command: "sudo -E env PATH=$PATH LIBC=gnu SUPPORT_VIRTUALIZATION=true make test" + - component: runtime-rs + component-path: src/dragonball + steps: + - name: Checkout the code + uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: Install system deps + run: | + sudo apt-get install -y build-essential musl-tools + - name: Install yq + run: | + sudo -E ./ci/install_yq.sh + env: + INSTALL_IN_GOPATH: false + - name: Install rust + run: | + export PATH="$PATH:/usr/local/bin" + ./tests/install_rust.sh + - name: Running `${{ matrix.command }}` for ${{ matrix.component }} + run: | + export PATH="$PATH:${HOME}/.cargo/bin" + cd ${{ matrix.component-path }} + ${{ matrix.command }} + env: + RUST_BACKTRACE: "1" + static-checks: runs-on: garm-ubuntu-2004 strategy: From 625a05aa2a3f01b0065222bee219c5279806a255 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Fri, 15 Sep 2023 18:48:53 +0200 Subject: [PATCH 221/339] ci: static-checks: Clean up static-checks job MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that the static-checks job only takes care of running the static-checks, let's clean it up, remove all the unneeded steps, make sure that we're using the actions in their latest version, and have it running in a cost free runner. At some point I'd like to see those tests done in parallel, in the same way that I've organised the build-checks, but that's something for someone else, at some other time. Fixes: #7974 -- part 0 Signed-off-by: Fabiano Fidêncio (cherry picked from commit 8b1e9b0c758b947107fe7d73fec2e263ac01f2af) --- .github/workflows/static-checks.yaml | 77 +++++++++------------------- 1 file changed, 24 insertions(+), 53 deletions(-) diff --git a/.github/workflows/static-checks.yaml b/.github/workflows/static-checks.yaml index a6e27c7e7..ad2d2b7a4 100644 --- a/.github/workflows/static-checks.yaml +++ b/.github/workflows/static-checks.yaml @@ -160,64 +160,35 @@ jobs: RUST_BACKTRACE: "1" static-checks: - runs-on: garm-ubuntu-2004 + runs-on: ubuntu-20.04 strategy: - # We can set this to true whenever we're 100% sure that - # the all the tests are not flaky, otherwise we'll fail - # all the tests due to a single flaky instance. fail-fast: false matrix: cmd: - "make static-checks" env: - RUST_BACKTRACE: "1" - target_branch: ${{ github.base_ref }} GOPATH: ${{ github.workspace }} steps: - - name: Free disk space - run: | - sudo rm -rf /usr/share/dotnet - sudo rm -rf "$AGENT_TOOLSDIRECTORY" - - name: Checkout code - uses: actions/checkout@v3 - with: - fetch-depth: 0 - path: ./src/github.com/${{ github.repository }} - - name: Install dependencies - run: | - sudo apt-get update - sudo apt-get install -y --no-install-recommends build-essential haveged - - name: Install Go - uses: actions/setup-go@v3 - with: - go-version: 1.19.3 - - name: Set PATH - if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }} - run: | - echo "${{ github.workspace }}/bin" >> $GITHUB_PATH - - name: Setup - if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }} - run: | - cd ${GOPATH}/src/github.com/${{ github.repository }} && ./ci/setup.sh - - name: Installing rust - if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }} - run: | - cd ${GOPATH}/src/github.com/${{ github.repository }} && ./ci/install_rust.sh - PATH=$PATH:"$HOME/.cargo/bin" - rustup target add x86_64-unknown-linux-musl - rustup component add rustfmt clippy - - name: Setup seccomp - if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }} - run: | - libseccomp_install_dir=$(mktemp -d -t libseccomp.XXXXXXXXXX) - gperf_install_dir=$(mktemp -d -t gperf.XXXXXXXXXX) - cd ${GOPATH}/src/github.com/${{ github.repository }} && ./ci/install_libseccomp.sh "${libseccomp_install_dir}" "${gperf_install_dir}" - echo "Set environment variables for the libseccomp crate to link the libseccomp library statically" - echo "LIBSECCOMP_LINK_TYPE=static" >> $GITHUB_ENV - echo "LIBSECCOMP_LIB_PATH=${libseccomp_install_dir}/lib" >> $GITHUB_ENV - - name: Run check - if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }} - run: | - export PATH=$PATH:"$HOME/.cargo/bin" - export XDG_RUNTIME_DIR=$(mktemp -d /tmp/kata-tests-$USER.XXX | tee >(xargs chmod 0700)) - cd ${GOPATH}/src/github.com/${{ github.repository }} && ${{ matrix.cmd }} + - name: Checkout code + uses: actions/checkout@v4 + with: + fetch-depth: 0 + path: ./src/github.com/${{ github.repository }} + - name: Install yq + run: | + cd ${GOPATH}/src/github.com/${{ github.repository }} + ./ci/install_yq.sh + env: + INSTALL_IN_GOPATH: false + - name: Install golang + run: | + cd ${GOPATH}/src/github.com/${{ github.repository }} + ./tests/install_go.sh -f -p + echo "/usr/local/go/bin" >> $GITHUB_PATH + - name: Install system dependencies + run: | + sudo apt-get -y install moreutils + - name: Run check + run: | + export PATH=${PATH}:${GOPATH}/bin + cd ${GOPATH}/src/github.com/${{ github.repository }} && ${{ matrix.cmd }} From 8e4180f697adcdcf34c5d4a7287144bcf31cc6bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Wed, 16 Aug 2023 08:56:26 +0200 Subject: [PATCH 222/339] tests: common: Add install_nydus() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This function will be used to download and install nydus, and it follows the same pattern we already have introduced for downloading and installing another dependencies from GitHub. Signed-off-by: Fabiano Fidêncio (cherry picked from commit b6563783e2782a95226bcb6b25f56cd794e4c009) --- tests/common.bash | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tests/common.bash b/tests/common.bash index fc5eac5cb..bffafacb1 100644 --- a/tests/common.bash +++ b/tests/common.bash @@ -414,6 +414,17 @@ function install_cri_tools() { rm -f "${tarball_name}" } +function install_nydus() { + version="${1}" + + project="dragonflyoss/image-service" + tarball_name="nydus-static-${version}-linux-$(${repo_root_dir}/tests/kata-arch.sh -g).tgz" + + download_github_project_tarball "${project}" "${version}" "${tarball_name}" + sudo tar xfz "${tarball_name}" -C /usr/local/bin --strip-components=1 + rm -f "${tarball_name}" +} + # Convert architecture to the name used by golang function arch_to_golang() { local arch="$(uname -m)" From 4915605b20e5ddf3d23f03fc5e797ea5ee4de52c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Wed, 16 Aug 2023 08:59:36 +0200 Subject: [PATCH 223/339] tests: common: Add install_nydus_snapshotter() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This function will be used to download and install the nydus-snapshotter, and it follows the same pattern we already have introduced for downloading and installing another dependencies from GitHub. Signed-off-by: Fabiano Fidêncio (cherry picked from commit 56a14b3950961bced5aebdbc876419df87dde192) --- tests/common.bash | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tests/common.bash b/tests/common.bash index bffafacb1..c6fbf71a7 100644 --- a/tests/common.bash +++ b/tests/common.bash @@ -425,6 +425,17 @@ function install_nydus() { rm -f "${tarball_name}" } +function install_nydus_snapshotter() { + version="${1}" + + project="containerd/nydus-snapshotter" + tarball_name="nydus-snapshotter-${version}-$(${repo_root_dir}/tests/kata-arch.sh).tgz" + + download_github_project_tarball "${project}" "${version}" "${tarball_name}" + sudo tar xfz "${tarball_name}" -C /usr/local/bin --strip-components=1 + rm -f "${tarball_name}" +} + # Convert architecture to the name used by golang function arch_to_golang() { local arch="$(uname -m)" From 82c531978fca716371c1b241770080fecaa1de5d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Wed, 16 Aug 2023 09:19:20 +0200 Subject: [PATCH 224/339] tests: nydus: Adapt check to "clh" instead "cloud-hypervisor" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As that's what we've been using as part of the GHA. Signed-off-by: Fabiano Fidêncio (cherry picked from commit a84efa3e875c9323006d41e7c40dd3677de9b1ae) --- tests/integration/nydus/nydus_tests.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/nydus/nydus_tests.sh b/tests/integration/nydus/nydus_tests.sh index fe49580da..52cc579c3 100755 --- a/tests/integration/nydus/nydus_tests.sh +++ b/tests/integration/nydus/nydus_tests.sh @@ -30,7 +30,7 @@ containerd_config_backup="/tmp/containerd.config.toml" # test image for container IMAGE="${IMAGE:-ghcr.io/dragonflyoss/image-service/alpine:nydus-latest}" -if [ "$KATA_HYPERVISOR" != "qemu" ] && [ "$KATA_HYPERVISOR" != "cloud-hypervisor" ] && [ "$KATA_HYPERVISOR" != "dragonball" ]; then +if [ "$KATA_HYPERVISOR" != "qemu" ] && [ "$KATA_HYPERVISOR" != "clh" ] && [ "$KATA_HYPERVISOR" != "dragonball" ]; then echo "Skip nydus test for $KATA_HYPERVISOR, it only works for QEMU/CLH/DB now." exit 0 fi From 4b58777eecc207045aa37e4822478b3551ac1739 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Wed, 16 Aug 2023 09:36:08 +0200 Subject: [PATCH 225/339] tests: nydus: Adapt "source ..." to GHA MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The "source ..." we've been doing was not changed since those tests were part of the Jenkins tests, and we need to adapt them, either setting the correct path or entirely removing the ones that are not relevant to us anymore. Signed-off-by: Fabiano Fidêncio (cherry picked from commit 4290fd4b67d173626631c31603c715ce7d09cfda) --- tests/integration/nydus/nydus_tests.sh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/integration/nydus/nydus_tests.sh b/tests/integration/nydus/nydus_tests.sh index 52cc579c3..87cf22917 100755 --- a/tests/integration/nydus/nydus_tests.sh +++ b/tests/integration/nydus/nydus_tests.sh @@ -12,8 +12,7 @@ set -o pipefail set -o errtrace dir_path=$(dirname "$0") -source "${dir_path}/../../lib/common.bash" -source "${dir_path}/../../.ci/lib.sh" +source "${dir_path}/../../common.bash" source "/etc/os-release" || source "/usr/lib/os-release" KATA_HYPERVISOR="${KATA_HYPERVISOR:-qemu}" From fd5935da9d645d32898042235618bd1735813f51 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Wed, 16 Aug 2023 11:33:33 +0200 Subject: [PATCH 226/339] tests: nydus: Decorate some calls with `sudo` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Otherwise we canoot properly start the nydus snapshotter, nor properly kill it after it's been started. Signed-off-by: Fabiano Fidêncio (cherry picked from commit 376574a16c650482106597416463ecab96f515e2) --- tests/integration/nydus/nydus_tests.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/integration/nydus/nydus_tests.sh b/tests/integration/nydus/nydus_tests.sh index 87cf22917..15512b32d 100755 --- a/tests/integration/nydus/nydus_tests.sh +++ b/tests/integration/nydus/nydus_tests.sh @@ -68,7 +68,7 @@ function setup_nydus() { sudo -E cp "$dir_path/nydusd-config.json" /etc/ # start nydus-snapshotter - nohup /usr/local/bin/containerd-nydus-grpc \ + sudo nohup /usr/local/bin/containerd-nydus-grpc \ --config-path /etc/nydusd-config.json \ --shared-daemon \ --log-level debug \ @@ -163,7 +163,7 @@ function run_test() { state=$(sudo -E crictl inspect $cnt | jq .status.state | tr -d '"') [ $state == "CONTAINER_RUNNING" ] || die "Container is not running($state)" # run a command in container - crictl exec $cnt ls + sudo -E crictl exec $cnt ls # cleanup containers sudo -E crictl stop $cnt @@ -176,11 +176,11 @@ function teardown() { # kill nydus-snapshotter bin=containerd-nydus-grpc - kill -9 $(pidof $bin) || true + sudo -E kill -9 $(pidof $bin) || true [ "$(pidof $bin)" == "" ] || die "$bin is running" bin=nydusd - kill -9 $(pidof $bin) || true + sudo -E kill -9 $(pidof $bin) || true [ "$(pidof $bin)" == "" ] || die "$bin is running" # restore kata configuratiom.toml if needed From 6d9b8e2437434d4e63a83254952c45f6211cdbbf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Wed, 16 Aug 2023 12:00:37 +0200 Subject: [PATCH 227/339] tests: nydus: Add uid / namespace to the nydus container / sandbox MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Otherwise we may face errors like: ``` getting sandbox status of pod "d3af2db414ce8": metadata.Name, metadata.Namespace or metadata.Uid is not in metadata "&PodSandboxMetadata{Name:nydus-sandbox,Uid:,Namespace:default,Attempt:1,}" getting sandbox status of pod "-A": rpc error: code = NotFound desc = an error occurred when try to find sandbox: not found ``` Signed-off-by: Fabiano Fidêncio (cherry picked from commit 5ac3b76eb12a5c4bedbf990b2d55b9856db3c83c) --- tests/integration/nydus/nydus-container.yaml | 2 ++ tests/integration/nydus/nydus-sandbox.yaml | 1 + 2 files changed, 3 insertions(+) diff --git a/tests/integration/nydus/nydus-container.yaml b/tests/integration/nydus/nydus-container.yaml index a30327ade..a8b0248dd 100644 --- a/tests/integration/nydus/nydus-container.yaml +++ b/tests/integration/nydus/nydus-container.yaml @@ -1,5 +1,7 @@ metadata: name: nydus-container + namespace: default + uid: nydus-containerd-uid image: image: ghcr.io/dragonflyoss/image-service/alpine:nydus-latest command: diff --git a/tests/integration/nydus/nydus-sandbox.yaml b/tests/integration/nydus/nydus-sandbox.yaml index 9f039d726..7a3e32c93 100644 --- a/tests/integration/nydus/nydus-sandbox.yaml +++ b/tests/integration/nydus/nydus-sandbox.yaml @@ -2,4 +2,5 @@ metadata: attempt: 1 name: nydus-sandbox namespace: default + uid: nydus-sandbox-uid log_directory: /tmp From 9ad60006769625830d86f735fffa9926cb2da573 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Wed, 16 Aug 2023 12:58:54 +0200 Subject: [PATCH 228/339] tests: nydus: Add timeout to the crictl calls MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Similarly to what's been done for the cri-containerd tests, as part of 84dd02e0f9baf942181d2808fc3f31c1077a0ebd, we need to add the timeout here for the crictl calls. Signed-off-by: Fabiano Fidêncio (cherry picked from commit 568439c77b5abda309c0b9dfa67cf80b51e1df20) --- tests/integration/nydus/nydus_tests.sh | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/integration/nydus/nydus_tests.sh b/tests/integration/nydus/nydus_tests.sh index 15512b32d..749a29c6d 100755 --- a/tests/integration/nydus/nydus_tests.sh +++ b/tests/integration/nydus/nydus_tests.sh @@ -151,24 +151,24 @@ function setup() { } function run_test() { - sudo -E crictl pull "${IMAGE}" - pod=$(sudo -E crictl runp -r kata $dir_path/nydus-sandbox.yaml) + sudo -E crictl --timeout=20s pull "${IMAGE}" + pod=$(sudo -E crictl --timeout=20s runp -r kata $dir_path/nydus-sandbox.yaml) echo "Pod $pod created" - cnt=$(sudo -E crictl create $pod $dir_path/nydus-container.yaml $dir_path/nydus-sandbox.yaml) + cnt=$(sudo -E crictl --timeout=20s create $pod $dir_path/nydus-container.yaml $dir_path/nydus-sandbox.yaml) echo "Container $cnt created" - sudo -E crictl start $cnt + sudo -E crictl --timeout=20s start $cnt echo "Container $cnt started" # ensure container is running - state=$(sudo -E crictl inspect $cnt | jq .status.state | tr -d '"') + state=$(sudo -E crictl --timeout=20s inspect $cnt | jq .status.state | tr -d '"') [ $state == "CONTAINER_RUNNING" ] || die "Container is not running($state)" # run a command in container - sudo -E crictl exec $cnt ls + sudo -E crictl --timeout=20s exec $cnt ls # cleanup containers - sudo -E crictl stop $cnt - sudo -E crictl stopp $pod - sudo -E crictl rmp $pod + sudo -E crictl --timeout=20s stop $cnt + sudo -E crictl --timeout=20s stopp $pod + sudo -E crictl --timeout=20s rmp $pod } function teardown() { From 30efa3e5632b8171ed16929d9fa460c8274922e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Wed, 16 Aug 2023 09:04:44 +0200 Subject: [PATCH 229/339] gha: nydus: Get rid of nydus{,-snapshotter} install from nydus_test.sh MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As we've added install_nydus() and install_nydus_snapshotter(), which do conform with the pattern we're following on GHA, let's rely on them rather than relying on the bits coming from nydus_test.sh. Later on we'll have install_nydus() and install_nydus_snapshotter() as part of the dependencies install in our `gha-run.sh`. Signed-off-by: Fabiano Fidêncio (cherry picked from commit 0ec00ad42e0a02617828644ad9c0722d5ab51045) --- tests/integration/nydus/nydus_tests.sh | 29 -------------------------- 1 file changed, 29 deletions(-) diff --git a/tests/integration/nydus/nydus_tests.sh b/tests/integration/nydus/nydus_tests.sh index 749a29c6d..3c9e30540 100755 --- a/tests/integration/nydus/nydus_tests.sh +++ b/tests/integration/nydus/nydus_tests.sh @@ -34,36 +34,7 @@ if [ "$KATA_HYPERVISOR" != "qemu" ] && [ "$KATA_HYPERVISOR" != "clh" ] && [ "$KA exit 0 fi -arch="$(uname -m)" -if [ "$arch" != "x86_64" ]; then - echo "Skip nydus test for $arch, it only works for x86_64 now. See https://github.com/kata-containers/tests/issues/4445" - exit 0 -fi - -function install_from_tarball() { - local package_name="$1" - local binary_name="$2" - [ -n "$package_name" ] || die "need package_name" - [ -n "$binary_name" ] || die "need package release binary_name" - - local url=$(get_version "externals.${package_name}.url") - local version=$(get_version "externals.${package_name}.version") - local tarball_url="${url}/releases/download/${version}/${binary_name}-${version}-$arch.tgz" - if [ "${package_name}" == "nydus" ]; then - local goarch="$(${dir_path}/../../.ci/kata-arch.sh --golang)" - tarball_url="${url}/releases/download/${version}/${binary_name}-${version}-linux-$goarch.tgz" - fi - echo "Download tarball from ${tarball_url}" - curl -Ls "$tarball_url" | sudo tar xfz - -C /usr/local/bin --strip-components=1 -} - function setup_nydus() { - # install nydus - install_from_tarball "nydus" "nydus-static" - - # install nydus-snapshotter - install_from_tarball "nydus-snapshotter" "nydus-snapshotter" - # Config nydus snapshotter sudo -E cp "$dir_path/nydusd-config.json" /etc/ From 70c1c7d8685e507acf3fb8c0e54c4b8c30a44714 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Wed, 16 Aug 2023 10:09:27 +0200 Subject: [PATCH 230/339] gha: nydus: Actually install kata when `install-kata` is called MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We've been simply doing nothing whenever `install-kata` was called, and that was the intent when we added the placeholder calls. Now, let's install kata, as expected. :-) Signed-off-by: Fabiano Fidêncio (cherry picked from commit d2b3b67f5de23eeceb6d866d3cd6306fe0cc4de9) --- tests/integration/nydus/gha-run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/nydus/gha-run.sh b/tests/integration/nydus/gha-run.sh index 86707d504..a5f856a29 100755 --- a/tests/integration/nydus/gha-run.sh +++ b/tests/integration/nydus/gha-run.sh @@ -29,7 +29,7 @@ function main() { action="${1:-}" case "${action}" in install-dependencies) install_dependencies ;; - install-kata) return 0 ;; + install-kata) install_kata ;; run) run ;; *) >&2 die "Invalid argument" ;; esac From 33a44278456a41ba0822837e39f6d1a4db659755 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Wed, 16 Aug 2023 09:08:18 +0200 Subject: [PATCH 231/339] gha: nydus: Populate install_dependencies() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's have all the dependencies needed for running the nydus tests installed. Signed-off-by: Fabiano Fidêncio (cherry picked from commit b3904a1a308e4eb7116f684552d1c8e06966f280) --- tests/integration/nydus/gha-run.sh | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/tests/integration/nydus/gha-run.sh b/tests/integration/nydus/gha-run.sh index a5f856a29..966b69ce9 100755 --- a/tests/integration/nydus/gha-run.sh +++ b/tests/integration/nydus/gha-run.sh @@ -16,7 +16,34 @@ source "${nydus_dir}/../../common.bash" function install_dependencies() { info "Installing the dependencies needed for running the nydus tests" - return 0 + # Dependency list of projects that we can rely on the system packages + # - jq + declare -a system_deps=( + jq + ) + + sudo apt-get update + sudo apt-get -y install "${system_deps[@]}" + + ensure_yq + + # Dependency list of projects that we can install them + # directly from their releases on GitHub: + # - containerd + # - cri-container-cni release tarball already includes CNI plugins + # - cri-tools + # - nydus + # - nydus-snapshotter + declare -a github_deps + github_deps[0]="cri_containerd:$(get_from_kata_deps "externals.containerd.${CONTAINERD_VERSION}")" + github_deps[1]="cri_tools:$(get_from_kata_deps "externals.critools.latest")" + github_deps[2]="nydus:$(get_from_kata_deps "externals.nydus.version")" + github_deps[3]="nydus_snapshotter:$(get_from_kata_deps "externals.nydus-snapshotter.version")" + + for github_dep in "${github_deps[@]}"; do + IFS=":" read -r -a dep <<< "${github_dep}" + install_${dep[0]} "${dep[1]}" + done } function run() { From d9e910702bd49444ea131bfac5e2d326f88bccc3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Wed, 16 Aug 2023 09:21:27 +0200 Subject: [PATCH 232/339] gha: nydus: Populate run() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit And with this we finally enable the nydus tests to run as part of our GHA CI. Fixes: #6543 Signed-off-by: Fabiano Fidêncio (cherry picked from commit b73bde320d653790d61556584c42216942950e07) --- tests/integration/nydus/gha-run.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/integration/nydus/gha-run.sh b/tests/integration/nydus/gha-run.sh index 966b69ce9..357fe6fcf 100755 --- a/tests/integration/nydus/gha-run.sh +++ b/tests/integration/nydus/gha-run.sh @@ -49,7 +49,8 @@ function install_dependencies() { function run() { info "Running nydus tests using ${KATA_HYPERVISOR} hypervisor" - return 0 + enabling_hypervisor + bash -c "${nydus_dir}/nydus_tests.sh" } function main() { From 079ab1e0acf2b0a38069f9c9e1228ab698d11d6c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Wed, 13 Sep 2023 21:16:28 +0200 Subject: [PATCH 233/339] versions: Bump nydus and nydus-snapshotter to its latest release MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As we need https://github.com/containerd/nydus-snapshotter/pull/530 in. Fixes #7984 Signed-off-by: Fabiano Fidêncio Signed-off-by: ChengyuZhu6 (cherry picked from commit c9a4e7e46de5e2fb4cf05388fbda84e0fedd6996) --- versions.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/versions.yaml b/versions.yaml index f316be865..3aff0b999 100644 --- a/versions.yaml +++ b/versions.yaml @@ -277,12 +277,12 @@ externals: nydus: description: "Nydus image acceleration service" url: "https://github.com/dragonflyoss/image-service" - version: "v2.2.1" + version: "v2.2.3" nydus-snapshotter: description: "Snapshotter for Nydus image acceleration service" url: "https://github.com/containerd/nydus-snapshotter" - version: "v0.3.3" + version: "v0.12.0" ovmf: description: "Firmware, implementation of UEFI for virtual machines." From 65e5bfe9eb1693ae401a1b6ba744ed16d2cdbefc Mon Sep 17 00:00:00 2001 From: ChengyuZhu6 Date: Fri, 15 Sep 2023 01:05:31 +0800 Subject: [PATCH 234/339] tests: nydus: Update nydus tests To support the v0.12.0 nydus-snapshotter, we need to update the config files and the commandline to start nydus-snapshotter. Signed-off-by: ChengyuZhu6 (cherry picked from commit 2f9c9e2e63c74196c89d8f65bd56d6039b6dcf58) --- tests/integration/nydus/nydus_tests.sh | 24 ++-- tests/integration/nydus/nydusd-config.json | 12 +- .../integration/nydus/snapshotter-config.toml | 128 ++++++++++++++++++ 3 files changed, 147 insertions(+), 17 deletions(-) create mode 100644 tests/integration/nydus/snapshotter-config.toml diff --git a/tests/integration/nydus/nydus_tests.sh b/tests/integration/nydus/nydus_tests.sh index 3c9e30540..e90b2498c 100755 --- a/tests/integration/nydus/nydus_tests.sh +++ b/tests/integration/nydus/nydus_tests.sh @@ -37,19 +37,12 @@ fi function setup_nydus() { # Config nydus snapshotter sudo -E cp "$dir_path/nydusd-config.json" /etc/ + sudo -E cp "$dir_path/snapshotter-config.toml" /etc/ # start nydus-snapshotter sudo nohup /usr/local/bin/containerd-nydus-grpc \ - --config-path /etc/nydusd-config.json \ - --shared-daemon \ - --log-level debug \ - --root /var/lib/containerd/io.containerd.snapshotter.v1.nydus \ - --cache-dir /var/lib/nydus/cache \ - --nydusd-path /usr/local/bin/nydusd \ - --nydusimg-path /usr/local/bin/nydus-image \ - --disable-cache-manager true \ - --enable-nydus-overlayfs true \ - --log-to-stdout >/dev/null 2>&1 & + --config /etc/snapshotter-config.toml \ + --nydusd-config /etc/nydusd-config.json & } function config_kata() { @@ -112,12 +105,23 @@ function config_containerd() { EOF } +function check_nydus_snapshotter_exist() { + echo "check_nydus_snapshotter_exist" + bin="containerd-nydus-grpc" + if pgrep -f "$bin" >/dev/null; then + echo "nydus-snapshotter is running" + else + die "nydus-snapshotter is not running" + fi +} + function setup() { setup_nydus config_kata config_containerd restart_containerd_service check_processes + check_nydus_snapshotter_exist extract_kata_env } diff --git a/tests/integration/nydus/nydusd-config.json b/tests/integration/nydus/nydusd-config.json index 654b39353..4b75426d7 100644 --- a/tests/integration/nydus/nydusd-config.json +++ b/tests/integration/nydus/nydusd-config.json @@ -3,17 +3,13 @@ "backend": { "type": "registry", "config": { - "scheme": "https", "timeout": 5, "connect_timeout": 5, "retry_limit": 2 } }, "cache": { - "type": "blobcache", - "config": { - "work_dir": "/var/lib/nydus/cache" - } + "type": "blobcache" } }, "mode": "direct", @@ -22,6 +18,8 @@ "enable_xattr": true, "fs_prefetch": { "enable": true, - "threads_count": 2 + "threads_count": 8, + "merging_size": 1048576, + "prefetch_all": true } -} +} \ No newline at end of file diff --git a/tests/integration/nydus/snapshotter-config.toml b/tests/integration/nydus/snapshotter-config.toml new file mode 100644 index 000000000..7b668236c --- /dev/null +++ b/tests/integration/nydus/snapshotter-config.toml @@ -0,0 +1,128 @@ +version = 1 +# Snapshotter's own home directory where it stores and creates necessary resources +root = "/var/lib/containerd-nydus" +# The snapshotter's GRPC server socket, containerd will connect to plugin on this socket +address = "/run/containerd-nydus/containerd-nydus-grpc.sock" +daemon_mode = "dedicated" +# Whether snapshotter should try to clean up resources when it is closed +cleanup_on_close = false + +[system] +# Snapshotter's debug and trace HTTP server interface +enable = true +# Unix domain socket path where system controller is listening on +address = "/run/containerd-nydus/system.sock" + +[system.debug] +# Snapshotter can profile the CPU utilization of each nydusd daemon when it is being started. +# This option specifies the profile duration when nydusd is downloading and uncomproessing data. +daemon_cpu_profile_duration_secs = 5 +# Enable by assigning an address, empty indicates pprof server is disabled +pprof_address = "" + +[daemon] +# Specify a configuration file for nydusd +nydusd_config = "/etc/nydusd-config.json" +nydusd_path = "/usr/local/bin/nydusd" +nydusimage_path = "/usr/local/bin/nydus-image" +# fusedev or fscache +fs_driver = "fusedev" +# How to process when daemon dies: "none", "restart" or "failover" +recover_policy = "restart" +# Nydusd worker thread number to handle FUSE or fscache requests, [0-1024]. +# Setting to 0 will use the default configuration of nydusd. +threads_number = 4 +# Log rotation size for nydusd, in unit MB(megabytes) +log_rotation_size = 100 + + +[cgroup] +# Whether to use separate cgroup for nydusd. +enable = true +# The memory limit for nydusd cgroup, which contains all nydusd processes. +# Percentage is supported as well, please ensure it is end with "%". +# The default unit is bytes. Acceptable values include "209715200", "200MiB", "200Mi" and "10%". +memory_limit = "" + +[log] +# Print logs to stdout rather than logging files +log_to_stdout = false +# Snapshotter's log level +level = "info" +log_rotation_compress = true +log_rotation_local_time = true +# Max number of days to retain logs +log_rotation_max_age = 7 +log_rotation_max_backups = 5 +# In unit MB(megabytes) +log_rotation_max_size = 100 + +[metrics] +# Enable by assigning an address, empty indicates metrics server is disabled +address = ":9110" + +[remote] +convert_vpc_registry = false + +[remote.mirrors_config] +# Snapshotter will overwrite daemon's mirrors configuration +# if the values loaded from this driectory are not null before starting a daemon. +# Set to "" or an empty directory to disable it. +#dir = "/etc/nydus/certs.d" + +[remote.auth] +# Fetch the private registry auth by listening to K8s API server +enable_kubeconfig_keychain = false +# synchronize `kubernetes.io/dockerconfigjson` secret from kubernetes API server with specified kubeconfig (default `$KUBECONFIG` or `~/.kube/config`) +kubeconfig_path = "" +# Fetch the private registry auth as CRI image service proxy +enable_cri_keychain = false +# the target image service when using image proxy +#image_service_address = "/run/containerd/containerd.sock" + +[snapshot] +# Let containerd use nydus-overlayfs mount helper +enable_nydus_overlayfs = true +# Insert Kata Virtual Volume option to `Mount.Options` +enable_kata_volume = false +# Whether to remove resources when a snapshot is removed +sync_remove = false + +[cache_manager] +disable = false +gc_period = "24h" +# Directory to host cached files +cache_dir = "" + +[image] +public_key_file = "" +validate_signature = false + +# The configuraions for features that are not production ready +[experimental] +# Whether to enable stargz support +enable_stargz = false +# Whether to enable referrers support +# The option enables trying to fetch the Nydus image associated with the OCI image and run it. +# Also see https://github.com/opencontainers/distribution-spec/blob/main/spec.md#listing-referrers +enable_referrer_detect = false +[experimental.tarfs] +# Whether to enable nydus tarfs mode. Tarfs is supported by: +# - The EROFS filesystem driver since Linux 6.4 +# - Nydus Image Service release v2.3 +enable_tarfs = false +# Mount rafs on host by loopdev and EROFS +mount_tarfs_on_host = false +# Only enable nydus tarfs mode for images with `tarfs hint` label when true +tarfs_hint = false +# Maximum of concurrence to converting OCIv1 images to tarfs, 0 means default +max_concurrent_proc = 0 +# Mode to export tarfs images: +# - "none" or "": do not export tarfs +# - "layer_verity_only": only generate disk verity information for a layer blob +# - "image_verity_only": only generate disk verity information for all blobs of an image +# - "layer_block": generate a raw block disk image with tarfs for a layer +# - "image_block": generate a raw block disk image with tarfs for an image +# - "layer_block_with_verity": generate a raw block disk image with tarfs for a layer with dm-verity info +# - "image_block_with_verity": generate a raw block disk image with tarfs for an image with dm-verity info +export_mode = "" \ No newline at end of file From ae55c0b5109ab74468cff2d2bf7c4a76dc9021e5 Mon Sep 17 00:00:00 2001 From: ChengyuZhu6 Date: Sun, 17 Sep 2023 23:29:40 +0800 Subject: [PATCH 235/339] static-build: Fix arch error on nydus build Fix the arch error when downloading the nydus tarball. Signed-off-by: ChengyuZhu6 Signed-off-by: Steven Horsman (cherry picked from commit f6df3d6efb62f482e57d891f4f96ea9b99511982) --- tools/packaging/static-build/nydus/build.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/packaging/static-build/nydus/build.sh b/tools/packaging/static-build/nydus/build.sh index 97686642a..908f46af5 100755 --- a/tools/packaging/static-build/nydus/build.sh +++ b/tools/packaging/static-build/nydus/build.sh @@ -12,7 +12,7 @@ script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" source "${script_dir}/../../scripts/lib.sh" -ARCH=${ARCH:-$(arch_to_golang "$(uname -m)")} +arch="$(uname -m)" nydus_url="${nydus_url:-}" nydus_version="${nydus_version:-}" @@ -25,7 +25,7 @@ info "Get nydus information from runtime versions.yaml" nydus_tarball_url="${nydus_url}/releases/download" -file_name="nydus-static-${nydus_version}-linux-${ARCH}.tgz" +file_name="nydus-static-${nydus_version}-linux-$(arch_to_golang $arch).tgz" download_url="${nydus_tarball_url}/${nydus_version}/${file_name}" info "Download nydus version: ${nydus_version} from ${download_url}" From 49c1a373300e4f880ae5bddfc591921a786ab911 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Mon, 18 Sep 2023 11:48:05 +0200 Subject: [PATCH 236/339] nydus: Use `kata-${KATA_HYPERVISOR}` instead of `kata` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This will ensure we're testing with the correct runtime, instead of using the `default` one. Signed-off-by: Fabiano Fidêncio (cherry picked from commit b8a8dfcd15aa6a894b48e8eb61fa8d0f09a030ce) --- tests/integration/nydus/nydus_tests.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/integration/nydus/nydus_tests.sh b/tests/integration/nydus/nydus_tests.sh index e90b2498c..9e3f655b9 100755 --- a/tests/integration/nydus/nydus_tests.sh +++ b/tests/integration/nydus/nydus_tests.sh @@ -99,8 +99,8 @@ function config_containerd() { [plugins.cri.containerd.runtimes.runc.options] BinaryName = "${runc_path}" Root = "" - [plugins.cri.containerd.runtimes.kata] - runtime_type = "io.containerd.kata.v2" + [plugins.cri.containerd.runtimes.kata-${KATA_HYPERVISOR}] + runtime_type = "io.containerd.kata-${KATA_HYPERVISOR}.v2" privileged_without_host_devices = true EOF } @@ -127,7 +127,7 @@ function setup() { function run_test() { sudo -E crictl --timeout=20s pull "${IMAGE}" - pod=$(sudo -E crictl --timeout=20s runp -r kata $dir_path/nydus-sandbox.yaml) + pod=$(sudo -E crictl --timeout=20s runp -r kata-${KATA_HYPERVISOR} $dir_path/nydus-sandbox.yaml) echo "Pod $pod created" cnt=$(sudo -E crictl --timeout=20s create $pod $dir_path/nydus-container.yaml $dir_path/nydus-sandbox.yaml) echo "Container $cnt created" From f3c42ff5febfe3476b7979c3723a4eb879c244c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Mon, 18 Sep 2023 11:51:28 +0200 Subject: [PATCH 237/339] nydus: Temporarily skip tests on dragonball MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We're hitting a specific issue after updating, which will require some work on dragonball before it can be re-added here. The issue: ``` ... 3: failed to do rafs mount\\n 4: fail to attach rafs \\\"/var/lib/containerd-nydus/snapshots/2/fs/image/image.boot\\\"\\n 5: add share fs mount\\n 6: Mount rafs at /rafs/197ef3db03c86b91bf3045ff59183ce8b5750941ad1d3484f4a8301a70f5109f/rootfs_lower error: Failed to Mount backend ... Caused by: vmm action error: FsDevice(AttachBackendFailed(\\\"attach/detach a backend filesystem failed:: missing field `version` at line 1 column 489\\\"))\"): unknown" ``` Signed-off-by: Fabiano Fidêncio (cherry picked from commit aba36ab188fd1da773b407460d322fb1f63e5516) --- tests/integration/nydus/nydus_tests.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration/nydus/nydus_tests.sh b/tests/integration/nydus/nydus_tests.sh index 9e3f655b9..e4e70139a 100755 --- a/tests/integration/nydus/nydus_tests.sh +++ b/tests/integration/nydus/nydus_tests.sh @@ -29,8 +29,8 @@ containerd_config_backup="/tmp/containerd.config.toml" # test image for container IMAGE="${IMAGE:-ghcr.io/dragonflyoss/image-service/alpine:nydus-latest}" -if [ "$KATA_HYPERVISOR" != "qemu" ] && [ "$KATA_HYPERVISOR" != "clh" ] && [ "$KATA_HYPERVISOR" != "dragonball" ]; then - echo "Skip nydus test for $KATA_HYPERVISOR, it only works for QEMU/CLH/DB now." +if [ "$KATA_HYPERVISOR" != "qemu" ] && [ "$KATA_HYPERVISOR" != "clh" ]; then + echo "Skip nydus test for $KATA_HYPERVISOR, it only works for QEMU/CLH now." exit 0 fi From 63e8c38a7a73e78463e9a40b55bffcc1817e4e48 Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Mon, 18 Sep 2023 16:32:11 +0000 Subject: [PATCH 238/339] metrics: Enable parallel bandwidth iperf limit This PR enables the parallel bandwidth iperf limit for kata metrics. Fixes #7989 Signed-off-by: Gabriela Cervantes (cherry picked from commit af59d4bf4a3fe42881e364707ee1b2fb123ee2d5) --- .../checkmetrics-json-clh-kata-metric8.toml | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml index 15e6f7545..6e0dff6da 100644 --- a/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml +++ b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml @@ -124,6 +124,19 @@ midval = 61176889941.19 minpercent = 20.0 maxpercent = 20.0 +[[metric]] +name = "network-iperf3" +type = "json" +description = "measure container parallel bandwidth using iperf3" +# Min and Max values to set a 'range' that +# the median of the CSV Results data must fall +# within (inclusive) +checkvar = ".\"network-iperf3\".Results | .[] | .parallel.Result" +checktype = "mean" +midval = 61176889941.0 +minpercent = 20.0 +maxpercent = 20.0 + [[metric]] name = "network-iperf3" type = "json" From b09a3f8f8e4b8b8981c38e118313fbb695e173f6 Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Mon, 18 Sep 2023 16:35:36 +0000 Subject: [PATCH 239/339] metrics: Add parallel bandwidth limit for qemu This PR adds the parallel bandwidth limit for qemu for kata metrics. Signed-off-by: Gabriela Cervantes (cherry picked from commit 9aa8d1c9170c70ea956c3fd88dce8fffba85a11e) --- .../checkmetrics-json-clh-kata-metric8.toml | 2 +- .../checkmetrics-json-qemu-kata-metric8.toml | 15 ++++++++++++++- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml index 6e0dff6da..cf5042dbc 100644 --- a/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml +++ b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml @@ -133,7 +133,7 @@ description = "measure container parallel bandwidth using iperf3" # within (inclusive) checkvar = ".\"network-iperf3\".Results | .[] | .parallel.Result" checktype = "mean" -midval = 61176889941.0 +midval = 47734838389.0 minpercent = 20.0 maxpercent = 20.0 diff --git a/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-qemu-kata-metric8.toml b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-qemu-kata-metric8.toml index 0db751246..8281ed71a 100644 --- a/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-qemu-kata-metric8.toml +++ b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-qemu-kata-metric8.toml @@ -120,7 +120,20 @@ description = "measure container bandwidth using iperf3" # within (inclusive) checkvar = ".\"network-iperf3\".Results | .[] | .bandwidth.Result" checktype = "mean" -midval = 52644229340.91 +midval = 55344417086.81 +minpercent = 20.0 +maxpercent = 20.0 + +[[metric]] +name = "network-iperf3" +type = "json" +description = "measure container parallel bandwidth using iperf3" +# Min and Max values to set a 'range' that +# the median of the CSV Results data must fall +# within (inclusive) +checkvar = ".\"network-iperf3\".Results | .[] | .parallel.Result" +checktype = "mean" +midval = 52644229340.0 minpercent = 20.0 maxpercent = 20.0 From 882d7d7d894a51b34cba6bec12c37ffb63353ed7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Bombo?= Date: Mon, 18 Sep 2023 08:13:52 -0700 Subject: [PATCH 240/339] ci: Create clusters in individual resource groups MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This makes it so that each AKS cluster is created in its own individual resource group, rather than using the "kataCI" resource group for all test clusters. This is to accommodate a tool that we recently introduced in our Azure subscription which automatically deletes resource groups after a set amount of time, in order to keep spending under control. The tool will automatically delete any resource group, unless it has a tag SkipAutoDeleteTill = YYYY-MM-DD. When this tag is present, the resource group will be retained until the specified date. Note that I tagged all current resource groups in our subscription with SkipAutoDeleteTill = 2043-01-01 so that we don't lose any existing resources. Fixes: #7982 Signed-off-by: Aurélien Bombo (cherry picked from commit 68267a399620dc8ef5f62bdbc2d980915006c1f4) --- tests/gha-run-k8s-common.sh | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/tests/gha-run-k8s-common.sh b/tests/gha-run-k8s-common.sh index fd9b98109..cbd79c01c 100644 --- a/tests/gha-run-k8s-common.sh +++ b/tests/gha-run-k8s-common.sh @@ -11,7 +11,6 @@ set -o pipefail tests_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" source "${tests_dir}/common.bash" -AZ_RG="${AZ_RG:-kataCI}" K8S_TEST_HOST_TYPE="${K8S_TEST_HOST_TYPE:-small}" function _print_cluster_name() { @@ -21,6 +20,12 @@ function _print_cluster_name() { echo "${test_type}-${GH_PR_NUMBER}-${short_sha}-${KATA_HYPERVISOR}-${KATA_HOST_OS}-amd64" } +function _print_rg_name() { + test_type="${1:-k8s}" + + echo "${AZ_RG:-"kataCI-$(_print_cluster_name ${test_type})"}" +} + function install_azure_cli() { curl -sL https://aka.ms/InstallAzureCLIDeb | sudo bash # The aks-preview extension is required while the Mariner Kata host is in preview. @@ -38,9 +43,15 @@ function login_azure() { function create_cluster() { test_type="${1:-k8s}" - # First, ensure that the cluster didn't fail to get cleaned up from a previous run. + # First ensure it didn't fail to get cleaned up from a previous run. delete_cluster "${test_type}" || true + local rg="$(_print_rg_name ${test_type})" + + az group create \ + -l eastus2 \ + -n "${rg}" + local instance_type="" case ${K8S_TEST_HOST_TYPE} in small) @@ -52,7 +63,8 @@ function create_cluster() { esac az aks create \ - -g "${AZ_RG}" \ + -g "${rg}" \ + --node-resource-group "node-${rg}" \ -n "$(_print_cluster_name ${test_type})" \ -s "${instance_type}" \ --node-count 1 \ @@ -79,16 +91,15 @@ function get_cluster_credentials() { test_type="${1:-k8s}" az aks get-credentials \ - -g "${AZ_RG}" \ + -g "$(_print_rg_name ${test_type})" \ -n "$(_print_cluster_name ${test_type})" } function delete_cluster() { test_type="${1:-k8s}" - az aks delete \ - -g "${AZ_RG}" \ - -n "$(_print_cluster_name ${test_type})" \ + az group delete \ + -g "$(_print_rg_name ${test_type})" \ --yes } From 7892e04dd1e1918bcfc1d674dfbede1c67be3bb3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Bombo?= Date: Mon, 18 Sep 2023 09:23:30 -0700 Subject: [PATCH 241/339] ci: Add first letter of the K8S_TEST_HOST_TYPE to resource group name MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ideally we'd add the instance_type or the full K8S_TEST_HOST_TYPE but that exceeds the maximum amount of characteres allowed for the cluster name. With this in mind, let's use the first letter of K8S_TEST_HOST_TYPE instead. Signed-off-by: Aurélien Bombo (cherry picked from commit d9ef1352af139030d760bedbaa860080a5f1d7aa) --- tests/gha-run-k8s-common.sh | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/tests/gha-run-k8s-common.sh b/tests/gha-run-k8s-common.sh index cbd79c01c..5b8193c88 100644 --- a/tests/gha-run-k8s-common.sh +++ b/tests/gha-run-k8s-common.sh @@ -13,11 +13,25 @@ source "${tests_dir}/common.bash" K8S_TEST_HOST_TYPE="${K8S_TEST_HOST_TYPE:-small}" +function _print_instance_type() { + case ${K8S_TEST_HOST_TYPE} in + small) + echo "Standard_D2s_v5" + ;; + normal) + echo "Standard_D4s_v5" + ;; + *) + echo "Unknown instance type '${K8S_TEST_HOST_TYPE}'" >&2 + exit 1 + esac +} + function _print_cluster_name() { test_type="${1:-k8s}" short_sha="$(git rev-parse --short=12 HEAD)" - echo "${test_type}-${GH_PR_NUMBER}-${short_sha}-${KATA_HYPERVISOR}-${KATA_HOST_OS}-amd64" + echo "${test_type}-${GH_PR_NUMBER}-${short_sha}-${KATA_HYPERVISOR}-${KATA_HOST_OS}-amd64-${K8S_TEST_HOST_TYPE:0:1}" } function _print_rg_name() { @@ -52,21 +66,11 @@ function create_cluster() { -l eastus2 \ -n "${rg}" - local instance_type="" - case ${K8S_TEST_HOST_TYPE} in - small) - instance_type="Standard_D2s_v5" - ;; - normal) - instance_type="Standard_D4s_v5" - ;; - esac - az aks create \ -g "${rg}" \ --node-resource-group "node-${rg}" \ -n "$(_print_cluster_name ${test_type})" \ - -s "${instance_type}" \ + -s "$(_print_instance_type)" \ --node-count 1 \ --generate-ssh-keys \ $([ "${KATA_HOST_OS}" = "cbl-mariner" ] && echo "--os-sku AzureLinux --workload-runtime KataMshvVmIsolation") From ea6489653edede338e1d1edc0ed09df59e355158 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Mon, 18 Sep 2023 11:10:27 -0700 Subject: [PATCH 242/339] ci: Properly set K8S_TEST_UNION MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Otherwise only the first test will be executed Signed-off-by: Aurélien Bombo Signed-off-by: Fabiano Fidêncio (cherry picked from commit 486fe14c99c437a302be2810fb6361c91122e2a4) --- tests/integration/kubernetes/run_kubernetes_tests.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration/kubernetes/run_kubernetes_tests.sh b/tests/integration/kubernetes/run_kubernetes_tests.sh index 0d4c40cfa..e0cd401d1 100644 --- a/tests/integration/kubernetes/run_kubernetes_tests.sh +++ b/tests/integration/kubernetes/run_kubernetes_tests.sh @@ -63,10 +63,10 @@ else case ${K8S_TEST_HOST_TYPE} in small) - K8S_TEST_UNION=($K8S_TEST_SMALL_HOST_UNION) + K8S_TEST_UNION=(${K8S_TEST_SMALL_HOST_UNION[@]}) ;; normal) - K8S_TEST_UNION=($K8S_TEST_NORMAL_HOST_UNION) + K8S_TEST_UNION=(${K8S_TEST_NORMAL_HOST_UNION[@]}) ;; baremetal) K8S_TEST_UNION=(${K8S_TEST_SMALL_HOST_UNION[@]} ${K8S_TEST_NORMAL_HOST_UNION[@]}) From abe9dc9904da61d8637fffdabfcb2f4bb3dcee3f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Mon, 11 Sep 2023 15:35:11 +0200 Subject: [PATCH 243/339] ci: Move deploy_k8s() to gha-run-k8s-common.sh MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This will allow us to re-use the function in the kata-deploy tests, which will come soon. Signed-off-by: Fabiano Fidêncio (cherry picked from commit 09cc0ed43853de13f0d0c21356f68dbb4d1d1d5a) --- tests/gha-run-k8s-common.sh | 39 +++++++++++++++++++++++++ tests/integration/kubernetes/gha-run.sh | 39 ------------------------- 2 files changed, 39 insertions(+), 39 deletions(-) diff --git a/tests/gha-run-k8s-common.sh b/tests/gha-run-k8s-common.sh index 5b8193c88..7f381c54a 100644 --- a/tests/gha-run-k8s-common.sh +++ b/tests/gha-run-k8s-common.sh @@ -111,3 +111,42 @@ function get_nodes_and_pods_info() { kubectl debug $(kubectl get nodes -o name) -it --image=quay.io/kata-containers/kata-debug:latest || true kubectl get pods -o name | grep node-debugger | xargs kubectl delete || true } + +function deploy_k3s() { + curl -sfL https://get.k3s.io | sh -s - --write-kubeconfig-mode 644 + + # This is an arbitrary value that came up from local tests + sleep 120s + + # Download the kubectl binary into /usr/bin and remove /usr/local/bin/kubectl + # + # We need to do this to avoid hitting issues like: + # ```sh + # error: open /etc/rancher/k3s/k3s.yaml.lock: permission denied + # ``` + # Which happens basically because k3s links `/usr/local/bin/kubectl` + # to `/usr/local/bin/k3s`, and that does extra stuff that vanilla + # `kubectl` doesn't do. + ARCH=$(uname -m) + if [ "${ARCH}" = "x86_64" ]; then + ARCH=amd64 + fi + kubectl_version=$(/usr/local/bin/k3s kubectl version --short 2>/dev/null | grep "Client Version" | sed -e 's/Client Version: //' -e 's/\+k3s1//') + sudo curl -fL --progress-bar -o /usr/bin/kubectl https://storage.googleapis.com/kubernetes-release/release/${kubectl_version}/bin/linux/${ARCH}/kubectl + sudo chmod +x /usr/bin/kubectl + sudo rm -rf /usr/local/bin/kubectl + + mkdir -p ~/.kube + cp /etc/rancher/k3s/k3s.yaml ~/.kube/config +} + +function deploy_k8s() { + echo "::group::Deploying ${KUBERNETES}" + + case ${KUBERNETES} in + k3s) deploy_k3s ;; + *) >&2 echo "${KUBERNETES} flavour is not supported"; exit 2 ;; + esac + + echo "::endgroup::" +} diff --git a/tests/integration/kubernetes/gha-run.sh b/tests/integration/kubernetes/gha-run.sh index 1cb11c456..fa754ada8 100755 --- a/tests/integration/kubernetes/gha-run.sh +++ b/tests/integration/kubernetes/gha-run.sh @@ -137,45 +137,6 @@ function deploy_kata() { echo "::endgroup::" } -function deploy_k3s() { - curl -sfL https://get.k3s.io | sh -s - --write-kubeconfig-mode 644 - - # This is an arbitrary value that came up from local tests - sleep 120s - - # Download the kubectl binary into /usr/bin and remove /usr/local/bin/kubectl - # - # We need to do this to avoid hitting issues like: - # ```sh - # error: open /etc/rancher/k3s/k3s.yaml.lock: permission denied - # ``` - # Which happens basically because k3s links `/usr/local/bin/kubectl` - # to `/usr/local/bin/k3s`, and that does extra stuff that vanilla - # `kubectl` doesn't do. - ARCH=$(uname -m) - if [ "${ARCH}" = "x86_64" ]; then - ARCH=amd64 - fi - kubectl_version=$(/usr/local/bin/k3s kubectl version --short 2>/dev/null | grep "Client Version" | sed -e 's/Client Version: //' -e 's/\+k3s1//') - sudo curl -fL --progress-bar -o /usr/bin/kubectl https://storage.googleapis.com/kubernetes-release/release/${kubectl_version}/bin/linux/${ARCH}/kubectl - sudo chmod +x /usr/bin/kubectl - sudo rm -rf /usr/local/bin/kubectl - - mkdir -p ~/.kube - cp /etc/rancher/k3s/k3s.yaml ~/.kube/config -} - -function deploy_k8s() { - echo "::group::Deploying ${KUBERNETES}" - - case ${KUBERNETES} in - k3s) deploy_k3s ;; - *) >&2 echo "${KUBERNETES} flavour is not supported"; exit 2 ;; - esac - - echo "::endgroup::" -} - function run_tests() { # Delete any spurious tests namespace that was left behind kubectl delete namespace kata-containers-k8s-tests &> /dev/null || true From f09c255766cf62396763f4b45d3c06dc1e999ffd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Mon, 11 Sep 2023 16:14:42 +0200 Subject: [PATCH 244/339] ci: kata-deploy: Export KUBERNETES env var MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit So we have a better control on which flavour of kubernetes kata-deploy is expected to be targetting. This was also done as part of fa62a4c01b9ffac59bfe8740ee9c82e4c4f2dc81, for the k8s tests. Signed-off-by: Fabiano Fidêncio (cherry picked from commit 9e1fb8a96660d8c87a8c2e3effd52ff50603bbaa) --- .github/workflows/run-kata-deploy-tests-on-aks.yaml | 1 + .github/workflows/run-kata-deploy-tests-on-tdx.yaml | 1 + tests/functional/kata-deploy/kata-deploy.bats | 4 ++-- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/run-kata-deploy-tests-on-aks.yaml b/.github/workflows/run-kata-deploy-tests-on-aks.yaml index 8962fcf32..74fcb0ea5 100644 --- a/.github/workflows/run-kata-deploy-tests-on-aks.yaml +++ b/.github/workflows/run-kata-deploy-tests-on-aks.yaml @@ -44,6 +44,7 @@ jobs: GH_PR_NUMBER: ${{ inputs.pr-number }} KATA_HOST_OS: ${{ matrix.host_os }} KATA_HYPERVISOR: ${{ matrix.vmm }} + KUBERNETES: "vanilla" USING_NFD: "false" steps: - uses: actions/checkout@v3 diff --git a/.github/workflows/run-kata-deploy-tests-on-tdx.yaml b/.github/workflows/run-kata-deploy-tests-on-tdx.yaml index b9e7e0588..6b439cea8 100644 --- a/.github/workflows/run-kata-deploy-tests-on-tdx.yaml +++ b/.github/workflows/run-kata-deploy-tests-on-tdx.yaml @@ -36,6 +36,7 @@ jobs: DOCKER_TAG: ${{ inputs.tag }} PR_NUMBER: ${{ inputs.pr-number }} KATA_HYPERVISOR: ${{ matrix.vmm }} + KUBERNETES: "k3s" USING_NFD: "true" steps: - uses: actions/checkout@v3 diff --git a/tests/functional/kata-deploy/kata-deploy.bats b/tests/functional/kata-deploy/kata-deploy.bats index 84faf8fe1..95d32e2b9 100644 --- a/tests/functional/kata-deploy/kata-deploy.bats +++ b/tests/functional/kata-deploy/kata-deploy.bats @@ -48,7 +48,7 @@ setup() { echo "::endgroup::" kubectl apply -f "${repo_root_dir}/tools/packaging/kata-deploy/kata-rbac/base/kata-rbac.yaml" - if [ "${platform}" = "tdx" ]; then + if [ "${KUBERNETES}" = "k3s" ]; then kubectl apply -k "${repo_root_dir}/tools/packaging/kata-deploy/kata-deploy/overlays/k3s" else kubectl apply -f "${repo_root_dir}/tools/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml" @@ -74,7 +74,7 @@ setup() { teardown() { kubectl get runtimeclasses -o name | grep -v "kata-mshv-vm-isolation" - if [ "${platform}" = "tdx" ]; then + if [ "${KUBERNETES}" = "k3s" ]; then deploy_spec="-k "${repo_root_dir}/tools/packaging/kata-deploy/kata-deploy/overlays/k3s"" cleanup_spec="-k "${repo_root_dir}/tools/packaging/kata-deploy/kata-cleanup/overlays/k3s"" else From 03a8bed32bcf764dfb671673092406629e4b3ca3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Mon, 11 Sep 2023 16:27:53 +0200 Subject: [PATCH 245/339] ci: kata-deploy: Add placeholder for tests on GARM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We'll be testing kata-deploy with different kubernetes flavours as part of our GARM tests, and this is a place-holder for this. Once enabled, we'll do nothing, just `return 0`, so we can then properly add the tests after this commit gets merged. Signed-off-by: Fabiano Fidêncio (cherry picked from commit b12b9e1886fae09b12c758bde6ed6860c4c28bf2) --- .../run-kata-deploy-tests-on-garm.yaml | 65 +++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 .github/workflows/run-kata-deploy-tests-on-garm.yaml diff --git a/.github/workflows/run-kata-deploy-tests-on-garm.yaml b/.github/workflows/run-kata-deploy-tests-on-garm.yaml new file mode 100644 index 000000000..016708896 --- /dev/null +++ b/.github/workflows/run-kata-deploy-tests-on-garm.yaml @@ -0,0 +1,65 @@ +name: CI | Run kata-deploy tests on GARM +on: + workflow_call: + inputs: + registry: + required: true + type: string + repo: + required: true + type: string + tag: + required: true + type: string + pr-number: + required: true + type: string + commit-hash: + required: false + type: string + target-branch: + required: false + type: string + default: "" + +jobs: + run-kata-deploy-tests: + strategy: + fail-fast: false + matrix: + vmm: + - clh + - qemu + k8s: + - k0s + - k3s + - rke2 + runs-on: garm-ubuntu-2004-small + env: + DOCKER_REGISTRY: ${{ inputs.registry }} + DOCKER_REPO: ${{ inputs.repo }} + DOCKER_TAG: ${{ inputs.tag }} + PR_NUMBER: ${{ inputs.pr-number }} + KATA_HYPERVISOR: ${{ matrix.vmm }} + KUBERNETES: ${{ matrix.k8s }} + USING_NFD: "false" + steps: + - uses: actions/checkout@v4 + with: + ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} + + - name: Deploy ${{ matrix.k8s }} + run: bash tests/functional/kata-deploy/gha-run.sh deploy-k8s + + - name: Install `bats` + run: bash tests/functional/kata-deploy/gha-run.sh install-bats + + - name: Run tests + run: bash tests/functional/kata-deploy/gha-run.sh run-tests From 19ee6c9fd7ffb1660c8da42989866b3bcbd84a5d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Mon, 11 Sep 2023 16:34:38 +0200 Subject: [PATCH 246/339] ci: kata-deploy: Expland tests to run on k0s / rke2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We just need to make sure the correct overlay is applied, following what we already have been doing for k3s. Signed-off-by: Fabiano Fidêncio (cherry picked from commit bf2cb02283062de062ac4e4ecab4c6fc7fe05507) --- tests/functional/kata-deploy/kata-deploy.bats | 22 ++++++++++++++----- 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/tests/functional/kata-deploy/kata-deploy.bats b/tests/functional/kata-deploy/kata-deploy.bats index 95d32e2b9..d4f957d05 100644 --- a/tests/functional/kata-deploy/kata-deploy.bats +++ b/tests/functional/kata-deploy/kata-deploy.bats @@ -48,8 +48,12 @@ setup() { echo "::endgroup::" kubectl apply -f "${repo_root_dir}/tools/packaging/kata-deploy/kata-rbac/base/kata-rbac.yaml" - if [ "${KUBERNETES}" = "k3s" ]; then + if [ "${KUBERNETES}" = "k0s" ]; then + kubectl apply -k "${repo_root_dir}/tools/packaging/kata-deploy/kata-deploy/overlays/k0s" + elif [ "${KUBERNETES}" = "k3s" ]; then kubectl apply -k "${repo_root_dir}/tools/packaging/kata-deploy/kata-deploy/overlays/k3s" + elif [ "${KUBERNETES}" = "rke2" ]; then + kubectl apply -k "${repo_root_dir}/tools/packaging/kata-deploy/kata-deploy/overlays/rke2" else kubectl apply -f "${repo_root_dir}/tools/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml" fi @@ -74,12 +78,18 @@ setup() { teardown() { kubectl get runtimeclasses -o name | grep -v "kata-mshv-vm-isolation" - if [ "${KUBERNETES}" = "k3s" ]; then - deploy_spec="-k "${repo_root_dir}/tools/packaging/kata-deploy/kata-deploy/overlays/k3s"" - cleanup_spec="-k "${repo_root_dir}/tools/packaging/kata-deploy/kata-cleanup/overlays/k3s"" + if [ "${KUBERNETES}" = "k0s" ]; then + deploy_spec="-k \"${repo_root_dir}/tools/packaging/kata-deploy/kata-deploy/overlays/k0s\"" + cleanup_spec="-k \"${repo_root_dir}/tools/packaging/kata-deploy/kata-cleanup/overlays/k0s\"" + elif [ "${KUBERNETES}" = "k3s" ]; then + deploy_spec="-k \"${repo_root_dir}/tools/packaging/kata-deploy/kata-deploy/overlays/k3s\"" + cleanup_spec="-k \"${repo_root_dir}/tools/packaging/kata-deploy/kata-cleanup/overlays/k3s\"" + elif [ "${KUBERNETES}" = "rke2" ]; then + deploy_spec="-k \"${repo_root_dir}/tools/packaging/kata-deploy/kata-deploy/overlays/rke2\"" + cleanup_spec="-k \"${repo_root_dir}/tools/packaging/kata-deploy/kata-cleanup/overlays/rke2\"" else - deploy_spec="-f "${repo_root_dir}/tools/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml"" - cleanup_spec="-f "${repo_root_dir}/tools/packaging/kata-deploy/kata-cleanup/base/kata-cleanup.yaml"" + deploy_spec="-f \"${repo_root_dir}/tools/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml\"" + cleanup_spec="-f \"${repo_root_dir}/tools/packaging/kata-deploy/kata-cleanup/base/kata-cleanup.yaml\"" fi kubectl delete ${deploy_spec} From 1a605c33add4be41abca60fa8f4a3d73b8ccc5d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Mon, 11 Sep 2023 16:36:18 +0200 Subject: [PATCH 247/339] ci: kata-deploy: Add deploy-k8s argument to gha-run.sh MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We'll be using exactly the same code used for the k8s tests, which are already deploying k3s on GARM. Signed-off-by: Fabiano Fidêncio (cherry picked from commit 001525763606d19d7a891ff7715b212c65a1db02) --- tests/functional/kata-deploy/gha-run.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/functional/kata-deploy/gha-run.sh b/tests/functional/kata-deploy/gha-run.sh index 8e1c9a1d0..744e73f1c 100755 --- a/tests/functional/kata-deploy/gha-run.sh +++ b/tests/functional/kata-deploy/gha-run.sh @@ -56,6 +56,7 @@ function main() { install-azure-cli) install_azure_cli ;; login-azure) login_azure ;; create-cluster) create_cluster "kata-deploy" ;; + deploy-k8s) deploy_k8s ;; install-bats) install_bats ;; install-kubectl) install_kubectl ;; get-cluster-credentials) get_cluster_credentials "kata-deploy" ;; From 59a4b00d29622d10e9af671936ba0cd93ba8b466 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Mon, 11 Sep 2023 15:44:10 +0200 Subject: [PATCH 248/339] ci: kata-deploy: Add the ability to deploy k0s MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This will be very useful in the near future, when we start testing kata-deploy with k0s as well. Signed-off-by: Fabiano Fidêncio (cherry picked from commit eaf61649163a3dc7d60df48a42152fd163bac6e2) --- tests/gha-run-k8s-common.sh | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/tests/gha-run-k8s-common.sh b/tests/gha-run-k8s-common.sh index 7f381c54a..ba9a96de0 100644 --- a/tests/gha-run-k8s-common.sh +++ b/tests/gha-run-k8s-common.sh @@ -112,6 +112,31 @@ function get_nodes_and_pods_info() { kubectl get pods -o name | grep node-debugger | xargs kubectl delete || true } +function deploy_k0s() { + curl -sSLf https://get.k0s.sh | sudo sh + + sudo k0s install controller --single + + sudo k0s start + + # This is an arbitrary value that came up from local tests + sleep 120s + + # Download the kubectl binary into /usr/bin so we can avoid depending + # on `k0s kubectl` command + ARCH=$(uname -m) + if [ "${ARCH}" = "x86_64" ]; then + ARCH=amd64 + fi + kubectl_version=$(sudo k0s kubectl version --short 2>/dev/null | grep "Client Version" | sed -e 's/Client Version: //') + sudo curl -fL --progress-bar -o /usr/bin/kubectl https://storage.googleapis.com/kubernetes-release/release/${kubectl_version}/bin/linux/${ARCH}/kubectl + sudo chmod +x /usr/bin/kubectl + + mkdir -p ~/.kube + sudo cp /var/lib/k0s/pki/admin.conf ~/.kube/config + sudo chown ${USER}:${USER} ~/.kube/config +} + function deploy_k3s() { curl -sfL https://get.k3s.io | sh -s - --write-kubeconfig-mode 644 @@ -144,6 +169,7 @@ function deploy_k8s() { echo "::group::Deploying ${KUBERNETES}" case ${KUBERNETES} in + k0s) deploy_k0s ;; k3s) deploy_k3s ;; *) >&2 echo "${KUBERNETES} flavour is not supported"; exit 2 ;; esac From 7fc2f7d003eabdfabcb2780f8870c82cfd0bd398 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Mon, 11 Sep 2023 16:05:14 +0200 Subject: [PATCH 249/339] ci: kata-deploy: Add the ability to deploy rke2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This will be very useful in the near future, when we start testing kata-deploy with rke2 as well. Signed-off-by: Fabiano Fidêncio (cherry picked from commit 2c908b598cbb636732c82960ef240e3a3d3302fa) --- tests/gha-run-k8s-common.sh | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/tests/gha-run-k8s-common.sh b/tests/gha-run-k8s-common.sh index ba9a96de0..b4631614f 100644 --- a/tests/gha-run-k8s-common.sh +++ b/tests/gha-run-k8s-common.sh @@ -165,12 +165,29 @@ function deploy_k3s() { cp /etc/rancher/k3s/k3s.yaml ~/.kube/config } +function deploy_rke2() { + curl -sfL https://get.rke2.io | sudo sh - + + systemctl enable --now rke2-server.service + + # This is an arbitrary value that came up from local tests + sleep 120s + + # Link the kubectl binary into /usr/bin + sudo ln -sf /var/lib/rancher/rke2/bin/kubectl /usr/local/bin/kubectl + + mkdir -p ~/.kube + sudo cp /etc/rancher/rke2/rke2.yaml ~/.kube/config + sudo chown ${USER}:${USER} ~/.kube/config +} + function deploy_k8s() { echo "::group::Deploying ${KUBERNETES}" case ${KUBERNETES} in k0s) deploy_k0s ;; k3s) deploy_k3s ;; + rke2) deploy_rke2 ;; *) >&2 echo "${KUBERNETES} flavour is not supported"; exit 2 ;; esac From 7a8d848a92e0cf0d266bb13dde54cdb1f9017e2f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Mon, 11 Sep 2023 18:41:35 +0200 Subject: [PATCH 250/339] ci: Enable kata-deploy tests for all the supported k8s flavours MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's ensure we test kata-deploy on RKE2 and k0s as well. Fixes: #7890 Signed-off-by: Fabiano Fidêncio (cherry picked from commit f7fa7f602aa434d945adeeae9364b6d3709c8fd6) --- .github/workflows/ci.yaml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 8b8ad8ab4..791db56c0 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -101,6 +101,18 @@ jobs: target-branch: ${{ inputs.target-branch }} secrets: inherit + run-kata-deploy-tests-on-garm: + needs: publish-kata-deploy-payload-amd64 + uses: ./.github/workflows/run-kata-deploy-tests-on-garm.yaml + with: + registry: ghcr.io + repo: ${{ github.repository_owner }}/kata-deploy-ci + tag: ${{ inputs.tag }}-amd64 + commit-hash: ${{ inputs.commit-hash }} + pr-number: ${{ inputs.pr-number }} + target-branch: ${{ inputs.target-branch }} + secrets: inherit + run-kata-deploy-tests-on-tdx: needs: [publish-kata-deploy-payload-amd64, build-and-publish-tee-confidential-unencrypted-image] uses: ./.github/workflows/run-kata-deploy-tests-on-tdx.yaml From 2c3f130c856480ef91fcae10e79d6ae296f173f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Tue, 19 Sep 2023 22:34:37 +0200 Subject: [PATCH 251/339] ci: kata-deploy: Fix runner name MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It should be garm-ubuntu-2004-smaller instead of garm-ubuntu-2004-small. Fixes: #7890 Signed-off-by: Fabiano Fidêncio (cherry picked from commit 3a2c83d69b64c3553683e68cdb952502019cbdc0) --- .github/workflows/run-kata-deploy-tests-on-garm.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run-kata-deploy-tests-on-garm.yaml b/.github/workflows/run-kata-deploy-tests-on-garm.yaml index 016708896..7514a27b6 100644 --- a/.github/workflows/run-kata-deploy-tests-on-garm.yaml +++ b/.github/workflows/run-kata-deploy-tests-on-garm.yaml @@ -34,7 +34,7 @@ jobs: - k0s - k3s - rke2 - runs-on: garm-ubuntu-2004-small + runs-on: garm-ubuntu-2004-smaller env: DOCKER_REGISTRY: ${{ inputs.registry }} DOCKER_REPO: ${{ inputs.repo }} From 67fef9d5c641558161d1c1a449691d88b37fd94d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Tue, 19 Sep 2023 19:45:20 +0200 Subject: [PATCH 252/339] ci: k8s: k0s: Allow passing parameters to the k0s installer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We'll need this in order to setup k0s with a different container engine. Signed-off-by: Fabiano Fidêncio (cherry picked from commit 54c0a471b1faedd427535ff9727a3cc02be9327e) --- tests/gha-run-k8s-common.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/gha-run-k8s-common.sh b/tests/gha-run-k8s-common.sh index b4631614f..6c6622b2b 100644 --- a/tests/gha-run-k8s-common.sh +++ b/tests/gha-run-k8s-common.sh @@ -115,7 +115,7 @@ function get_nodes_and_pods_info() { function deploy_k0s() { curl -sSLf https://get.k0s.sh | sudo sh - sudo k0s install controller --single + sudo k0s install controller --single ${KUBERNETES_EXTRA_PARAMS:-} sudo k0s start From b41fa6d9467c996dc605c7fde3b9828bcc2fdafe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Tue, 19 Sep 2023 19:46:10 +0200 Subject: [PATCH 253/339] ci: k8s: Add a method to install CRI-O MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is based on official CRI-O documentations[0] and right now we're making this specific to Ubuntu as that's what we have as runners. We may want to expand this in the future, but we're good for now. [0]: https://github.com/cri-o/cri-o/blob/main/install.md#apt-based-operating-systems Signed-off-by: Fabiano Fidêncio (cherry picked from commit d7105cf7a498bdac1b057007e4977f8cf0e14f48) --- tests/gha-run-k8s-common.sh | 47 +++++++++++++++++++++++++ tests/integration/kubernetes/gha-run.sh | 1 + 2 files changed, 48 insertions(+) diff --git a/tests/gha-run-k8s-common.sh b/tests/gha-run-k8s-common.sh index 6c6622b2b..f706db5eb 100644 --- a/tests/gha-run-k8s-common.sh +++ b/tests/gha-run-k8s-common.sh @@ -181,6 +181,53 @@ function deploy_rke2() { sudo chown ${USER}:${USER} ~/.kube/config } +function _get_k0s_kubernetes_version_for_crio() { + # k0s version will look like: + # v1.27.5+k0s.0 + # + # The CRI-O repo for such version of Kubernetes expects something like: + # 1.27 + k0s_version=$(curl -sSLf "https://docs.k0sproject.io/stable.txt") + + # Remove everything after the second '.' + crio_version=${k0s_version%\.*+*} + # Remove the 'v' + crio_version=${crio_version#v} + + echo ${crio_version} +} + +function _get_os_for_crio() { + source /etc/os-release + + if [ "${NAME}" != "Ubuntu" ]; then + echo "Only Ubuntu is supported for now" + exit 2 + fi + + echo "x${NAME}_${VERSION_ID}" +} + +function setup_crio() { + # Get the CRI-O version to be installed depending on the version of the + # "k8s distro" that we are using + case ${KUBERNETES} in + k0s) crio_version=$(_get_k0s_kubernetes_version_for_crio) ;; + *) >&2 echo "${KUBERNETES} flavour is not supported with CRI-O"; exit 2 ;; + + esac + + os=$(_get_os_for_crio) + + echo "deb https://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/stable/${os}/ /"|sudo tee /etc/apt/sources.list.d/devel:kubic:libcontainers:stable.list + echo "deb http://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/stable:/cri-o:/${crio_version}/${os}/ /"|sudo tee /etc/apt/sources.list.d/devel:kubic:libcontainers:stable:cri-o:${crio_version}.list + curl -L https://download.opensuse.org/repositories/devel:kubic:libcontainers:stable:cri-o:${crio_version}/${os}/Release.key | sudo apt-key add - + curl -L https://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/stable/${os}/Release.key | sudo apt-key add - + sudo apt update + sudo apt install cri-o cri-o-runc + sudo systemctl enable --now crio +} + function deploy_k8s() { echo "::group::Deploying ${KUBERNETES}" diff --git a/tests/integration/kubernetes/gha-run.sh b/tests/integration/kubernetes/gha-run.sh index fa754ada8..360628a6b 100755 --- a/tests/integration/kubernetes/gha-run.sh +++ b/tests/integration/kubernetes/gha-run.sh @@ -203,6 +203,7 @@ function main() { login-azure) login_azure ;; create-cluster) create_cluster ;; configure-snapshotter) configure_snapshotter ;; + setup-crio) setup_crio ;; deploy-k8s) deploy_k8s ;; install-bats) install_bats ;; install-kubectl) install_kubectl ;; From 08bdb6b5da16bd11fce3ba4131998dfb3808e86f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Tue, 19 Sep 2023 19:44:14 +0200 Subject: [PATCH 254/339] ci: k8s: Add a CRI-O test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's make sure we'll also be testing k8s using CRI-O. For now, we'll only be running the CRI-O test with QEMU. Once it becomes stable we can expand this to other Hypervisors as well. Fixes: #8005 Signed-off-by: Fabiano Fidêncio (cherry picked from commit 03b82e84840d2101e334484338b522c487b25d8a) --- .../run-k8s-tests-with-crio-on-garm.yaml | 86 +++++++++++++++++++ 1 file changed, 86 insertions(+) create mode 100644 .github/workflows/run-k8s-tests-with-crio-on-garm.yaml diff --git a/.github/workflows/run-k8s-tests-with-crio-on-garm.yaml b/.github/workflows/run-k8s-tests-with-crio-on-garm.yaml new file mode 100644 index 000000000..14000dc63 --- /dev/null +++ b/.github/workflows/run-k8s-tests-with-crio-on-garm.yaml @@ -0,0 +1,86 @@ +name: CI | Run kubernetes tests, using CRI-O, on GARM +on: + workflow_call: + inputs: + registry: + required: true + type: string + repo: + required: true + type: string + tag: + required: true + type: string + pr-number: + required: true + type: string + commit-hash: + required: false + type: string + target-branch: + required: false + type: string + default: "" + +jobs: + run-k8s-tests: + strategy: + fail-fast: false + matrix: + vmm: + - qemu + k8s: + - k0s + instance: + - garm-ubuntu-2004 + - garm-ubuntu-2004-smaller + include: + - instance: garm-ubuntu-2004 + instance-type: normal + - instance: garm-ubuntu-2004-smaller + instance-type: small + - k8s: k0s + k8s-extra-params: '--cri-socket remote:unix:///var/run/crio/crio.sock --kubelet-extra-args --cgroup-driver="systemd"' + runs-on: ${{ matrix.instance }} + env: + DOCKER_REGISTRY: ${{ inputs.registry }} + DOCKER_REPO: ${{ inputs.repo }} + DOCKER_TAG: ${{ inputs.tag }} + PR_NUMBER: ${{ inputs.pr-number }} + KATA_HYPERVISOR: ${{ matrix.vmm }} + KUBERNETES: ${{ matrix.k8s }} + KUBERNETES_EXTRA_PARAMS: ${{ matrix.k8s-extra-params }} + USING_NFD: "false" + K8S_TEST_HOST_TYPE: ${{ matrix.instance-type }} + steps: + - uses: actions/checkout@v4 + with: + ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} + + - name: Configure CRI-O + run: bash tests/integration/kubernetes/gha-run.sh setup-crio + + - name: Deploy ${{ matrix.k8s }} + run: bash tests/integration/kubernetes/gha-run.sh deploy-k8s + + - name: Deploy Kata + timeout-minutes: 10 + run: bash tests/integration/kubernetes/gha-run.sh deploy-kata-garm + + - name: Install `bats` + run: bash tests/integration/kubernetes/gha-run.sh install-bats + + - name: Run tests + timeout-minutes: 30 + run: bash tests/integration/kubernetes/gha-run.sh run-tests + + - name: Delete kata-deploy + if: always() + run: bash tests/integration/kubernetes/gha-run.sh cleanup-garm From f2953e644882664bdde47e521102d00fb7cc3163 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Wed, 20 Sep 2023 08:48:29 +0200 Subject: [PATCH 255/339] ci: k8s: rke2: Use sudo to call systemd MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Otherwise we'll face the following error: ``` Failed to enable unit: Interactive authentication required. ``` Signed-off-by: Fabiano Fidêncio (cherry picked from commit 07a6e63a6bd96ba72ac99b35cd182a0cf56c397d) --- tests/gha-run-k8s-common.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/gha-run-k8s-common.sh b/tests/gha-run-k8s-common.sh index f706db5eb..01d825c28 100644 --- a/tests/gha-run-k8s-common.sh +++ b/tests/gha-run-k8s-common.sh @@ -168,7 +168,7 @@ function deploy_k3s() { function deploy_rke2() { curl -sfL https://get.rke2.io | sudo sh - - systemctl enable --now rke2-server.service + sudo systemctl enable --now rke2-server.service # This is an arbitrary value that came up from local tests sleep 120s From 390bde3182a3c86a856a36c9bedb6796d45463a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Wed, 20 Sep 2023 18:00:00 +0200 Subject: [PATCH 256/339] ci: Actually enable the CRI-O tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The test has been added to the repo, but we have to also add it to the list of jobs to be executed. Fixes: #8005 Signed-off-by: Fabiano Fidêncio (cherry picked from commit 8c3c50ca8ae6450059dad3c3669cbb8d74f40ffd) --- .github/workflows/ci.yaml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 791db56c0..7479e6777 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -148,6 +148,18 @@ jobs: target-branch: ${{ inputs.target-branch }} secrets: inherit + run-k8s-tests-with-crio-on-garm: + needs: publish-kata-deploy-payload-amd64 + uses: ./.github/workflows/run-k8s-tests-with-crio-on-garm.yaml + with: + registry: ghcr.io + repo: ${{ github.repository_owner }}/kata-deploy-ci + tag: ${{ inputs.tag }}-amd64 + commit-hash: ${{ inputs.commit-hash }} + pr-number: ${{ inputs.pr-number }} + target-branch: ${{ inputs.target-branch }} + secrets: inherit + run-k8s-tests-on-sev: needs: [publish-kata-deploy-payload-amd64, build-and-publish-tee-confidential-unencrypted-image] uses: ./.github/workflows/run-k8s-tests-on-sev.yaml From a11e8867afa8f7609315e902975b049b0dd71ad8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Thu, 21 Sep 2023 09:24:13 +0200 Subject: [PATCH 257/339] ci: Trigger payload-after-push on workflow_dispatch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This will allow us to easily test failures and fixes on that workflows. Fixes: #8031 Signed-off-by: Fabiano Fidêncio (cherry picked from commit 0c95697cc4d8ec35c313d706c3ba6f260e0b5222) --- .github/workflows/payload-after-push.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/payload-after-push.yaml b/.github/workflows/payload-after-push.yaml index 0f1ab742f..bcc2aa7a0 100644 --- a/.github/workflows/payload-after-push.yaml +++ b/.github/workflows/payload-after-push.yaml @@ -4,6 +4,7 @@ on: branches: - main - stable-* + workflow_dispatch: concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} From 6a9384ed4069c4e4000d88c71ece24a83f424308 Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Wed, 20 Sep 2023 16:21:39 +0000 Subject: [PATCH 258/339] gha: Install hunspell for static checks Seems like the static checks are failing due the missing of the hunspell package this PR fixes that. Fixes #8019 Signed-off-by: Gabriela Cervantes (cherry picked from commit 87a8616488461ea7a14f7d75a8711bb38ba8a583) --- .github/workflows/static-checks.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/static-checks.yaml b/.github/workflows/static-checks.yaml index ad2d2b7a4..386d3e41e 100644 --- a/.github/workflows/static-checks.yaml +++ b/.github/workflows/static-checks.yaml @@ -187,7 +187,7 @@ jobs: echo "/usr/local/go/bin" >> $GITHUB_PATH - name: Install system dependencies run: | - sudo apt-get -y install moreutils + sudo apt-get -y install moreutils hunspell - name: Run check run: | export PATH=${PATH}:${GOPATH}/bin From 8b607ff79a745619038bb4b83819ccc06cadc6f2 Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Thu, 21 Sep 2023 20:14:41 +0000 Subject: [PATCH 259/339] gha: Add pandoc as a dependency for static checks To avoid the failure of not finding pandoc command this PR adds that package as a dependency for static checks. Fixes #8041 Signed-off-by: Gabriela Cervantes (cherry picked from commit 13ca7d9f97149b89f94e6a73c09fd94ec0a856af) --- .github/workflows/static-checks.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/static-checks.yaml b/.github/workflows/static-checks.yaml index 386d3e41e..c55adf4c7 100644 --- a/.github/workflows/static-checks.yaml +++ b/.github/workflows/static-checks.yaml @@ -187,7 +187,7 @@ jobs: echo "/usr/local/go/bin" >> $GITHUB_PATH - name: Install system dependencies run: | - sudo apt-get -y install moreutils hunspell + sudo apt-get -y install moreutils hunspell pandoc - name: Run check run: | export PATH=${PATH}:${GOPATH}/bin From cf3abd308f827b84179b7305ffe6b6c8ad1c244a Mon Sep 17 00:00:00 2001 From: Jeremi Piotrowski Date: Fri, 22 Sep 2023 13:38:49 +0200 Subject: [PATCH 260/339] local-build: Fix .docker ownership before build-payload The permissions on .docker/buildx/activity/default are regularly broken by us passing docker.sock + $HOME/.docker to a container running as root and then using buildx inside. Fixup ownership before executing docker commands. Fixes: #8027 Signed-off-by: Jeremi Piotrowski (cherry picked from commit 15425a2b80ab8dc2cdffb74473a0e40dfb9ae35c) --- .../local-build/kata-deploy-build-and-upload-payload.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/packaging/kata-deploy/local-build/kata-deploy-build-and-upload-payload.sh b/tools/packaging/kata-deploy/local-build/kata-deploy-build-and-upload-payload.sh index be460ffa7..af746e79c 100755 --- a/tools/packaging/kata-deploy/local-build/kata-deploy-build-and-upload-payload.sh +++ b/tools/packaging/kata-deploy/local-build/kata-deploy-build-and-upload-payload.sh @@ -25,6 +25,8 @@ arch=$(uname -m) [ "$arch" = "x86_64" ] && arch="amd64" IMAGE_TAG="${REGISTRY}:kata-containers-$(git rev-parse HEAD)-${arch}" +sudo chown -R $USER $HOME/.docker + echo "Building the image" docker build --tag ${IMAGE_TAG} . From 73a084a7d4e8695c9baf69094421c98732f47f24 Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Thu, 21 Sep 2023 16:11:58 +0000 Subject: [PATCH 261/339] metrics: Enable latency test in gha run script This PR enables the latency test for gha run script for kata metrics. Fixes #8037 Signed-off-by: Gabriela Cervantes (cherry picked from commit 6776b55d7e2aaa27827df83e3b15d47da2181901) --- tests/metrics/gha-run.sh | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/metrics/gha-run.sh b/tests/metrics/gha-run.sh index 4076ad7ff..4f0848821 100755 --- a/tests/metrics/gha-run.sh +++ b/tests/metrics/gha-run.sh @@ -97,6 +97,12 @@ function run_test_iperf() { check_metrics } +function run_test_latency() { + info "Running Latency test using ${KATA_HYPERVISOR} hypervisor" + + bash tests/metrics/network/latency_kubernetes/latency-network.sh +} + function main() { action="${1:-}" case "${action}" in @@ -110,6 +116,7 @@ function main() { run-test-tensorflow) run_test_tensorflow ;; run-test-fio) run_test_fio ;; run-test-iperf) run_test_iperf ;; + run-test-latency) run_test_latency ;; *) >&2 die "Invalid argument" ;; esac } From 3f2780fca6573bf92dc5018e3d836fbcc0d13dab Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Thu, 21 Sep 2023 16:14:39 +0000 Subject: [PATCH 262/339] metrics: Add latency benchmark for gha This PR adds the latency benchmark for gha for kata metrics. Signed-off-by: Gabriela Cervantes (cherry picked from commit 08bc8e4db4e6dafd9ad927aca9a972d4102bac70) --- .github/workflows/run-metrics.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/run-metrics.yaml b/.github/workflows/run-metrics.yaml index 08cbef904..c0e43c360 100644 --- a/.github/workflows/run-metrics.yaml +++ b/.github/workflows/run-metrics.yaml @@ -79,6 +79,9 @@ jobs: - name: run iperf test run: bash tests/metrics/gha-run.sh run-test-iperf + - name: run latency test + run: bash tests/metrics/gha-run.sh run-test-latency + - name: make metrics tarball ${{ matrix.vmm }} run: bash tests/metrics/gha-run.sh make-tarball-results From 468a3218f53762d10b27e1ad34508f18ca4549d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Thu, 21 Sep 2023 08:53:05 +0200 Subject: [PATCH 263/339] ci: crio: Pass `-y` to apt MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit That was something overlooked during my tests. :-/ Fixes: #8005 Signed-off-by: Fabiano Fidêncio (cherry picked from commit ebaa4fa4c10e2fe4ded5b67373e079d5e68c61c3) --- tests/gha-run-k8s-common.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/gha-run-k8s-common.sh b/tests/gha-run-k8s-common.sh index 01d825c28..44b6601e3 100644 --- a/tests/gha-run-k8s-common.sh +++ b/tests/gha-run-k8s-common.sh @@ -224,7 +224,7 @@ function setup_crio() { curl -L https://download.opensuse.org/repositories/devel:kubic:libcontainers:stable:cri-o:${crio_version}/${os}/Release.key | sudo apt-key add - curl -L https://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/stable/${os}/Release.key | sudo apt-key add - sudo apt update - sudo apt install cri-o cri-o-runc + sudo apt install -y cri-o cri-o-runc sudo systemctl enable --now crio } From 0de3216b088f7725ea5afb2a68dfcdf64004fdaf Mon Sep 17 00:00:00 2001 From: Steve Fan <29133953+stevefan1999-personal@users.noreply.github.com> Date: Thu, 3 Nov 2022 00:06:56 +0800 Subject: [PATCH 264/339] kata-deploy: Add k0s support Add k0s support to kata-deploy, in the very same way kata-containers already supports k3s, and rke2. k0s support requires v1.27.1, which is noted as part of the kata-deploy documentation, as it's the way to use dynamic configuration on containerd CRI runtimes. This support will only be part of the `main` branch, as it's not a bug fix that can be backported to the `stable-3.2` branch, and this is also noted as part of the documentation. Fixes: #7548 Signed-off-by: Steve Fan <29133953+stevefan1999-personal@users.noreply.github.com> (cherry picked from commit 72cbcf040bb179a2723b7e270da9762cbbc17cb3) --- tools/packaging/kata-deploy/README.md | 30 +++++++++++ .../overlays/k0s/kustomization.yaml | 5 ++ .../overlays/k0s/mount_k0s_conf.yaml | 12 +++++ .../kata-deploy/scripts/kata-deploy.sh | 54 ++++++++++++++----- 4 files changed, 87 insertions(+), 14 deletions(-) create mode 100644 tools/packaging/kata-deploy/kata-deploy/overlays/k0s/kustomization.yaml create mode 100644 tools/packaging/kata-deploy/kata-deploy/overlays/k0s/mount_k0s_conf.yaml diff --git a/tools/packaging/kata-deploy/README.md b/tools/packaging/kata-deploy/README.md index 9e546e4f5..6ef7cd6a9 100644 --- a/tools/packaging/kata-deploy/README.md +++ b/tools/packaging/kata-deploy/README.md @@ -43,6 +43,36 @@ $ kubectl apply -f kata-rbac/base/kata-rbac.yaml $ kubectl apply -k kata-deploy/overlays/rke2 ``` +#### [k0s] cluster + +For your [k0s](https://k0sproject.io/) cluster, run: + +```sh +$ git clone https://github.com/kata-containers/kata-containers.git +``` + +Check and switch to "main", and then run: + +```bash +$ cd kata-containers/tools/packaging/kata-deploy +$ kubectl apply -f kata-rbac/base/kata-rbac.yaml +$ kubectl apply -k kata-deploy/overlays/k0s +``` + +##### Note + +The supported version of k0s is **v1.27.1+k0s** and above, since the k0s support leverages a special dynamic containerd configuration mode: + +> From 1.27.1 onwards k0s enables dynamic configuration on containerd CRI runtimes. This works by k0s creating a special directory in /etc/k0s/containerd.d/ where user can drop-in partial containerd configuration snippets. +> +> k0s will automatically pick up these files and adds these in containerd configuration imports list. If k0s sees the configuration drop-ins are CRI related configurations k0s will automatically collect all these into a single file and adds that as a single import file. This is to overcome some hard limitation on containerd 1.X versions. Read more at containerd#8056 + +However, this would also require a magic string set in the beginning of the line for `/etc/k0s/containerd.toml`: + +``` +# k0s_managed=true +``` + #### Vanilla Kubernetes cluster ##### Installing the latest image diff --git a/tools/packaging/kata-deploy/kata-deploy/overlays/k0s/kustomization.yaml b/tools/packaging/kata-deploy/kata-deploy/overlays/k0s/kustomization.yaml new file mode 100644 index 000000000..14904f560 --- /dev/null +++ b/tools/packaging/kata-deploy/kata-deploy/overlays/k0s/kustomization.yaml @@ -0,0 +1,5 @@ +bases: +- ../../base + +patchesStrategicMerge: +- mount_k0s_conf.yaml diff --git a/tools/packaging/kata-deploy/kata-deploy/overlays/k0s/mount_k0s_conf.yaml b/tools/packaging/kata-deploy/kata-deploy/overlays/k0s/mount_k0s_conf.yaml new file mode 100644 index 000000000..c10f938f8 --- /dev/null +++ b/tools/packaging/kata-deploy/kata-deploy/overlays/k0s/mount_k0s_conf.yaml @@ -0,0 +1,12 @@ +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: kata-deploy + namespace: kube-system +spec: + template: + spec: + volumes: + - name: containerd-conf + hostPath: + path: /etc/k0s/containerd.d/ \ No newline at end of file diff --git a/tools/packaging/kata-deploy/scripts/kata-deploy.sh b/tools/packaging/kata-deploy/scripts/kata-deploy.sh index e01a8a9a6..5a19befea 100755 --- a/tools/packaging/kata-deploy/scripts/kata-deploy.sh +++ b/tools/packaging/kata-deploy/scripts/kata-deploy.sh @@ -74,6 +74,7 @@ function get_container_runtime() { if [ "$?" -ne 0 ]; then die "invalid node name" fi + if echo "$runtime" | grep -qE 'containerd.*-k3s'; then if host_systemctl is-active --quiet rke2-agent; then echo "rke2-agent" @@ -84,6 +85,12 @@ function get_container_runtime() { else echo "k3s" fi + # Note: we assumed you used a conventional k0s setup and k0s will generate a systemd entry k0scontroller.service and k0sworker.service respectively + # and it is impossible to run this script without a kubelet, so this k0s controller must also have worker mode enabled + elif host_systemctl is-active --quiet k0scontroller; then + echo "k0s-controller" + elif host_systemctl is-active --quiet k0sworker; then + echo "k0s-worker" else echo "$runtime" | awk -F '[:]' '{print $1}' fi @@ -136,12 +143,17 @@ function configure_cri_runtime() { crio) configure_crio ;; - containerd | k3s | k3s-agent | rke2-agent | rke2-server) - configure_containerd + containerd | k3s | k3s-agent | rke2-agent | rke2-server | k0s-controller | k0s-worker) + configure_containerd "$1" ;; esac - host_systemctl daemon-reload - host_systemctl restart "$1" + if [ "$1" == "k0s-worker" ] || [ "$1" == "k0s-controller" ]; then + # do nothing, k0s will automatically load the config on the fly + : + else + host_systemctl daemon-reload + host_systemctl restart "$1" + fi wait_till_node_is_ready } @@ -274,12 +286,15 @@ EOF function configure_containerd_runtime() { local runtime="kata" local configuration="configuration" - if [ -n "${1-}" ]; then - runtime+="-$1" - configuration+="-$1" + if [ -n "${2-}" ]; then + runtime+="-$2" + configuration+="-$2" fi local pluginid=cri - if grep -q "version = 2\>" $containerd_conf_file; then + + # if we are running k0s auto containerd.toml generation, the base template is by default version 2 + # we can safely assume to reference the older version of cri + if grep -q "version = 2\>" $containerd_conf_file || [ "$1" == "k0s-worker" ] || [ "$1" == "k0s-controller" ]; then pluginid=\"io.containerd.grpc.v1.cri\" fi local runtime_table="plugins.${pluginid}.containerd.runtimes.$runtime" @@ -333,10 +348,10 @@ function configure_containerd() { fi # Add default Kata runtime configuration - configure_containerd_runtime + configure_containerd_runtime "$1" for shim in "${shims[@]}"; do - configure_containerd_runtime $shim + configure_containerd_runtime "$1" $shim done } @@ -352,7 +367,7 @@ function cleanup_cri_runtime() { crio) cleanup_crio ;; - containerd | k3s | k3s-agent | rke2-agent | rke2-server) + containerd | k3s | k3s-agent | rke2-agent | rke2-server | k0s-controller | k0s-worker) cleanup_containerd ;; esac @@ -375,8 +390,14 @@ function cleanup_containerd() { function reset_runtime() { kubectl label node "$NODE_NAME" katacontainers.io/kata-runtime- - host_systemctl daemon-reload - host_systemctl restart "$1" + if [ "$1" == "k0s-worker" ] || [ "$1" == "k0s-controller" ]; then + # do nothing, k0s will auto restart + : + else + host_systemctl daemon-reload + host_systemctl restart "$1" + fi + if [ "$1" == "crio" ] || [ "$1" == "containerd" ]; then host_systemctl restart kubelet fi @@ -412,6 +433,11 @@ function main() { containerd_conf_file="${containerd_conf_tmpl_file}" containerd_conf_file_backup="${containerd_conf_file}.bak" + elif [ "$runtime" == "k0s-worker" ] || [ "$runtime" == "k0s-controller" ]; then + # From 1.27.1 onwards k0s enables dynamic configuration on containerd CRI runtimes. + # This works by k0s creating a special directory in /etc/k0s/containerd.d/ where user can drop-in partial containerd configuration snippets. + # k0s will automatically pick up these files and adds these in containerd configuration imports list. + containerd_conf_file="/etc/containerd/kata-containers.toml" else # runtime == containerd if [ ! -f "$containerd_conf_file" ] && [ -d $(dirname "$containerd_conf_file") ] && \ @@ -427,7 +453,7 @@ function main() { fi # only install / remove / update if we are dealing with CRIO or containerd - if [[ "$runtime" =~ ^(crio|containerd|k3s|k3s-agent|rke2-agent|rke2-server)$ ]]; then + if [[ "$runtime" =~ ^(crio|containerd|k3s|k3s-agent|rke2-agent|rke2-server|k0s-worker|k0s-controller)$ ]]; then case "$action" in install) From d1d3c7cbdaee47e04554d4f6c65c2f843e1b07ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Thu, 21 Sep 2023 10:15:17 +0200 Subject: [PATCH 265/339] kata-deploy: Fix CRI-O detection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some of the "k8s distros" allow using CRI-O in a non-official way, and if that's done we cannot simply assume they're on containerd, otherwise kata-deploy will simply not work. In order to avoid such issue, let's check for `cri-o` as the container engine as the first place and only proceed with the checks for the "k8s distros" after we rule out that CRI-O is not being used. Signed-off-by: Fabiano Fidêncio (cherry picked from commit 358dc2f5695e185df1a8fdaf44dc447438060efa) --- tools/packaging/kata-deploy/scripts/kata-deploy.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/packaging/kata-deploy/scripts/kata-deploy.sh b/tools/packaging/kata-deploy/scripts/kata-deploy.sh index 5a19befea..9d60a7ad0 100755 --- a/tools/packaging/kata-deploy/scripts/kata-deploy.sh +++ b/tools/packaging/kata-deploy/scripts/kata-deploy.sh @@ -75,7 +75,9 @@ function get_container_runtime() { die "invalid node name" fi - if echo "$runtime" | grep -qE 'containerd.*-k3s'; then + if echo "$runtime" | grep -qE "cri-o"; then + echo "cri-o" + elif echo "$runtime" | grep -qE 'containerd.*-k3s'; then if host_systemctl is-active --quiet rke2-agent; then echo "rke2-agent" elif host_systemctl is-active --quiet rke2-server; then From 2de1c8bac27d0636c75519edc47967e7a7b3798d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Mon, 25 Sep 2023 13:35:41 +0200 Subject: [PATCH 266/339] ci: crio: Enable default capabilities MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We need the default capabilities to be enabled, especially `SYS_CHROOT`, in order to have tests accessing the host to pass. A huge thanks to Greg Kurz for spotting this and suggesting the fix. Signed-off-by: Fabiano Fidêncio Signed-off-by: Greg Kurz (cherry picked from commit 74c12b29270d6ed5bbad560d020cdebd73ef2a3c) --- tests/gha-run-k8s-common.sh | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/tests/gha-run-k8s-common.sh b/tests/gha-run-k8s-common.sh index 44b6601e3..9c1d9f9be 100644 --- a/tests/gha-run-k8s-common.sh +++ b/tests/gha-run-k8s-common.sh @@ -225,6 +225,26 @@ function setup_crio() { curl -L https://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/stable/${os}/Release.key | sudo apt-key add - sudo apt update sudo apt install -y cri-o cri-o-runc + + # We need to set the default capabilities to ensure our tests will pass + # See: https://github.com/kata-containers/kata-containers/issues/8034 + sudo mkdir -p /etc/crio/crio.conf.d/ + cat < Date: Mon, 25 Sep 2023 14:54:12 +0200 Subject: [PATCH 267/339] ci: crio: Trail '\r' from exec_host() output MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We've faced this as part of the CI, only happening with the CRI-O tests: ``` not ok 1 Test readonly volume for pods # (from function `exec_host' in file tests_common.sh, line 51, # in test file k8s-file-volume.bats, line 25) # `exec_host "echo "$file_body" > $tmp_file"' failed with status 127 # [bats-exec-test:38] INFO: k8s configured to use runtimeclass # bash: line 1: $'\r': command not found # # Error from server (NotFound): pods "test-file-volume" not found ``` I must say I didn't dig into figuring out why this is happening, but we may be safe enough to just trail the '\r', as long as all the tests keep passing on containerd. Signed-off-by: Fabiano Fidêncio (cherry picked from commit ef63d67c411eb7aa0610a6c796cf7e75feda0da9) --- tests/integration/kubernetes/tests_common.sh | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/tests/integration/kubernetes/tests_common.sh b/tests/integration/kubernetes/tests_common.sh index b6d448cac..f932bd656 100644 --- a/tests/integration/kubernetes/tests_common.sh +++ b/tests/integration/kubernetes/tests_common.sh @@ -44,7 +44,16 @@ exec_host() { # `kubectl debug` always returns 0, so we hack it to return the right exit code. command="$@" command+='; echo -en \\n$?' - output="$(kubectl debug -qit "${node}" --image=alpine:latest -- chroot /host bash -c "${command}")" + # We're trailing the `\r` here due to: https://github.com/kata-containers/kata-containers/issues/8051 + # tl;dr: When testing with CRI-O we're facing the foillowing error: + # ``` + # (from function `exec_host' in file tests_common.sh, line 51, + # in test file k8s-file-volume.bats, line 25) + # `exec_host "echo "$file_body" > $tmp_file"' failed with status 127 + # [bats-exec-test:38] INFO: k8s configured to use runtimeclass + # bash: line 1: $'\r': command not found + # ``` + output="$(kubectl debug -qit "${node}" --image=alpine:latest -- chroot /host bash -c "${command}" | tr -d '\r')" kubectl get pods -o name | grep node-debugger | xargs kubectl delete > /dev/null exit_code="$(echo "${output}" | tail -1)" echo "$(echo "${output}" | head -n -1)" From c28a0a03f0092e780bc65869adfedfd9bedc142b Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Mon, 25 Sep 2023 15:24:15 +0000 Subject: [PATCH 268/339] metrics: Fix C-Ray documentation This PR fixes the C-Ray documentation for kata metrics. Fixes #8052 Signed-off-by: Gabriela Cervantes (cherry picked from commit 48157368207f118e662f3b8740935a155583bdb0) --- tests/metrics/cpu/README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/metrics/cpu/README.md b/tests/metrics/cpu/README.md index 3dd3a2928..339cd8a10 100644 --- a/tests/metrics/cpu/README.md +++ b/tests/metrics/cpu/README.md @@ -5,5 +5,6 @@ This is a test of C-Ray which is a simple raytracer designed to test the floatin Individual test can be run by hand, for example: ``` -$ cd metrics/disk/c-ray $ ./cray.sh +$ cd metrics/cpu/c-ray +$ ./cray.sh ``` From b0c9b4254bbeab843bd32fdd889721bacacda7d2 Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Wed, 20 Sep 2023 15:58:06 +0000 Subject: [PATCH 269/339] metrics: Fix metrics README This PR fixes the network metrics section at the README by leaving the current tests that we have in our kata metrics. Fixes #8017 Signed-off-by: Gabriela Cervantes (cherry picked from commit 36c8cd6f1fb031b385292b0d7b46718213c36d4e) --- tests/metrics/README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/metrics/README.md b/tests/metrics/README.md index ab55e1052..68d6d535d 100644 --- a/tests/metrics/README.md +++ b/tests/metrics/README.md @@ -62,7 +62,6 @@ Tests relating to networking. General items could include: - latency - jitter - parallel bandwidth -- write and read percentiles For further details see the [network tests documentation](network). From 665805c81cd6b15799030ae070173371480700ba Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Fri, 22 Sep 2023 15:40:27 +0000 Subject: [PATCH 270/339] metrics: Fix spelling warnings This PR fixes general spelling warnings detected by the spelling check. Signed-off-by: Gabriela Cervantes (cherry picked from commit 97e73b22344ab15158080d1078d8c8a78ddc9f78) --- tests/metrics/README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/metrics/README.md b/tests/metrics/README.md index 68d6d535d..56af31318 100644 --- a/tests/metrics/README.md +++ b/tests/metrics/README.md @@ -33,7 +33,7 @@ regression checking, we try to define and stick to some "quality measures" for o ## Categories -Kata Container metrics tend to fall into a set of categories, and we organise the tests +Kata Container metrics tend to fall into a set of categories, and we organize the tests within this folder as such. Each sub-folder contains its own `README` detailing its own tests. @@ -60,7 +60,7 @@ For further details see the [density tests documentation](density). Tests relating to networking. General items could include: - bandwidth - latency -- jitter +- `jitter` - parallel bandwidth For further details see the [network tests documentation](network). @@ -80,7 +80,7 @@ For further details see the [disk tests documentation](disk). ### Machine Learning Tests relating with TensorFlow and Pytorch implementations of several popular -convolutional models. +`convolutional` models. For further details see the [machine learning tests documentation](machine_learning). @@ -113,7 +113,7 @@ to do some JSON handling themselves before injecting their JSON into the API. #### `metrics_json_init()` -Initialise the API. Must be called before all other JSON API calls. +Initialize the API. Must be called before all other JSON API calls. Should be matched by a final call to `metrics_json_save`. Relies upon the `TEST_NAME` variable to derive the file name the final JSON @@ -147,7 +147,7 @@ Add a JSON formatted fragment at the top level. #### `metrics_json_start_array()` -Initialise the JSON array API subsystem, ready to accept JSON fragments via +Initialize the JSON array API subsystem, ready to accept JSON fragments via `metrics_json_add_array_element`. This JSON array API subset allows accumulation of multiple entries into a From e106ecd1e4e648d9ad81899d7dff1f7e12b12125 Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Mon, 25 Sep 2023 15:52:24 +0000 Subject: [PATCH 271/339] metrics: Fix latency yamls path This PR fixes the latency yamls path for the latency test for kata metrics. Fixes #8055 Signed-off-by: Gabriela Cervantes (cherry picked from commit 81c8babca9e0ae2a90796f22bba1fd14ea9e26b1) --- tests/metrics/network/latency_kubernetes/latency-network.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/metrics/network/latency_kubernetes/latency-network.sh b/tests/metrics/network/latency_kubernetes/latency-network.sh index 19c84ef14..508e86f9b 100755 --- a/tests/metrics/network/latency_kubernetes/latency-network.sh +++ b/tests/metrics/network/latency_kubernetes/latency-network.sh @@ -30,7 +30,7 @@ function main() { sleep_time=2 # Create server - kubectl create -f "${SCRIPT_PATH}/runtimeclass_workloads/latency-server.yaml" + kubectl create -f "${SCRIPT_PATH}/latency-server.yaml" # Get the names of the server pod export server_pod_name="latency-server" @@ -40,7 +40,7 @@ function main() { waitForProcess "$wait_time" "$sleep_time" "$cmd" # Create client - kubectl create -f "${SCRIPT_PATH}/runtimeclass_workloads/latency-client.yaml" + kubectl create -f "${SCRIPT_PATH}/latency-client.yaml" # Get the names of the client pod export client_pod_name="latency-client" From 7c4617cfac57211ba419ec08ec779532ef25da97 Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Mon, 25 Sep 2023 17:01:42 +0000 Subject: [PATCH 272/339] metrics: Add init_env function to latency test This Pr adds the init_env function to latency test. Signed-off-by: Gabriela Cervantes (cherry picked from commit 9ac29b8d38f0433ce3a48b79399d94c92fadd7a5) --- tests/metrics/network/latency_kubernetes/latency-network.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/metrics/network/latency_kubernetes/latency-network.sh b/tests/metrics/network/latency_kubernetes/latency-network.sh index 508e86f9b..fedd18135 100755 --- a/tests/metrics/network/latency_kubernetes/latency-network.sh +++ b/tests/metrics/network/latency_kubernetes/latency-network.sh @@ -23,6 +23,8 @@ function main() { cmds=("bc" "jq") check_cmds "${cmds[@]}" + init_env + # Check no processes are left behind check_processes @@ -80,6 +82,7 @@ EOF metrics_json_save kubectl delete pod "$client_pod_name" "$server_pod_name" + kubectl get pods -A check_processes } main "$@" From c61b488b66f14d50e44761896535802d56bf4e7b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Tue, 26 Sep 2023 22:44:46 +0200 Subject: [PATCH 273/339] ci: Modify containerd default config MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's ensure we have runc running with `SystemdCgroups = false`, otherwise we'll face failures when running tests depending on runc on Ubuntu 22.04, woth LTS containerd. Signed-off-by: Fabiano Fidêncio (cherry picked from commit 8132fe15c9398bfc2ee7092c1f2d98878e5b73e6) --- tests/common.bash | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/tests/common.bash b/tests/common.bash index c6fbf71a7..20330a981 100644 --- a/tests/common.bash +++ b/tests/common.bash @@ -263,14 +263,34 @@ function overwrite_containerd_config() { sudo rm -f "${containerd_config}" sudo tee "${containerd_config}" << EOF version = 2 -[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options] - SystemdCgroup = true [plugins] [plugins."io.containerd.grpc.v1.cri"] [plugins."io.containerd.grpc.v1.cri".containerd] - default_runtime_name = "kata" [plugins."io.containerd.grpc.v1.cri".containerd.runtimes] + [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc] + base_runtime_spec = "" + cni_conf_dir = "" + cni_max_conf_num = 0 + container_annotations = [] + pod_annotations = [] + privileged_without_host_devices = false + runtime_engine = "" + runtime_path = "" + runtime_root = "" + runtime_type = "io.containerd.runc.v2" + [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options] + BinaryName = "" + CriuImagePath = "" + CriuPath = "" + CriuWorkPath = "" + IoGid = 0 + IoUid = 0 + NoNewKeyring = false + NoPivotRoot = false + Root = "" + ShimCgroup = "" + SystemdCgroup = false [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.kata] runtime_type = "io.containerd.kata.v2" EOF From 98e9434be46f39a3a33719d08454e4fcffe491ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Wed, 27 Sep 2023 11:04:14 +0200 Subject: [PATCH 274/339] ci: Add install_cni_plugins helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This will become handy when doing tests with CRI-O, as CRI-O doesn't install the CNI plugins for us. Signed-off-by: Fabiano Fidêncio (cherry picked from commit 64a20008590b8935a9c42c8d54b3cf71641fb0ea) --- tests/common.bash | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tests/common.bash b/tests/common.bash index 20330a981..19f57ab0c 100644 --- a/tests/common.bash +++ b/tests/common.bash @@ -403,6 +403,19 @@ function download_github_project_tarball() { wget https://github.com/${project}/releases/download/${version}/${tarball_name} } +# version: The version to be intalled +function install_cni_plugins() { + version="${1}" + + project="containernetworking/plugins" + tarball_name="cni-plugins-linux-$(${repo_root_dir}/tests/kata-arch.sh -g)-${version}.tgz" + + download_github_project_tarball "${project}" "${version}" "${tarball_name}" + sudo mkdir -p /opt/cni/bin + sudo tar -xvf "${tarball_name}" -C /opt/cni/bin + rm -f "${tarball_name}" +} + # base_version: The version to be intalled in the ${major}.${minor} format function install_cri_containerd() { base_version="${1}" From 5017435734be0579ae5bc406d4e55fadc03f0ce8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Tue, 26 Sep 2023 22:52:14 +0200 Subject: [PATCH 275/339] ci: Create a generic install_crio function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This will serve us quite will in the upcoming tests addition, which will also have to be executed using CRi-O. Signed-off-by: Fabiano Fidêncio (cherry picked from commit de1eeee334a58d2c49c0b095004fd9ead820e367) --- tests/common.bash | 46 +++++++++++++++++++++++++++++++++++++ tests/gha-run-k8s-common.sh | 41 +-------------------------------- 2 files changed, 47 insertions(+), 40 deletions(-) diff --git a/tests/common.bash b/tests/common.bash index 19f57ab0c..e114a2121 100644 --- a/tests/common.bash +++ b/tests/common.bash @@ -469,6 +469,52 @@ function install_nydus_snapshotter() { rm -f "${tarball_name}" } +function _get_os_for_crio() { + source /etc/os-release + + if [ "${NAME}" != "Ubuntu" ]; then + echo "Only Ubuntu is supported for now" + exit 2 + fi + + echo "x${NAME}_${VERSION_ID}" +} + +# version: the CRI-O version to be installe +function install_crio() { + local version=${1} + + os=$(_get_os_for_crio) + + echo "deb https://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/stable/${os}/ /"|sudo tee /etc/apt/sources.list.d/devel:kubic:libcontainers:stable.list + echo "deb http://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/stable:/cri-o:/${version}/${os}/ /"|sudo tee /etc/apt/sources.list.d/devel:kubic:libcontainers:stable:cri-o:${version}.list + curl -L https://download.opensuse.org/repositories/devel:kubic:libcontainers:stable:cri-o:${version}/${os}/Release.key | sudo apt-key add - + curl -L https://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/stable/${os}/Release.key | sudo apt-key add - + sudo apt update + sudo apt install -y cri-o cri-o-runc + + # We need to set the default capabilities to ensure our tests will pass + # See: https://github.com/kata-containers/kata-containers/issues/8034 + sudo mkdir -p /etc/crio/crio.conf.d/ + cat < Date: Wed, 27 Sep 2023 09:23:28 +0200 Subject: [PATCH 276/339] ci: Make install_kata aware of container engines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This will help us when running tests using CRI-O. Signed-off-by: Fabiano Fidêncio (cherry picked from commit 57cb4ce204c90ab35a08e4f058e2f40c3651838e) --- tests/common.bash | 36 ++++++++++++++++++++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) diff --git a/tests/common.bash b/tests/common.bash index e114a2121..a11144574 100644 --- a/tests/common.bash +++ b/tests/common.bash @@ -257,6 +257,10 @@ function restart_containerd_service() { return 0 } +function restart_crio_service() { + sudo systemctl restart crio +} + # Configures containerd function overwrite_containerd_config() { containerd_config="/etc/containerd/config.toml" @@ -296,6 +300,28 @@ version = 2 EOF } +# Configures CRI-O +function overwrite_crio_config() { + crio_conf_d="/etc/crio/crio.conf.d" + sudo mkdir -p ${crio_conf_d} + + kata_config="${crio_conf_d}/99-kata-containers" + sudo tee "${kata_config}" << EOF +[crio.runtime.runtimes.kata] +runtime_path = "/usr/local/bin/containerd-shim-kata-v2" +runtime_type = "vm" +runtime_root = "/run/vc" +runtime_config_path = "/opt/kata/share/defaults/kata-containers/configuration.toml" +privileged_without_host_devices = true +EOF + + debug_config="${crio_conf_d}/100-debug" + sudo tee "${debug_config}" << EOF +[crio] +log_level = "debug" +EOF +} + function install_kata() { local kata_tarball="kata-static.tar.xz" declare -r katadir="/opt/kata" @@ -314,8 +340,14 @@ function install_kata() { sudo ln -sf "${b}" "${local_bin_dir}/$(basename $b)" done - check_containerd_config_for_kata - restart_containerd_service + if [ "${CONTAINER_ENGINE:=containerd}" = "containerd" ]; then + check_containerd_config_for_kata + restart_containerd_service + else + overwrite_crio_config + restart_crio_service + fi + } # creates a new kata configuration.toml hard link that From a50c7f1972cc4281a400ad4daa726f4519efb128 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Tue, 26 Sep 2023 14:09:06 +0200 Subject: [PATCH 277/339] ci: Add placeholder for kata-monitor tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The kata-monitor tests is currently running as part of the Jenkins CI with the following setups: * Container Engines: CRI-O | containerd * VMMs: QEMU When using containerd, we're testing it with: * Snapshotter: overlayfs | devmapper We will stop running those tests on devmapper / overlayfs as that hardly would get us a functionality issue. Also, we're restricting this to run with the LTS version of containerd, when containerd is used. As it's known due to our GHA limitation, this is just a placeholder and the tests will actually be added in the next iterations. Signed-off-by: Fabiano Fidêncio (cherry picked from commit a3fb067f1bccde0cbd3fd4d5de12dfb3d8c28b60) --- .github/workflows/ci.yaml | 8 +++ .github/workflows/run-kata-monitor-tests.yaml | 59 +++++++++++++++++++ 2 files changed, 67 insertions(+) create mode 100644 .github/workflows/run-kata-monitor-tests.yaml diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 7479e6777..35317c528 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -124,6 +124,14 @@ jobs: pr-number: ${{ inputs.pr-number }} target-branch: ${{ inputs.target-branch }} + run-kata-monitor-tests: + needs: build-kata-static-tarball-amd64 + uses: ./.github/workflows/run-kata-monitor-tests.yaml + with: + tarball-suffix: -${{ inputs.tag }} + commit-hash: ${{ inputs.commit-hash }} + target-branch: ${{ inputs.target-branch }} + run-k8s-tests-on-aks: needs: publish-kata-deploy-payload-amd64 uses: ./.github/workflows/run-k8s-tests-on-aks.yaml diff --git a/.github/workflows/run-kata-monitor-tests.yaml b/.github/workflows/run-kata-monitor-tests.yaml new file mode 100644 index 000000000..98e2a2276 --- /dev/null +++ b/.github/workflows/run-kata-monitor-tests.yaml @@ -0,0 +1,59 @@ +name: CI | Run kata-monitor tests +on: + workflow_call: + inputs: + tarball-suffix: + required: false + type: string + commit-hash: + required: false + type: string + target-branch: + required: false + type: string + default: "" + +jobs: + run-monitor: + strategy: + fail-fast: false + matrix: + vmm: + - qemu + container_engine: + - crio + - containerd + include: + - container_engine: containerd + containerd_version: lts + runs-on: garm-ubuntu-2204-smaller + env: + CONTAINER_ENGINE: ${{ matrix.container_engine }} + CONTAINERD_VERSION: ${{ matrix.containerd_version }} + KATA_HYPERVISOR: ${{ matrix.vmm }} + steps: + - uses: actions/checkout@v4 + with: + ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} + + - name: Install dependencies + run: bash tests/functional/kata-monitor/gha-run.sh install-dependencies + + - name: get-kata-tarball + uses: actions/download-artifact@v3 + with: + name: kata-static-tarball-amd64${{ inputs.tarball-suffix }} + path: kata-artifacts + + - name: Install kata + run: bash tests/functional/kata-monitor/gha-run.sh install-kata kata-artifacts + + - name: Run kata-monitor tests + run: bash tests/functional/kata-monitor/gha-run.sh run From 1709f99975a7c2eb35526b7650c407f34d9244f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Tue, 26 Sep 2023 12:57:32 +0000 Subject: [PATCH 278/339] ci: kata-monitor: Move tests over MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's move, adapt, and use the kata-monitor tests from the tests repo. In this PR I'm keeping the SoB from every single contributor from who touched those tests in the past. Fixes: #8074 Signed-off-by: Fabiano Fidêncio Signed-off-by: yaoyinnan Signed-off-by: Gabriela Cervantes Signed-off-by: James O. D. Hunt (cherry picked from commit 489caf1ad0fae27cfd00ba3c9ed40e3d512fa492) --- tests/functional/kata-monitor/gha-run.sh | 78 +++++ .../kata-monitor/kata-monitor-tests.sh | 294 ++++++++++++++++++ 2 files changed, 372 insertions(+) create mode 100755 tests/functional/kata-monitor/gha-run.sh create mode 100755 tests/functional/kata-monitor/kata-monitor-tests.sh diff --git a/tests/functional/kata-monitor/gha-run.sh b/tests/functional/kata-monitor/gha-run.sh new file mode 100755 index 000000000..93ac3f87d --- /dev/null +++ b/tests/functional/kata-monitor/gha-run.sh @@ -0,0 +1,78 @@ +#!/bin/bash +# +# Copyright (c) 2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# + +set -o errexit +set -o nounset +set -o pipefail + +kata_tarball_dir="${2:-kata-artifacts}" +kata_monitor_dir="$(dirname "$(readlink -f "$0")")" +source "${kata_monitor_dir}/../../common.bash" + +function install_dependencies() { + info "Installing the dependencies needed for running the cri-containerd tests" + + # Dependency list of projects that we can rely on the system packages + # - build-essential + # - Theoretically we only need `make`, but doesn't hurt to install + # the whole build-essential group + # - jq + declare -a system_deps=( + build-essential + jq + ) + + sudo apt-get update + sudo apt-get -y install "${system_deps[@]}" + + ensure_yq + + # Dependency list of projects that we can install them + # directly from their releases on GitHub: + # - cri-tools + # - containerd + # - cri-container-cni release tarball already includes CNI plugins + cri_tools_version=$(get_from_kata_deps "externals.critools.latest") + declare -a github_deps + github_deps[0]="cri_tools:${cri_tools_version}" + case "${CONTAINER_ENGINE}" in + containerd) + github_deps[1]="cri_containerd:$(get_from_kata_deps "externals.containerd.${CONTAINERD_VERSION}")" + ;; + crio) + github_deps[1]="cni_plugins:$(get_from_kata_deps "externals.cni-plugins.version")" + ;; + esac + + for github_dep in "${github_deps[@]}"; do + IFS=":" read -r -a dep <<< "${github_dep}" + install_${dep[0]} "${dep[1]}" + done + + if [ "${CONTAINER_ENGINE}" = "crio" ]; then + install_crio ${cri_tools_version#v} + fi +} + +function run() { + info "Running cri-containerd tests using ${KATA_HYPERVISOR} hypervisor" + + enabling_hypervisor + bash -c ${kata_monitor_dir}/kata-monitor-tests.sh +} + +function main() { + action="${1:-}" + case "${action}" in + install-dependencies) install_dependencies ;; + install-kata) install_kata ;; + run) run ;; + *) >&2 die "Invalid argument" ;; + esac +} + +main "$@" diff --git a/tests/functional/kata-monitor/kata-monitor-tests.sh b/tests/functional/kata-monitor/kata-monitor-tests.sh new file mode 100755 index 000000000..fe95cb867 --- /dev/null +++ b/tests/functional/kata-monitor/kata-monitor-tests.sh @@ -0,0 +1,294 @@ +#!/bin/bash +# +# Copyright (c) 2022 Red Hat +# +# SPDX-License-Identifier: Apache-2.0 +# +# This test file will test kata-monitor for basic functionality (retrieve kata sandboxes) +# It will assume an environment where: +# - a CRI container manager (container engine) will be up and running +# - crictl is installed and configured +# - the kata-monitor binary is available on the host +# + +set -o errexit +set -o nounset +set -o pipefail + +source "/etc/os-release" || source "/usr/lib/os-release" + +[ -n "${BASH_VERSION:-}" ] && set -o errtrace +[ -n "${DEBUG:-}" ] && set -o xtrace + +readonly MONITOR_HTTP_ENDPOINT="127.0.0.1:8090" +# we should collect few hundred metrics, let's put a reasonable minimum +readonly MONITOR_MIN_METRICS_NUM=200 +CONTAINER_ENGINE=${CONTAINER_ENGINE:-"containerd"} +CRICTL_RUNTIME=${CRICTL_RUNTIME:-"kata"} +KATA_MONITOR_BIN="${KATA_MONITOR_BIN:-$(command -v kata-monitor || true)}" +KATA_MONITOR_PID="" +TMPATH=$(mktemp -d -t kata-monitor-test-XXXXXXXXX) +METRICS_FILE="${TMPATH}/metrics.txt" +MONITOR_LOG_FILE="${TMPATH}/kata-monitor.log" +CACHE_UPD_TIMEOUT_SEC=${CACHE_UPD_TIMEOUT_SEC:-20} +POD_ID="" +CID="" +RUNC_POD_ID="" +RUNC_CID="" +CURRENT_TASK="" + +FALSE=1 +TRUE=0 + +trap error_with_msg ERR + +title() { + local step="$1" + echo -e "\n* STEP: $step" +} + +echo_ok() { + local msg="$1" + + echo "OK: $msg" +} + +# quiet crictrl +qcrictl() { + sudo crictl "$@" > /dev/null +} + +# this is just an hash of current date (+ nanoseconds) +gen_unique_id() { + date +%T:%N | md5sum | cut -d ' ' -f 1 +} + +error_with_msg() { + local msg=${1:-"cannot $CURRENT_TASK"} + + trap - ERR + echo -e "\nERROR: $msg" + if [ -f "$MONITOR_LOG_FILE" ]; then + echo -e "\nkata-monitor logs:\n----------------" + cat "$MONITOR_LOG_FILE" + fi + echo -e "\nkata-monitor testing: FAILED!" + cleanup + exit 1 +} + +cleanup() { + stop_workload + stop_workload "$RUNC_CID" "$RUNC_POD_ID" + + [ -n "$KATA_MONITOR_PID" ] \ + && [ -d "/proc/$KATA_MONITOR_PID" ] \ + && kill -9 "$KATA_MONITOR_PID" + + rm -rf "$TMPATH" +} + +create_sandbox_json() { + local uid_name_suffix="$(gen_unique_id)" + local sbfile="$TMPATH/sandbox-$uid_name_suffix.json" + + cat <$sbfile +{ + "metadata": { + "name": "nginx-$uid_name_suffix", + "namespace": "default", + "uid": "nginx-container-uid", + "attempt": 1 + }, + "logDirectory": "/tmp", + "linux": { + } +} +EOF + echo "$sbfile" +} + +create_container_json() { + local uid_name_suffix="$(gen_unique_id)" + local cntfile="$TMPATH/container-$uid_name_suffix.json" + + cat <$cntfile +{ + "metadata": { + "name": "busybox", + "namespace": "default", + "uid": "busybox-container-uid" + }, + "image":{ + "image": "busybox" + }, + "command": [ + "top" + ], + "log_path":"busybox.log", + "linux": { + } +} +EOF + echo "$cntfile" +} + +start_workload() { + local runtime=${1:-} + local args="" + local sbfile="" + local cntfile="" + + [ -n "$runtime" ] && args="-r $runtime" + + sbfile="$(create_sandbox_json)" + cntfile="$(create_container_json)" + + POD_ID=$(sudo crictl runp $args $sbfile) + CID=$(sudo crictl create $POD_ID $cntfile $sbfile) + qcrictl start $CID +} + +stop_workload() { + local cid="${1:-$CID}" + local pod_id="${2:-$POD_ID}" + local check + + [ -z "$pod_id" ] && return + check=$(sudo crictl pods -q -id $pod_id) + [ -z "$check" ] && return + + qcrictl stop $cid + qcrictl rm $cid + + qcrictl stopp $pod_id + qcrictl rmp $pod_id +} + +is_sandbox_there() { + local podid=${1} + local sbs s + + sbs=$(sudo curl -s ${MONITOR_HTTP_ENDPOINT}/sandboxes) + if [ -n "$sbs" ]; then + for s in $sbs; do + if [ "$s" = "$podid" ]; then + return $TRUE + break + fi + done + fi + return $FALSE +} + +is_sandbox_there_iterate() { + local podid=${1} + + for i in $(seq 1 $CACHE_UPD_TIMEOUT_SEC); do + is_sandbox_there "$podid" && return $TRUE + echo -n "." + sleep 1 + continue + done + + return $FALSE +} + +is_sandbox_missing_iterate() { + local podid=${1} + + for i in $(seq 1 $CACHE_UPD_TIMEOUT_SEC); do + is_sandbox_there "$podid" || return $TRUE + echo -n "." + sleep 1 + continue + done + + return $FALSE +} + +main() { + local args="" + + ########################### + title "pre-checks" + + CURRENT_TASK="connect to the container engine" + qcrictl pods + echo_ok "$CURRENT_TASK" + + ########################### + title "pull the image to be used" + sudo crictl pull busybox + + ########################### + title "create workloads" + + CURRENT_TASK="start workload (runc)" + start_workload + RUNC_POD_ID="$POD_ID" + RUNC_CID="$CID" + echo_ok "$CURRENT_TASK - POD ID:$POD_ID, CID:$CID" + + CURRENT_TASK="start workload ($CRICTL_RUNTIME)" + start_workload "$CRICTL_RUNTIME" + echo_ok "$CURRENT_TASK - POD ID:$POD_ID, CID:$CID" + + ########################### + title "start kata-monitor" + + [ ! -x "$KATA_MONITOR_BIN" ] && error_with_msg "kata-monitor binary not found" + + [ "$CONTAINER_ENGINE" = "crio" ] && args="--runtime-endpoint /run/crio/crio.sock" + + CURRENT_TASK="start kata-monitor" + sudo $KATA_MONITOR_BIN $args --log-level trace > "$MONITOR_LOG_FILE" 2>&1 & + KATA_MONITOR_PID="$!" + echo_ok "$CURRENT_TASK ($KATA_MONITOR_PID)" + + ########################### + title "kata-monitor cache update checks" + + CURRENT_TASK="retrieve $POD_ID in kata-monitor cache" + is_sandbox_there_iterate "$POD_ID" || error_with_msg + echo_ok "$CURRENT_TASK" + + CURRENT_TASK="look for runc pod $RUNC_POD_ID in kata-monitor cache" + is_sandbox_there_iterate "$RUNC_POD_ID" && error_with_msg "cache: got runc pod $RUNC_POD_ID" + echo_ok "runc pod $RUNC_POD_ID skipped from kata-monitor cache" + + ########################### + title "kata-monitor metrics retrieval" + + CURRENT_TASK="retrieve metrics from kata-monitor" + curl -s ${MONITOR_HTTP_ENDPOINT}/metrics > "$METRICS_FILE" + echo_ok "$CURRENT_TASK" + + CURRENT_TASK="retrieve metrics for pod $POD_ID" + METRICS_COUNT=$(grep -c "$POD_ID" "$METRICS_FILE") + [ ${METRICS_COUNT} -lt ${MONITOR_MIN_METRICS_NUM} ] \ + && error_with_msg "got too few metrics (#${METRICS_COUNT})" + echo_ok "$CURRENT_TASK - found #${METRICS_COUNT} metrics" + + ########################### + title "remove kata workload" + + CURRENT_TASK="stop workload ($CRICTL_RUNTIME)" + stop_workload + echo_ok "$CURRENT_TASK" + + ########################### + title "kata-monitor cache update checks (removal)" + + CURRENT_TASK="verify removal of $POD_ID from kata-monitor cache" + is_sandbox_missing_iterate "$POD_ID" || error_with_msg "pod $POD_ID was not removed" + echo_ok "$CURRENT_TASK" + + ########################### + CURRENT_TASK="cleanup" + cleanup + + echo -e "\nkata-monitor testing: PASSED!\n" +} + +main "@" From cd82a351bd55b185710a2fe9b8141f32a2c64ef6 Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Tue, 26 Sep 2023 16:24:01 +0000 Subject: [PATCH 279/339] metrics: Add latency value limits for kata CI This PR adds latency value limits for kata CI. Fixes #8067 Signed-off-by: Gabriela Cervantes (cherry picked from commit a74a8f8a9da91b58a68c5b957665c1b4bde4f40e) --- .../checkmetrics-json-clh-kata-metric8.toml | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml index cf5042dbc..44b456765 100644 --- a/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml +++ b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml @@ -98,6 +98,19 @@ midval = 98.0 minpercent = 20.0 maxpercent = 20.0 +[[metric]] +name = "latency" +type = "json" +description = "measure container latency" +# Min and Max values to set a 'range' that +# the median of the CSV Results data must fall +# within (inclusive) +checkvar = ".\"latency\".Results | .[] | .latency.Result" +checktype = "mean" +midval = 0.75 +minpercent = 20.0 +maxpercent = 20.0 + [[metric]] name = "network-iperf3" type = "json" From ce03e9f97a0db03d279e0689fb8782d56da72146 Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Tue, 26 Sep 2023 17:30:09 +0000 Subject: [PATCH 280/339] metrics: Add qemu latency value limit This PR adds the qemu latency value limit for kata metrics. Signed-off-by: Gabriela Cervantes (cherry picked from commit e90440ae24266a55059639f6681d5d052d2d3423) --- .../checkmetrics-json-qemu-kata-metric8.toml | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-qemu-kata-metric8.toml b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-qemu-kata-metric8.toml index 8281ed71a..3bce0c8fb 100644 --- a/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-qemu-kata-metric8.toml +++ b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-qemu-kata-metric8.toml @@ -98,6 +98,19 @@ midval = 98.0 minpercent = 20.0 maxpercent = 20.0 +[[metric]] +name = "latency" +type = "json" +description = "measure container latency" +# Min and Max values to set a 'range' that +# the median of the CSV Results data must fall +# within (inclusive) +checkvar = ".\"latency\".Results | .[] | .latency.Result" +checktype = "mean" +midval = 0.70 +minpercent = 20.0 +maxpercent = 20.0 + [[metric]] name = "network-iperf3" type = "json" From f3fcf6cbf974b4939a187abe70c6f5d2af9b18b0 Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Tue, 26 Sep 2023 19:11:08 +0000 Subject: [PATCH 281/339] metrics: Add checkmetrics for latency test This PR adds the checkmetrics for latency test. Signed-off-by: Gabriela Cervantes (cherry picked from commit 8cb7df1bedd762147ce951fba2837f71cdc7f871) --- tests/metrics/gha-run.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/metrics/gha-run.sh b/tests/metrics/gha-run.sh index 4f0848821..3c7686e12 100755 --- a/tests/metrics/gha-run.sh +++ b/tests/metrics/gha-run.sh @@ -93,14 +93,14 @@ function run_test_iperf() { info "Running Iperf test using ${KATA_HYPERVISOR} hypervisor" bash tests/metrics/network/iperf3_kubernetes/k8s-network-metrics-iperf3.sh -a - - check_metrics } function run_test_latency() { info "Running Latency test using ${KATA_HYPERVISOR} hypervisor" bash tests/metrics/network/latency_kubernetes/latency-network.sh + + check_metrics } function main() { From 8a1af8689bfa03dd59c0718107a92a850c65d9e2 Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Wed, 27 Sep 2023 17:30:19 +0000 Subject: [PATCH 282/339] metrics: Increase jitter value for clh This PR increases jitter value for clh. Signed-off-by: Gabriela Cervantes (cherry picked from commit 5600e28b54a763ac3a9c17cae6e8544add513603) --- .../ci_worker/checkmetrics-json-clh-kata-metric8.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml index 44b456765..c0436b24c 100644 --- a/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml +++ b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml @@ -160,5 +160,5 @@ description = "iperf" checkvar = ".\"network-iperf3\".Results | .[] | .jitter.Result" checktype = "mean" midval = 0.044 -minpercent = 40.0 -maxpercent = 40.0 +minpercent = 50.0 +maxpercent = 50.0 From c22fdb46e33872d82131944f7ecab7aa23b74b80 Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Wed, 27 Sep 2023 17:31:07 +0000 Subject: [PATCH 283/339] metrics: Increase qemu jitter value This PR increases qemu jitter value. Signed-off-by: Gabriela Cervantes (cherry picked from commit 8d66ef51855c18a255ba19a1900cde5e9ca7cf5c) --- .../ci_worker/checkmetrics-json-qemu-kata-metric8.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-qemu-kata-metric8.toml b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-qemu-kata-metric8.toml index 3bce0c8fb..666a898be 100644 --- a/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-qemu-kata-metric8.toml +++ b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-qemu-kata-metric8.toml @@ -160,5 +160,5 @@ description = "iperf" checkvar = ".\"network-iperf3\".Results | .[] | .jitter.Result" checktype = "mean" midval = 0.041 -minpercent = 40.0 -maxpercent = 40.0 +minpercent = 50.0 +maxpercent = 50.0 From b2e432c02468b7dd1a6351d0e9f125fad75e5234 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Wed, 27 Sep 2023 14:19:24 +0200 Subject: [PATCH 284/339] packaging: Use git abbreviated hash MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This will make it easier to build images that rely on several directories hashes. Signed-off-by: Fabiano Fidêncio (cherry picked from commit 98097c96deae92000997e24f90318ec9536a7fdc) --- tools/packaging/scripts/lib.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/packaging/scripts/lib.sh b/tools/packaging/scripts/lib.sh index e7a6fc987..8b7b13da1 100644 --- a/tools/packaging/scripts/lib.sh +++ b/tools/packaging/scripts/lib.sh @@ -123,7 +123,7 @@ get_last_modification() { dirty="" [ $(git status --porcelain | grep "${file#${repo_root_dir}/}" | wc -l) -gt 0 ] && dirty="-dirty" - echo "$(git log -1 --pretty=format:"%H" ${file})${dirty}" + echo "$(git log -1 --pretty=format:"%h" ${file})${dirty}" popd &> /dev/null } From d503daf75e05755d8981e148fd88dfe903342a29 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Wed, 27 Sep 2023 14:20:02 +0200 Subject: [PATCH 285/339] packaging: Add get_tools_image_name() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This will be used for building all the (rust) components from src/tools. Signed-off-by: Fabiano Fidêncio (cherry picked from commit 4d08ec29bc42b349d25507e4b8c038904991df2a) --- tools/packaging/scripts/lib.sh | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tools/packaging/scripts/lib.sh b/tools/packaging/scripts/lib.sh index 8b7b13da1..fe1faf5af 100644 --- a/tools/packaging/scripts/lib.sh +++ b/tools/packaging/scripts/lib.sh @@ -218,3 +218,11 @@ get_virtiofsd_image_name() { virtiofsd_script_dir="${repo_root_dir}/tools/packaging/static-build/virtiofsd" echo "${BUILDER_REGISTRY}:virtiofsd-$(get_from_kata_deps "externals.virtiofsd.toolchain")-${libc}-$(get_last_modification ${virtiofsd_script_dir})-$(uname -m)" } + +get_tools_image_name() { + tools_dir="${repo_root_dir}/src/tools" + libs_dir="${repo_root_dir}/src/libs" + agent_dir="${repo_root_dir}/src/agent" + + echo "${BUILDER_REGISTRY}:tools-$(get_last_modification ${tools_dir})-$(get_last_modification ${libs_dir})-$(get_last_modification ${agent_dir})" +} From a5d7ba66621502344451abc9bd27a3d10fd04ff1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Wed, 27 Sep 2023 13:54:38 +0200 Subject: [PATCH 286/339] static-build: Add scripts to build content from src/tools MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As we'd like to ship the content from src/tools, we need to build them in the very same way we build the other components, and the first step is providing scripts that can build those inside a container. Signed-off-by: Fabiano Fidêncio (cherry picked from commit 6ef42db5ecc87f2a79d5ebe88c1dff57083e4790) --- tools/packaging/static-build/tools/Dockerfile | 20 +++++++++++ .../static-build/tools/build-static-tools.sh | 36 +++++++++++++++++++ tools/packaging/static-build/tools/build.sh | 31 ++++++++++++++++ 3 files changed, 87 insertions(+) create mode 100644 tools/packaging/static-build/tools/Dockerfile create mode 100755 tools/packaging/static-build/tools/build-static-tools.sh create mode 100755 tools/packaging/static-build/tools/build.sh diff --git a/tools/packaging/static-build/tools/Dockerfile b/tools/packaging/static-build/tools/Dockerfile new file mode 100644 index 000000000..aa468488d --- /dev/null +++ b/tools/packaging/static-build/tools/Dockerfile @@ -0,0 +1,20 @@ +# Copyright (c) 2023 Intel +# +# SPDX-License-Identifier: Apache-2.0 + +FROM alpine:3.18 +ARG GO_TOOLCHAIN +ARG RUST_TOOLCHAIN + +SHELL ["/bin/ash", "-o", "pipefail", "-c"] +RUN apk --no-cache add \ + bash \ + curl \ + gcc \ + git \ + libcap-ng-static \ + libseccomp-static \ + make \ + musl-dev \ + protoc && \ + curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain ${RUST_TOOLCHAIN} diff --git a/tools/packaging/static-build/tools/build-static-tools.sh b/tools/packaging/static-build/tools/build-static-tools.sh new file mode 100755 index 000000000..15e9f740a --- /dev/null +++ b/tools/packaging/static-build/tools/build-static-tools.sh @@ -0,0 +1,36 @@ +#!/usr/bin/env bash +# +# Copyright (c) 2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +set -o errexit +set -o nounset +set -o pipefail + +script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +source "${script_dir}/../../scripts/lib.sh" + +init_env() { + source "$HOME/.cargo/env" + + export LIBC=musl + export LIBSECCOMP_LINK_TYPE=static + export LIBSECCOMP_LIB_PATH=/usr/lib + + extra_rust_flags=" -C link-self-contained=yes" +} + +build_tool_from_source() { + set -x + tool=${1} + + echo "build ${tool} from source" + init_env + + cd src/tools/${tool} + make +} + +build_tool_from_source $@ diff --git a/tools/packaging/static-build/tools/build.sh b/tools/packaging/static-build/tools/build.sh new file mode 100755 index 000000000..11abe7bb2 --- /dev/null +++ b/tools/packaging/static-build/tools/build.sh @@ -0,0 +1,31 @@ +#!/usr/bin/env bash +# +# Copyright (c) 2023 Intel +# +# SPDX-License-Identifier: Apache-2.0 + +set -o errexit +set -o nounset +set -o pipefail + +script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +readonly tools_builder="${script_dir}/build-static-tools.sh" + +source "${script_dir}/../../scripts/lib.sh" + +tool="${1}" + +container_image="${VIRTIOFSD_CONTAINER_BUILDER:-$(get_tools_image_name)}" +[ "${CROSS_BUILD}" == "true" ] && container_image="${container_image}-cross-build" + +sudo docker pull ${container_image} || \ + (sudo docker $BUILDX build $PLATFORM \ + --build-arg RUST_TOOLCHAIN="$(get_from_kata_deps "languages.rust.meta.newest-version")" \ + -t "${container_image}" "${script_dir}" && \ + # No-op unless PUSH_TO_REGISTRY is exported as "yes" + push_to_registry "${container_image}") + +sudo docker run --rm -i -v "${repo_root_dir}:${repo_root_dir}" \ + -w "${repo_root_dir}" \ + "${container_image}" \ + bash -c "${tools_builder} ${tool}" From 41b509e0a67fdb6817441f2ac2dd8dde04946f71 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Wed, 27 Sep 2023 14:06:17 +0200 Subject: [PATCH 287/339] kata-deploy: Build components from src/tools MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's add targets and actually enable users and oursevles to build those components in the same way we build the rest of the project. Signed-off-by: Fabiano Fidêncio (cherry picked from commit 501a168a8132d7ec9eb5a48a6b6ac4db7f261148) --- .../kata-deploy/local-build/Makefile | 15 ++++ .../local-build/kata-deploy-binaries.sh | 76 ++++++++++++++++++- 2 files changed, 90 insertions(+), 1 deletion(-) diff --git a/tools/packaging/kata-deploy/local-build/Makefile b/tools/packaging/kata-deploy/local-build/Makefile index ece1900c0..041cd5c80 100644 --- a/tools/packaging/kata-deploy/local-build/Makefile +++ b/tools/packaging/kata-deploy/local-build/Makefile @@ -52,6 +52,9 @@ serial-targets: %-tarball-build: $(MK_DIR)/dockerbuild/install_yq.sh $(call BUILD,$*) +agent-ctl-tarball: + ${MAKE} $@-build + cloud-hypervisor-tarball: ${MAKE} $@-build @@ -61,6 +64,9 @@ cloud-hypervisor-glibc-tarball: firecracker-tarball: ${MAKE} $@-build +kata-ctl-tarball: + ${MAKE} $@-build + kernel-dragonball-experimental-tarball: ${MAKE} $@-build @@ -82,6 +88,9 @@ kernel-tdx-experimental-tarball: kernel-sev-tarball: ${MAKE} $@-build +log-parser-rs-tarball: + ${MAKE} $@-build + nydus-tarball: ${MAKE} $@-build @@ -115,12 +124,18 @@ rootfs-initrd-sev-tarball: kernel-sev-tarball rootfs-initrd-tarball: ${MAKE} $@-build +runk-tarball: + ${MAKE} $@-build + shim-v2-tarball: ${MAKE} $@-build tdvf-tarball: ${MAKE} $@-build +trace-forwarder-tarball: + ${MAKE} $@-build + virtiofsd-tarball: ${MAKE} $@-build diff --git a/tools/packaging/kata-deploy/local-build/kata-deploy-binaries.sh b/tools/packaging/kata-deploy/local-build/kata-deploy-binaries.sh index 15159a769..c576fbab5 100755 --- a/tools/packaging/kata-deploy/local-build/kata-deploy-binaries.sh +++ b/tools/packaging/kata-deploy/local-build/kata-deploy-binaries.sh @@ -32,8 +32,8 @@ readonly qemu_experimental_builder="${static_build_dir}/qemu/build-static-qemu-e readonly shimv2_builder="${static_build_dir}/shim-v2/build.sh" readonly virtiofsd_builder="${static_build_dir}/virtiofsd/build.sh" readonly nydus_builder="${static_build_dir}/nydus/build.sh" - readonly rootfs_builder="${repo_root_dir}/tools/packaging/guest-image/build_image.sh" +readonly tools_builder="${static_build_dir}/tools/build.sh" ARCH=${ARCH:-$(uname -m)} MEASURED_ROOTFS=${MEASURED_ROOTFS:-no} @@ -81,9 +81,11 @@ options: -s : Silent mode (produce output in case of failure only) --build= : all + agent-ctl cloud-hypervisor cloud-hypervisor-glibc firecracker + kata-ctl kernel kernel-dragonball-experimental kernel-experimental @@ -92,6 +94,7 @@ options: kernel-nvidia-gpu-tdx-experimental kernel-sev-tarball kernel-tdx-experimental + log-parser-rs nydus ovmf ovmf-sev @@ -103,8 +106,10 @@ options: rootfs-initrd rootfs-initrd-mariner rootfs-initrd-sev + runk shim-v2 tdvf + trace-forwarder virtiofsd EOF @@ -617,6 +622,55 @@ install_ovmf_sev() { install_ovmf "sev" "edk2-sev.tar.gz" } +install_tools_helper() { + tool=${1} + + latest_artefact="$(git log -1 --pretty=format:"%h" ${repo_root_dir}/src/tools/${tool})" + latest_builder_image="$(get_tools_image_name)" + + install_cached_tarball_component \ + "${tool}" \ + "${latest_artefact}" \ + "${latest_builder_image}" \ + "${final_tarball_name}" \ + "${final_tarball_path}" \ + && return 0 + + + info "build static ${tool}" + ${tools_builder} ${tool} + + tool_binary=${tool} + [ ${tool} = "agent-ctl" ] && tool_binary="kata-agent-ctl" + [ ${tool} = "log-parser-rs" ] && tool_binary="log-parser" + [ ${tool} = "trace-forwarder" ] && tool_binary="kata-trace-forwarder" + binary=$(find ${repo_root_dir}/src/tools/${tool}/ -type f -name ${tool_binary}) + + info "Install static ${tool_binary}" + mkdir -p "${destdir}/opt/kata/bin/" + sudo install -D --owner root --group root --mode 0744 ${binary} "${destdir}/opt/kata/bin/${tool_binary}" +} + +install_agent_ctl() { + install_tools_helper "agent-ctl" +} + +install_kata_ctl() { + install_tools_helper "kata-ctl" +} + +install_log_parser_rs() { + install_tools_helper "log-parser-rs" +} + +install_runk() { + install_tools_helper "runk" +} + +install_trace_forwarder() { + install_tools_helper "trace-forwarder" +} + get_kata_version() { local v v=$(cat "${version_file}") @@ -638,32 +692,41 @@ handle_build() { case "${build_target}" in all) + install_agent_ctl install_clh install_firecracker install_image install_initrd install_initrd_mariner install_initrd_sev + install_kata_ctl install_kernel install_kernel_dragonball_experimental install_kernel_tdx_experimental + install_log_parser_rs install_nydus install_ovmf install_ovmf_sev install_qemu install_qemu_snp_experimental install_qemu_tdx_experimental + install_runk install_shimv2 install_tdvf + install_trace_forwarder install_virtiofsd ;; + agent-ctl) install_agent_ctl ;; + cloud-hypervisor) install_clh ;; cloud-hypervisor-glibc) install_clh_glibc ;; firecracker) install_firecracker ;; + kata-ctl) install_kata_ctl ;; + kernel) install_kernel ;; kernel-dragonball-experimental) install_kernel_dragonball_experimental ;; @@ -678,6 +741,8 @@ handle_build() { kernel-sev) install_kernel_sev ;; + log-parser-rs) install_log_parser_rs ;; + nydus) install_nydus ;; ovmf) install_ovmf ;; @@ -699,11 +764,15 @@ handle_build() { rootfs-initrd-mariner) install_initrd_mariner ;; rootfs-initrd-sev) install_initrd_sev ;; + + runk) install_runk ;; shim-v2) install_shimv2 ;; tdvf) install_tdvf ;; + trace-forwarder) install_trace_forwarder ;; + virtiofsd) install_virtiofsd ;; *) @@ -755,16 +824,21 @@ main() { local build_targets local silent build_targets=( + agent-ctl cloud-hypervisor firecracker + kata-ctl kernel kernel-experimental + log-parser-rs nydus qemu rootfs-image rootfs-initrd rootfs-initrd-mariner + runk shim-v2 + trace-forwarder virtiofsd ) silent=false From 5800be50294c004f430c0b140d1658fb904efa44 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Wed, 27 Sep 2023 18:35:53 +0200 Subject: [PATCH 288/339] ci: Build src/tools components as part of our tests / releases MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Build those as part of our CI and release workflows. Fixes #5520 #5348 Signed-off-by: Fabiano Fidêncio (cherry picked from commit a6b1f5e21b212a54a6575a1babe6e7f89b576c1d) --- .github/workflows/build-kata-static-tarball-amd64.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/build-kata-static-tarball-amd64.yaml b/.github/workflows/build-kata-static-tarball-amd64.yaml index 20a59bb8c..dcfbfdc03 100644 --- a/.github/workflows/build-kata-static-tarball-amd64.yaml +++ b/.github/workflows/build-kata-static-tarball-amd64.yaml @@ -27,9 +27,11 @@ jobs: strategy: matrix: asset: + - agent-ctl - cloud-hypervisor - cloud-hypervisor-glibc - firecracker + - kata-ctl - kernel - kernel-sev - kernel-dragonball-experimental @@ -37,6 +39,7 @@ jobs: - kernel-nvidia-gpu - kernel-nvidia-gpu-snp - kernel-nvidia-gpu-tdx-experimental + - log-parser-rs - nydus - ovmf - ovmf-sev @@ -48,8 +51,10 @@ jobs: - rootfs-initrd - rootfs-initrd-mariner - rootfs-initrd-sev + - runk - shim-v2 - tdvf + - trace-forwarder - virtiofsd stage: - ${{ inputs.stage }} From 16c349e76c97bd9b16c7c0dbc54c3495addd4950 Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Tue, 26 Sep 2023 16:13:12 +0000 Subject: [PATCH 289/339] docs: Update url in kata vra document This PR updates the url in kata vra document. Fixes #8065 Signed-off-by: Gabriela Cervantes (cherry picked from commit 928553d1bac10056a9bdbb70ea7154e6df865f64) --- docs/design/kata-vra.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/design/kata-vra.md b/docs/design/kata-vra.md index ba53c3371..887f51aec 100644 --- a/docs/design/kata-vra.md +++ b/docs/design/kata-vra.md @@ -220,7 +220,7 @@ containers that he wants to run with Kata. The goal is to make such things as transparent as possible, so we also introduced [CDI](https://github.com/container-orchestrated-devices/container-device-interface) (Container Device Interface) to Kata. CDI is a[ -specification](https://github.com/container-orchestrated-devices/container-device-interface/blob/master/SPEC.md) +specification](https://github.com/container-orchestrated-devices/container-device-interface/blob/main/SPEC.md) for container runtimes to support third-party devices. As written before, we can provide a clique ID for the devices that belong From 362adea8cd42ac04d9d10227aaa96546f6d36ff9 Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Tue, 26 Sep 2023 16:30:59 +0000 Subject: [PATCH 290/339] metrics: Fix general check static warnings This PR fixes general check static warnings. Signed-off-by: Gabriela Cervantes (cherry picked from commit d7def8317a598329ad6e520e57984ca5feac6ede) --- docs/design/kata-vra.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/design/kata-vra.md b/docs/design/kata-vra.md index 887f51aec..bc62e762c 100644 --- a/docs/design/kata-vra.md +++ b/docs/design/kata-vra.md @@ -43,7 +43,7 @@ and perform DMA transactions _anywhere_. The second feature is ACS (Access Control Services), which controls which devices are allowed to communicate with one another and thus avoids improper -routing of packets irrespectively of whether IOMMU is enabled or not. +routing of packets `irrespectively` of whether IOMMU is enabled or not. When IOMMU is enabled, ACS is normally configured to force all PCI Express DMA to go through the root complex so IOMMU can translate it, impacting performance @@ -126,7 +126,7 @@ efficient P2P communication. ## PCI Express Virtual P2P Approval Capability Most of the time, the PCI Express topology is flattened and obfuscated to ensure -easy migration of the VM image between different physical hardware topologies. +easy migration of the VM image between different physical hardware `topologies`. In Kata, we can configure the hypervisor to use PCI Express root ports to hotplug the VFIO  devices one is passing through. A user can select how many PCI Express root ports to allocate depending on how many devices are passed through. @@ -300,7 +300,7 @@ pcie_switch_port = 8 ``` Each device that is passed through is attached to a PCI Express downstream port -as illustrated below. We can even replicate the host’s two DPUs topologies with +as illustrated below. We can even replicate the host’s two DPUs `topologies` with added metadata through the CDI. Most of the time, a container only needs one pair of GPU and NIC for GPUDirect RDMA. This is more of a showcase of what we can do with the power of Kata and CDI. One could even think of adding groups of @@ -328,7 +328,7 @@ $ lspci -tv ``` The configuration of using either the root port or switch port can be applied on -a per Container or Pod basis, meaning we can switch PCI Express topologies on +a per Container or Pod basis, meaning we can switch PCI Express `topologies` on each run of an application. ## Hypervisor Resource Limits From 4393f553e97c9c174eb8843bd36922c073ec4647 Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Thu, 28 Sep 2023 16:23:36 +0000 Subject: [PATCH 291/339] metrics: Add stability test for kata CI This PR adds the stability test for kata containers repository. Fixes #8084 Signed-off-by: Gabriela Cervantes (cherry picked from commit ef68a3a36b5a93d99fc32482de3c521214be80a3) --- tests/stability/scability_test.sh | 70 +++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100755 tests/stability/scability_test.sh diff --git a/tests/stability/scability_test.sh b/tests/stability/scability_test.sh new file mode 100755 index 000000000..86d941e65 --- /dev/null +++ b/tests/stability/scability_test.sh @@ -0,0 +1,70 @@ +#!/bin/bash +# +# Copyright (c) 2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +set -o pipefail + +# General env +SCRIPT_PATH=$(dirname "$(readlink -f "$0")") +source "${SCRIPT_PATH}/../metrics/lib/common.bash" + +NUM_CONTAINERS="$1" +TIMEOUT_LAUNCH="$2" +PAYLOAD_ARGS="${PAYLOAD_ARGS:-tail -f /dev/null}" +IMAGE="${IMAGE:-quay.io/prometheus/busybox:latest}" + +# Show help about this script +help(){ +cat << EOF +Usage: $0 + Description: + This script launches n number of containers. + Options: + : Number of containers to run. + : Timeout to launch the containers. +EOF +} + +function main() { + # Verify enough arguments + if [ $# != 2 ]; then + echo >&2 "error: Not enough arguments [$@]" + help + exit 1 + fi + + local i=0 + local containers=() + local not_started_count="${NUM_CONTAINERS}" + + init_env + check_cmds "${cmds[@]}" + sudo -E ctr i pull "${IMAGE}" + + info "Creating ${NUM_CONTAINERS} containers" + + for ((i=1; i<= "${NUM_CONTAINERS}"; i++)); do + containers+=($(random_name)) + sudo -E ctr run -d --runtime "${CTR_RUNTIME}" "${IMAGE}" "${containers[-1]}" sh -c "${PAYLOAD_ARGS}" + ((not_started_count--)) + info "$not_started_count remaining containers" + done + + # Check that the requested number of containers are running + check_containers_are_up & pid=$! + (sleep "${TIMEOUT_LAUNCH}" && kill -HUP "${pid}") 2>/dev/null & pid_tout=$! + + if wait "${pid}" 2>/dev/null; then + pkill -HUP -P "${pid_tout}" + wait "${pid_tout}" + else + warn "Time out exceeded" + return 1 + fi + + clean_env_ctr +} + +main "$@" From e95d3b1be56f70e19249f82fea905593a08d5ee9 Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Thu, 28 Sep 2023 16:33:08 +0000 Subject: [PATCH 292/339] tests: Add stressor CPU test for stability tests This PR adds the stressor CPU test for stability tests. Signed-off-by: Gabriela Cervantes (cherry picked from commit 6493aa309e02af49bb887ed904254e14ff20ccbe) --- tests/stability/stressng.sh | 59 +++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100755 tests/stability/stressng.sh diff --git a/tests/stability/stressng.sh b/tests/stability/stressng.sh new file mode 100755 index 000000000..22ba0e0c9 --- /dev/null +++ b/tests/stability/stressng.sh @@ -0,0 +1,59 @@ +#!/bin/bash +# +# Copyright (c) 2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +set -o pipefail + +# General env +SCRIPT_PATH=$(dirname "$(readlink -f "$0")") +source "${SCRIPT_PATH}/../metrics/lib/common.bash" + +PAYLOAD_ARGS="${PAYLOAD_ARGS:-tail -f /dev/null}" +DOCKERFILE="${SCRIPT_PATH}/stressng_dockerfile/Dockerfile" +IMAGE="docker.io/library/local-stressng:latest" +CONTAINER_NAME="${CONTAINER_NAME:-stressng_test}" + +function main() { + local cmds=("docker") + + init_env + check_cmds "${cmds[@]}" + check_ctr_images "${IMAGE}" "${DOCKERFILE}" + sudo -E ctr run -d --runtime "${CTR_RUNTIME}" "${IMAGE}" "${CONTAINER_NAME}" sh -c "${PAYLOAD_ARGS}" + + # Run 1 iomix stressor (mix of I/O operations) for 20 seconds with verbose output + info "Running iomix stressor test" + IOMIX_CMD="stress-ng --iomix 1 -t 20 -v" + sudo -E ctr t exec --exec-id 1 "${CONTAINER_NAME}" sh -c "${IOMIX_CMD}" + + # Run cpu stressors and virtual memory stressors for 5 minutes + info "Running memory stressors for 5 minutes" + MEMORY_CMD="stress-ng --cpu 2 --vm 4 -t 5m" + sudo -E ctr t exec --exec-id 2 "${CONTAINER_NAME}" sh -c "${MEMORY_CMD}" + + # Run shared memory stressors + info "Running 8 shared memory stressors" + SHARED_CMD="stress-ng --shm 0" + sudo -E ctr t exec --exec-id 3 "${CONTAINER_NAME}" sh -c "${SHARED_CMD}" + + # Run all stressors one by one on all CPUs + info "Running all stressors one by one" + STRESSORS_CMD="stress-ng --seq 0 -t 10 --tz -v" + sudo -E ctr t exec --exec-id 4 "${CONTAINER_NAME}" sh -c "${STRESSORS_CMD}" + + # Test floating point on CPU for 60 seconds + info "Running floating tests on CPU" + FLOAT_CMD="stress-ng --matrix 1 -t 1m" + sudo -E ctr t exec --exec-id 5 "${CONTAINER_NAME}" sh -c "${FLOAT_CMD}" + + # Runs two instances of the CPU stressors, one instance of the matrix + info "Running instances of the CPU stressors" + INSTANCE_CMD='stress-ng --cpu 2 --matrix 1 --mq 3 -t 5m' + sudo -E ctr t exec --exec-id 6 "${CONTAINER_NAME}" sh -c "${INSTANCE_CMD}" + + clean_env_ctr +} + +main "$@" From 240c584ae298ede5960257456b659361d6240d2b Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Thu, 28 Sep 2023 16:35:22 +0000 Subject: [PATCH 293/339] tests: Add stressng dockerfile for stability tests This PR adds the stressng dockerfile for stability tests. Signed-off-by: Gabriela Cervantes (cherry picked from commit f2d3ea988dfdf9e4f7f0801d912e72e530030594) --- tests/stability/stressng_dockerfile/Dockerfile | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 tests/stability/stressng_dockerfile/Dockerfile diff --git a/tests/stability/stressng_dockerfile/Dockerfile b/tests/stability/stressng_dockerfile/Dockerfile new file mode 100644 index 000000000..b17002c1e --- /dev/null +++ b/tests/stability/stressng_dockerfile/Dockerfile @@ -0,0 +1,17 @@ +# Copyright (c) 2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +FROM ubuntu:20.04 + +# Version of the Dockerfile +LABEL DOCKERFILE_VERSION="1.0" + +ENV DEBIAN_FRONTEND=noninteractive + +RUN apt-get update && \ + apt-get install -y --no-install-recommends sudo build-essential curl && \ + apt-get remove -y unattended-upgrades && \ + apt-get install -y stress stress-ng + +CMD ["/bin/bash"] From a8eec39559f465e0e55c8c99682e69def0c7d1c8 Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Thu, 28 Sep 2023 16:40:48 +0000 Subject: [PATCH 294/339] tests: Add cassandra stress in stability tests This PR adds the cassandra stress at the stability tests. Signed-off-by: Gabriela Cervantes (cherry picked from commit 215577032f5b0fe5dbf10b0ed539bce4c291bd05) --- tests/stability/cassandra_stress.sh | 43 +++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100755 tests/stability/cassandra_stress.sh diff --git a/tests/stability/cassandra_stress.sh b/tests/stability/cassandra_stress.sh new file mode 100755 index 000000000..cd666ca15 --- /dev/null +++ b/tests/stability/cassandra_stress.sh @@ -0,0 +1,43 @@ +#!/bin/bash +# +# Copyright (c) 2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +set -o pipefail + +# General env +SCRIPT_PATH=$(dirname "$(readlink -f "$0")") +source "${SCRIPT_PATH}/../metrics/lib/common.bash" + +IMAGE="docker.io/library/cassandra:latest" +CONTAINER_NAME="${CONTAINER_NAME:-cassandra_test}" +DOCKER_IMAGE="cassandra:latest" +PAYLOAD_ARGS="${PAYLOAD_ARGS:-tail -f /dev/null}" +CMD="cassandra -R" + +function main() { + local cmds=("docker") + + init_env + check_cmds "${cmds[@]}" + sudo -E "${DOCKER_EXE}" pull "${DOCKER_IMAGE}" + sudo -E "${DOCKER_EXE}" save -o "${DOCKER_IMAGE}.tar" "${DOCKER_IMAGE}" + sudo -E "${CTR_EXE}" i import "${DOCKER_IMAGE}.tar" + + sudo -E "${CTR_EXE}" run -d --runtime "${CTR_RUNTIME}" "${IMAGE}" "${CONTAINER_NAME}" sh -c "${PAYLOAD_ARGS}" + sudo -E "${CTR_EXE}" t exec --exec-id 1 "${CONTAINER_NAME}" sh -c "${CMD}" + info "Write one million rows" + local WRITE_CMD="./opt/cassandra/tools/bin/cassandra-stress write n=1000000 -rate threads=50" + sudo -E "${CTR_EXE}" t exec --exec-id 2 "${CONTAINER_NAME}" sh -c "${WRITE_CMD}" + info "Load one row with default schema" + local CQL_WRITE_CMD="./opt/cassandra/tools/bin/cassandra-stress write n=1 c1=one -mode native cql3 -log file-create_schema.log" + sudo -E "${CTR_EXE}" t exec --exec-id 3 "${CONTAINER_NAME}" sh -c "${CQL_WRITE_CMD}" + info "Run a write workload using CQL" + local REAL_WRITE_CMD="./opt/cassandra/tools/bin/cassandra-stress write n=1000000 cl=one -mode native cql3 -schema keyspace='keyspace1' -log file=load_1M_rows.log" + sudo -E "${CTR_EXE}" t exec --exec-id 4 "${CONTAINER_NAME}" sh -c "${REAL_WRITE_CMD}" + + clean_env_ctr +} + +main "$@" From 1edf2d9bc15e0556951e7753dc7d4e6ff94f5085 Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Thu, 28 Sep 2023 22:37:02 +0000 Subject: [PATCH 295/339] tests: Add agent stability test This PR adds the agent stability test to stability test. Signed-off-by: Gabriela Cervantes (cherry picked from commit fd19f4082fd9728590523d2ebeb6907d7fdb3c44) --- tests/stability/agent_stability_test.sh | 60 +++++++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 tests/stability/agent_stability_test.sh diff --git a/tests/stability/agent_stability_test.sh b/tests/stability/agent_stability_test.sh new file mode 100644 index 000000000..0f235cacb --- /dev/null +++ b/tests/stability/agent_stability_test.sh @@ -0,0 +1,60 @@ +#!/bin/bash +# +# Copyright (c) 2018-2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# +# This test will perform several execs to a +# running container, the main purpose of this +# test is to stress the agent + +set -e -x + +cidir=$(dirname "$0") + +source "${cidir}/../metrics/lib/common.bash" + +# Environment variables +IMAGE="${IMAGE:-quay.io/prometheus/busybox:latest}" +CONTAINER_NAME="${CONTAINER_NAME:-test}" +PAYLOAD_ARGS="${PAYLOAD_ARGS:-tail -f /dev/null}" + + +# Timeout is the duration of this test (seconds) +# We want to stress the agent for a significant +# time (approximately running for two days) +timeout=186400 +start_time=$(date +%s) +end_time=$((start_time+timeout)) + +function setup { + restart_containerd_service + sudo ctr image pull $IMAGE + sudo ctr run --runtime=$CTR_RUNTIME -d $IMAGE $CONTAINER_NAME sh -c $PAYLOAD_ARGS +} + +function exec_loop { + cmd="sudo ctr t exec --exec-id 1 $CONTAINER_NAME sh -c" + $cmd "echo 'hello world' > file" + $cmd "rm -rf /file" + $cmd "touch /tmp/execWorks" + $cmd "ls /tmp | grep execWorks" + $cmd "rm -rf /tmp/execWorks" + $cmd "ls /etc/foo" || echo "Fail expected" + $cmd "cat /tmp/one" || echo "Fail expected" + $cmd "exit 42" || echo "Fail expected" +} + +function teardown { + echo "Ending stability test" + clean_env_ctr +} +trap teardown EXIT + +echo "Starting stability test" +setup + +echo "Running stability test" +while [[ $end_time > $(date +%s) ]]; do + exec_loop +done From cf254bc4ee51ce45f33a42ee5e688c1e4baa6b86 Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Mon, 2 Oct 2023 16:17:31 +0000 Subject: [PATCH 296/339] tests: Add general stability fixes This PR adds general stability fixes. Signed-off-by: Gabriela Cervantes (cherry picked from commit 6339605a14e9ba278bbfab92ea228498b412ef93) --- tests/stability/agent_stability_test.sh | 7 ++++--- tests/stability/cassandra_stress.sh | 8 ++++---- tests/stability/stressng.sh | 12 ++++++------ 3 files changed, 14 insertions(+), 13 deletions(-) mode change 100644 => 100755 tests/stability/agent_stability_test.sh diff --git a/tests/stability/agent_stability_test.sh b/tests/stability/agent_stability_test.sh old mode 100644 new mode 100755 index 0f235cacb..902d9d48c --- a/tests/stability/agent_stability_test.sh +++ b/tests/stability/agent_stability_test.sh @@ -34,8 +34,9 @@ function setup { } function exec_loop { - cmd="sudo ctr t exec --exec-id 1 $CONTAINER_NAME sh -c" + cmd="sudo ctr t exec --exec-id $(random_name) $CONTAINER_NAME sh -c" $cmd "echo 'hello world' > file" + $cmd "ls /file" $cmd "rm -rf /file" $cmd "touch /tmp/execWorks" $cmd "ls /tmp | grep execWorks" @@ -51,10 +52,10 @@ function teardown { } trap teardown EXIT -echo "Starting stability test" +info "Starting stability test" setup -echo "Running stability test" +info "Running stability test" while [[ $end_time > $(date +%s) ]]; do exec_loop done diff --git a/tests/stability/cassandra_stress.sh b/tests/stability/cassandra_stress.sh index cd666ca15..2d79d4f59 100755 --- a/tests/stability/cassandra_stress.sh +++ b/tests/stability/cassandra_stress.sh @@ -26,16 +26,16 @@ function main() { sudo -E "${CTR_EXE}" i import "${DOCKER_IMAGE}.tar" sudo -E "${CTR_EXE}" run -d --runtime "${CTR_RUNTIME}" "${IMAGE}" "${CONTAINER_NAME}" sh -c "${PAYLOAD_ARGS}" - sudo -E "${CTR_EXE}" t exec --exec-id 1 "${CONTAINER_NAME}" sh -c "${CMD}" + sudo -E "${CTR_EXE}" t exec --exec-id "$(random_name)" "${CONTAINER_NAME}" sh -c "${CMD}" info "Write one million rows" local WRITE_CMD="./opt/cassandra/tools/bin/cassandra-stress write n=1000000 -rate threads=50" - sudo -E "${CTR_EXE}" t exec --exec-id 2 "${CONTAINER_NAME}" sh -c "${WRITE_CMD}" + sudo -E "${CTR_EXE}" t exec --exec-id "$(random_name)" "${CONTAINER_NAME}" sh -c "${WRITE_CMD}" info "Load one row with default schema" local CQL_WRITE_CMD="./opt/cassandra/tools/bin/cassandra-stress write n=1 c1=one -mode native cql3 -log file-create_schema.log" - sudo -E "${CTR_EXE}" t exec --exec-id 3 "${CONTAINER_NAME}" sh -c "${CQL_WRITE_CMD}" + sudo -E "${CTR_EXE}" t exec --exec-id "$(random_name)" "${CONTAINER_NAME}" sh -c "${CQL_WRITE_CMD}" info "Run a write workload using CQL" local REAL_WRITE_CMD="./opt/cassandra/tools/bin/cassandra-stress write n=1000000 cl=one -mode native cql3 -schema keyspace='keyspace1' -log file=load_1M_rows.log" - sudo -E "${CTR_EXE}" t exec --exec-id 4 "${CONTAINER_NAME}" sh -c "${REAL_WRITE_CMD}" + sudo -E "${CTR_EXE}" t exec --exec-id "$(random_name)" "${CONTAINER_NAME}" sh -c "${REAL_WRITE_CMD}" clean_env_ctr } diff --git a/tests/stability/stressng.sh b/tests/stability/stressng.sh index 22ba0e0c9..9f95b8c68 100755 --- a/tests/stability/stressng.sh +++ b/tests/stability/stressng.sh @@ -26,32 +26,32 @@ function main() { # Run 1 iomix stressor (mix of I/O operations) for 20 seconds with verbose output info "Running iomix stressor test" IOMIX_CMD="stress-ng --iomix 1 -t 20 -v" - sudo -E ctr t exec --exec-id 1 "${CONTAINER_NAME}" sh -c "${IOMIX_CMD}" + sudo -E ctr t exec --exec-id "$(random_name)" "${CONTAINER_NAME}" sh -c "${IOMIX_CMD}" # Run cpu stressors and virtual memory stressors for 5 minutes info "Running memory stressors for 5 minutes" MEMORY_CMD="stress-ng --cpu 2 --vm 4 -t 5m" - sudo -E ctr t exec --exec-id 2 "${CONTAINER_NAME}" sh -c "${MEMORY_CMD}" + sudo -E ctr t exec --exec-id "$(random_name)" "${CONTAINER_NAME}" sh -c "${MEMORY_CMD}" # Run shared memory stressors info "Running 8 shared memory stressors" SHARED_CMD="stress-ng --shm 0" - sudo -E ctr t exec --exec-id 3 "${CONTAINER_NAME}" sh -c "${SHARED_CMD}" + sudo -E ctr t exec --exec-id "$(random_name)" "${CONTAINER_NAME}" sh -c "${SHARED_CMD}" # Run all stressors one by one on all CPUs info "Running all stressors one by one" STRESSORS_CMD="stress-ng --seq 0 -t 10 --tz -v" - sudo -E ctr t exec --exec-id 4 "${CONTAINER_NAME}" sh -c "${STRESSORS_CMD}" + sudo -E ctr t exec --exec-id "$(random_name)" "${CONTAINER_NAME}" sh -c "${STRESSORS_CMD}" # Test floating point on CPU for 60 seconds info "Running floating tests on CPU" FLOAT_CMD="stress-ng --matrix 1 -t 1m" - sudo -E ctr t exec --exec-id 5 "${CONTAINER_NAME}" sh -c "${FLOAT_CMD}" + sudo -E ctr t exec --exec-id "$(random_name)" "${CONTAINER_NAME}" sh -c "${FLOAT_CMD}" # Runs two instances of the CPU stressors, one instance of the matrix info "Running instances of the CPU stressors" INSTANCE_CMD='stress-ng --cpu 2 --matrix 1 --mq 3 -t 5m' - sudo -E ctr t exec --exec-id 6 "${CONTAINER_NAME}" sh -c "${INSTANCE_CMD}" + sudo -E ctr t exec --exec-id "$(random_name)" "${CONTAINER_NAME}" sh -c "${INSTANCE_CMD}" clean_env_ctr } From 3770b200a861ac9c2339e73e47d681c4e6261f88 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Tue, 3 Oct 2023 17:21:40 +0200 Subject: [PATCH 297/339] gha: Fix k0s deployment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The tests are failing when setting up k0s, and that happens because we download a kubectl binary matching the kubernetes version k0s is using, and we do that by: ``` sudo k0s kubectl version --short 2>/dev/null | ... ``` With kubectl 1.28, which is now the default on k0s, `kubectl version --short` has been removed, leading us to an empty stringm causing then the error in the CI. Fixes: #8105 Signed-off-by: Fabiano Fidêncio (cherry picked from commit 70e7ec3e23020919fbaf83d7b66e40f853f1e4b1) --- tests/gha-run-k8s-common.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/gha-run-k8s-common.sh b/tests/gha-run-k8s-common.sh index 2db44cfee..44b02f32e 100644 --- a/tests/gha-run-k8s-common.sh +++ b/tests/gha-run-k8s-common.sh @@ -128,7 +128,7 @@ function deploy_k0s() { if [ "${ARCH}" = "x86_64" ]; then ARCH=amd64 fi - kubectl_version=$(sudo k0s kubectl version --short 2>/dev/null | grep "Client Version" | sed -e 's/Client Version: //') + kubectl_version=$(sudo k0s kubectl version 2>/dev/null | grep "Client Version" | sed -e 's/Client Version: //') sudo curl -fL --progress-bar -o /usr/bin/kubectl https://storage.googleapis.com/kubernetes-release/release/${kubectl_version}/bin/linux/${ARCH}/kubectl sudo chmod +x /usr/bin/kubectl From 050a4260b9b663773d29b54d301e460590ea729d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Tue, 3 Oct 2023 14:17:38 +0200 Subject: [PATCH 298/339] packaging: Add get_agent_image_name() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This will be used for building the kata-agent. Signed-off-by: Fabiano Fidêncio (cherry picked from commit 45c1188839f30b313dc6f7d33a55269e49768f50) --- tools/packaging/scripts/lib.sh | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tools/packaging/scripts/lib.sh b/tools/packaging/scripts/lib.sh index fe1faf5af..d1e17e20d 100644 --- a/tools/packaging/scripts/lib.sh +++ b/tools/packaging/scripts/lib.sh @@ -226,3 +226,10 @@ get_tools_image_name() { echo "${BUILDER_REGISTRY}:tools-$(get_last_modification ${tools_dir})-$(get_last_modification ${libs_dir})-$(get_last_modification ${agent_dir})" } + +get_agent_image_name() { + libs_dir="${repo_root_dir}/src/libs" + agent_dir="${repo_root_dir}/src/agent" + + echo "${BUILDER_REGISTRY}:agent-$(get_last_modification ${libs_dir})-$(get_last_modification ${agent_dir})" +} From 766a5fa1180a7b0245b40fbde2edcc7265dd6375 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Tue, 3 Oct 2023 14:18:45 +0200 Subject: [PATCH 299/339] agent: Allow specifying DESTDIR and AGENT_POLICY via env vars MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This will help to build the agent binary as part of the kata-deploy localbuild, as we need to pass the DESTDIR to where the agent will be installed, and also whether we're building the agent with policy support enabled or not. Signed-off-by: Fabiano Fidêncio (cherry picked from commit 1727487eef00283b159bb829540913cc5fa7a5da) Conflicts: src/agent/Makefile The conflict happened as AGENT_POLICY is not part of the stable-3.2, thus I've manually removed it. --- src/agent/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/agent/Makefile b/src/agent/Makefile index a47e69911..94864aa9b 100644 --- a/src/agent/Makefile +++ b/src/agent/Makefile @@ -54,7 +54,7 @@ endif TARGET_PATH = target/$(TRIPLE)/$(BUILD_TYPE)/$(TARGET) ##VAR DESTDIR= is a directory prepended to each installed target file -DESTDIR := +DESTDIR ?= ##VAR BINDIR= is a directory for installing executable programs BINDIR := /usr/bin From a586b8c5815ce80ad939ea7bca71892f374832d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Tue, 3 Oct 2023 14:20:23 +0200 Subject: [PATCH 300/339] packaging: Build the kata-agent MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's add the needed functions to start building the kata-agent, with or without the OPA support. For now this build is not used as part of the rootfs build, but later on this will (not as part of this series, though). Fixes: #8099 Signed-off-by: Fabiano Fidêncio (cherry picked from commit 5208386ab18a3e7873b4ec9a9efa9c8aeaddc7b0) --- .../kata-deploy/local-build/Makefile | 6 +++ .../kata-deploy-binaries-in-docker.sh | 2 + .../local-build/kata-deploy-binaries.sh | 35 ++++++++++++++++++ tools/packaging/static-build/agent/Dockerfile | 21 +++++++++++ .../static-build/agent/build-static-agent.sh | 37 +++++++++++++++++++ tools/packaging/static-build/agent/build.sh | 31 ++++++++++++++++ 6 files changed, 132 insertions(+) create mode 100644 tools/packaging/static-build/agent/Dockerfile create mode 100755 tools/packaging/static-build/agent/build-static-agent.sh create mode 100755 tools/packaging/static-build/agent/build.sh diff --git a/tools/packaging/kata-deploy/local-build/Makefile b/tools/packaging/kata-deploy/local-build/Makefile index 041cd5c80..db9218ac2 100644 --- a/tools/packaging/kata-deploy/local-build/Makefile +++ b/tools/packaging/kata-deploy/local-build/Makefile @@ -52,6 +52,12 @@ serial-targets: %-tarball-build: $(MK_DIR)/dockerbuild/install_yq.sh $(call BUILD,$*) +agent-tarball: + ${MAKE} $@-build + +agent-opa-tarball: + ${MAKE} $@-build + agent-ctl-tarball: ${MAKE} $@-build diff --git a/tools/packaging/kata-deploy/local-build/kata-deploy-binaries-in-docker.sh b/tools/packaging/kata-deploy/local-build/kata-deploy-binaries-in-docker.sh index 47cf2dd1d..64f505cd5 100755 --- a/tools/packaging/kata-deploy/local-build/kata-deploy-binaries-in-docker.sh +++ b/tools/packaging/kata-deploy/local-build/kata-deploy-binaries-in-docker.sh @@ -84,6 +84,7 @@ ARTEFACT_REGISTRY_PASSWORD="${ARTEFACT_REGISTRY_PASSWORD:-}" TARGET_BRANCH="${TARGET_BRANCH:-}" BUILDER_REGISTRY="${BUILDER_REGISTRY:-}" PUSH_TO_REGISTRY="${PUSH_TO_REGISTRY:-"no"}" +AGENT_CONTAINER_BUILDER="${AGENT_CONTAINER_BUILDER:-}" INITRAMFS_CONTAINER_BUILDER="${INITRAMFS_CONTAINER_BUILDER:-}" KERNEL_CONTAINER_BUILDER="${KERNEL_CONTAINER_BUILDER:-}" OVMF_CONTAINER_BUILDER="${OVMF_CONTAINER_BUILDER:-}" @@ -106,6 +107,7 @@ docker run \ --env TARGET_BRANCH="${TARGET_BRANCH}" \ --env BUILDER_REGISTRY="${BUILDER_REGISTRY}" \ --env PUSH_TO_REGISTRY="${PUSH_TO_REGISTRY}" \ + --env AGENT_CONTAINER_BUILDER="${AGENT_CONTAINER_BUILDER}" \ --env INITRAMFS_CONTAINER_BUILDER="${INITRAMFS_CONTAINER_BUILDER}" \ --env KERNEL_CONTAINER_BUILDER="${KERNEL_CONTAINER_BUILDER}" \ --env OVMF_CONTAINER_BUILDER="${OVMF_CONTAINER_BUILDER}" \ diff --git a/tools/packaging/kata-deploy/local-build/kata-deploy-binaries.sh b/tools/packaging/kata-deploy/local-build/kata-deploy-binaries.sh index c576fbab5..a4753c47d 100755 --- a/tools/packaging/kata-deploy/local-build/kata-deploy-binaries.sh +++ b/tools/packaging/kata-deploy/local-build/kata-deploy-binaries.sh @@ -22,6 +22,7 @@ readonly static_build_dir="${repo_root_dir}/tools/packaging/static-build" readonly version_file="${repo_root_dir}/VERSION" readonly versions_yaml="${repo_root_dir}/versions.yaml" +readonly agent_builder="${static_build_dir}/agent/build.sh" readonly clh_builder="${static_build_dir}/cloud-hypervisor/build-static-clh.sh" readonly firecracker_builder="${static_build_dir}/firecracker/build-static-firecracker.sh" readonly initramfs_builder="${static_build_dir}/initramfs/build.sh" @@ -81,6 +82,8 @@ options: -s : Silent mode (produce output in case of failure only) --build= : all + agent + agent-opa agent-ctl cloud-hypervisor cloud-hypervisor-glibc @@ -622,6 +625,32 @@ install_ovmf_sev() { install_ovmf "sev" "edk2-sev.tar.gz" } +install_agent_helper() { + agent_policy="${1:-no}" + + latest_artefact="$(git log -1 --pretty=format:"%h" ${repo_root_dir}/src/agent)" + latest_builder_image="$(get_agent_image_name)" + + install_cached_tarball_component \ + "${build_target}" \ + "${latest_artefact}" \ + "${latest_builder_image}" \ + "${final_tarball_name}" \ + "${final_tarball_path}" \ + && return 0 + + info "build static agent" + DESTDIR="${destdir}" AGENT_POLICY=${agent_policy} "${agent_builder}" +} + +install_agent() { + install_agent_helper +} + +install_agent_opa() { + install_agent_helper "yes" +} + install_tools_helper() { tool=${1} @@ -717,6 +746,10 @@ handle_build() { install_virtiofsd ;; + agent) install_agent ;; + + agent-opa) install_agent_opa ;; + agent-ctl) install_agent_ctl ;; cloud-hypervisor) install_clh ;; @@ -824,6 +857,8 @@ main() { local build_targets local silent build_targets=( + agent + agent-opa agent-ctl cloud-hypervisor firecracker diff --git a/tools/packaging/static-build/agent/Dockerfile b/tools/packaging/static-build/agent/Dockerfile new file mode 100644 index 000000000..c72104cb5 --- /dev/null +++ b/tools/packaging/static-build/agent/Dockerfile @@ -0,0 +1,21 @@ +# Copyright (c) 2023 Intel +# +# SPDX-License-Identifier: Apache-2.0 + +FROM alpine:3.18 +ARG RUST_TOOLCHAIN + +SHELL ["/bin/ash", "-o", "pipefail", "-c"] +RUN apk --no-cache add \ + bash \ + curl \ + gcc \ + git \ + libcap-ng-static \ + libseccomp-static \ + make \ + musl-dev \ + openssl-dev \ + openssl-libs-static \ + protoc && \ + curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain ${RUST_TOOLCHAIN} diff --git a/tools/packaging/static-build/agent/build-static-agent.sh b/tools/packaging/static-build/agent/build-static-agent.sh new file mode 100755 index 000000000..1d7389c33 --- /dev/null +++ b/tools/packaging/static-build/agent/build-static-agent.sh @@ -0,0 +1,37 @@ +#!/usr/bin/env bash +# +# Copyright (c) 2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +set -o errexit +set -o nounset +set -o pipefail + +script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +source "${script_dir}/../../scripts/lib.sh" + +init_env() { + source "$HOME/.cargo/env" + + export LIBC=musl + export LIBSECCOMP_LINK_TYPE=static + export LIBSECCOMP_LIB_PATH=/usr/lib + + # This is needed to workaround + # https://github.com/sfackler/rust-openssl/issues/1624 + export OPENSSL_NO_VENDOR=Y +} + +build_agent_from_source() { + echo "build agent from source" + + init_env + + cd src/agent + DESTDIR=${DESTDIR} AGENT_POLICY=${AGENT_POLICY} make + DESTDIR=${DESTDIR} AGENT_POLICY=${AGENT_POLICY} make install +} + +build_agent_from_source $@ diff --git a/tools/packaging/static-build/agent/build.sh b/tools/packaging/static-build/agent/build.sh new file mode 100755 index 000000000..d847092e4 --- /dev/null +++ b/tools/packaging/static-build/agent/build.sh @@ -0,0 +1,31 @@ +#!/usr/bin/env bash +# +# Copyright (c) 2023 Intel +# +# SPDX-License-Identifier: Apache-2.0 + +set -o errexit +set -o nounset +set -o pipefail + +script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +readonly agent_builder="${script_dir}/build-static-agent.sh" + +source "${script_dir}/../../scripts/lib.sh" + +container_image="${AGENT_CONTAINER_BUILDER:-$(get_agent_image_name)}" +[ "${CROSS_BUILD}" == "true" ] && container_image="${container_image}-cross-build" + +sudo docker pull ${container_image} || \ + (sudo docker $BUILDX build $PLATFORM \ + --build-arg RUST_TOOLCHAIN="$(get_from_kata_deps "languages.rust.meta.newest-version")" \ + -t "${container_image}" "${script_dir}" && \ + # No-op unless PUSH_TO_REGISTRY is exported as "yes" + push_to_registry "${container_image}") + +sudo docker run --rm -i -v "${repo_root_dir}:${repo_root_dir}" \ + --env DESTDIR=${DESTDIR} \ + --env AGENT_POLICY=${AGENT_POLICY:-no} \ + -w "${repo_root_dir}" \ + "${container_image}" \ + bash -c "${agent_builder}" From 892c9f2f03ab4dabbb2be607d246ad0ff85f60fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Tue, 3 Oct 2023 14:21:54 +0200 Subject: [PATCH 301/339] gha: Build the kata-agent as part of our workflows MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The kata-agent binary won't be released, just built so it can be used, later on, as part of our tests and as part of the rootfs build. Signed-off-by: Fabiano Fidêncio (cherry picked from commit 02acef9575b55cd69f101d80d081990a2c5f799d) --- .github/workflows/build-kata-static-tarball-amd64.yaml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/workflows/build-kata-static-tarball-amd64.yaml b/.github/workflows/build-kata-static-tarball-amd64.yaml index dcfbfdc03..5df5e2772 100644 --- a/.github/workflows/build-kata-static-tarball-amd64.yaml +++ b/.github/workflows/build-kata-static-tarball-amd64.yaml @@ -27,6 +27,8 @@ jobs: strategy: matrix: asset: + - agent + - agent-opa - agent-ctl - cloud-hypervisor - cloud-hypervisor-glibc @@ -59,6 +61,10 @@ jobs: stage: - ${{ inputs.stage }} exclude: + - asset: agent + stage: release + - asset: agent-opa + stage: release - asset: cloud-hypervisor-glibc stage: release steps: From bb1efe0d461d61a63204a6c63e7786c684d2cedf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Fri, 6 Oct 2023 15:22:23 +0200 Subject: [PATCH 302/339] packaging: stable-3.2: Remove everything related to agent policy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Agent policy is not part of the stable-3.2 branch, and will never be. Signed-off-by: Fabiano Fidêncio --- .github/workflows/build-kata-static-tarball-amd64.yaml | 3 --- tools/packaging/kata-deploy/local-build/Makefile | 3 --- .../kata-deploy/local-build/kata-deploy-binaries.sh | 8 -------- tools/packaging/static-build/agent/build-static-agent.sh | 4 ++-- tools/packaging/static-build/agent/build.sh | 1 - 5 files changed, 2 insertions(+), 17 deletions(-) diff --git a/.github/workflows/build-kata-static-tarball-amd64.yaml b/.github/workflows/build-kata-static-tarball-amd64.yaml index 5df5e2772..48406ae27 100644 --- a/.github/workflows/build-kata-static-tarball-amd64.yaml +++ b/.github/workflows/build-kata-static-tarball-amd64.yaml @@ -28,7 +28,6 @@ jobs: matrix: asset: - agent - - agent-opa - agent-ctl - cloud-hypervisor - cloud-hypervisor-glibc @@ -63,8 +62,6 @@ jobs: exclude: - asset: agent stage: release - - asset: agent-opa - stage: release - asset: cloud-hypervisor-glibc stage: release steps: diff --git a/tools/packaging/kata-deploy/local-build/Makefile b/tools/packaging/kata-deploy/local-build/Makefile index db9218ac2..d9e28a547 100644 --- a/tools/packaging/kata-deploy/local-build/Makefile +++ b/tools/packaging/kata-deploy/local-build/Makefile @@ -55,9 +55,6 @@ serial-targets: agent-tarball: ${MAKE} $@-build -agent-opa-tarball: - ${MAKE} $@-build - agent-ctl-tarball: ${MAKE} $@-build diff --git a/tools/packaging/kata-deploy/local-build/kata-deploy-binaries.sh b/tools/packaging/kata-deploy/local-build/kata-deploy-binaries.sh index a4753c47d..bb68d883b 100755 --- a/tools/packaging/kata-deploy/local-build/kata-deploy-binaries.sh +++ b/tools/packaging/kata-deploy/local-build/kata-deploy-binaries.sh @@ -83,7 +83,6 @@ options: --build= : all agent - agent-opa agent-ctl cloud-hypervisor cloud-hypervisor-glibc @@ -647,10 +646,6 @@ install_agent() { install_agent_helper } -install_agent_opa() { - install_agent_helper "yes" -} - install_tools_helper() { tool=${1} @@ -748,8 +743,6 @@ handle_build() { agent) install_agent ;; - agent-opa) install_agent_opa ;; - agent-ctl) install_agent_ctl ;; cloud-hypervisor) install_clh ;; @@ -858,7 +851,6 @@ main() { local silent build_targets=( agent - agent-opa agent-ctl cloud-hypervisor firecracker diff --git a/tools/packaging/static-build/agent/build-static-agent.sh b/tools/packaging/static-build/agent/build-static-agent.sh index 1d7389c33..126e22d52 100755 --- a/tools/packaging/static-build/agent/build-static-agent.sh +++ b/tools/packaging/static-build/agent/build-static-agent.sh @@ -30,8 +30,8 @@ build_agent_from_source() { init_env cd src/agent - DESTDIR=${DESTDIR} AGENT_POLICY=${AGENT_POLICY} make - DESTDIR=${DESTDIR} AGENT_POLICY=${AGENT_POLICY} make install + DESTDIR=${DESTDIR} make + DESTDIR=${DESTDIR} make install } build_agent_from_source $@ diff --git a/tools/packaging/static-build/agent/build.sh b/tools/packaging/static-build/agent/build.sh index d847092e4..5757952d3 100755 --- a/tools/packaging/static-build/agent/build.sh +++ b/tools/packaging/static-build/agent/build.sh @@ -25,7 +25,6 @@ sudo docker pull ${container_image} || \ sudo docker run --rm -i -v "${repo_root_dir}:${repo_root_dir}" \ --env DESTDIR=${DESTDIR} \ - --env AGENT_POLICY=${AGENT_POLICY:-no} \ -w "${repo_root_dir}" \ "${container_image}" \ bash -c "${agent_builder}" From 508016fca127c5d2e94acb926e0d9376658e6dbf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Tue, 3 Oct 2023 14:24:10 +0200 Subject: [PATCH 303/339] packaging: Allow passing the TOOLS_CONTAINER_BUILDER MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This follows what we've been doing for all the components we're building, but was missed as part of #8077. Signed-off-by: Fabiano Fidêncio (cherry picked from commit 5ca66795c7f5d9cc933503fbac52211351bc5a79) --- .../kata-deploy/local-build/kata-deploy-binaries-in-docker.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/packaging/kata-deploy/local-build/kata-deploy-binaries-in-docker.sh b/tools/packaging/kata-deploy/local-build/kata-deploy-binaries-in-docker.sh index 64f505cd5..080aa1a12 100755 --- a/tools/packaging/kata-deploy/local-build/kata-deploy-binaries-in-docker.sh +++ b/tools/packaging/kata-deploy/local-build/kata-deploy-binaries-in-docker.sh @@ -114,6 +114,7 @@ docker run \ --env QEMU_CONTAINER_BUILDER="${QEMU_CONTAINER_BUILDER}" \ --env SHIM_V2_CONTAINER_BUILDER="${SHIM_V2_CONTAINER_BUILDER}" \ --env TDSHIM_CONTAINER_BUILDER="${TDSHIM_CONTAINER_BUILDER}" \ + --env TOOLS_CONTAINER_BUILDER="${TOOLS_CONTAINER_BUILDER}" \ --env VIRTIOFSD_CONTAINER_BUILDER="${VIRTIOFSD_CONTAINER_BUILDER}" \ --env MEASURED_ROOTFS="${MEASURED_ROOTFS}" \ --env USE_CACHE="${USE_CACHE}" \ From 95da1c71ecea55e5f82b4c872ee4da0b36daa4e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Tue, 3 Oct 2023 14:25:24 +0200 Subject: [PATCH 304/339] packaging: tools: Fix container image env var name MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This should be TOOLS_CONTAINER_BUILDER instead of VIRTIOFSD_CONTAINER_BUILDER. Signed-off-by: Fabiano Fidêncio (cherry picked from commit ca3b8883716aef524df61a018bbf5e1564ee22d5) --- .../kata-deploy/local-build/kata-deploy-binaries-in-docker.sh | 1 + tools/packaging/static-build/tools/build.sh | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/packaging/kata-deploy/local-build/kata-deploy-binaries-in-docker.sh b/tools/packaging/kata-deploy/local-build/kata-deploy-binaries-in-docker.sh index 080aa1a12..19653720e 100755 --- a/tools/packaging/kata-deploy/local-build/kata-deploy-binaries-in-docker.sh +++ b/tools/packaging/kata-deploy/local-build/kata-deploy-binaries-in-docker.sh @@ -91,6 +91,7 @@ OVMF_CONTAINER_BUILDER="${OVMF_CONTAINER_BUILDER:-}" QEMU_CONTAINER_BUILDER="${QEMU_CONTAINER_BUILDER:-}" SHIM_V2_CONTAINER_BUILDER="${SHIM_V2_CONTAINER_BUILDER:-}" TDSHIM_CONTAINER_BUILDER="${TDSHIM_CONTAINER_BUILDER:-}" +TOOLS_CONTAINER_BUILDER="${TOOLS_CONTAINER_BUILDER:-}" VIRTIOFSD_CONTAINER_BUILDER="${VIRTIOFSD_CONTAINER_BUILDER:-}" MEASURED_ROOTFS="${MEASURED_ROOTFS:-}" USE_CACHE="${USE_CACHE:-}" diff --git a/tools/packaging/static-build/tools/build.sh b/tools/packaging/static-build/tools/build.sh index 11abe7bb2..a4dd958c4 100755 --- a/tools/packaging/static-build/tools/build.sh +++ b/tools/packaging/static-build/tools/build.sh @@ -15,7 +15,7 @@ source "${script_dir}/../../scripts/lib.sh" tool="${1}" -container_image="${VIRTIOFSD_CONTAINER_BUILDER:-$(get_tools_image_name)}" +container_image="${TOOLS_CONTAINER_BUILDER:-$(get_tools_image_name)}" [ "${CROSS_BUILD}" == "true" ] && container_image="${container_image}-cross-build" sudo docker pull ${container_image} || \ From 1941d87b84022157299d34698c6be666b4f8d8bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Tue, 3 Oct 2023 14:27:56 +0200 Subject: [PATCH 305/339] packaging: release: Mention newly added images MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We've added two new containerd builder images recently, one for the components under `src/tools` and another one for the Kata Containers agent. Signed-off-by: Fabiano Fidêncio (cherry picked from commit 18fa483d907d59e5dec98d837fca64fa27f6bc26) --- tools/packaging/release/release-notes.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/packaging/release/release-notes.sh b/tools/packaging/release/release-notes.sh index 254aa255b..734028dc0 100755 --- a/tools/packaging/release/release-notes.sh +++ b/tools/packaging/release/release-notes.sh @@ -140,18 +140,22 @@ The majority of the components of the project were built using containers. In o build reproducibility we publish those container images, and when those are used combined with the version of the projects listed as part of the "versions.yaml" file, users can get as close to the environment we used to build the release artefacts. +* agent (on all its different flavours): $(get_agent_image_name) * Kernel (on all its different flavours): $(get_kernel_image_name) * OVMF (on all its different flavours): $(get_ovmf_image_name) * QEMU (on all its different flavurs): $(get_qemu_image_name) * shim-v2: $(get_shim_v2_image_name) +* tools: $(get_tools_image_name) * virtiofsd: $(get_virtiofsd_image_name) The users who want to rebuild the tarballs using exactly the same images can simply use the following environment variables: +* \`AGENT_CONTAINER_BUILDER\` * \`KERNEL_CONTAINER_BUILDER\` * \`OVMF_CONTAINER_BUILDER\` * \`QEMU_CONTAINER_BUILDER\` * \`SHIM_V2_CONTAINER_BUILDER\` +* \`TOOLS_CONTAINER_BUILDER\` * \`VIRTIOFSD_CONTAINER_BUILDER\` ## Kata Linux Containers Kernel From f20164dc75c21e25f682e9235535f76f2b7abaf4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Tue, 3 Oct 2023 15:31:53 +0200 Subject: [PATCH 306/339] packaging: tools: Remove `set -x` leftover MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This was used for debugging, and ended up being merged with that. Signed-off-by: Fabiano Fidêncio (cherry picked from commit 560bbffb57702c60bc441f5decb10d0db0b270d4) --- tools/packaging/static-build/tools/build-static-tools.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/packaging/static-build/tools/build-static-tools.sh b/tools/packaging/static-build/tools/build-static-tools.sh index 15e9f740a..2004fcf90 100755 --- a/tools/packaging/static-build/tools/build-static-tools.sh +++ b/tools/packaging/static-build/tools/build-static-tools.sh @@ -23,7 +23,6 @@ init_env() { } build_tool_from_source() { - set -x tool=${1} echo "build ${tool} from source" From be3a3c221b26ebed0cc79af6ac6d0029b4f122fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Wed, 4 Oct 2023 11:05:59 +0200 Subject: [PATCH 307/339] gha: arm64: Ensure the builder is arm64-builder MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Otherwise we'll use any arm64 machine that's added as a runner, and whenever new machines are added those may end up being only used for running some specific set of the tests. Fixes: #8109 Signed-off-by: Fabiano Fidêncio (cherry picked from commit 119f03de262bbb94dfea726ba8e429ee62cc113c) --- .github/workflows/build-kata-static-tarball-arm64.yaml | 4 ++-- .github/workflows/publish-kata-deploy-payload-arm64.yaml | 2 +- .github/workflows/release-arm64.yaml | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build-kata-static-tarball-arm64.yaml b/.github/workflows/build-kata-static-tarball-arm64.yaml index 4225abee8..81095e000 100644 --- a/.github/workflows/build-kata-static-tarball-arm64.yaml +++ b/.github/workflows/build-kata-static-tarball-arm64.yaml @@ -23,7 +23,7 @@ on: jobs: build-asset: - runs-on: arm64 + runs-on: arm64-builder strategy: matrix: asset: @@ -87,7 +87,7 @@ jobs: if-no-files-found: error create-kata-tarball: - runs-on: arm64 + runs-on: arm64-builder needs: build-asset steps: - name: Adjust a permission for repo diff --git a/.github/workflows/publish-kata-deploy-payload-arm64.yaml b/.github/workflows/publish-kata-deploy-payload-arm64.yaml index 0bdf909f1..f198814fe 100644 --- a/.github/workflows/publish-kata-deploy-payload-arm64.yaml +++ b/.github/workflows/publish-kata-deploy-payload-arm64.yaml @@ -24,7 +24,7 @@ on: jobs: kata-payload: - runs-on: arm64 + runs-on: arm64-builder steps: - name: Adjust a permission for repo run: | diff --git a/.github/workflows/release-arm64.yaml b/.github/workflows/release-arm64.yaml index cd7db8fdf..136177580 100644 --- a/.github/workflows/release-arm64.yaml +++ b/.github/workflows/release-arm64.yaml @@ -14,7 +14,7 @@ jobs: kata-deploy: needs: build-kata-static-tarball-arm64 - runs-on: arm64 + runs-on: arm64-builder steps: - name: Login to Kata Containers docker.io uses: docker/login-action@v2 From a4e0929054e3340d6f344cd3bca9f9d20d2f7e3f Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Tue, 3 Oct 2023 17:36:39 +0000 Subject: [PATCH 308/339] gha: Add stability tests workflow for gha This PR adds the stability test workflow for gha for the kata CI. Fixes #8107 Signed-off-by: Gabriela Cervantes (cherry picked from commit 54f0c8f88ee49367d7499cb204fd71b7d5ffbb95) --- .../run-containerd-stability-tests.yaml | 50 +++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 .github/workflows/run-containerd-stability-tests.yaml diff --git a/.github/workflows/run-containerd-stability-tests.yaml b/.github/workflows/run-containerd-stability-tests.yaml new file mode 100644 index 000000000..76de75d9e --- /dev/null +++ b/.github/workflows/run-containerd-stability-tests.yaml @@ -0,0 +1,50 @@ +name: CI | Run containerd stability tests +on: + workflow_call: + inputs: + tarball-suffix: + required: false + type: string + commit-hash: + required: false + type: string + target-branch: + required: false + type: string + default: "" + +jobs: + run-containerd-stability: + strategy: + fail-fast: false + matrix: + containerd_version: ['lts', 'active'] + vmm: ['clh', 'qemu'] + runs-on: garm-ubuntu-2204-smaller + env: + CONTAINERD_VERSION: ${{ matrix.containerd_version }} + GOPATH: ${{ github.workspace }} + KATA_HYPERVISOR: ${{ matrix.vmm }} + steps: + - uses: actions/checkout@v3 + with: + ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} + + - name: get-kata-tarball + uses: actions/download-artifact@v3 + with: + name: kata-static-tarball-amd64${{ inputs.tarball-suffix }} + path: kata-artifacts + + - name: Install kata + run: bash tests/stability/gha-run.sh install-kata kata-artifacts + + - name: Run containerd-stability tests + run: bash tests/stability/gha-run.sh run From 7963298ba26c102c6c325b38ed938b25e5354d42 Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Wed, 4 Oct 2023 17:45:45 +0000 Subject: [PATCH 309/339] gha: Add stability gha run script This PR adds the stability gha run script. Signed-off-by: Gabriela Cervantes (cherry picked from commit 85d290a04830c21c4cec2687ac7b3b0165e67840) --- tests/stability/gha-run.sh | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100755 tests/stability/gha-run.sh diff --git a/tests/stability/gha-run.sh b/tests/stability/gha-run.sh new file mode 100755 index 000000000..66e6f21c0 --- /dev/null +++ b/tests/stability/gha-run.sh @@ -0,0 +1,33 @@ +#!/bin/bash +# +# Copyright (c) 2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# + +set -o errexit +set -o nounset +set -o pipefail + +kata_tarball_dir="${2:-kata-artifacts}" +stability_dir="$(dirname "$(readlink -f "$0")")" +source "${stability_dir}/../common.bash" + +function run() { + info "Running soak parallel stability tests using ${KATA_HYPERVISOR} hypervisor" + + # export ITERATIONS=2 MAX_CONTAINERS=20 + # bash "${stability_dir}/soak_parallel_rm.sh" +} + +function main() { + action="${1:-}" + case "${action}" in + install-kata) install_kata ;; + enabling-hypervisor) enabling_hypervisor ;; + run) run ;; + *) >&2 die "Invalid argument" ;; + esac +} + +main "$@" From fe4f72e0a15569805b0088e6cf4ff8498147078b Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Thu, 5 Oct 2023 15:21:24 +0000 Subject: [PATCH 310/339] gha: Add containerd stability tests to ci yaml This PR adds containerd stability tests to ci yaml. Signed-off-by: Gabriela Cervantes (cherry picked from commit 0f2dc8c675c368eadee2b2fa3dfbc9897aaa2581) --- .github/workflows/ci.yaml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 35317c528..c7c98776d 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -217,6 +217,14 @@ jobs: commit-hash: ${{ inputs.commit-hash }} target-branch: ${{ inputs.target-branch }} + run-containerd-stability-tests: + needs: build-kata-static-tarball-amd64 + uses: ./.github/workflows/run-containerd-stability-tests.yaml + with: + tarball-suffix: -${{ inputs.tag }} + commit-hash: ${{ inputs.commit-hash }} + target-branch: ${{ inputs.target-branch }} + run-nydus-tests: needs: build-kata-static-tarball-amd64 uses: ./.github/workflows/run-nydus-tests.yaml From 1f9a4e908f1ba19159982a65c7e3c3d40d38d782 Mon Sep 17 00:00:00 2001 From: David Esparza Date: Wed, 27 Sep 2023 16:37:53 -0600 Subject: [PATCH 311/339] metrics: Enables FIO test for kata containers FIO benchmark is enabled to measure IO in Kata at different latencies using containerd client, in order to complement the CI metrics testing set. This PR asl deprecated the previous Fio bench based on k8s. Fixes: #8080 Signed-off-by: David Esparza (cherry picked from commit a2159a6361200a2d9ce59178de6378d98f705a34) --- .../metrics/storage/fio-dockerfile/Dockerfile | 26 ++ .../fio-dockerfile/workload/fio_bench.sh | 129 ++++++ tests/metrics/storage/fio-k8s/.gitignore | 1 - tests/metrics/storage/fio-k8s/Makefile | 31 -- tests/metrics/storage/fio-k8s/README.md | 30 -- .../storage/fio-k8s/cmd/fiotest/Makefile | 27 -- .../storage/fio-k8s/cmd/fiotest/go.mod | 24 -- .../storage/fio-k8s/cmd/fiotest/go.sum | 31 -- .../storage/fio-k8s/cmd/fiotest/main.go | 373 ------------------ .../example-config/fio-jobs/randrw-async.job | 10 - .../example-config/fio-jobs/randrw-libaio.job | 10 - .../example-config/fio-jobs/randrw-sync.job | 10 - .../configs/example-config/kata-clh.yaml | 17 - .../configs/example-config/kata-qemu.yaml | 17 - .../fio-k8s/configs/example-config/kata.yaml | 16 - .../fio-k8s/configs/example-config/runc.yaml | 15 - .../test-config/fio-jobs/randread-libaio.job | 9 - .../test-config/fio-jobs/randread-mmpap.job | 9 - .../test-config/fio-jobs/randrw-libaio.job | 10 - .../test-config/fio-jobs/randrw-mmap.job | 10 - .../test-config/fio-jobs/randwrite-libaio.job | 9 - .../test-config/fio-jobs/randwrite-mmap.job | 9 - .../test-config/fio-jobs/seqread-libaio.job | 9 - .../test-config/fio-jobs/seqread-mmap.job | 9 - .../test-config/fio-jobs/seqread-psync.job | 8 - .../test-config/fio-jobs/seqwrite-libaio.job | 9 - .../test-config/fio-jobs/seqwrite-mmap.job | 10 - .../fio-k8s/configs/test-config/kata.yaml | 16 - .../fio-k8s/configs/test-config/runc.yaml | 15 - tests/metrics/storage/fio-k8s/fio-test-ci.sh | 86 ---- .../metrics/storage/fio-k8s/pkg/env/Makefile | 9 - tests/metrics/storage/fio-k8s/pkg/env/env.go | 38 -- tests/metrics/storage/fio-k8s/pkg/env/go.mod | 10 - tests/metrics/storage/fio-k8s/pkg/env/go.sum | 2 - .../metrics/storage/fio-k8s/pkg/exec/Exec.go | 67 ---- tests/metrics/storage/fio-k8s/pkg/exec/go.mod | 5 - tests/metrics/storage/fio-k8s/pkg/exec/go.sum | 2 - .../metrics/storage/fio-k8s/pkg/k8s/Makefile | 8 - tests/metrics/storage/fio-k8s/pkg/k8s/exec.go | 34 -- tests/metrics/storage/fio-k8s/pkg/k8s/go.mod | 10 - tests/metrics/storage/fio-k8s/pkg/k8s/go.sum | 2 - tests/metrics/storage/fio-k8s/pkg/k8s/k8s.go | 68 ---- .../fio-k8s/scripts/dax-compare-test/Makefile | 10 - .../scripts/dax-compare-test/README.md | 47 --- .../dax-compare-test/compare-virtiofsd-dax.sh | 151 ------- .../scripts/dax-compare-test/report/fio.ipynb | 51 --- .../scripts/dax-compare-test/report/fio.py | 102 ----- .../report/gen-html-fio-report.sh | 48 --- .../report/run-docker-jupyter-server.sh | 39 -- tests/metrics/storage/fio_test.sh | 161 ++++++++ 50 files changed, 316 insertions(+), 1533 deletions(-) create mode 100644 tests/metrics/storage/fio-dockerfile/Dockerfile create mode 100755 tests/metrics/storage/fio-dockerfile/workload/fio_bench.sh delete mode 100644 tests/metrics/storage/fio-k8s/.gitignore delete mode 100644 tests/metrics/storage/fio-k8s/Makefile delete mode 100644 tests/metrics/storage/fio-k8s/README.md delete mode 100644 tests/metrics/storage/fio-k8s/cmd/fiotest/Makefile delete mode 100644 tests/metrics/storage/fio-k8s/cmd/fiotest/go.mod delete mode 100644 tests/metrics/storage/fio-k8s/cmd/fiotest/go.sum delete mode 100644 tests/metrics/storage/fio-k8s/cmd/fiotest/main.go delete mode 100644 tests/metrics/storage/fio-k8s/configs/example-config/fio-jobs/randrw-async.job delete mode 100644 tests/metrics/storage/fio-k8s/configs/example-config/fio-jobs/randrw-libaio.job delete mode 100644 tests/metrics/storage/fio-k8s/configs/example-config/fio-jobs/randrw-sync.job delete mode 100644 tests/metrics/storage/fio-k8s/configs/example-config/kata-clh.yaml delete mode 100644 tests/metrics/storage/fio-k8s/configs/example-config/kata-qemu.yaml delete mode 100644 tests/metrics/storage/fio-k8s/configs/example-config/kata.yaml delete mode 100644 tests/metrics/storage/fio-k8s/configs/example-config/runc.yaml delete mode 100644 tests/metrics/storage/fio-k8s/configs/test-config/fio-jobs/randread-libaio.job delete mode 100644 tests/metrics/storage/fio-k8s/configs/test-config/fio-jobs/randread-mmpap.job delete mode 100644 tests/metrics/storage/fio-k8s/configs/test-config/fio-jobs/randrw-libaio.job delete mode 100644 tests/metrics/storage/fio-k8s/configs/test-config/fio-jobs/randrw-mmap.job delete mode 100644 tests/metrics/storage/fio-k8s/configs/test-config/fio-jobs/randwrite-libaio.job delete mode 100644 tests/metrics/storage/fio-k8s/configs/test-config/fio-jobs/randwrite-mmap.job delete mode 100644 tests/metrics/storage/fio-k8s/configs/test-config/fio-jobs/seqread-libaio.job delete mode 100644 tests/metrics/storage/fio-k8s/configs/test-config/fio-jobs/seqread-mmap.job delete mode 100644 tests/metrics/storage/fio-k8s/configs/test-config/fio-jobs/seqread-psync.job delete mode 100644 tests/metrics/storage/fio-k8s/configs/test-config/fio-jobs/seqwrite-libaio.job delete mode 100644 tests/metrics/storage/fio-k8s/configs/test-config/fio-jobs/seqwrite-mmap.job delete mode 100644 tests/metrics/storage/fio-k8s/configs/test-config/kata.yaml delete mode 100644 tests/metrics/storage/fio-k8s/configs/test-config/runc.yaml delete mode 100755 tests/metrics/storage/fio-k8s/fio-test-ci.sh delete mode 100644 tests/metrics/storage/fio-k8s/pkg/env/Makefile delete mode 100644 tests/metrics/storage/fio-k8s/pkg/env/env.go delete mode 100644 tests/metrics/storage/fio-k8s/pkg/env/go.mod delete mode 100644 tests/metrics/storage/fio-k8s/pkg/env/go.sum delete mode 100644 tests/metrics/storage/fio-k8s/pkg/exec/Exec.go delete mode 100644 tests/metrics/storage/fio-k8s/pkg/exec/go.mod delete mode 100644 tests/metrics/storage/fio-k8s/pkg/exec/go.sum delete mode 100644 tests/metrics/storage/fio-k8s/pkg/k8s/Makefile delete mode 100644 tests/metrics/storage/fio-k8s/pkg/k8s/exec.go delete mode 100644 tests/metrics/storage/fio-k8s/pkg/k8s/go.mod delete mode 100644 tests/metrics/storage/fio-k8s/pkg/k8s/go.sum delete mode 100644 tests/metrics/storage/fio-k8s/pkg/k8s/k8s.go delete mode 100644 tests/metrics/storage/fio-k8s/scripts/dax-compare-test/Makefile delete mode 100644 tests/metrics/storage/fio-k8s/scripts/dax-compare-test/README.md delete mode 100755 tests/metrics/storage/fio-k8s/scripts/dax-compare-test/compare-virtiofsd-dax.sh delete mode 100644 tests/metrics/storage/fio-k8s/scripts/dax-compare-test/report/fio.ipynb delete mode 100644 tests/metrics/storage/fio-k8s/scripts/dax-compare-test/report/fio.py delete mode 100644 tests/metrics/storage/fio-k8s/scripts/dax-compare-test/report/gen-html-fio-report.sh delete mode 100644 tests/metrics/storage/fio-k8s/scripts/dax-compare-test/report/run-docker-jupyter-server.sh create mode 100755 tests/metrics/storage/fio_test.sh diff --git a/tests/metrics/storage/fio-dockerfile/Dockerfile b/tests/metrics/storage/fio-dockerfile/Dockerfile new file mode 100644 index 000000000..62bba2772 --- /dev/null +++ b/tests/metrics/storage/fio-dockerfile/Dockerfile @@ -0,0 +1,26 @@ +# Copyright (c) 2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +# Set up an Ubuntu image with 'fio io tester' installed + +FROM docker.io/library/ubuntu:22.04 + +# Version of the Dockerfile +LABEL DOCKERFILE_VERSION="1.0" + +# URL for the fio tester +ENV FIO_TOOLING_URL "https://github.com/axboe/fio" + +RUN apt-get update --quiet && \ + apt-get install --quiet --no-install-recommends -y \ + bash \ + util-linux \ + procps \ + fio && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/ + +COPY workload/fio_bench.sh / +WORKDIR / +CMD ["/bin/bash"] diff --git a/tests/metrics/storage/fio-dockerfile/workload/fio_bench.sh b/tests/metrics/storage/fio-dockerfile/workload/fio_bench.sh new file mode 100755 index 000000000..43aed0136 --- /dev/null +++ b/tests/metrics/storage/fio-dockerfile/workload/fio_bench.sh @@ -0,0 +1,129 @@ +#!/bin/bash +# +# Copyright (c) 2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +# Description of the test: +# This test runs the 'fio benchmark' on kata containers +# https://fio.readthedocs.io/en/latest/ + +set -o pipefail + +# FIO variable settings + +# io-types supported: +# read, write, randread, randwrite, randrw, readwrite +io_type="read" +block_size="4k" +num_jobs="2" + +# FIO default settings +readonly ioengine="libaio" +readonly rate_process="linear" +readonly disable_buffered="1" +readonly iodepth="2" +readonly runtime="10s" +# ramp time +readonly rt="10s" +readonly fname="test.fio" +readonly workload_dir="/" +readonly workload_file="${workload_dir}${fname}" +readonly workload_size="10G" +readonly summary_file_local="/results.json" + +# Show help about this script +function help() { +cat << EOF +Usage: $0 + Description: + Runs FIO test using ctr to excercise IO in kata containers. + + Params: + + Operations are: + run-read-4k + run-write-4k + run-randread-4k + run-randwrite-4k + run-read-64k + run-write-64k + run-randread-64k + run-randwrite-64k + + : [Mandatory] + : [Optional] Any of the FIO supported ioengines, default: libaio. +EOF +} + +# Run from the host +function setup_workload() { + # create workload file: + if [ ! -f ${workload_file} ]; then + pushd "${workload_dir}" > /dev/null 2>&1 + dd if=/dev/urandom of="${workload_file}" bs=64M count=160 > /dev/null 2>&1 + fi +} + +# Run inside container +function launch_workload() { + # the parameters used in the test_name are accesible globally + local test_name="${io_type}_${block_size}_nj-${num_jobs}_${rate_process}_iodepth-${iodepth}_io-direct-${disable_buffered}" + + setup_workload + rm -f "${summary_file_local}" > /dev/null 2>&1 + fio \ + --name="${test_name}" \ + --output-format="json" \ + --filename="${workload_file}" \ + --size="${workload_size}" \ + --rate_process="${rate_process}" \ + --runtime="${runtime}" \ + --ioengine="${ioengine}" \ + --rw="${io_type}" \ + --direct="${disable_buffered}" \ + --numjobs="${num_jobs}" \ + --blocksize="${block_size}" \ + --ramp_time="${rt}" \ + --iodepth="${iodepth}" \ + --gtod_reduce="1" \ + --randrepeat="1" \ + | tee -a ${summary_file_local} > /dev/null 2>&1 +} + +function print_latest_results() { + [ ! -f "${summary_file_local}" ] && echo "Error: no results to display; you must run a test before requesting results display" && exit 1 + echo "$(cat ${summary_file_local})" +} + +function delete_workload() { + rm -f "${workload_file}" > /dev/null 2>&1 +} + +function main() { + local action="${1:-}" + num_jobs="${2:-1}" + + [[ ! ${num_jobs} =~ ^[0-9]+$ ]] && die "The number of jobs must be a positive integer" + + case "${action}" in + run-read-4k) launch_workload ;; + run-read-64k) block_size="64k" && launch_workload ;; + + run-write-4k) io_type="write" && launch_workload ;; + run-write-64k) block_size="64k" && io_type="write" && launch_workload ;; + + run-randread-4k) io_type="randread" && launch_workload ;; + run-randread-64k) block_size="64k" && io_type="randread" && launch_workload ;; + + run-randwrite-4k) io_type="randwrite" && launch_workload ;; + run-randwrite-64k) block_size="64k" && io_type="randwrite" && launch_workload ;; + + print-latest-results) print_latest_results ;; + delete-workload) delete_workload ;; + + *) >&2 echo "Invalid argument" ; help ; exit 1;; + esac +} + +main "$@" diff --git a/tests/metrics/storage/fio-k8s/.gitignore b/tests/metrics/storage/fio-k8s/.gitignore deleted file mode 100644 index 1caa97b15..000000000 --- a/tests/metrics/storage/fio-k8s/.gitignore +++ /dev/null @@ -1 +0,0 @@ -./cmd/fiotest/fio-k8s diff --git a/tests/metrics/storage/fio-k8s/Makefile b/tests/metrics/storage/fio-k8s/Makefile deleted file mode 100644 index 8bdcf91f6..000000000 --- a/tests/metrics/storage/fio-k8s/Makefile +++ /dev/null @@ -1,31 +0,0 @@ -# -# Copyright (c) 2021-2022 Intel Corporation -# -# SPDX-License-Identifier: Apache-2.0 -# - -MKFILE_PATH := $(abspath $(lastword $(MAKEFILE_LIST))) -MKFILE_DIR := $(dir $(MKFILE_PATH)) - -build: - make -C $(MKFILE_DIR)/cmd/fiotest/ gomod - make -C $(MKFILE_DIR)/cmd/fiotest/ build - -test-report: - $(MKFILE_DIR)/scripts/dax-compare-test/report/gen-html-fio-report.sh $(MKFILE_DIR)/cmd/fiotest/test-results/ - -test-report-interactive: - $(MKFILE_DIR)/scripts/dax-compare-test/report/run-docker-jupyter-server.sh $(MKFILE_DIR)/cmd/fiotest/test-results/ - -test: build - make -C $(MKFILE_DIR)/cmd/fiotest/ run - make test-report - -run: build - make -C $(MKFILE_DIR)/scripts/dax-compare-test/ run - -test-qemu: build - make -C $(MKFILE_DIR)/cmd/fiotest/ run-qemu - -test-clh: build - make -C $(MKFILE_DIR)/cmd/fiotest/ run-clh diff --git a/tests/metrics/storage/fio-k8s/README.md b/tests/metrics/storage/fio-k8s/README.md deleted file mode 100644 index 96798849a..000000000 --- a/tests/metrics/storage/fio-k8s/README.md +++ /dev/null @@ -1,30 +0,0 @@ -# FIO test in Kubernetes - -This is an automation to run `fio` with Kubernetes. - -## Requirements: - -- Kubernetes cluster running. -- Kata configured as `runtimeclass`. - -## Test structure: - -- [fio-test]: Program wrapper to launch `fio` in a K8s pod. -- [pkg]: Library code that could be used for more `fio` automation. -- [configs]: Configuration files used by [fio-test]. -- [DAX-compare-test]: Script to run [fio-test] to generate `fio` data for Kata with/without `virtio-fs DAX` and K8s bare-metal runtime(`runc`). -- [report] Jupyter Notebook to create reports for data generated by [DAX-compare-test]. - -## Top-level Makefile targets - -- `build`: Build `fio` metrics. -- `test`: quick test, used to verify changes in [fio-test]. -- `run`: Run `fio` metrics and generate reports. -- `test-report-interactive`: Run python notebook in `localhost:8888`, useful to edit the report. -- `test-report`: Generate report from data generated by `make test`. - -[fio-test]:cmd/fiotest -[configs]:configs -[pkg]:pkg -[report]:scripts/dax-compare-test/report -[DAX-compare-test]:scripts/dax-compare-test/README.md diff --git a/tests/metrics/storage/fio-k8s/cmd/fiotest/Makefile b/tests/metrics/storage/fio-k8s/cmd/fiotest/Makefile deleted file mode 100644 index 48ea3c91f..000000000 --- a/tests/metrics/storage/fio-k8s/cmd/fiotest/Makefile +++ /dev/null @@ -1,27 +0,0 @@ -# -# Copyright (c) 2021-2023 Intel Corporation -# -# SPDX-License-Identifier: Apache-2.0 -# - -MKFILE_PATH := $(abspath $(lastword $(MAKEFILE_LIST))) -MKFILE_DIR := $(dir $(MKFILE_PATH)) - -build: - GO111MODULE=on go build - -run: build - $(MKFILE_DIR)/fio-k8s --debug --fio.size 10M --output-dir test-results --test-name kata $(MKFILE_DIR)/../../configs/example-config/ - $(MKFILE_DIR)/fio-k8s --debug --fio.size 10M --output-dir test-results --test-name runc --container-runtime runc $(MKFILE_DIR)/../../configs/example-config/ - -gomod: - go mod edit -replace=github.com/kata-containers/kata-containers/tests/metrics/k8s=../../pkg/k8s - go mod edit -replace=github.com/kata-containers/kata-containers/tests/metrics/exec=../../pkg/exec - go mod edit -replace=github.com/kata-containers/kata-containers/tests/metrics/env=../../pkg/env - go mod tidy - -run-qemu: build - $(MKFILE_DIR)/fio-k8s --debug --fio.size 10M --output-dir test-results --test-name kata --container-runtime kata-qemu $(MKFILE_DIR)/../../configs/example-config/ - -run-clh: build - $(MKFILE_DIR)/fio-k8s --debug --fio.size 10M --output-dir test-results --test-name kata --container-runtime kata-clh $(MKFILE_DIR)/../../configs/example-config/ diff --git a/tests/metrics/storage/fio-k8s/cmd/fiotest/go.mod b/tests/metrics/storage/fio-k8s/cmd/fiotest/go.mod deleted file mode 100644 index 4d6f8fb45..000000000 --- a/tests/metrics/storage/fio-k8s/cmd/fiotest/go.mod +++ /dev/null @@ -1,24 +0,0 @@ -module github.com/kata-containers/kata-containers/tests/metrics/storage/fio-k8s - -go 1.19 - -replace github.com/kata-containers/kata-containers/tests/metrics/exec => ../../pkg/exec - -replace github.com/kata-containers/kata-containers/tests/metrics/k8s => ../../pkg/k8s - -replace github.com/kata-containers/kata-containers/tests/metrics/env => ../../pkg/env - -require ( - github.com/kata-containers/kata-containers/tests/metrics/env v0.0.0-00010101000000-000000000000 - github.com/kata-containers/kata-containers/tests/metrics/exec v0.0.0-00010101000000-000000000000 - github.com/kata-containers/kata-containers/tests/metrics/k8s v0.0.0-00010101000000-000000000000 - github.com/pkg/errors v0.9.1 - github.com/sirupsen/logrus v1.9.3 - github.com/urfave/cli v1.22.14 -) - -require ( - github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect - github.com/russross/blackfriday/v2 v2.1.0 // indirect - golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8 // indirect -) diff --git a/tests/metrics/storage/fio-k8s/cmd/fiotest/go.sum b/tests/metrics/storage/fio-k8s/cmd/fiotest/go.sum deleted file mode 100644 index 45fbeb4a0..000000000 --- a/tests/metrics/storage/fio-k8s/cmd/fiotest/go.sum +++ /dev/null @@ -1,31 +0,0 @@ -github.com/BurntSushi/toml v1.3.2/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ= -github.com/cpuguy83/go-md2man/v2 v2.0.2 h1:p1EgwI/C7NhT0JmVkwCD2ZBK8j4aeHQX2pMHHBfMQ6w= -github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= -github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= -github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= -github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= -github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= -github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk= -github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= -github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= -github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= -github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= -github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= -github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= -github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= -github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= -github.com/urfave/cli v1.22.14 h1:ebbhrRiGK2i4naQJr+1Xj92HXZCrK7MsyTS/ob3HnAk= -github.com/urfave/cli v1.22.14/go.mod h1:X0eDS6pD6Exaclxm99NJ3FiCDRED7vIHpx2mDOHLvkA= -golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8 h1:0A+M6Uqn+Eje4kHMK80dtF3JCXC4ykBgQG4Fe06QRhQ= -golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= -gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= -gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/tests/metrics/storage/fio-k8s/cmd/fiotest/main.go b/tests/metrics/storage/fio-k8s/cmd/fiotest/main.go deleted file mode 100644 index de7e7b2ff..000000000 --- a/tests/metrics/storage/fio-k8s/cmd/fiotest/main.go +++ /dev/null @@ -1,373 +0,0 @@ -// Copyright (c) 2021-2023 Intel Corporation -// -// SPDX-License-Identifier: Apache-2.0 -package main - -import ( - "encoding/csv" - "encoding/json" - "fmt" - "os" - "path" - "path/filepath" - "strings" - "time" - - env "github.com/kata-containers/kata-containers/tests/metrics/env" - exec "github.com/kata-containers/kata-containers/tests/metrics/exec" - "github.com/kata-containers/kata-containers/tests/metrics/k8s" - "github.com/pkg/errors" - "github.com/sirupsen/logrus" - "github.com/urfave/cli" -) - -var log = logrus.New() - -var ( - optContainerRuntime = "container-runtime" - optDebug = "debug" - optOutputDir = "output-dir" - optTestName = "test-name" - // fio options - optFioBlockSize = "fio.block-size" - optFioDirect = "fio.direct" - optFioIoDepth = "fio.iodepth" - optFioSize = "fio.size" - optFioNumJobs = "fio.numjobs" -) - -type RwFioOp struct { - BandwidthKb int `json:"bw"` - IOPS float64 `json:"iops"` -} - -type fioResult struct { - GlobalOptions struct { - IOEngine string `json:"ioengine"` - RW string `json:"rw"` - } `json:"global options"` - Jobs []struct { - JobName string `json:"jobname"` - Read RwFioOp `json:"read"` - Write RwFioOp `json:"write"` - } `json:"jobs"` -} - -// Run fio in k8s metrics test in K8s -func (c fioTestConfig) run() (result fioResult, err error) { - log.Infof("Running fio config: %s", c.jobFile) - - pod := k8s.Pod{YamlPath: c.k8sYaml} - - log.Infof("Delete pod if already created") - err = pod.Delete() - if err != nil { - return result, err - } - - log.Infof("Create pod: %s", pod.YamlPath) - err = pod.Run() - if err != nil { - return result, err - } - - defer func() { - log.Info("Deleting pod") - delErr := pod.Delete() - if delErr != nil { - log.Error(delErr) - if err != nil { - err = errors.Wrapf(err, "Could not delete pod after: %s", delErr) - } - } - }() - - destDir := "/home/fio-jobs" - _, err = pod.Exec("mkdir " + destDir) - if err != nil { - return result, err - } - - dstJobFile := path.Join(destDir, "jobFile") - err = pod.CopyFromHost(c.jobFile, dstJobFile) - if err != nil { - return result, err - } - - _, err = pod.Exec("apt update") - if err != nil { - return result, err - } - _, err = pod.Exec("apt install -y fio") - if err != nil { - return result, err - } - - err = env.DropCaches() - if err != nil { - return result, err - } - - var directStr string - if c.direct { - directStr = "1" - } else { - directStr = "0" - } - - cmdFio := "fio" - cmdFio += " --append-terse " - cmdFio += " --blocksize=" + c.blocksize - cmdFio += " --direct=" + directStr - cmdFio += " --directory=" + c.directory - cmdFio += " --iodepth=" + c.iodepth - cmdFio += " --numjobs=" + c.numjobs - cmdFio += " --runtime=" + c.runtime - cmdFio += " --size=" + c.size - cmdFio += " --output-format=json" - cmdFio += " " + dstJobFile - - log.Infof("Exec fio") - output, err := pod.Exec(cmdFio, k8s.ExecOptShowStdOut()) - if err != nil { - return result, err - } - err = json.Unmarshal([]byte(output), &result) - if err != nil { - return result, errors.Wrapf(err, "failed to unmarshall output : %s", output) - } - - log.Infof("ioengine:%s", result.GlobalOptions.IOEngine) - log.Infof("rw:%s", result.GlobalOptions.RW) - if len(result.Jobs) == 0 { - return result, errors.New("No jobs found after parsing fio results") - } - - testDir := path.Join(c.outputDir, filepath.Base(c.jobFile)) - err = os.MkdirAll(testDir, 0775) - if err != nil { - return result, errors.Wrapf(err, "failed to create test directory for :%s", c.jobFile) - } - outputFile := path.Join(testDir, "output.json") - log.Infof("Store results output in : %s", outputFile) - - err = os.WriteFile(outputFile, []byte(output), 0644) - if err != nil { - return result, err - } - - return result, nil -} - -type fioTestConfig struct { - //test options - k8sYaml string - containerRuntime string - outputDir string - - //fio options - blocksize string - directory string - iodepth string - numjobs string - jobFile string - loops string - runtime string - size string - - direct bool -} - -func runFioJobs(testDirPath string, cfg fioTestConfig) (results []fioResult, err error) { - fioJobsDir, err := filepath.Abs(path.Join(testDirPath, "fio-jobs")) - if err != nil { - return results, err - } - - files, err := os.ReadDir(fioJobsDir) - if err != nil { - log.Fatal(err) - return results, err - } - - if cfg.containerRuntime == "" { - return results, errors.New("containerRuntime is empty") - } - - podYAMLName := cfg.containerRuntime + ".yaml" - cfg.k8sYaml = path.Join(testDirPath, podYAMLName) - - if len(files) == 0 { - return results, errors.New("No fio configs found") - } - - for _, file := range files { - cfg.jobFile = path.Join(fioJobsDir, file.Name()) - r, err := cfg.run() - if err != nil { - return results, err - } - results = append(results, r) - - log.Infof("workload:%s", r.Jobs[0].JobName) - log.Infof("bw_r:%d", r.Jobs[0].Read.BandwidthKb) - log.Infof("IOPS_r:%f", r.Jobs[0].Read.IOPS) - log.Infof("bw_w:%d", r.Jobs[0].Write.BandwidthKb) - log.Infof("IOPS_w:%f", r.Jobs[0].Write.IOPS) - - waitTime := 5 - log.Debugf("Sleep %d seconds(if not wait sometimes create another pod timesout)", waitTime) - time.Sleep(time.Duration(waitTime) * time.Second) - } - return results, err - -} - -func generateResultsView(testName string, results []fioResult, outputDir string) error { - outputFile := path.Join(outputDir, "results.csv") - f, err := os.Create(outputFile) - if err != nil { - return err - } - defer f.Close() - - log.Infof("Creating results output in %s", outputFile) - - w := csv.NewWriter(f) - - headers := []string{"NAME", "WORKLOAD", "bw_r", "bw_w", "IOPS_r", "IOPS_w"} - err = w.Write(headers) - if err != nil { - return err - } - - for _, r := range results { - if len(r.Jobs) == 0 { - return errors.Errorf("fio result has no jobs: %v", r) - } - row := []string{testName} - row = append(row, r.Jobs[0].JobName) - row = append(row, fmt.Sprintf("%d", r.Jobs[0].Read.BandwidthKb)) - row = append(row, fmt.Sprintf("%d", r.Jobs[0].Write.BandwidthKb)) - row = append(row, fmt.Sprintf("%f", r.Jobs[0].Read.IOPS)) - row = append(row, fmt.Sprintf("%f", r.Jobs[0].Write.IOPS)) - if err := w.Write(row); err != nil { - return err - } - } - - w.Flush() - - return w.Error() -} - -func main() { - - app := &cli.App{ - Flags: []cli.Flag{ - &cli.BoolFlag{ - Name: optDebug, - Usage: "Logs in debug level", - }, - &cli.StringFlag{ - Name: optTestName, - Value: "kata-fio-test", - Usage: "Change the fio test name for reports", - }, - &cli.StringFlag{ - Name: optOutputDir, - Value: ".", - Usage: "Use a file to store results", - }, - &cli.StringFlag{ - Name: optContainerRuntime, - Value: "kata", - Usage: "Choose the runtime to use", - }, - //fio options - &cli.StringFlag{ - Name: optFioSize, - Value: "200M", - Usage: "File size to use for tests", - }, - &cli.StringFlag{ - Name: optFioBlockSize, - Value: "4K", - Usage: "Block size for fio tests", - }, - &cli.BoolFlag{ - Name: optFioDirect, - Usage: "Use direct io", - }, - &cli.StringFlag{ - Name: optFioIoDepth, - Value: "16", - Usage: "Number of I/O units to keep in flight against the file", - }, - &cli.StringFlag{ - Name: optFioNumJobs, - Value: "1", - Usage: "Number of clones (processes/threads performing the same workload) of this job", - }, - }, - Action: func(c *cli.Context) error { - jobsDir := c.Args().First() - - if jobsDir == "" { - cli.SubcommandHelpTemplate = strings.Replace(cli.SubcommandHelpTemplate, "[arguments...]", "", -1) - cli.ShowCommandHelp(c, "") - return errors.New("Missing ") - } - - if c.Bool(optDebug) { - log.SetLevel(logrus.DebugLevel) - k8s.Debug = true - env.Debug = true - } - - exec.SetLogger(log) - k8s.SetLogger(log) - env.SetLogger(log) - - testName := c.String(optTestName) - - outputDir, err := filepath.Abs(path.Join(c.String(optOutputDir), testName)) - if err != nil { - return err - } - - cfg := fioTestConfig{ - blocksize: c.String(optFioBlockSize), - direct: c.Bool(optFioDirect), - directory: ".", - iodepth: c.String(optFioIoDepth), - loops: "3", - numjobs: c.String(optFioNumJobs), - runtime: "20", - size: c.String(optFioSize), - containerRuntime: c.String(optContainerRuntime), - outputDir: outputDir, - } - - log.Infof("Results will be created in %s", cfg.outputDir) - - err = os.MkdirAll(cfg.outputDir, 0775) - if err != nil { - return err - } - - results, err := runFioJobs(jobsDir, cfg) - if err != nil { - return err - } - - return generateResultsView(c.String(optTestName), results, outputDir) - }, - } - - err := app.Run(os.Args) - if err != nil { - log.Fatal(err) - } - -} diff --git a/tests/metrics/storage/fio-k8s/configs/example-config/fio-jobs/randrw-async.job b/tests/metrics/storage/fio-k8s/configs/example-config/fio-jobs/randrw-async.job deleted file mode 100644 index 9e0edb96c..000000000 --- a/tests/metrics/storage/fio-k8s/configs/example-config/fio-jobs/randrw-async.job +++ /dev/null @@ -1,10 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# Copyright (c) 2022 Intel Corporation -[global] -name=io_uring -filename=fio-file -rw=randrw -rwmixread=75 -ioengine=io_uring - -[randrw-io_uring] diff --git a/tests/metrics/storage/fio-k8s/configs/example-config/fio-jobs/randrw-libaio.job b/tests/metrics/storage/fio-k8s/configs/example-config/fio-jobs/randrw-libaio.job deleted file mode 100644 index 327852e44..000000000 --- a/tests/metrics/storage/fio-k8s/configs/example-config/fio-jobs/randrw-libaio.job +++ /dev/null @@ -1,10 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# Copyright (c) 2021 Intel Corporation -[global] -name=randrw-libaio -filename=fio-file -rw=randrw -rwmixread=75 -ioengine=libaio - -[randrw-libaio] diff --git a/tests/metrics/storage/fio-k8s/configs/example-config/fio-jobs/randrw-sync.job b/tests/metrics/storage/fio-k8s/configs/example-config/fio-jobs/randrw-sync.job deleted file mode 100644 index 3f7f2b6ed..000000000 --- a/tests/metrics/storage/fio-k8s/configs/example-config/fio-jobs/randrw-sync.job +++ /dev/null @@ -1,10 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# Copyright (c) 2022 Intel Corporation -[global] -name=sync -filename=fio-file -rw=randrw -rwmixread=75 -ioengine=sync - -[randrw-sync] diff --git a/tests/metrics/storage/fio-k8s/configs/example-config/kata-clh.yaml b/tests/metrics/storage/fio-k8s/configs/example-config/kata-clh.yaml deleted file mode 100644 index 0744fe013..000000000 --- a/tests/metrics/storage/fio-k8s/configs/example-config/kata-clh.yaml +++ /dev/null @@ -1,17 +0,0 @@ -## Copyright (c) 2021 Intel Corporation -# -## SPDX-License-Identifier: Apache-2.0 -# -apiVersion: v1 -kind: Pod -metadata: - name: iometrics -spec: - runtimeClassName: kata-clh - containers: - - args: - - sleep - - infinity - image: ubuntu:latest - name: iometrics - imagePullPolicy: Always diff --git a/tests/metrics/storage/fio-k8s/configs/example-config/kata-qemu.yaml b/tests/metrics/storage/fio-k8s/configs/example-config/kata-qemu.yaml deleted file mode 100644 index 8d7cb3319..000000000 --- a/tests/metrics/storage/fio-k8s/configs/example-config/kata-qemu.yaml +++ /dev/null @@ -1,17 +0,0 @@ -## Copyright (c) 2021 Intel Corporation -# -## SPDX-License-Identifier: Apache-2.0 -# -apiVersion: v1 -kind: Pod -metadata: - name: iometrics -spec: - runtimeClassName: kata-qemu - containers: - - args: - - sleep - - infinity - image: ubuntu:latest - name: iometrics - imagePullPolicy: Always diff --git a/tests/metrics/storage/fio-k8s/configs/example-config/kata.yaml b/tests/metrics/storage/fio-k8s/configs/example-config/kata.yaml deleted file mode 100644 index 08f397dea..000000000 --- a/tests/metrics/storage/fio-k8s/configs/example-config/kata.yaml +++ /dev/null @@ -1,16 +0,0 @@ -## Copyright (c) 2021 Intel Corporation -# -## SPDX-License-Identifier: Apache-2.0 -# -apiVersion: v1 -kind: Pod -metadata: - name: iometrics -spec: - runtimeClassName: kata - containers: - - name: iometrics - image: ubuntu:latest - # Just spin & wait forever - command: [ "/bin/bash", "-c", "--" ] - args: [ "sleep infinity" ] diff --git a/tests/metrics/storage/fio-k8s/configs/example-config/runc.yaml b/tests/metrics/storage/fio-k8s/configs/example-config/runc.yaml deleted file mode 100644 index 4fd96d3c1..000000000 --- a/tests/metrics/storage/fio-k8s/configs/example-config/runc.yaml +++ /dev/null @@ -1,15 +0,0 @@ -## Copyright (c) 2021 Intel Corporation -# -## SPDX-License-Identifier: Apache-2.0 -# -apiVersion: v1 -kind: Pod -metadata: - name: iometrics -spec: - containers: - - name: iometrics - image: ubuntu:latest - # Just spin & wait forever - command: [ "/bin/bash", "-c", "--" ] - args: [ "sleep infinity" ] diff --git a/tests/metrics/storage/fio-k8s/configs/test-config/fio-jobs/randread-libaio.job b/tests/metrics/storage/fio-k8s/configs/test-config/fio-jobs/randread-libaio.job deleted file mode 100644 index 5ab226061..000000000 --- a/tests/metrics/storage/fio-k8s/configs/test-config/fio-jobs/randread-libaio.job +++ /dev/null @@ -1,9 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# Copyright (c) 2021 Intel Corporation -[global] -name=randread-libaio -filename=fio-file -rw=randread -ioengine=libaio - -[randread-libaio] diff --git a/tests/metrics/storage/fio-k8s/configs/test-config/fio-jobs/randread-mmpap.job b/tests/metrics/storage/fio-k8s/configs/test-config/fio-jobs/randread-mmpap.job deleted file mode 100644 index cd79854f2..000000000 --- a/tests/metrics/storage/fio-k8s/configs/test-config/fio-jobs/randread-mmpap.job +++ /dev/null @@ -1,9 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# Copyright (c) 2021 Intel Corporation -[global] -name=randread-mmap -rw=randread -ioengine=mmap - -[randread-mmap] -filename=fio-file diff --git a/tests/metrics/storage/fio-k8s/configs/test-config/fio-jobs/randrw-libaio.job b/tests/metrics/storage/fio-k8s/configs/test-config/fio-jobs/randrw-libaio.job deleted file mode 100644 index 327852e44..000000000 --- a/tests/metrics/storage/fio-k8s/configs/test-config/fio-jobs/randrw-libaio.job +++ /dev/null @@ -1,10 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# Copyright (c) 2021 Intel Corporation -[global] -name=randrw-libaio -filename=fio-file -rw=randrw -rwmixread=75 -ioengine=libaio - -[randrw-libaio] diff --git a/tests/metrics/storage/fio-k8s/configs/test-config/fio-jobs/randrw-mmap.job b/tests/metrics/storage/fio-k8s/configs/test-config/fio-jobs/randrw-mmap.job deleted file mode 100644 index cf0c1288a..000000000 --- a/tests/metrics/storage/fio-k8s/configs/test-config/fio-jobs/randrw-mmap.job +++ /dev/null @@ -1,10 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# Copyright (c) 2021 Intel Corporation -[global] -name=randrw-mmap -rw=randrw -rwmixread=75 -ioengine=mmap - -[randrw-mmap] -filename=fio-file diff --git a/tests/metrics/storage/fio-k8s/configs/test-config/fio-jobs/randwrite-libaio.job b/tests/metrics/storage/fio-k8s/configs/test-config/fio-jobs/randwrite-libaio.job deleted file mode 100644 index ef3bfc5a5..000000000 --- a/tests/metrics/storage/fio-k8s/configs/test-config/fio-jobs/randwrite-libaio.job +++ /dev/null @@ -1,9 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# Copyright (c) 2021 Intel Corporation -[global] -name=randwrite-libaio -filename=fio-file -rw=randwrite -ioengine=libaio - -[randwrite-libaio] diff --git a/tests/metrics/storage/fio-k8s/configs/test-config/fio-jobs/randwrite-mmap.job b/tests/metrics/storage/fio-k8s/configs/test-config/fio-jobs/randwrite-mmap.job deleted file mode 100644 index 0d4f6ef73..000000000 --- a/tests/metrics/storage/fio-k8s/configs/test-config/fio-jobs/randwrite-mmap.job +++ /dev/null @@ -1,9 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# Copyright (c) 2021 Intel Corporation -[global] -name=randwrite-mmap -rw=randwrite -ioengine=mmap - -[randwrite-mmap] -filename=fio-file diff --git a/tests/metrics/storage/fio-k8s/configs/test-config/fio-jobs/seqread-libaio.job b/tests/metrics/storage/fio-k8s/configs/test-config/fio-jobs/seqread-libaio.job deleted file mode 100644 index 6f8a16a3f..000000000 --- a/tests/metrics/storage/fio-k8s/configs/test-config/fio-jobs/seqread-libaio.job +++ /dev/null @@ -1,9 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# Copyright (c) 2021 Intel Corporation -[global] -name=seqread-libaio -filename=fio-file -rw=read -ioengine=libaio - -[seqread-libaio] diff --git a/tests/metrics/storage/fio-k8s/configs/test-config/fio-jobs/seqread-mmap.job b/tests/metrics/storage/fio-k8s/configs/test-config/fio-jobs/seqread-mmap.job deleted file mode 100644 index 9829726d3..000000000 --- a/tests/metrics/storage/fio-k8s/configs/test-config/fio-jobs/seqread-mmap.job +++ /dev/null @@ -1,9 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# Copyright (c) 2021 Intel Corporation -[global] -name=seqread-mmap -rw=read -ioengine=mmap - -[seqread-mmap] -filename=fio-file diff --git a/tests/metrics/storage/fio-k8s/configs/test-config/fio-jobs/seqread-psync.job b/tests/metrics/storage/fio-k8s/configs/test-config/fio-jobs/seqread-psync.job deleted file mode 100644 index a1f54ca7c..000000000 --- a/tests/metrics/storage/fio-k8s/configs/test-config/fio-jobs/seqread-psync.job +++ /dev/null @@ -1,8 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# Copyright (c) 2021 Intel Corporation -[global] -name=seqread-psync -filename=fio-file -rw=read - -[seqread-psync] diff --git a/tests/metrics/storage/fio-k8s/configs/test-config/fio-jobs/seqwrite-libaio.job b/tests/metrics/storage/fio-k8s/configs/test-config/fio-jobs/seqwrite-libaio.job deleted file mode 100644 index 9927e05c9..000000000 --- a/tests/metrics/storage/fio-k8s/configs/test-config/fio-jobs/seqwrite-libaio.job +++ /dev/null @@ -1,9 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# Copyright (c) 2021 Intel Corporation -[global] -name=seqwrite-libaio -filename=fio-file -rw=write -ioengine=libaio - -[seqwrite-libaio] diff --git a/tests/metrics/storage/fio-k8s/configs/test-config/fio-jobs/seqwrite-mmap.job b/tests/metrics/storage/fio-k8s/configs/test-config/fio-jobs/seqwrite-mmap.job deleted file mode 100644 index e3485db4f..000000000 --- a/tests/metrics/storage/fio-k8s/configs/test-config/fio-jobs/seqwrite-mmap.job +++ /dev/null @@ -1,10 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# Copyright (c) 2021 Intel Corporation -[global] -name=seqwrite-mmap -filename=fio-file -rw=write -ioengine=mmap - -[seqwrite-mmap] -filename=fio-file diff --git a/tests/metrics/storage/fio-k8s/configs/test-config/kata.yaml b/tests/metrics/storage/fio-k8s/configs/test-config/kata.yaml deleted file mode 100644 index 08f397dea..000000000 --- a/tests/metrics/storage/fio-k8s/configs/test-config/kata.yaml +++ /dev/null @@ -1,16 +0,0 @@ -## Copyright (c) 2021 Intel Corporation -# -## SPDX-License-Identifier: Apache-2.0 -# -apiVersion: v1 -kind: Pod -metadata: - name: iometrics -spec: - runtimeClassName: kata - containers: - - name: iometrics - image: ubuntu:latest - # Just spin & wait forever - command: [ "/bin/bash", "-c", "--" ] - args: [ "sleep infinity" ] diff --git a/tests/metrics/storage/fio-k8s/configs/test-config/runc.yaml b/tests/metrics/storage/fio-k8s/configs/test-config/runc.yaml deleted file mode 100644 index 4fd96d3c1..000000000 --- a/tests/metrics/storage/fio-k8s/configs/test-config/runc.yaml +++ /dev/null @@ -1,15 +0,0 @@ -## Copyright (c) 2021 Intel Corporation -# -## SPDX-License-Identifier: Apache-2.0 -# -apiVersion: v1 -kind: Pod -metadata: - name: iometrics -spec: - containers: - - name: iometrics - image: ubuntu:latest - # Just spin & wait forever - command: [ "/bin/bash", "-c", "--" ] - args: [ "sleep infinity" ] diff --git a/tests/metrics/storage/fio-k8s/fio-test-ci.sh b/tests/metrics/storage/fio-k8s/fio-test-ci.sh deleted file mode 100755 index 9933bffdb..000000000 --- a/tests/metrics/storage/fio-k8s/fio-test-ci.sh +++ /dev/null @@ -1,86 +0,0 @@ -#!/bin/bash -# -# Copyright (c) 2022-2023 Intel Corporation -# -# SPDX-License-Identifier: Apache-2.0 - -set -e - -# General env -SCRIPT_PATH=$(dirname "$(readlink -f "$0")") -source "${SCRIPT_PATH}/../../lib/common.bash" -FIO_PATH="${GOPATH}/tests/metrics/storage/fio-k8s" -TEST_NAME="${TEST_NAME:-fio}" - -function main() { - cmds=("bc" "jq") - check_cmds "${cmds[@]}" - init_env - check_processes - - pushd "${FIO_PATH}" - [ -z "${KATA_HYPERVISOR}" ] && die "Hypervisor ID is missing." - [ "${KATA_HYPERVISOR}" != "qemu" ] && [ "${KATA_HYPERVISOR}" != "clh" ] && die "Hypervisor not recognized: ${KATA_HYPERVISOR}" - - echo "INFO: Running K8S FIO test using ${KATA_HYPERVISOR} hypervisor" - make "test-${KATA_HYPERVISOR}" - popd - - test_result_file="${FIO_PATH}/cmd/fiotest/test-results/kata/randrw-sync.job/output.json" - - metrics_json_init - local read_io=$(cat $test_result_file | grep io_bytes | head -1 | sed 's/[[:blank:]]//g' | cut -f2 -d ':' | cut -f1 -d ',') - local read_bw=$(cat $test_result_file | grep bw_bytes | head -1 | sed 's/[[:blank:]]//g' | cut -f2 -d ':' | cut -f1 -d ',') - local read_90_percentile=$(cat $test_result_file | grep 90.000000 | head -1 | sed 's/[[:blank:]]//g' | cut -f2 -d ':' | cut -f1 -d ',') - local read_95_percentile=$(cat $test_result_file | grep 95.000000 | head -1 | sed 's/[[:blank:]]//g' | cut -f2 -d ':' | cut -f1 -d ',') - local write_io=$(cat $test_result_file | grep io_bytes | head -2 | tail -1 | sed 's/[[:blank:]]//g' | cut -f2 -d ':' | cut -f1 -d ',') - local write_bw=$(cat $test_result_file | grep bw_bytes | head -2 | tail -1 | sed 's/[[:blank:]]//g' | cut -f2 -d ':' | cut -f1 -d ',') - local write_90_percentile=$(cat $test_result_file | grep 90.000000 | head -2 | tail -1 | sed 's/[[:blank:]]//g' | cut -f2 -d ':' | cut -f1 -d ',') - local write_95_percentile=$(cat $test_result_file | grep 95.000000 | head -2 | tail -1 | sed 's/[[:blank:]]//g' | cut -f2 -d ':' | cut -f1 -d ',') - - metrics_json_start_array - local json="$(cat << EOF - { - "readio": { - "Result" : $read_io, - "Units" : "bytes" - }, - "readbw": { - "Result" : $read_bw, - "Units" : "bytes/sec" - }, - "read90percentile": { - "Result" : $read_90_percentile, - "Units" : "ns" - }, - "read95percentile": { - "Result" : $read_95_percentile, - "Units" : "ns" - }, - "writeio": { - "Result" : $write_io, - "Units" : "bytes" - }, - "writebw": { - "Result" : $write_bw, - "Units" : "bytes/sec" - }, - "write90percentile": { - "Result" : $write_90_percentile, - "Units" : "ns" - }, - "write95percentile": { - "Result" : $write_95_percentile, - "Units" : "ns" - } - } -EOF -)" - metrics_json_add_array_element "$json" - metrics_json_end_array "Results" - metrics_json_save - - check_processes -} - -main "$@" diff --git a/tests/metrics/storage/fio-k8s/pkg/env/Makefile b/tests/metrics/storage/fio-k8s/pkg/env/Makefile deleted file mode 100644 index a1c0a7779..000000000 --- a/tests/metrics/storage/fio-k8s/pkg/env/Makefile +++ /dev/null @@ -1,9 +0,0 @@ -# -# Copyright (c) 2021-2023 Intel Corporation -# -# SPDX-License-Identifier: Apache-2.0 -# - -gomod: - GO111MODULE=on go mod edit -replace=github.com/kata-containers/kata-containers/tests/metrics/exec=../exec - GO111MODULE=on go mod tidy diff --git a/tests/metrics/storage/fio-k8s/pkg/env/env.go b/tests/metrics/storage/fio-k8s/pkg/env/env.go deleted file mode 100644 index 034127f96..000000000 --- a/tests/metrics/storage/fio-k8s/pkg/env/env.go +++ /dev/null @@ -1,38 +0,0 @@ -// Copyright (c) 2021-2023 Intel Corporation -// -// SPDX-License-Identifier: Apache-2.0 -package env - -import ( - exec "github.com/kata-containers/kata-containers/tests/metrics/exec" -) - -// logger interface for pkg -var log logger -var Debug bool = false - -type logger interface { - Infof(string, ...interface{}) - Debugf(string, ...interface{}) - Errorf(string, ...interface{}) -} - -func SetLogger(l logger) { - log = l -} - -var sysDropCachesPath = "/proc/sys/vm/drop_caches" - -func DropCaches() (err error) { - log.Infof("drop caches") - _, err = exec.ExecCmd("sync", Debug) - if err != nil { - return err - } - - _, err = exec.ExecCmd("echo 3 | sudo tee "+sysDropCachesPath, Debug) - if err != nil { - return err - } - return nil -} diff --git a/tests/metrics/storage/fio-k8s/pkg/env/go.mod b/tests/metrics/storage/fio-k8s/pkg/env/go.mod deleted file mode 100644 index 4e59e60d9..000000000 --- a/tests/metrics/storage/fio-k8s/pkg/env/go.mod +++ /dev/null @@ -1,10 +0,0 @@ -module github.com/kata-containers/kata-containers/tests/metrics/storage/fio-k8s/exec - -go 1.19 - -require ( - github.com/kata-containers/kata-containers/tests/metrics/exec v0.0.0-00010101000000-000000000000 // indirect - github.com/pkg/errors v0.9.1 // indirect -) - -replace github.com/kata-containers/kata-containers/tests/metrics/exec => ../exec diff --git a/tests/metrics/storage/fio-k8s/pkg/env/go.sum b/tests/metrics/storage/fio-k8s/pkg/env/go.sum deleted file mode 100644 index 7c401c3f5..000000000 --- a/tests/metrics/storage/fio-k8s/pkg/env/go.sum +++ /dev/null @@ -1,2 +0,0 @@ -github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= -github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= diff --git a/tests/metrics/storage/fio-k8s/pkg/exec/Exec.go b/tests/metrics/storage/fio-k8s/pkg/exec/Exec.go deleted file mode 100644 index 6e409f427..000000000 --- a/tests/metrics/storage/fio-k8s/pkg/exec/Exec.go +++ /dev/null @@ -1,67 +0,0 @@ -// Copyright (c) 2021-2023 Intel Corporation -// -// SPDX-License-Identifier: Apache-2.0 -package exec - -import ( - "bytes" - "io" - "os" - "os/exec" - - "github.com/pkg/errors" -) - -// logger interface for pkg -var log logger - -type logger interface { - Infof(string, ...interface{}) - Debugf(string, ...interface{}) - Errorf(string, ...interface{}) -} - -func SetLogger(l logger) { - log = l -} - -// Exec a command -// err != nil if command fails to execute -// output is a string with a combined stdout and stderr -func ExecCmd(c string, showInStdout bool) (stdout string, err error) { - if c == "" { - return "", errors.New("command is empty") - } - - log.Debugf("Exec: %s", c) - cmd := exec.Command("bash", "-o", "pipefail", "-c", c) - var stdBuffer bytes.Buffer - var writers []io.Writer - writers = append(writers, &stdBuffer) - if showInStdout { - writers = append(writers, os.Stdout) - } - mw := io.MultiWriter(writers...) - - cmd.Stdout = mw - cmd.Stderr = mw - - err = cmd.Run() - output := stdBuffer.String() - - return stdBuffer.String(), errors.Wrap(err, output) -} - -// Exec a command -// Send output to Stdout and Stderr -func ExecStdout(c string) error { - if c == "" { - return errors.New("command is empty") - } - - log.Debugf("Exec: %s", c) - cmd := exec.Command("bash", "-o", "pipefail", "-c", c) - cmd.Stdout = os.Stdout - cmd.Stderr = os.Stderr - return cmd.Run() -} diff --git a/tests/metrics/storage/fio-k8s/pkg/exec/go.mod b/tests/metrics/storage/fio-k8s/pkg/exec/go.mod deleted file mode 100644 index c74fcbeb8..000000000 --- a/tests/metrics/storage/fio-k8s/pkg/exec/go.mod +++ /dev/null @@ -1,5 +0,0 @@ -module github.com/kata-containers/kata-containers/tests/metrics/storage/fio-k8s/exec - -go 1.19 - -require github.com/pkg/errors v0.9.1 diff --git a/tests/metrics/storage/fio-k8s/pkg/exec/go.sum b/tests/metrics/storage/fio-k8s/pkg/exec/go.sum deleted file mode 100644 index 7c401c3f5..000000000 --- a/tests/metrics/storage/fio-k8s/pkg/exec/go.sum +++ /dev/null @@ -1,2 +0,0 @@ -github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= -github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= diff --git a/tests/metrics/storage/fio-k8s/pkg/k8s/Makefile b/tests/metrics/storage/fio-k8s/pkg/k8s/Makefile deleted file mode 100644 index f5bee6d16..000000000 --- a/tests/metrics/storage/fio-k8s/pkg/k8s/Makefile +++ /dev/null @@ -1,8 +0,0 @@ -# -# Copyright (c) 2021-2023 Intel Corporation -# -# SPDX-License-Identifier: Apache-2.0 -# -gomod: - GO111MODULE=on go mod edit -replace=github.com/kata-containers/kata-containers/tests/metrics/exec=../exec - GO111MODULE=on go mod tidy diff --git a/tests/metrics/storage/fio-k8s/pkg/k8s/exec.go b/tests/metrics/storage/fio-k8s/pkg/k8s/exec.go deleted file mode 100644 index 9be25f618..000000000 --- a/tests/metrics/storage/fio-k8s/pkg/k8s/exec.go +++ /dev/null @@ -1,34 +0,0 @@ -// Copyright (c) 2021-2023 Intel Corporation -// -// SPDX-License-Identifier: Apache-2.0 -package k8s - -import ( - "fmt" - - exec "github.com/kata-containers/kata-containers/tests/metrics/exec" -) - -type execOpt struct { - showInStdOut bool -} - -type ExecOption func(e *execOpt) - -func ExecOptShowStdOut() ExecOption { - return func(e *execOpt) { - e.showInStdOut = true - } - -} - -func (p *Pod) Exec(cmd string, opts ...ExecOption) (output string, err error) { - log.Debugf("Exec %q in %s", cmd, p.YamlPath) - o := &execOpt{showInStdOut: false} - for _, opt := range opts { - opt(o) - - } - execCmd := fmt.Sprintf("kubectl exec -f %s -- /bin/bash -c %q", p.YamlPath, cmd) - return exec.ExecCmd(execCmd, Debug || o.showInStdOut) -} diff --git a/tests/metrics/storage/fio-k8s/pkg/k8s/go.mod b/tests/metrics/storage/fio-k8s/pkg/k8s/go.mod deleted file mode 100644 index 6a349002a..000000000 --- a/tests/metrics/storage/fio-k8s/pkg/k8s/go.mod +++ /dev/null @@ -1,10 +0,0 @@ -module github.com/kata-containers/kata-containers/tests/metrics/k8s - -go 1.19 - -replace github.com/kata-containers/kata-containers/tests/metrics/exec => ../exec - -require ( - github.com/kata-containers/kata-containers/tests/metrics/exec v0.0.0-00010101000000-000000000000 // indirect - github.com/pkg/errors v0.9.1 // indirect -) diff --git a/tests/metrics/storage/fio-k8s/pkg/k8s/go.sum b/tests/metrics/storage/fio-k8s/pkg/k8s/go.sum deleted file mode 100644 index 7c401c3f5..000000000 --- a/tests/metrics/storage/fio-k8s/pkg/k8s/go.sum +++ /dev/null @@ -1,2 +0,0 @@ -github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= -github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= diff --git a/tests/metrics/storage/fio-k8s/pkg/k8s/k8s.go b/tests/metrics/storage/fio-k8s/pkg/k8s/k8s.go deleted file mode 100644 index 2fef788cc..000000000 --- a/tests/metrics/storage/fio-k8s/pkg/k8s/k8s.go +++ /dev/null @@ -1,68 +0,0 @@ -// Copyright (c) 2021-2023 Intel Corporation -// -// SPDX-License-Identifier: Apache-2.0 -package k8s - -import ( - "fmt" - - exec "github.com/kata-containers/kata-containers/tests/metrics/exec" - "github.com/pkg/errors" -) - -// logger interface for pkg -var log logger -var Debug bool = false - -type logger interface { - Infof(string, ...interface{}) - Debugf(string, ...interface{}) - Errorf(string, ...interface{}) -} - -func SetLogger(l logger) { - log = l -} - -type Pod struct { - YamlPath string -} - -func (p *Pod) waitForReady() (err error) { - log.Debugf("Wait for pod %s", p.YamlPath) - _, err = exec.ExecCmd("kubectl wait --for=condition=ready -f "+p.YamlPath, Debug) - return err -} - -func (p *Pod) Run() (err error) { - - log.Debugf("Creating K8s Pod %s", p.YamlPath) - _, err = exec.ExecCmd("kubectl apply -f "+p.YamlPath, Debug) - if err != nil { - return errors.Wrapf(err, "Failed to run pod %s", p.YamlPath) - } - - err = p.waitForReady() - if err != nil { - return errors.Wrapf(err, "Failed to wait for pod %s", p.YamlPath) - } - return err -} - -func (p *Pod) Delete() (err error) { - log.Debugf("Delete pod %s", p.YamlPath) - _, err = exec.ExecCmd("kubectl delete --ignore-not-found -f "+p.YamlPath, Debug) - return errors.Wrapf(err, "Failed to delete pod %s", p.YamlPath) -} - -func (p *Pod) CopyFromHost(src, dst string) (err error) { - podName, err := exec.ExecCmd("kubectl get -f "+p.YamlPath+" -o jsonpath={.metadata.name}", Debug) - if err != nil { - return nil - } - - log.Debugf("Copy from host %q->%q in pod %s", src, dst, p.YamlPath) - execCmd := fmt.Sprintf("kubectl cp %s %s:%s", src, podName, dst) - _, err = exec.ExecCmd(execCmd, Debug) - return err -} diff --git a/tests/metrics/storage/fio-k8s/scripts/dax-compare-test/Makefile b/tests/metrics/storage/fio-k8s/scripts/dax-compare-test/Makefile deleted file mode 100644 index a33a5015c..000000000 --- a/tests/metrics/storage/fio-k8s/scripts/dax-compare-test/Makefile +++ /dev/null @@ -1,10 +0,0 @@ -# -# Copyright (c) 2021-2023 Intel Corporation -# -# SPDX-License-Identifier: Apache-2.0 -# -MKFILE_PATH := $(abspath $(lastword $(MAKEFILE_LIST))) -MKFILE_DIR := $(dir $(MKFILE_PATH)) -run: - $(MKFILE_DIR)/compare-virtiofsd-dax.sh - "$(MKFILE_DIR)/report/gen-html-fio-report.sh" "./results" diff --git a/tests/metrics/storage/fio-k8s/scripts/dax-compare-test/README.md b/tests/metrics/storage/fio-k8s/scripts/dax-compare-test/README.md deleted file mode 100644 index c8b00f0f9..000000000 --- a/tests/metrics/storage/fio-k8s/scripts/dax-compare-test/README.md +++ /dev/null @@ -1,47 +0,0 @@ -# FIO in Kubernetes - -This test runs `fio` jobs to measure how Kata Containers work using virtio-fs DAX. The test works using Kubernetes. -The test has to run in a single node cluster, it is needed as the test modifies Kata configuration file. - -The `virtio-fs` options that this test will use are: - -* `cache mode` Only `auto`, this is the most compatible mode for most of the Kata use cases. Today this is default in Kata. -* `thread pool size` Restrict the number of worker threads per request queue, zero means no thread pool. -* `DAX` -``` -File contents can be mapped into a memory window on the host, allowing the guest to directly access data from the host page cache. This has several advantages: The guest page cache is bypassed, reducing the memory footprint. No communication is necessary -to access file contents, improving I/O performance. Shared file access is coherent between virtual machines on the same host even with mmap. -``` - -This test by default iterates over different `virtio-fs` configurations. - -| test name | DAX | thread pool size | cache mode | -|---------------------------|-----|------------------|------------| -| pool_0_cache_auto_no_DAX | no | 0 | auto | -| pool_0_cache_auto_DAX | yes | 0 | auto | - -The `fio` options used are: - -`ioengine`: How the IO requests are issued to the kernel. -* `libaio`: Supports async IO for both direct and buffered IO. -* `mmap`: File is memory mapped with mmap(2) and data copied to/from using memcpy(3). - -`rw type`: Type of I/O pattern. -* `randread`: Random reads. -* `randrw`: Random mixed reads and writes. -* `randwrite`: Random writes. -* `read`: Sequential reads. -* `write`: Sequential writes. - -Additional notes: Some jobs contain a `multi` prefix. This means that the same job runs more than once at the same time using its own file. - -### Static `fio` values: - -Some `fio` values are not modified over all the jobs. - -* `runtime`: Tell `fio` to terminate processing after the specified period of time(seconds). -* `loops`: Run the specified number of iterations of this job. Used to repeat the same workload a given number of times. -* `iodepth`: Number of I/O units to keep in flight against the file. Note that increasing `iodepth` beyond 1 will not affect synchronous `ioengine`. -* `size`: The total size of file I/O for each thread of this job. -* `direct`: If value is true, use non-buffered I/O. This is usually O_`DIRECT`. -* `blocksize`: The block size in bytes used for I/O units. diff --git a/tests/metrics/storage/fio-k8s/scripts/dax-compare-test/compare-virtiofsd-dax.sh b/tests/metrics/storage/fio-k8s/scripts/dax-compare-test/compare-virtiofsd-dax.sh deleted file mode 100755 index 248e37776..000000000 --- a/tests/metrics/storage/fio-k8s/scripts/dax-compare-test/compare-virtiofsd-dax.sh +++ /dev/null @@ -1,151 +0,0 @@ -#!/bin/bash -#Copyright (c) 2021-2023 Intel Corporation -# -#SPDX-License-Identifier: Apache-2.0 -# - -set -o errexit -set -o nounset -set -o pipefail -set -o errtrace - -script_dir=$(dirname "$(readlink -f "$0")") - -runtime_path="/opt/kata/bin/kata-runtime" -kata_config_path="/opt/kata/share/defaults/kata-containers/configuration.toml" - -results_dir="$(realpath ./)/results" - -KATA_RUNTIME="${KATA_RUNTIME_CLASS:-kata}" -BAREMETAL_RUNTIME="runc" -RUNTIME_CLASS="" - -FIO_SIZE="${FIO_SIZE:-500M}" -FIO_BLOCKSIZE="${FIO_BLOCKSIZE:-4K}" -VIRTIOFS_DAX_SIZE=${VIRTIOFS_DAX_SIZE:-600M} - -# set the base case for virtiofsd -set_base_virtiofs_config() { - # Running kata-qemu-virtiofs - # Defaults for virtiofs - sudo crudini --set --existing "$kata_config_path" hypervisor.qemu virtio_fs_cache '"auto"' - sudo crudini --set --existing "$kata_config_path" hypervisor.qemu virtio_fs_cache_size ${VIRTIOFS_DAX_SIZE} -} - -## helper function: get name of current bash function -fn_name() { - echo "${FUNCNAME[1]}" -} - -# directory where results are stored -get_results_dir() { - local test_name - local test_result_dir - test_name="${1}" - test_result_dir="${results_dir}/${test_name}" - mkdir -p "${test_result_dir}" - echo "${test_result_dir}" -} - -# Collect kata env -# save kata config toml -# save output from kata-env -kata_env() { - local suffix=${1} - local config_path - local kata_env_bk - local kata_config_bk - kata_env_bk="$(get_results_dir "${suffix}")/kata-env.toml" - kata_config_bk="$(get_results_dir "${suffix}")/kata-config.toml" - - ${runtime_path} kata-env >"${kata_env_bk}" - config_path="$(${runtime_path} kata-env --json | jq .Runtime.Config.Path -r)" - cp "${config_path}" "${kata_config_bk}" -} - -# Collect the command used by virtiofsd -collect_qemu_virtiofs_cmd() { - local rdir - local test_name - test_name="${1}" - - rdir=$(get_results_dir "${test_name}") - # TODO -} - -# Run metrics runner -run_workload() { - local test_name - local test_result_file - local test_result_dir - - test_name="${1}" - - test_result_dir="$(get_results_dir "${test_name}")" - test_result_file="${test_result_dir}/test-out.txt" - - echo "Running for kata config: ${test_name}" - collect_qemu_virtiofs_cmd "$test_name" - - fio_runner_dir="${script_dir}/../../cmd/fiotest/" - fio_jobs="${script_dir}/../../configs/test-config/" - make -C "${fio_runner_dir}" build - pwd - set -x - "${fio_runner_dir}fio-k8s" \ - --debug \ - --fio.size "${FIO_SIZE}" \ - --fio.block-size "${FIO_BLOCKSIZE}" \ - --container-runtime "${RUNTIME_CLASS}" \ - --test-name "${test_name}" \ - --output-dir "$(dirname ${test_result_dir})" \ - "${fio_jobs}" | - tee \ - "${test_result_file}" - set +x -} - -pool_0_cache_auto_dax() { - local suffix="$(fn_name)" - - set_base_virtiofs_config - sudo crudini --set --existing "$kata_config_path" hypervisor.qemu virtio_fs_extra_args '["--thread-pool-size=0","-o","no_posix_lock","-o","xattr"]' - sudo crudini --set --existing "$kata_config_path" hypervisor.qemu virtio_fs_cache '"auto"' - sudo crudini --set --existing "$kata_config_path" hypervisor.qemu virtio_fs_cache_size 1024 - kata_env "${suffix}" - RUNTIME_CLASS="${KATA_RUNTIME}" - run_workload "${suffix}" -} - -pool_0_cache_auto_no_dax() { - local suffix="$(fn_name)" - - set_base_virtiofs_config - sudo crudini --set --existing "$kata_config_path" hypervisor.qemu virtio_fs_extra_args '["--thread-pool-size=0","-o","no_posix_lock","-o","xattr"]' - sudo crudini --set --existing "$kata_config_path" hypervisor.qemu virtio_fs_cache '"auto"' - sudo crudini --set --existing "$kata_config_path" hypervisor.qemu virtio_fs_cache_size 0 - - kata_env "${suffix}" - - RUNTIME_CLASS="${KATA_RUNTIME}" - run_workload "${suffix}" - echo "done" -} - -k8s_baremetal() { - local suffix="$(fn_name)" - - RUNTIME_CLASS="${BAREMETAL_RUNTIME}" - run_workload "${suffix}" -} - -main() { - - mkdir -p "${results_dir}" - - k8s_baremetal - pool_0_cache_auto_dax - pool_0_cache_auto_no_dax -} - -main $* diff --git a/tests/metrics/storage/fio-k8s/scripts/dax-compare-test/report/fio.ipynb b/tests/metrics/storage/fio-k8s/scripts/dax-compare-test/report/fio.ipynb deleted file mode 100644 index d2e8da202..000000000 --- a/tests/metrics/storage/fio-k8s/scripts/dax-compare-test/report/fio.ipynb +++ /dev/null @@ -1,51 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "tWacOPbMYPtc" - }, - "source": [ - "# FIO comparision" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "jXtTs6yldl_y" - }, - "outputs": [], - "source": [ - "import fio\n", - "fio.generate_report()" - ] - } - ], - "metadata": { - "colab": { - "collapsed_sections": [], - "name": "fio.ipynb", - "provenance": [] - }, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.6" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/tests/metrics/storage/fio-k8s/scripts/dax-compare-test/report/fio.py b/tests/metrics/storage/fio-k8s/scripts/dax-compare-test/report/fio.py deleted file mode 100644 index 313f63313..000000000 --- a/tests/metrics/storage/fio-k8s/scripts/dax-compare-test/report/fio.py +++ /dev/null @@ -1,102 +0,0 @@ -# Copyright (c) 2021-2023 Intel Corporation -# -# SPDX-License-Identifier: Apache-2.0 -import pandas as pd -import os -import re -import io -import glob -from IPython.display import display, Markdown -import matplotlib.pyplot as plt - -#Compare the tests results group by fio job. -#Input: -# df: dataset from `import_data()` -# metric: string of metrics provided in `df` -def compare_tests_group_by_fio_job(df, metric): - test_names, metric_df = group_metrics_group_by_testname(df, metric) - show_df(metric_df) - plot_df(metric_df,test_names) - -# Given a metric return results per test group by fio job. -# input: -# df: dataset from `import_data()` -# metric: string with the name of the metric to filter. -# output: -# dataset with fomat: -# 'workload' , 'name[0]' , ... , 'name[n]' -# -def group_metrics_group_by_testname(df, metric): - #name of each tests from results - names = set() - # Rows of new data set - rows = [] - # map: - # keys: name of fio job - # value: dict[k]:v where k: name of a test, v: value of test for metric` - workload = {} - - for k, row in df.iterrows(): - # name of a fio job - w = row['WORKLOAD'] - # name of tests - tname = row['NAME'] - names.add(tname) - # given a fio job name get dict of values - # if not previous values init empty dict - dict_values = workload.get(w, {}) - # For a given metric, add it into as value of dict_values[testname]=val - #e.g - # dict_values["test-name"] = row["IOPS"] - dict_values[tname] = row[metric] - workload[w] = dict_values - - names = list(names) - cols = ['WORKLOAD'] + list(names) - rdf = pd.DataFrame(workload,columns = cols) - - for k in workload: - d = workload[k] - - if not d[names[0]] == 0: - d["WORKLOAD"] = k; - rdf = rdf.append(d,ignore_index=True) - rdf = rdf.dropna() - return names, rdf - -def plot_df(df, names,sort_key=""): - if sort_key != "": - df.sort_values(sort_key, ascending=False) - df.plot(kind='bar',x="WORKLOAD",y=names, figsize=(30, 10)) - plt.show() - - -def import_data(): - frames = [] - for f in glob.glob('./results/*/results.csv'): - print("reading:" + f) - df = pd.read_csv(f) - frames.append(df) - return pd.concat(frames) - -def show_df(df): - pd.set_option('display.max_rows', df.shape[0]+1) - print(df) - -def print_md(s): - display(Markdown(s)) - -#notebook entrypoint -def generate_report(): - #Load the all test results in a single dataset - df_results = import_data() - print_md("Show all data from results") - show_df(df_results) - print_md("### Compare the tests results group by fio job. The metric used to compare is write bandwidth") - compare_tests_group_by_fio_job(df_results, 'bw_w') - print_md("### Compare the tests results group by fio job. The metric used to compare is read bandwidth") - compare_tests_group_by_fio_job(df_results, 'bw_r') - print_md("### Compare the tests results group by fio job. The metric used to compare is write IOPS(Input/Output Operations Per Second)") - compare_tests_group_by_fio_job(df_results, 'IOPS_w') - print_md("### Compare the tests results group by fio job. The metric used to compare is read IOPS(Input/Output Operations Per Second)") - compare_tests_group_by_fio_job(df_results, 'IOPS_r') diff --git a/tests/metrics/storage/fio-k8s/scripts/dax-compare-test/report/gen-html-fio-report.sh b/tests/metrics/storage/fio-k8s/scripts/dax-compare-test/report/gen-html-fio-report.sh deleted file mode 100644 index 8061c059d..000000000 --- a/tests/metrics/storage/fio-k8s/scripts/dax-compare-test/report/gen-html-fio-report.sh +++ /dev/null @@ -1,48 +0,0 @@ -#!/bin/bash -#Copyright (c) 2021-2023 Intel Corporation -# -#SPDX-License-Identifier: Apache-2.0 -# - -set -o errexit -set -o nounset -set -o pipefail -set -o errtrace - -script_dir=$(dirname "$(readlink -f "$0")") - -results_dir=${1:-} - -usage(){ - echo "$0 " -} - -if [ "${results_dir}" == "" ];then - echo "missing results directory" - usage - exit 1 -fi - -if [ ! -d "${results_dir}" ];then - echo "${results_dir} is not a directory" - usage - exit 1 -fi - -results_dir=$(realpath "${results_dir}") - -generate_report(){ - sudo chown "${USER}:${USER}" -R ${results_dir} - sudo docker run --rm -e JUPYTER_ENABLE_LAB=yes \ - -v "${script_dir}:/home/jovyan" \ - -v "${results_dir}:/home/jovyan/results" \ - --user $(id -u):$(id -g) \ - jupyter/scipy-notebook:399cbb986c6b \ - bash -e -c ' - cd results; - jupyter nbconvert --execute /home/jovyan/fio.ipynb --to html; - cp /home/jovyan/fio.html /home/jovyan/results; - ' -} - -generate_report diff --git a/tests/metrics/storage/fio-k8s/scripts/dax-compare-test/report/run-docker-jupyter-server.sh b/tests/metrics/storage/fio-k8s/scripts/dax-compare-test/report/run-docker-jupyter-server.sh deleted file mode 100644 index f386da4e2..000000000 --- a/tests/metrics/storage/fio-k8s/scripts/dax-compare-test/report/run-docker-jupyter-server.sh +++ /dev/null @@ -1,39 +0,0 @@ -#!/bin/bash -#Copyright (c) 2021-2023 Intel Corporation -# -#SPDX-License-Identifier: Apache-2.0 -# - -set -o errexit -set -o nounset -set -o pipefail -set -o errtrace - -script_dir=$(dirname "$(readlink -f "$0")") -NOTEBOOK_PORT="8888" - -results_dir=${1:-} - -usage(){ - echo "$0 " -} - -if [ "${results_dir}" == "" ];then - echo "missing results directory" - usage - exit 1 -fi - -if [ ! -d "${results_dir}" ];then - echo "${results_dir} is not a directory" - usage - exit 1 -fi - -results_dir=$(realpath "${results_dir}") - -sudo -E docker run --rm -p "${NOTEBOOK_PORT}:${NOTEBOOK_PORT}" -e JUPYTER_ENABLE_LAB=yes \ - -v "${script_dir}:/home/jovyan" \ - -v "${results_dir}:/home/jovyan/results" \ - jupyter/scipy-notebook:399cbb986c6b \ - start.sh jupyter lab --LabApp.token='' diff --git a/tests/metrics/storage/fio_test.sh b/tests/metrics/storage/fio_test.sh new file mode 100755 index 000000000..7ea7deb8e --- /dev/null +++ b/tests/metrics/storage/fio_test.sh @@ -0,0 +1,161 @@ +#!/bin/bash +# +# Copyright (c) 2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +# Description of the test: +# This test runs the 'fio benchmark' on kata containers +# https://fio.readthedocs.io/en/latest/ + +set -o pipefail + +SCRIPT_PATH=$(dirname "$(readlink -f "$0")") +source "${SCRIPT_PATH}/../lib/common.bash" + +CONTAINER_ID="fio_bench_${RANDOM}" +IMAGE="docker.io/library/fio-bench:latest" +DOCKERFILE="${SCRIPT_PATH}/fio-dockerfile/Dockerfile" +PAYLOAD_ARGS="${PAYLOAD_ARGS:-tail -f /dev/null}" +TEST_NAME="fio" + +# Fio default number of jobs +nj=4 + +function release_resources() { + sudo -E "${CTR_EXE}" t exec --exec-id "$(random_name)" ${CONTAINER_ID} sh -c "./fio_bench.sh delete-workload" + sleep 0.5 + clean_env_ctr + info "fio test end" +} + +trap release_resources EXIT + +function setup() { + info "setup fio test" + clean_env_ctr + check_cmds "${cmds[@]}" + check_ctr_images "$IMAGE" "$DOCKERFILE" + init_env + + # drop caches + sudo sh -c 'echo 3 > /proc/sys/vm/drop_caches' + + # launch container + sudo -E "${CTR_EXE}" run -d --runtime "${CTR_RUNTIME}" "${IMAGE}" "${CONTAINER_ID}" sh -c "${PAYLOAD_ARGS}" +} + +function parse_results() { + local data="${1}" + local bw=0 + local bw_stddev=0 + local iops=0 + local iops_stddev=0 + + [ -z "${data}" ] && die "Data results are missing when trying to parsing them." + + local io_type="$(echo "${data}" | jq -r '.jobs[0]."job options".rw')" + + if [ "${io_type}" = "read" ] || [ "${io_type}" = "randread" ]; then + # Bandwidth + bw="$(echo "${data}" | num_jobs="$nj" jq '[.jobs[] | .read.bw] | add/(env.num_jobs|tonumber) | .*1000|round/1000')" + bw_stddev="$(echo "${data}" | num_jobs="$nj" jq '[.jobs[] | .read.bw_dev] | add/(env.num_jobs|tonumber) | .*1000|round/1000')" + # IOPS + iops="$(echo "${data}" | num_jobs="$nj" jq '[.jobs[] | .read.iops] | add/(env.num_jobs|tonumber) | .*1000|round/1000')" + iops_stddev="$(echo "${data}" | num_jobs="$nj" jq '[.jobs[] | .read.iops_stddev] | add/(env.num_jobs|tonumber) | .*1000|round/1000')" + elif [ "${io_type}" = "write" ] || [ "${io_type}" = "randwrite" ]; then + # Bandwidth + bw="$(echo "${data}" | num_jobs="$nj" jq '[.jobs[] | .write.bw] | add/(env.num_jobs|tonumber) | .*1000|round/1000')" + bw_stddev="$(echo "${data}" | num_jobs="$nj" jq '[.jobs[] | .write.bw_dev] | add/(env.num_jobs|tonumber) | .*1000|round/1000')" + # IOPS + iops="$(echo "${data}" | num_jobs="$nj" jq '[.jobs[] | .write.iops] | add/(env.num_jobs|tonumber) | .*1000|round/1000')" + iops_stddev="$(echo "${data}" | num_jobs="$nj" jq '[.jobs[] | .write.iops_stddev] | add/(env.num_jobs|tonumber) | .*1000|round/1000')" + else + die "io type ${io_type} is not valid when parsing results" + fi + + convert_results_to_json "${io_type}" "${bw}" "${bw_stddev}" "${iops}" "${iops_stddev}" +} + +function extract_test_params() { + local data="${1}" + [ -z "${data}" ] && die "Missing fio parameters when trying to convert to json format." + + local json_params="$(echo "${data}" | jq -r '.jobs[0]."job options" | del(.name) | del(.rw) | del(.filename)')" + local json="$(cat << EOF + { + "Parameters" : ${json_params} + } +EOF +)" + metrics_json_add_array_element "${json}" +} + +function convert_results_to_json() { + local io_type="${1}" + local bw="${2}" + local bw_stddev="${3}" + local iops="${4}" + local iops_stddev="${5}" + + [ -z "${io_type}" ] || [ -z "${bw}" ] || [ -z "${bw_stddev}" ] || [ -z "${iops}" ] || [ -z "${iops_stddev}" ] && die "Results are missing when trying to convert to json format." + + local json="$(cat << EOF + { + "${io_type}" : { + "bw" : "${bw}", + "bw_stddev" : "${bw_stddev}", + "iops" : "${iops}", + "iops_stddev" : "${iops_stddev}", + "units" : "Kb" + } + } +EOF +)" + metrics_json_add_array_element "${json}" +} + +function store_results() { + local data_r="${1}" + local data_w="${2}" + local title="${3}" + + [ -z "${data_r}" ] || [ -z "${data_w}" ] || [ -z "${title}" ] && die "Missing data and/or title when trying storing results." + + metrics_json_start_array + extract_test_params "${data_r}" + parse_results "${data_r}" + parse_results "${data_w}" + metrics_json_end_array "${title}" +} + +function main() { + setup + + # Collect bs=4K, num_jobs=4, io-direct, io-depth=2 + info "Processing sequential type workload" + sudo -E "${CTR_EXE}" t exec --exec-id "${RANDOM}" ${CONTAINER_ID} sh -c "./fio_bench.sh run-read-4k ${nj}" >/dev/null 2>&1 + local results_read_4K="$(sudo -E "${CTR_EXE}" t exec -t --exec-id "${RANDOM}" ${CONTAINER_ID} sh -c "./fio_bench.sh print-latest-results")" + + sleep 0.5 + sudo -E "${CTR_EXE}" t exec --exec-id "${RANDOM}" ${CONTAINER_ID} sh -c "./fio_bench.sh run-write-4k ${nj}" >/dev/null 2>&1 + local results_write_4K="$(sudo -E "${CTR_EXE}" t exec -t --exec-id "${RANDOM}" ${CONTAINER_ID} sh -c "./fio_bench.sh print-latest-results")" + + # Collect bs=64K, num_jobs=4, io-direct, io-depth=2 + info "Processing random type workload" + sleep 0.5 + sudo -E "${CTR_EXE}" t exec --exec-id "${RANDOM}" ${CONTAINER_ID} sh -c "./fio_bench.sh run-randread-64k ${nj}" >/dev/null 2>&1 + local results_rand_read_64K="$(sudo -E "${CTR_EXE}" t exec -t --exec-id "${RANDOM}" ${CONTAINER_ID} sh -c "./fio_bench.sh print-latest-results")" + + sleep 0.5 + sudo -E "${CTR_EXE}" t exec --exec-id "${RANDOM}" ${CONTAINER_ID} sh -c "./fio_bench.sh run-randwrite-64k ${nj}" >/dev/null 2>&1 + local results_rand_write_64K="$(sudo -E "${CTR_EXE}" t exec -t --exec-id "${RANDOM}" ${CONTAINER_ID} sh -c "./fio_bench.sh print-latest-results")" + + # parse results + metrics_json_init + store_results "${results_read_4K}" "${results_write_4K}" "Results sequential" + store_results "${results_rand_read_64K}" "${results_rand_write_64K}" "Results random" + metrics_json_save +} + +main "$@" From 16e31dd40946c31f7d7f7764c6c64fe180b1ba9e Mon Sep 17 00:00:00 2001 From: David Esparza Date: Tue, 3 Oct 2023 23:33:19 -0600 Subject: [PATCH 312/339] metrics: Use jq tool to pretty-print json metrics output This PR enables the use of jq pretty-print feature to improve the formatting of metric results json files. Fixes: #8081 Signed-off-by: David Esparza (cherry picked from commit 8c498ef5eecc9155b7e19233499a9d5306f3dffa) --- tests/metrics/lib/json.bash | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/metrics/lib/json.bash b/tests/metrics/lib/json.bash index 017d6494a..1a3082095 100755 --- a/tests/metrics/lib/json.bash +++ b/tests/metrics/lib/json.bash @@ -147,7 +147,7 @@ done) EOF )" - echo "$json" > $json_filename + echo "${json}" | jq . > "${json_filename}" # If we have a JSON URL or host/socket pair set up, post the results there as well. # Optionally compress into a single line. From 619ef169fb9d711003954901b42bb5e0fef06171 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Wed, 4 Oct 2023 15:01:13 +0200 Subject: [PATCH 313/339] ci: Create a function to install docker MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This will be re-used in other tests as well. Signed-off-by: Fabiano Fidêncio (cherry picked from commit 2c3bf406dc3335d076698201be742f9c96ef4fbc) --- tests/common.bash | 18 ++++++++++++++++++ tests/integration/docker/gha-run.sh | 16 +--------------- 2 files changed, 19 insertions(+), 15 deletions(-) diff --git a/tests/common.bash b/tests/common.bash index a11144574..34c59e052 100644 --- a/tests/common.bash +++ b/tests/common.bash @@ -547,6 +547,24 @@ EOF sudo systemctl enable --now crio } +function install_docker() { + # Add Docker's official GPG key + sudo apt-get update + sudo apt-get -y install ca-certificates curl gnupg + sudo install -m 0755 -d /etc/apt/keyrings + curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /etc/apt/keyrings/docker.gpg + sudo chmod a+r /etc/apt/keyrings/docker.gpg + + # Add the repository to Apt sources: + echo \ + "deb [arch="$(dpkg --print-architecture)" signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu \ + "$(. /etc/os-release && echo "$VERSION_CODENAME")" stable" | \ + sudo tee /etc/apt/sources.list.d/docker.list > /dev/null + sudo apt-get update + + sudo apt-get install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin +} + # Convert architecture to the name used by golang function arch_to_golang() { local arch="$(uname -m)" diff --git a/tests/integration/docker/gha-run.sh b/tests/integration/docker/gha-run.sh index cab3401ea..0a0100668 100755 --- a/tests/integration/docker/gha-run.sh +++ b/tests/integration/docker/gha-run.sh @@ -16,21 +16,7 @@ source "${docker_dir}/../../common.bash" function install_dependencies() { info "Installing the dependencies needed for running the docker smoke test" - # Add Docker's official GPG key: - sudo apt-get update - sudo apt-get -y install ca-certificates curl gnupg - sudo install -m 0755 -d /etc/apt/keyrings - curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /etc/apt/keyrings/docker.gpg - sudo chmod a+r /etc/apt/keyrings/docker.gpg - - # Add the repository to Apt sources: - echo \ - "deb [arch="$(dpkg --print-architecture)" signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu \ - "$(. /etc/os-release && echo "$VERSION_CODENAME")" stable" | \ - sudo tee /etc/apt/sources.list.d/docker.list > /dev/null - sudo apt-get update - - sudo apt-get install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin + install_docker } function run() { From 5d232c8143b098e9f9398ace960a0773c22398cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Tue, 26 Sep 2023 14:21:20 +0200 Subject: [PATCH 314/339] ci: Add placeholder for tracing tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The tracing tests are currently running as part of the Jenkins CI with the following setups: * Container Engines: containerd * VMMs: QEMU | Cloud Hypervisor * Snapshotters: overlayfs | devmapper We'll be restricting those tests to be running on LTS version of containerd, without devmapper. As it's known due to our GHA limitation, this is just a placeholder and the tests will actually be added in the next interations. Signed-off-by: Fabiano Fidêncio (cherry picked from commit 3bb2923e5d142b1d42062a2be2fecbe0400efef7) --- .github/workflows/ci.yaml | 8 ++++ .github/workflows/run-tracing-tests.yaml | 52 ++++++++++++++++++++++++ 2 files changed, 60 insertions(+) create mode 100644 .github/workflows/run-tracing-tests.yaml diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index c7c98776d..90a297712 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -240,3 +240,11 @@ jobs: tarball-suffix: -${{ inputs.tag }} commit-hash: ${{ inputs.commit-hash }} target-branch: ${{ inputs.target-branch }} + + run-tracing-tests: + needs: build-kata-static-tarball-amd64 + uses: ./.github/workflows/run-tracing-tests.yaml + with: + tarball-suffix: -${{ inputs.tag }} + commit-hash: ${{ inputs.commit-hash }} + target-branch: ${{ inputs.target-branch }} diff --git a/.github/workflows/run-tracing-tests.yaml b/.github/workflows/run-tracing-tests.yaml new file mode 100644 index 000000000..ede15aca1 --- /dev/null +++ b/.github/workflows/run-tracing-tests.yaml @@ -0,0 +1,52 @@ +name: CI | Run tracing tests +on: + workflow_call: + inputs: + tarball-suffix: + required: false + type: string + commit-hash: + required: false + type: string + target-branch: + required: false + type: string + default: "" + +jobs: + run-tracing: + strategy: + fail-fast: false + matrix: + vmm: + - clh # cloud-hypervisor + - qemu + runs-on: garm-ubuntu-2204-smaller + env: + KATA_HYPERVISOR: ${{ matrix.vmm }} + steps: + - uses: actions/checkout@v4 + with: + ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} + + - name: Install dependencies + run: bash tests/functional/tracing/gha-run.sh install-dependencies + + - name: get-kata-tarball + uses: actions/download-artifact@v3 + with: + name: kata-static-tarball-amd64${{ inputs.tarball-suffix }} + path: kata-artifacts + + - name: Install kata + run: bash tests/functional/tracing/gha-run.sh install-kata kata-artifacts + + - name: Run tracing tests + run: bash tests/functional/tracing/gha-run.sh run From 6541969a8320adc53940e3354fc1ae081e6eed65 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Wed, 27 Sep 2023 21:10:27 +0200 Subject: [PATCH 315/339] ci: Move tracing tests here MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I'm basically moving the tracing tests from the tests repo to this one, and I'm adding the "Signed-off-by:" of every single contributor to the tests. Fixes: #8114 Signed-off-by: Fabiano Fidêncio Signed-off-by: Alexandru Matei Signed-off-by: Chelsea Mafrica Signed-off-by: Gabriela Cervantes Signed-off-by: Salvador Fuentes Signed-off-by: Wainer dos Santos Moschetta Signed-off-by: yaoyinnan (cherry picked from commit 9205acc3d26d4f30153674a3045257f10e334bf3) --- .../tracing/configure_tracing_for_kata.sh | 40 + tests/functional/tracing/gha-run.sh | 56 + .../functional/tracing/test-agent-shutdown.sh | 1502 +++++++++++++++++ tests/functional/tracing/tracing-test.sh | 540 ++++++ 4 files changed, 2138 insertions(+) create mode 100755 tests/functional/tracing/configure_tracing_for_kata.sh create mode 100755 tests/functional/tracing/gha-run.sh create mode 100755 tests/functional/tracing/test-agent-shutdown.sh create mode 100755 tests/functional/tracing/tracing-test.sh diff --git a/tests/functional/tracing/configure_tracing_for_kata.sh b/tests/functional/tracing/configure_tracing_for_kata.sh new file mode 100755 index 000000000..ed9757113 --- /dev/null +++ b/tests/functional/tracing/configure_tracing_for_kata.sh @@ -0,0 +1,40 @@ +#!/bin/bash +# +# Copyright (c) 2019-2022 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# + +set -o errexit +set -o nounset +set -o pipefail + +SCRIPT_PATH=$(dirname "$(readlink -f "$0")") +source "${SCRIPT_PATH}/../../common.bash" + +[ "$#" -eq 1 ] || die "Specify enable or disable" + +kata_cfg_file=$(kata-runtime kata-env --json |jq '.Runtime | .Config | .Path' |cut -d\" -f2) + +enable_tracing() { + info "Enabling kata tracing on $kata_cfg_file" + sudo crudini --set "$kata_cfg_file" agent.kata enable_tracing true + sudo crudini --set "$kata_cfg_file" runtime enable_tracing true +} + +disable_tracing() { + info "Disabling kata tracing on $kata_cfg_file" + sudo crudini --set "$kata_cfg_file" agent.kata enable_tracing false + sudo crudini --set "$kata_cfg_file" runtime enable_tracing false +} + +main() { + cmd="$1" + case "$cmd" in + enable ) enable_tracing ;; + disable ) disable_tracing ;; + *) die "invalid command: '$cmd'" ;; + esac +} + +main "$@" diff --git a/tests/functional/tracing/gha-run.sh b/tests/functional/tracing/gha-run.sh new file mode 100755 index 000000000..336992662 --- /dev/null +++ b/tests/functional/tracing/gha-run.sh @@ -0,0 +1,56 @@ +#!/bin/bash +# +# Copyright (c) 2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# + +set -o errexit +set -o nounset +set -o pipefail + +kata_tarball_dir="${2:-kata-artifacts}" +tracing_dir="$(dirname "$(readlink -f "$0")")" +source "${tracing_dir}/../../common.bash" + +function install_dependencies() { + info "Installing the dependencies needed for running the tracing tests" + + # Dependency list of projects that we can rely on the system packages + # - crudini + # - jq + # - socat + # - tmux + declare -a system_deps=( + crudini + jq + socat + tmux + ) + + sudo apt-get update + sudo apt-get -y install "${system_deps[@]}" + + # Install docker according to the docker's website documentation + install_docker +} + +function run() { + info "Running tracing tests using ${KATA_HYPERVISOR} hypervisor" + + enabling_hypervisor + bash -c ${tracing_dir}/test-agent-shutdown.sh + bash -c ${tracing_dir}/tracing-test.sh +} + +function main() { + action="${1:-}" + case "${action}" in + install-dependencies) install_dependencies ;; + install-kata) install_kata ;; + run) run ;; + *) >&2 die "Invalid argument" ;; + esac +} + +main "$@" diff --git a/tests/functional/tracing/test-agent-shutdown.sh b/tests/functional/tracing/test-agent-shutdown.sh new file mode 100755 index 000000000..6bad962d8 --- /dev/null +++ b/tests/functional/tracing/test-agent-shutdown.sh @@ -0,0 +1,1502 @@ +#!/bin/bash +# Copyright (c) 2021 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# +#--------------------------------------------------------------------- +# Description: Test the Kata Containers 2.x rust agent shutdown behaviour. +# +# Normally, the kata-agent process running inside the VM is not shut down; +# once the workload ends and the agent has returned the workload return +# value back to the runtime, the runtime simply kills the VM. This is safe +# since nothing the user cares about is running any more. +# +# However, for agent tracing, a graceful agent shutdown is necessary to ensure +# all trace spans are generated. When *static* agent tracing is enabled, the +# runtime relies entirely on the agent to perform a graceful shutdown _and_ +# shut down the VM. +# +# This script tests the kata-agent in two ways: +# +# - "manually" / "standalone" where the agent binary is run directly. +# - Inside a Kata VM, started by a shimv2-capable container manager +# (containerd). +# +# In both cases, the agent is shut down using the agent-ctl tool +# to request the agent shut down gracefully. +# +# Various configuration options are also tested. One of these enables +# the agents built-in (VSOCK) debug console. This test not only enables +# the option but also connects to the created console. +# +# Since this script needs to start various programs with a terminal, +# it uses tmux(1) consistently to simplify the handling logic. +#--------------------------------------------------------------------- + +readonly script_name=${0##*/} + +set -o errexit +set -o nounset +set -o pipefail +set -o errtrace + +SCRIPT_PATH=$(dirname "$(readlink -f "$0")") +source "${SCRIPT_PATH}/../../common.bash" +source "/etc/os-release" || source "/usr/lib/os-release" + +CTR_RUNTIME=${CTR_RUNTIME:-"io.containerd.kata.v2"} + +# Kata always uses this value +EXPECTED_VSOCK_PORT="1024" + +DOCKER_IMAGE=${DOCKER_IMAGE:-"busybox"} +CTR_IMAGE=${CTR_IMAGE:-"quay.io/prometheus/busybox:latest"} + +# Number of times the test should be run +KATA_AGENT_SHUTDOWN_TEST_COUNT=${KATA_AGENT_SHUTDOWN_TEST_COUNT:-1} + +# Default VSOCK port used by the agent +KATA_AGENT_VSOCK_CONSOLE_PORT=${KATA_AGENT_VSOCK_CONSOLE_PORT:-1026} + +# The shutdown test type that represents a "default" / vanilla Kata +# installation (where no debug options are enabled). +VANILLA_TEST_TYPE='default' + +# Name of tmux(1) sessions to create to run Kata VM and local agent in +KATA_TMUX_VM_SESSION="kata-shutdown-test-vm-session" +KATA_TMUX_LOCAL_SESSION="kata-shutdown-test-local-agent-session" + +# Name of tmux(1) session to create to run a debug console in +KATA_TMUX_CONSOLE_SESSION="kata-shutdown-test-console-session" + +# tmux(1) session to run the trace forwarder in +KATA_TMUX_FORWARDER_SESSION="kata-shutdown-test-trace-forwarder-session" + +KATA_HYPERVISOR="${KATA_HYPERVISOR:-qemu}" + +# List of test types used by configure_kata(). +# +# Each element contains four colon delimited fields: +# +# 1: Name. +# 2: Whether debug should be enabled for the agent+runtime. +# 3: Whether hypervisor debug should be enabled. +# (handled separately due to a previous bug which blocked agent shutdown). +# 4: Whether a VSOCK debug console should be configured and used. +# +# Notes: +# +# - Tests are run in the order found in this array. +# - An array is used (rather than a hash) to ensure the standard/vanilla +# configuration is run *last*. The reason for this being that debug is +# needed to diagnose shutdown errors, so there is no point in runnning +# the default scenario first, in case it fails (and it thus "undebuggable"). +shutdown_test_types=( + 'with-debug:true:false:false' + 'with-debug-console:false:false:true' + 'with-hypervisor-debug:true:true:false' + 'with-everything:true:true:true' + "${VANILLA_TEST_TYPE}:false:false:false" +) + +# Number of fields each entry in the 'shutdown_test_types' array should have. +shutdown_test_type_fields=4 + +# Pseudo test type name that represents all test types defined +# in the 'shutdown_test_types' array. +ALL_TEST_TYPES='all' + +DEFAULT_SHUTDOWN_TEST_TYPE="${ALL_TEST_TYPES}" + +# List of ways of running the agent: +# +# Each element contains two colon delimited fields: +# +# 1: Name used for a particular way of running the agent. +# 2: Description. +agent_test_types=( + 'local:Run agent using agent-ctl tool' + 'vm:Run agent inside a Kata Container' +) + +# Default value from the 'agent_test_types' array. +DEFAULT_AGENT_TEST_TYPE='vm' + +# Set by every call to run_single_agent() +test_start_time= +test_end_time= + +#------------------------------------------------------------------------------- +# Settings + +# values used to wait for local and VM processes to start and end. +wait_time_secs=${WAIT_TIME_SECS:-20} +sleep_time_secs=${SLEEP_TIME_SECS:-1} + +# Time to allow for the agent and VM to shutdown +shutdown_time_secs=${SHUTDOWN_TIME_SECS:-120} + +# Name for the container that will be created +container_id="${CONTAINER_ID:-kata-agent-shutdown-test}" + +# If 'true', don't run any commands, just show what would be run. +dry_run="${DRY_RUN:-false}" + +# If 'true', don't remove logs on a successful run. +keep_logs="${KEEP_LOGS:-false}" + +# Name of socket file used by a local agent. +agent_socket_file="kata-agent.socket" + +# Kata Agent socket URI. +# +# Notes: +# +# - The file is an abstract socket +# (meaning it is not visible in the filesystem). +# +# - The agent and the agent-ctl tool use slightly different +# address formats for abstract sockets. +local_agent_server_addr="unix://${agent_socket_file}" +local_agent_ctl_server_addr="unix://@${agent_socket_file}" + +# Address that is dynamically configured when using CLH before +# starting trace forwarder or container +clh_socket_path= +clh_socket_prefix="/run/vc/vm/" + +ctl_log_file="${PWD}/agent-ctl.log" + +# Log file that must contain agent output. +agent_log_file="${PWD}/kata-agent.log" + +# Set in setup() based on KATA_HYPERVISOR +# Supported hypervisors are qemu and clh +configured_hypervisor= +# String that would appear in config file (qemu or clh) +configured_hypervisor_cfg= + +# Full path to directory containing an OCI bundle based on "$DOCKER_IMAGE", +# which is required by the agent control tool. +bundle_dir=${BUNDLE_DIR:-""} + +#--------------------------------------- +# Default values + +default_arch=$(uname -m) +arch="${arch:-${default_arch}}" + +#------------------------------------------------------------------------------- + +agent_binary="/usr/bin/kata-agent" + +# Maximum debug level +default_agent_log_level="trace" + +agent_log_level=${agent_log_level:-${default_agent_log_level}} + +# Full path to the main configuration file (set by setup()). +kata_cfg_file= + +# Set in setup() based on KATA_HYPERVISOR +hypervisor_binary= + + +#------------------------------------------------------------------------------- + +[ -n "${DEBUG:-}" ] && set -o xtrace + +usage() +{ + cat < : Agent test type to use + (default: '$DEFAULT_AGENT_TEST_TYPE'). + -c : Run specified number of iterations + (default: $KATA_AGENT_SHUTDOWN_TEST_COUNT). + -d : Enable debug (shell trace) output. + -h : Show this help statement. + -k : Keep logs on successful run + (default: logs will be deleted on success). + -l : List all available agent and shutdown test types. + -n : Dry-run mode - show the commands that would be run. + -t : Only run the specified shutdown test type + (default: '$DEFAULT_SHUTDOWN_TEST_TYPE'). + +Notes: + +- These tests should be run *before* the Kata Agent tracing tests, since if + the agent cannot be shut down, static tracing will not work reliably. + +- By default all shutdown test types are run, but only the default agent test + type is run. + +EOF +} + +warn() +{ + echo >&2 "WARNING: $*" +} + +# Run the specified command, or if dry-run mode is enabled, +# just show the command that would be run. +run_cmd() +{ + local cmdline="$@" + + if [ "$dry_run" = 'true' ] + then + info "dry-run: Would run: '$cmdline'" + else + eval $cmdline + fi +} + +# Show a subset of processes (for debugging) +show_procs() +{ + info "Processes" + + local hypervisor + hypervisor="qemu" + [ ${configured_hypervisor} = "clh" ] && hypervisor="cloud-hypervisor" + + local patterns=() + + patterns+=("kata-agent-ctl") + patterns+=("${hypervisor}") + patterns+=("containerd") + patterns+=("ctr") + + local pattern_list + pattern_list=$(echo "${patterns[@]}"|tr ' ' '|') + + local regex + regex="(${pattern_list})" + + ps -efww | egrep -i "$regex" || true +} + +kill_tmux_sessions() +{ + local session + + for session in \ + "$KATA_TMUX_CONSOLE_SESSION" \ + "$KATA_TMUX_FORWARDER_SESSION" \ + "$KATA_TMUX_LOCAL_SESSION" \ + "$KATA_TMUX_VM_SESSION" + do + tmux kill-session -t "$session" &>/dev/null || true + done + + true +} + +get_shutdown_test_type_entry() +{ + local shutdown_test_type="${1:-}" + [ -z "$shutdown_test_type" ] && die "need shutdown test type name" + + local entry + + for entry in "${shutdown_test_types[@]}" + do + local count + count=$(echo "$entry"|tr ':' '\n'|wc -l) + [ "$count" -eq "$shutdown_test_type_fields" ] \ + || die "expected $shutdown_test_type_fields fields, found $count: '$entry'" + + local name + + name=$(echo "$entry"|cut -d: -f1) + + [ "$name" = "$shutdown_test_type" ] \ + && echo "$entry" \ + && break + done + + echo +} + +list_shutdown_test_types() +{ + local entry + local debug_value + local hypervisor_debug_value + local debug_console_value + + printf "# Shutdown test types:\n\n" + + printf "%-24s %-15s %-23s %s\n\n" \ + "Test type" \ + "Debug enabled" \ + "Hypervisor debug" \ + "Debug console used" + + for entry in "${shutdown_test_types[@]}" + do + local name + local debug_value + local hypervisor_debug_value + local debug_console_value + + name=$(echo "$entry"|cut -d: -f1) + debug_value=$(echo "$entry"|cut -d: -f2) + hypervisor_debug_value=$(echo "$entry"|cut -d: -f3) + debug_console_value=$(echo "$entry"|cut -d: -f4) + + printf "%-24s %-15s %-23s %s\n" \ + "$name" \ + "$debug_value" \ + "$hypervisor_debug_value" \ + "$debug_console_value" + done + + echo +} + +list_agent_test_types() +{ + local entry + + printf "# Agent test types:\n\n" + + printf "%-12s %s\n\n" \ + "Agent type" \ + "Description" + + for entry in "${agent_test_types[@]}" + do + local name + local descr + + name=$(echo "$entry"|cut -d: -f1) + descr=$(echo "$entry"|cut -d: -f2-) + + local msg="" + + [ "$name" = "$DEFAULT_AGENT_TEST_TYPE" ] && msg=" (default)" + + printf "%-12s %s%s.\n" \ + "$name" \ + "$descr" \ + "$msg" + done + + echo +} + +list_test_types() +{ + list_agent_test_types + list_shutdown_test_types +} + +# Set Kata options according to test type. +configure_kata() +{ + local shutdown_test_type="${1:-}" + [ -z "$shutdown_test_type" ] && die "need shutdown test type" + + local entry + local debug_value + local hypervisor_debug_value + local debug_console_value + + local entry + entry=$(get_shutdown_test_type_entry "$shutdown_test_type" || true) + [ -z "$entry" ] && die "invalid test type: '$shutdown_test_type'" + + debug_value=$(echo "$entry"|cut -d: -f2) + hypervisor_debug_value=$(echo "$entry"|cut -d: -f3) + debug_console_value=$(echo "$entry"|cut -d: -f4) + + [ -z "$debug_value" ] && \ + die "need debug value for $shutdown_test_type" + + [ -z "$hypervisor_debug_value" ] && \ + die "need hypervisor debug value for $shutdown_test_type" + + [ -z "$debug_console_value" ] && \ + die "need debug console value for $shutdown_test_type" + + toggle_debug "$debug_value" "$hypervisor_debug_value" + toggle_vsock_debug_console "$debug_console_value" + + # Enable agent tracing + # + # Even though this program only tests agent shutdown, static tracing + # must be configured. This is because normally (with tracing + # disabled), the runtime kills the VM after the workload has exited. + # However, if static tracing is enabled, the runtime will not kill the + # VM - the responsibility for shutting down the VM is given to the + # agent process running inside the VM. + + if [ "$shutdown_test_type" = "$VANILLA_TEST_TYPE" ] + then + # We don't need to worry about the 'trace_mode' here since agent tracing + # is *only* enabled if the 'enable_tracing' variable is set. + run_cmd sudo crudini --set "${kata_cfg_file}" 'agent.kata' 'enable_tracing' 'false' + else + run_cmd sudo crudini --set "${kata_cfg_file}" 'agent.kata' 'enable_tracing' 'true' + fi +} + +unconfigure_kata() +{ + info "Resetting configuration to defaults" + + configure_kata "$VANILLA_TEST_TYPE" +} + +# Enable/disable the agent's built-in VSOCK debug console +toggle_vsock_debug_console() +{ + run_cmd sudo crudini --set "${kata_cfg_file}" \ + 'agent.kata' 'debug_console_enabled' "$1" +} + +# Enable/disable debug options. +# +# Note: Don't use 'kata-manager.sh "enable-debug"' since this +# enables all debug (including the problematic hypervisor +# debug - see below). +toggle_debug() +{ + local value="${1:-}" + local hypervisor_debug="${2:-}" + + [ -z "$value" ] && die "need value" + [ -z "$hypervisor_debug" ] && die "need hypervisor debug value" + + # list of confguration.toml sections that have debug options we care about + local debug_sections=() + + debug_sections+=('agent.kata') + debug_sections+=('runtime') + + local section + + for section in "${debug_sections[@]}" + do + run_cmd sudo crudini --set "$kata_cfg_file" "$section" \ + 'enable_debug' "$value" + done + + # XXX: Enabling hypervisor debug for QEMU will make a systemd debug + # console service inoperable (*), but we need to test it anyhow. + # + # (*) - If enabled, it stops "kata-debug.service" from attaching to + # the console and the socat call made on the client hangs until + # the VM is shut down! + local section + + section=$(printf "hypervisor.%s" "$configured_hypervisor_cfg") + + run_cmd sudo crudini --set "$kata_cfg_file" "$section" \ + 'enable_debug' "$hypervisor_debug_value" +} + +# Provide a "semi-valid" vsock address for when dry-run mode is active. +# The URI includes a message telling the user to change it and replace +# with the real VSOCK CID value. +get_dry_run_agent_vsock_address() +{ + echo "vsock://FIXME-CHANGE-TO-VSOCK-CID:${EXPECTED_VSOCK_PORT}" +} + +# Start a debug console shell using the agent's built-in debug console +# feature. +# +# Note: You should be able to use "kata-runtime exec $cid", but that isn't +# working currently. +connect_to_vsock_debug_console() +{ + local agent_addr + + if [ "$dry_run" = 'true' ] + then + agent_addr=$(get_dry_run_agent_vsock_address) + else + agent_addr=$(get_agent_vsock_address || true) + [ -z "$agent_addr" ] && die "cannot determine agent VSOCK address" + fi + + local socat_connect= + if [ $configured_hypervisor = "qemu" ]; then + socat_connect=$(echo "$agent_addr"|sed 's!^vsock://!vsock-connect:!') + elif [ $configured_hypervisor = "clh" ]; then + socat_connect="unix-connect:${clh_socket_path}" + else + die "Cannot configure address for socat, unknown hypervisor: '$configured_hypervisor'" + fi + + run_cmd \ + "tmux new-session \ + -d \ + -s \"$KATA_TMUX_CONSOLE_SESSION\" \ + \"socat \ + '${socat_connect}' \ + stdout\"" + +} + +cleanup() +{ + # Save the result of the last call made before + # this handler was called. + # + # XXX: This *MUST* be the first command in this function! + local failure_ret="$?" + + [ "$dry_run" = 'true' ] && return 0 + + if [ "$failure_ret" -eq 0 ] && [ "$keep_logs" = 'true' ] + then + info "SUCCESS: Test passed, but leaving logs:" + info "" + info "agent log file : ${agent_log_file}" + info "agent-ctl log file : ${ctl_log_file}" + info "OCI bundle directory : ${bundle_dir}" + + return 0 + fi + + local arg="${1:-}" + + if [ $failure_ret -ne 0 ] && [ "$arg" != 'initial' ]; then + warn "ERROR: Test failed" + warn "" + warn "Not cleaning up to help debug failure:" + warn "" + + if [ "${CI:-}" = "true" ] + then + show_procs + + info "VSOCK details" + ss -Hp --vsock || true + + info "agent-ctl log file" + sudo cat "${ctl_log_file}" || true + echo + + info "agent log file" + sudo cat "${agent_log_file}" || true + echo + + else + info "agent-ctl log file : ${ctl_log_file}" + info "agent log file : ${agent_log_file}" + fi + + info "OCI bundle directory : ${bundle_dir}" + + return 0 + fi + + kill_tmux_sessions + + unconfigure_kata + + [ "$arg" != 'initial' ] && [ -d "$bundle_dir" ] && rm -rf "$bundle_dir" + + sudo rm -f \ + "$agent_log_file" \ + "$ctl_log_file" + + clean_env_ctr &>/dev/null || true + + local sandbox_dir="/run/sandbox-ns/" + + # XXX: Without doing this, the agent will hang attempting to create the + # XXX: namespaces (in function "setup_shared_namespaces()") + sudo umount -f "${sandbox_dir}/uts" "${sandbox_dir}/ipc" &>/dev/null || true + sudo rm -rf "${sandbox_dir}" &>/dev/null || true + + # Check that clh socket was deleted + if [ $configured_hypervisor = "clh" ] && [ ! -z $clh_socket_path ]; then + [ -f $clh_socket_path ] && die "CLH socket path $clh_socket_path was not properly cleaned up" + fi + + sudo systemctl restart containerd +} + +setup_containerd() +{ + local file="/etc/containerd/config.toml" + + [ -e "$file" ] || die "missing containerd config file: '$file'" + + # Although the containerd config file is in TOML format, crudini(1) + # won't parse it due to the indentation it uses. + local containerd_debug_enabled + + containerd_debug_enabled=$(sed \ + -e '/./{H;$!d;}' \ + -e 'x;/\[debug\]/!d;' \ + "$file" |\ + grep "level *= *\"debug\"" || true) + + if [ -z "$containerd_debug_enabled" ] + then + cat <<-EOF | sudo tee -a "$file" + [debug] + # Allow Kata Containers debug messages to be propageted + # into the hosts journal. + # (use "journalctl -t kata" to view). + level = "debug" + EOF + + sudo systemctl restart containerd + fi + + sudo ctr image pull "$CTR_IMAGE" + + true +} + +create_oci_rootfs() +{ + local dir="${1:-}" + + [ -z "$dir" ] && die "Need OCI rootfs dir" + + sudo docker export $(sudo docker create "$DOCKER_IMAGE") |\ + tar -C "${dir}" -xvf - >/dev/null +} + +setup_oci_bundle() +{ + bundle_dir="$(mktemp -d)" + export bundle_dir + + info "Creating OCI bundle in directory: '$bundle_dir'" + + local config="${bundle_dir}/config.json" + local rootfs_dir="${bundle_dir}/rootfs/" + + mkdir -p "$rootfs_dir" + + create_oci_rootfs "$rootfs_dir" + + pushd "$bundle_dir" &>/dev/null + runc spec + popd &>/dev/null + + [ -e "$config" ] || die "no OCI config file at ${config}" +} + +setup() +{ + configured_hypervisor="${KATA_HYPERVISOR:-}" + + if [ "${KATA_HYPERVISOR:-}" = "qemu" ]; then + hypervisor_binary="qemu-system-${arch}" + configured_hypervisor_cfg="qemu" + elif [ "${KATA_HYPERVISOR:-}" = "clh" ]; then + hypervisor_binary="cloud-hypervisor" + configured_hypervisor_cfg="clh" + else + local msg="" + msg+="Exiting as hypervisor test dependency not met" + msg+=" (expected 'qemu' or 'cloud-hypervisor', found '$KATA_HYPERVISOR')" + die "$msg" + fi + info "Configured hypervisor is $configured_hypervisor" + + trap cleanup EXIT + + # Don't mess with an existing tmux session + unset TMUX + + [ "$dry_run" = 'false' ] && \ + [ -z "$bundle_dir" ] && \ + setup_oci_bundle || true + + local cmds=() + + # For parsing TOML config files + cmds+=('crudini') + + # For container manager (containerd) + cmds+=('ctr') + + # for OCI bundle creation + cmds+=('docker') + cmds+=('runc') + + # For querying VSOCK sockets + cmds+=('socat') + + # For launching processes + cmds+=('tmux') + + local cmd + + for cmd in "${cmds[@]}" + do + local result + result=$(command -v "$cmd" || true) + [ -n "$result" ] || die "need $cmd" + done + + kata_cfg_file=$(kata-runtime kata-env \ + --json |\ + jq '.Runtime | .Config | .Path' |\ + cut -d\" -f2 || true) + + [ -z "$kata_cfg_file" ] && die "Cannot determine config file" + + sudo mkdir -p $(dirname "$kata_cfg_file") + + #------------------------------ + # Check configured hypervisor + + local hypervisor_section + + hypervisor_section=$(printf "hypervisor.%s\n" "${configured_hypervisor_cfg}") + + local ret + + { crudini --get "${kata_cfg_file}" "${hypervisor_section}" &>/dev/null; ret=$?; } || true + + [ "$ret" -eq 0 ] || \ + die "Configured hypervisor ${configured_hypervisor} does not match config file ${kata_cfg_file}" + + setup_containerd +} + +start_local_agent() +{ + local log_file="${1:-}" + [ -z "$log_file" ] && die "need agent log file" + + local running + running=$(get_local_agent_pid || true) + + [ -n "$running" ] && die "agent already running: '$running'" + + # Note: it's imperative that we capture stderr to the log file + # as the agent writes the shutdown message to this stream! + run_cmd \ + "tmux new-session \ + -d \ + -s \"$KATA_TMUX_LOCAL_SESSION\" \ + \"sudo \ + RUST_BACKTRACE=full \ + KATA_AGENT_LOG_LEVEL=${agent_log_level} \ + KATA_AGENT_SERVER_ADDR=${local_agent_server_addr} \ + ${agent_binary} \ + &> ${log_file}\"" + + [ "$dry_run" = 'false' ] && wait_for_local_agent_to_start || true +} + +# Wait for the agent to finish starting +wait_for_kata_vm_agent_to_start() +{ + local cid="${1:-}" + [ -z "$log_file" ] && die "need container ID" + + # First, check the containerd status of the container + local cmd="sudo ctr task list | grep \"${cid}\" | grep -q \"RUNNING\"" + + info "Waiting for VM to start (cid: '$cid')" + + waitForProcess \ + "$wait_time_secs" \ + "$sleep_time_secs" \ + "$cmd" + + show_procs + + # Next, ensure there is a valid VSOCK address for the VM + info "Waiting for agent VSOCK server" + + cmd="get_agent_vsock_address_simple >/dev/null" + + waitForProcess \ + "$wait_time_secs" \ + "$sleep_time_secs" \ + "$cmd" + + info "Kata VM running" +} + +check_local_agent_alive() +{ + local cmds=() + + cmds+=("-c Check") + + run_agent_ctl \ + "${local_agent_ctl_server_addr}" \ + "${cmds[@]}" + + true +} + +wait_for_local_agent_to_start() +{ + local cmd="check_local_agent_alive" + + info "Waiting for agent process to start" + + waitForProcess \ + "$wait_time_secs" \ + "$sleep_time_secs" \ + "$cmd" + + info "Kata agent process running" +} + +# Create a Kata Container that blocks "forever" +start_agent_in_kata_vm() +{ + local log_file="${1:-}" + [ -z "$log_file" ] && die "need agent log file" + + local snapshotter="" + local ret + + # Allow containerd to run on a ZFS root filesystem + { zfs list &>/dev/null; ret=$?; } || true + [ "$ret" = 0 ] && snapshotter='zfs' + + # Ensure the container blocks forever + local cmd='tail -f /dev/null' + + run_cmd \ + "tmux new-session \ + -d \ + -s \"$KATA_TMUX_VM_SESSION\" \ + \"sudo ctr run \ + --snapshotter '$snapshotter' \ + --runtime '${CTR_RUNTIME}' \ + --rm \ + -t '${CTR_IMAGE}' \ + '$container_id' \ + $cmd\"" + + [ "$dry_run" = 'false' ] && \ + wait_for_kata_vm_agent_to_start "$container_id" || true +} + +start_agent() +{ + local agent_test_type="${1:-}" + [ -z "$agent_test_type" ] && die "need agent test type" + + local log_file="${2:-}" + [ -z "$log_file" ] && die "need agent log file" + + case "$agent_test_type" in + 'local') start_local_agent "$log_file" ;; + 'vm') start_agent_in_kata_vm "$log_file" ;; + *) die "invalid agent test type: '$agent_test_type'" ;; + esac + + true +} + +run_agent_ctl() +{ + local server_addr="${1:-}" + + shift + + local cmds="${*:-}" + + [ -n "$server_addr" ] || die "need agent ttRPC server address" + [ -n "$cmds" ] || die "need commands for agent control tool" + + local agent_ctl_path + agent_ctl_path="/opt/kata/bin/kata-agent-ctl" + + local redirect="&>\"${ctl_log_file}\"" + + if [ "$dry_run" = 'true' ] + then + redirect="" + bundle_dir="FIXME-set-to-OCI-bundle-directory" + fi + + local server_address= + if [ $configured_hypervisor = "qemu" ]; then + server_address="--server-address \"${server_addr}\"" + elif [ $configured_hypervisor = "clh" ]; then + server_address="--server-address \"${server_addr}\" --hybrid-vsock" + else + die "Cannot configure server address, unknown hypervisor: '$configured_hypervisor'" + fi + + run_cmd \ + sudo \ + RUST_BACKTRACE=full \ + "${agent_ctl_path}" \ + -l debug \ + connect \ + "${server_address}" \ + --bundle-dir "${bundle_dir}" \ + "${cmds}" \ + "${redirect}" +} + +# This function "cheats" a little - it gets the agent +# to do some work *and then* stops it. +stop_local_agent() +{ + local cmds=() + + cmds+=("-c Check") + cmds+=("-c GetGuestDetails") + cmds+=("-c 'sleep 1s'") + cmds+=("-c DestroySandbox") + + run_agent_ctl \ + "${local_agent_ctl_server_addr}" \ + "${cmds[@]}" +} + +get_addresses() +{ + local addresses= + + if [ $configured_hypervisor = "qemu" ]; then + addresses=$(ss -Hp --vsock |\ + egrep -v "\" |\ + awk '$2 ~ /^ESTAB$/ {print $6}' |\ + grep ":${EXPECTED_VSOCK_PORT}$") + elif [ $configured_hypervisor = "clh" ]; then + # since we preconfigured the socket, we are checking to see if it is reported + addresses=$(ss -Hp |\ + grep "${clh_socket_path}" |\ + awk '$2 ~ /^ESTAB$/ {print $5}') + else + die "Cannot retrieve address, unknown hypervisor: '$configured_hypervisor'" + fi + + echo ${addresses} +} + +# Doesn't fail. Instead it will return the empty string on error. +get_agent_vsock_address_simple() +{ + local addresses=$(get_addresses) + + [ -z "$addresses" ] && return 1 + + local expected_count=1 + + local count + count=$(echo "$addresses"|wc -l || true) + + [ "$count" -eq "$expected_count" ] || return 1 + + if [ $configured_hypervisor = "qemu" ]; then + local cid + local port + + cid=$(echo "$addresses"|cut -d: -f1) + port=$(echo "$addresses"|cut -d: -f2) + + echo "vsock://${cid}:${port}" + elif [ $configured_hypervisor = "clh" ]; then + address=$(echo "$addresses" | awk 'NR==1{print $1}') + echo "unix://${address}" + else + die "Cannot get agent vsock address, unknown hypervisor: '$configured_hypervisor'" + fi + + return 0 +} + +get_agent_vsock_address() +{ + local addresses=$(get_addresses) + + [ -z "$addresses" ] && die "no VSOCK connections found" + + local expected_count=1 + + local count + count=$(echo "$addresses"|wc -l || true) + + if [ $configured_hypervisor = "qemu" ]; then + # For QEMU we always expect 1 result. For Cloud Hypervisor, if a debug console is configured + # and running, we will have more than 1 result, so only run this check for QEMU + [ "$count" -eq "$expected_count" ] \ + || die "expected $expected_count VSOCK entry, found $count: '$addresses'" + + local cid + local port + + cid=$(echo "$addresses"|cut -d: -f1) + port=$(echo "$addresses"|cut -d: -f2) + + echo "vsock://${cid}:${port}" + elif [ $configured_hypervisor = "clh" ]; then + address=$(echo "$addresses" | awk 'NR==1{print $1}') + echo "unix://${address}" + else + die "Cannot get agent vsock address, unknown hypervisor: '$configured_hypervisor'" + fi +} + +stop_agent_in_kata_vm() +{ + local agent_addr + + if [ "$dry_run" = 'true' ] + then + agent_addr=$(get_dry_run_agent_vsock_address) + else + agent_addr=$(get_agent_vsock_address || true) + [ -z "$agent_addr" ] && \ + die "cannot determine agent VSOCK address for $hypervisor_binary" + fi + + # List of API commands to send to the agent. + local cmds=() + + # Run a couple of query commands first to ensure + # the agent is listening. + cmds+=("-c Check") + cmds+=("-c GetGuestDetails") + + # Creating a container implies creating a sandbox, so request + # agent/VM/container shutdown by asking the agent + # to destroy the sandbox. + cmds+=("-c DestroySandbox") + + run_agent_ctl \ + "${agent_addr}" \ + "${cmds[@]}" + + true +} + +stop_agent() +{ + info "Stopping agent" + + local agent_test_type="${1:-}" + [ -z "$agent_test_type" ] && die "need agent test type" + + local log_file="${2:-}" + [ -z "$log_file" ] && die "need agent-ctl log file" + + case "$agent_test_type" in + 'local') stop_local_agent ;; + 'vm') stop_agent_in_kata_vm ;; + *) die "invalid agent test type: '$agent_test_type'" ;; + esac + + true +} + +get_local_agent_pid() +{ + local pids + + local name + name=$(basename "$agent_binary") + + pids=$(pgrep "$name" || true) + [ -z "$pids" ] && return 0 + + local count + count=$(echo "$pids"|wc -l) + + [ "$count" -gt 1 ] && \ + die "too many agent processes running ($count, '$pids')" + + echo $pids +} + +# Function that writes all agent logs to '$agent_log_file'. +get_agent_log_file() +{ + local agent_test_type="${1:-}" + [ -z "$agent_test_type" ] && die "need agent test type" + + local log_file="${2:-}" + [ -z "$log_file" ] && die "need agent log file" + + info "Getting agent log details" + + case "$agent_test_type" in + # NOP: File should have been created by start_local_agent() + 'local') true ;; + + # Extract journal entries for the duration of the test + 'vm') + sudo journalctl \ + -q \ + -a \ + -o cat \ + -t 'kata' \ + --since="$test_start_time" \ + > "$log_file" + ;; + + *) die "invalid agent test type: '$agent_test_type'" ;; + esac + + [ -e "$log_file" ] || die "no log file: '$log_file'" + [ -s "$log_file" ] || die "empty log file: '$log_file'" + + true +} + +# Function to run to ensure correct behaviour +validate_agent() +{ + local agent_test_type="${1:-}" + local shutdown_test_type="${2:-}" + local log_file="${3:-}" + + [ -z "$agent_test_type" ] && die "need agent test type" + [ -z "$shutdown_test_type" ] && die "need shutdown test type" + [ -z "$log_file" ] && die "need agent log file" + + info "validating" + + get_agent_log_file \ + "$agent_test_type" \ + "$log_file" + + # Regular expression that describes possible agent failures + local regex="(slog::Fuse|Drain|Custom|serialization error|thread.*panicked|stack backtrace:)" + + egrep -q "$regex" "$log_file" && cat $log_file && die "Found agent error in log file: '$log_file'" + + local entry + entry=$(get_shutdown_test_type_entry "$shutdown_test_type" || true) + [ -z "$entry" ] && die "invalid test type: '$shutdown_test_type'" + + local hypervisor_debug=$(echo "$entry"|cut -d: -f3) + local vsock_console=$(echo "$entry"|cut -d: -f4) + + local agent_debug_logs_available='false' + + [ "$hypervisor_debug" = 'true' ] && \ + [ "$vsock_console" = 'false' ] && \ + agent_debug_logs_available='true' + + if [ "$agent_debug_logs_available" = 'true' ] || [ "$agent_test_type" = 'local' ] + then + # The message the agent writes to stderr just before it exits. + local done_msg="\" + + egrep -q "$done_msg" "$log_file" || (cat $log_file && die "missing agent shutdown message") + else + # We can only check for the shutdown message if the agent debug + # logs are available. + info "Not checking for agent shutdown message as hypervisor debug disabled" + fi +} + +setup_agent() +{ + local shutdown_test_type="${1:-}" + [ -z "$shutdown_test_type" ] && die "need shutdown test type" + + kill_tmux_sessions + + configure_kata "$shutdown_test_type" + + true +} + +# Even though this test is not testing tracing, agent tracing needs to be +# enabled to stop the runtime from killing the VM. However, if tracing is +# enabled, the forwarder must be running. To remove the need for Jaeger to +# also be running, run the forwarder in "NOP" mode. +run_trace_forwarder() +{ + local forwarder_binary_path + forwarder_binary_path="/opt/kata/bin/kata-trace-forwarder" + + local socket_path_tf="" + + # If using CLH, socket path must be passed to trace forwarder + if [ $configured_hypervisor = "clh" ]; then + socket_path_tf="--socket-path ${clh_socket_path}" + fi + + run_cmd \ + "tmux new-session \ + -d \ + -s \"$KATA_TMUX_FORWARDER_SESSION\" \ + sudo \"$forwarder_binary_path --dump-only -l trace ${socket_path_tf}\"" +} + +check_agent_stopped() +{ + info "Checking agent stopped" + + local agent_test_type="${1:-}" + [ -z "$agent_test_type" ] && die "need agent test type" + + local cmd= + + case "$agent_test_type" in + 'local') cmd=check_local_agent_stopped ;; + 'vm') cmd=check_vm_stopped ;; + *) die "invalid agent test type: '$agent_test_type'" ;; + esac + + waitForProcess \ + "$shutdown_time_secs" \ + "$sleep_time_secs" \ + "$cmd" + + true +} + +check_local_agent_stopped() +{ + local ret=0 + + local i=0 + local max=20 + + agent_ended="false" + + local agent_pid + agent_pid=$(get_local_agent_pid || true) + + # Agent has finished + [ -z "$agent_pid" ] && return 0 + + for _ in $(seq "$max") + do + { sudo kill -0 "$agent_pid"; ret=$?; } || true + + [ "$ret" -ne 0 ] && agent_ended="true" && break + + sleep 0.2 + done + + [ "$agent_ended" = "false" ] && die "agent still running: pid $agent_pid" || true +} + +get_vm_pid() +{ + pgrep "$hypervisor_binary" +} + +check_vm_stopped() +{ + tmux list-sessions |\ + grep -q "^${KATA_TMUX_VM_SESSION}:" \ + && return 1 + + return 0 +} + +start_debug_console() +{ + local agent_test_type="${1:-}" + local shutdown_test_type="${2:-}" + + [ -z "$agent_test_type" ] && die "need agent test type" + [ -z "$shutdown_test_type" ] && die "need shutdown test type" + + info "Starting debug console" + + case "$agent_test_type" in + 'vm') connect_to_vsock_debug_console ;; + # NOP for a local agent since we cannot connect to the agents + # VSOCK console socket from *outside* the host! + 'local') true ;; + *) die "invalid agent test type: '$agent_test_type'" ;; + esac + + true +} + +run_single_agent() +{ + local agent_test_type="${1:-}" + local shutdown_test_type="${2:-}" + + [ -z "$agent_test_type" ] && die "need agent test type" + [ -z "$shutdown_test_type" ] && die "need shutdown test type" + + local msg + msg=$(printf \ + "Testing agent (agent test type: '%s', shutdown test type: '%s')" \ + "$agent_test_type" \ + "$shutdown_test_type") + info "$msg" + + setup_agent "$shutdown_test_type" + + if [ $configured_hypervisor = "clh" ]; then + # CLH uses hybrid VSOCK which uses a local UNIX socket that we need to specify + socket_path_template=$clh_socket_prefix$(sudo kata-runtime env --json | jq '.Hypervisor.SocketPath') + clh_socket_path=$(echo "$socket_path_template" | sed "s/{ID}/${container_id}/g" | tr -d '"') + [ "$dry_run" = 'false' ] && sudo mkdir -p $(dirname "$clh_socket_path") + fi + + run_trace_forwarder "$shutdown_test_type" + + sleep 5s + + test_start_time=$(date '+%F %T') + + start_agent \ + "$agent_test_type" \ + "$agent_log_file" + + info "Testing agent: shutdown test type: '$shutdown_test_type', agent test type: $agent_test_type" + + local entry + entry=$(get_shutdown_test_type_entry "$shutdown_test_type" || true) + local debug_console=$(echo "$entry"|cut -d: -f4) + [ "$debug_console" = 'true' ] && \ + start_debug_console \ + "$agent_test_type" \ + "$shutdown_test_type" + + stop_agent \ + "$agent_test_type" \ + "$ctl_log_file" + + # We only need to show the set of commands once + [ "$dry_run" = 'true' ] && exit 0 + + test_end_time=$(date '+%F %T') + + check_agent_stopped "$agent_test_type" + + validate_agent \ + "$agent_test_type" \ + "$shutdown_test_type" \ + "$agent_log_file" +} + +run_agent() +{ + local agent_test_type="${1:-}" + local shutdown_test_type="${2:-}" + + [ -z "$agent_test_type" ] && die "need agent test type" + [ -z "$shutdown_test_type" ] && die "need shutdown test type" + + case "$shutdown_test_type" in + "$ALL_TEST_TYPES") + local entry + + # Run all shutdown types + for entry in "${shutdown_test_types[@]}" + do + local name + name=$(echo "$entry"|cut -d: -f1) + + run_single_agent \ + "$agent_test_type" \ + "$name" + + # Clean up between iterations + sudo rm -f \ + "$ctl_log_file" \ + "$agent_log_file" + + local addresses=$(get_addresses || true) + + [ -z "$addresses" ] || \ + die "found unexpected vsock addresses: '$addresses'" + + done + ;; + + *) + run_single_agent \ + "$agent_test_type" \ + "$shutdown_test_type" + ;; + esac + +} + +test_agent_shutdown() +{ + local count="${1:-}" + local agent_test_type="${2:-}" + local shutdown_test_type="${3:-}" + + [ -z "$count" ] && die "need count" + [ -z "$agent_test_type" ] && die "need agent test type" + [ -z "$shutdown_test_type" ] && die "need shutdown test type" + + # Start with a clean environment + [ "$dry_run" = 'false' ] && cleanup initial || true + + local i + + for i in $(seq "$count") + do + [ "$dry_run" = 'false' ] && \ + info "testing agent: run $i of $count" || true + run_agent \ + "$agent_test_type" \ + "$shutdown_test_type" + done + + info "testing agent: completed $count runs" +} + +handle_args() +{ + local opt + + local count="${KATA_AGENT_SHUTDOWN_TEST_COUNT}" + local shutdown_test_type="$DEFAULT_SHUTDOWN_TEST_TYPE" + local agent_test_type="$DEFAULT_AGENT_TEST_TYPE" + + while getopts "a:c:dhklnt:" opt "$@" + do + case "$opt" in + a) agent_test_type="$OPTARG" ;; + c) count="$OPTARG" ;; + d) set -o xtrace ;; + h) usage; exit 0 ;; + k) keep_logs='true' ;; + l) list_test_types; exit 0 ;; + n) dry_run='true' ;; + t) shutdown_test_type="$OPTARG" ;; + *) die "invalid option: '$opt'" ;; + esac + done + + setup + + test_agent_shutdown \ + "$count" \ + "$agent_test_type" \ + "$shutdown_test_type" +} + +main() +{ + handle_args "$@" +} + +main "$@" diff --git a/tests/functional/tracing/tracing-test.sh b/tests/functional/tracing/tracing-test.sh new file mode 100755 index 000000000..e8256ba38 --- /dev/null +++ b/tests/functional/tracing/tracing-test.sh @@ -0,0 +1,540 @@ +#!/bin/bash +# Copyright (c) 2019-2022 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# + +set -o errexit +set -o nounset +set -o pipefail +set -o errtrace + +script_name=${0##*/} +source "/etc/os-release" || source "/usr/lib/os-release" + +# Set to true if all tests pass +success="false" + +DEBUG=${DEBUG:-} + +# If set to any value, do not shut down the Jaeger service. +DEBUG_KEEP_JAEGER=${DEBUG_KEEP_JAEGER:-} +# If set to any value, do not shut down the trace forwarder. +DEBUG_KEEP_FORWARDER=${DEBUG_KEEP_FORWARDER:-} + +[ -n "$DEBUG" ] && set -o xtrace + +SCRIPT_PATH=$(dirname "$(readlink -f "$0")") +source "${SCRIPT_PATH}/../../common.bash" + +RUNTIME="io.containerd.kata.v2" +CONTAINER_IMAGE="quay.io/prometheus/busybox:latest" + +TRACE_LOG_DIR=${TRACE_LOG_DIR:-${KATA_TESTS_LOGDIR}/traces} + +KATA_HYPERVISOR="${KATA_HYPERVISOR:-qemu}" + +# files for output +formatted_traces_file="kata-traces-formatted.json" +trace_summary_file="span-summary.txt" + +# tmux(1) session to run the trace forwarder in +KATA_TMUX_FORWARDER_SESSION="kata-trace-forwarder-session" + +forwarder_binary="/opt/kata/bin/kata-trace-forwarder" + +# path prefix for CLH socket path +socket_path_prefix="/run/vc/vm/" + +container_id="tracing-test" + +jaeger_server=${jaeger_server:-localhost} +jaeger_ui_port=${jaeger_ui_port:-16686} +jaeger_docker_container_name="jaeger" + +# Span data for testing: +# 1. Existence of spans in jaeger output +# 2. That the relative ordering in the data occurs +# in the jaeger output. +# This is tested to make sure specific spans exist in the output and +# that the order of spans is preserved. +# Ordered in latest in sequence to earliest. +# +# Fields are all span existing span names in relative order from latest +# to earliest call in a sequence of calls. Example (pseudocode): +# func1() { +# span = trace("func1") +# func2() +# end span +# } +# func2() { +# span = trace("func2") +# func3() +# end span +# } +# func3() { +# span = trace("func3") +# end span +# } +# The following data should result in a passing test: +# 'func3:func2:func1' +# 'func3:func2' +# 'func3:func1' +# 'func2:func1' +span_ordering_data=( + 'StartVM:createSandboxFromConfig:create:rootSpan' + 'setup_shared_namespaces:StartVM:createSandboxFromConfig:create:rootSpan' + 'start_container:Start:rootSpan' + 'stopSandbox:Stop:Start:rootSpan' +) + +# Cleanup will remove Jaeger container and +# disable tracing. +cleanup() +{ + local fp="die" + local result="failed" + local dest="$logdir" + + if [ "$success" = "true" ]; then + local fp="info" + result="passed" + + [ -z "$DEBUG_KEEP_JAEGER" ] && stop_jaeger 2>/dev/null || true + + [ -z "$DEBUG_KEEP_FORWARDER" ] && kill_trace_forwarder + + # The tests worked so remove the logs + if [ -n "$DEBUG" ]; then + eval "$fp" "test $result - logs left in '$dest'" + else + "${SCRIPT_PATH}/configure_tracing_for_kata.sh" disable + + [ -d "$logdir" ] && rm -rf "$logdir" || true + fi + + return 0 + fi + + if [ -n "${CI:-}" ]; then + # Running under the CI, so copy the logs to allow them + # to be added as test artifacts. + sudo mkdir -p "$TRACE_LOG_DIR" + sudo cp -a "$logdir"/* "$TRACE_LOG_DIR" + + dest="$TRACE_LOG_DIR" + fi + + eval "$fp" "test $result - logs left in '$dest'" +} + +# Run an operation to generate Jaeger trace spans +create_traces() +{ + sudo ctr image pull "$CONTAINER_IMAGE" + sudo ctr run --runtime "$RUNTIME" --rm "$CONTAINER_IMAGE" "$container_id" true +} + +start_jaeger() +{ + local jaeger_docker_image="jaegertracing/all-in-one:latest" + + sudo docker rm -f "${jaeger_docker_container_name}" + + # Defaults - see https://www.jaegertracing.io/docs/getting-started/ + sudo docker run -d --runtime runc --name "${jaeger_docker_container_name}" \ + -e COLLECTOR_ZIPKIN_HTTP_PORT=9411 \ + -p 5775:5775/udp \ + -p 6831:6831/udp \ + -p 6832:6832/udp \ + -p 5778:5778 \ + -p "${jaeger_ui_port}:${jaeger_ui_port}" \ + -p 14268:14268 \ + -p 9411:9411 \ + "$jaeger_docker_image" + + sudo mkdir -m 0750 -p "$TRACE_LOG_DIR" +} + +stop_jaeger() +{ + sudo docker stop "${jaeger_docker_container_name}" + sudo docker rm -f "${jaeger_docker_container_name}" +} + +get_jaeger_traces() +{ + local service="$1" + [ -z "$service" ] && die "need jaeger service name" + + local traces_url="http://${jaeger_server}:${jaeger_ui_port}/api/traces?service=${service}" + curl -s "${traces_url}" 2>/dev/null +} + +get_trace_summary() +{ + local status="$1" + [ -z "$status" ] && die "need jaeger status JSON" + + echo "${status}" | jq -S '.data[].spans[] | [.spanID, .operationName] | @sh' +} + +get_span_count() +{ + local status="$1" + [ -z "$status" ] && die "need jaeger status JSON" + + # This could be simplified but creating a variable holding the + # summary is useful in debug mode as the summary is displayed. + local trace_summary=$(get_trace_summary "$status" || true) + + [ -z "$trace_summary" ] && die "failed to get trace summary" + + local count=$(echo "${trace_summary}" | wc -l) + + [ -z "$count" ] && count=0 + + echo "$count" +} + +# Returns status from Jaeger web UI +get_jaeger_status() +{ + local service="$1" + local logdir="$2" + + [ -z "$service" ] && die "need jaeger service name" + [ -z "$logdir" ] && die "need logdir" + + local status="" + local span_count=0 + + # Find spans + status=$(get_jaeger_traces "$service" || true) + if [ -n "$status" ]; then + echo "$status" | tee "$logdir/${service}-status.json" + span_count=$(get_span_count "$status") + fi + + [ -z "$status" ] && die "failed to query Jaeger for status" + [ "$span_count" -eq 0 ] && die "failed to find any trace spans" + [ "$span_count" -le 0 ] && die "invalid span count" + + get_trace_summary "$status" > "$logdir/$trace_summary_file" +} + +# Check Jaeger spans for the specified service. +check_jaeger_output() +{ + local service="$1" + local min_spans="$2" + local logdir="$3" + + [ -z "$service" ] && die "need jaeger service name" + [ -z "$min_spans" ] && die "need minimum trace span count" + [ -z "$logdir" ] && die "need logdir" + + local status + local errors=0 + + info "Checking Jaeger status" + + status=$(get_jaeger_status "$service" "$logdir") + + #------------------------------ + # Basic sanity checks + [ -z "$status" ] && die "failed to query status via HTTP" + + local span_lines=$(echo "$status"|jq -S '.data[].spans | length') + [ -z "$span_lines" ] && die "no span status" + + # Log the spans to allow for analysis in case the test fails + echo "$status"|jq -S . > "$logdir/${service}-traces-formatted.json" + + local span_lines_count=$(echo "$span_lines"|wc -l) + + # Total up all span counts + local spans=$(echo "$span_lines"|paste -sd+ -|bc) + [ -z "$spans" ] && die "no spans" + + # Ensure total span count is numeric + echo "$spans"|grep -q "^[0-9][0-9]*$" || die "invalid span count: '$spans'" + + info "found $spans spans (across $span_lines_count traces)" + + # Validate + [ "$spans" -lt "$min_spans" ] && die "expected >= $min_spans spans, got $spans" + + # Look for common errors in span data + local error_msg=$(echo "$status"|jq -S . 2>/dev/null|grep "invalid parent span" || true) + + if [ -n "$error_msg" ]; then + errors=$((errors+1)) + warn "Found invalid parent span errors: $error_msg" + else + errors=$((errors-1)) + [ "$errors" -lt 0 ] && errors=0 + fi + + # Crude but it works + error_or_warning_msgs=$(echo "$status" |\ + jq -S . 2>/dev/null |\ + jq '.data[].spans[].warnings' |\ + grep -E -v "\" |\ + grep -E -v "\[" |\ + grep -E -v "\]" |\ + grep -E -v "clock skew" || true) # ignore clock skew error + + if [ -n "$error_or_warning_msgs" ]; then + errors=$((errors+1)) + warn "Found errors/warnings: $error_or_warning_msgs" + else + errors=$((errors-1)) + [ "$errors" -lt 0 ] && errors=0 + fi + + [ "$errors" -eq 0 ] || die "errors detected" +} + +# Check output for spans in span_ordering_data +check_spans() +{ + local logdir="$1" + [ -z "$logdir" ] && die "need logdir" + + local errors=0 + + # Check for existence of spans in output so we do not do the more + # time consuming test of checking span ordering if it will fail + info "Checking spans: ${span_ordering_data[@]}" + local missing_spans=() + for span_ordering in "${span_ordering_data[@]}"; do + local test_spans=(`echo $span_ordering | tr ':' ' '`) + for s in "${test_spans[@]}"; do + grep -q \'$s\' "$logdir/$trace_summary_file" || missing_spans+=( "$s" ) + done + done + if [ "${#missing_spans[@]}" -gt 0 ]; then + die "Fail: Missing spans: ${missing_spans[@]}" + fi + + # Check relative ordering of spans. We are not checking full trace, just + # that known calls are not out of order based on the test input. + for span_ordering in "${span_ordering_data[@]}"; do # runs maximum length of span_ordering_data + local test_spans=(`echo $span_ordering | tr ':' ' '`) + + # create array for span IDs that match span string + local span_ids=() + for span in "${test_spans[@]}"; do + grep -q \'$span\' "$logdir/$trace_summary_file" || die "Fail: Missing span: $span" + id=$(cat "$logdir/$formatted_traces_file" | jq ".data[].spans[] | select(.operationName==\"$span\") | .spanID") || die "Fail: error with span $span retrieved from traces" + id_formatted=$(echo $id | tr -d '\"' | tr '\n' ':') # format to a string for parsing later, not an array + span_ids+=("$id_formatted") + done + + # We now have 2 parallel arrays where test_spans[n] is the string name and + # span_ids[n] has all possible span IDs for that string separated by a colon + + # Since functions can be called multiple times, we may have multiple results + # for span IDs. + initial_span_ids=(`echo ${span_ids[0]} | tr ':' ' '`) + for initial in "${initial_span_ids[@]}"; do # test parents for all initial spans + # construct array of all parents of first span + local retrieved_spans=() + local current_span="$initial" + [ "$current_span" != "" ] || break + + MAX_DEPTH=20 # to prevent infinite loop due to unforeseen errors + for i in `seq 1 $MAX_DEPTH`; do + retrieved_spans+=("$current_span") + current_span=$(cat "$logdir/$formatted_traces_file" | jq ".data[].spans[] | select(.spanID==\"$current_span\") | .references[].spanID") || die "Fail: error with current_span $current_span retrieved from formatted traces" + [ "$current_span" != "" ] || break + current_span=$(echo $current_span | tr -d '"') + [ $i -lt $MAX_DEPTH ] || die "Fail: max depth reached, error in jq or adjust test depth" + done + + # Keep track of this index so we can ensure we are testing the constructed array in order + # Increment when there is a match between test case and constructed path + local retrieved_ids_index=0 + + local matches=0 + local index=0 + + # TODO: Optimize + for ((index=0; index<${#span_ids[@]}; index++)); do + for ((r_index=$retrieved_ids_index; r_index<${#retrieved_spans[@]}; r_index++)); do + grep -q "${retrieved_spans[$r_index]}" <<< ${span_ids[$index]} && (( retrieved_ids_index=$r_index+1 )) && (( matches+=1 )) && break + done + done + + local last_initial_span_index=${#initial_span_ids[@]}-1 + if [ $matches -eq ${#span_ids[@]} ]; then + info "Pass: spans \"${test_spans[@]}\" found in jaeger output" + break + elif [ $matches -lt ${#span_ids[@]} ] && [ "$initial" = "${initial_span_ids[$last_initial_span_index]}" ]; then + die "Fail: spans \"${test_spans[@]}\" NOT in jaeger output" + fi + # else repeat test for next initial span ID + done + done + + +} + +run_trace_forwarder() +{ + if [ $KATA_HYPERVISOR = "qemu" ]; then + tmux new-session -d -s "$KATA_TMUX_FORWARDER_SESSION" "sudo $forwarder_binary -l trace" + elif [ $KATA_HYPERVISOR = "clh" ]; then + # CLH uses hybrid VSOCK which uses a local UNIX socket that we need to specify + socket_path_template=$socket_path_prefix$(sudo kata-runtime env --json | jq '.Hypervisor.SocketPath') + socket_path=$(echo "$socket_path_template" | sed "s/{ID}/${container_id}/g" | tr -d '"') + sudo mkdir -p $(dirname "$socket_path") + + tmux new-session -d -s "$KATA_TMUX_FORWARDER_SESSION" "sudo $forwarder_binary -l trace --socket-path $socket_path" + else + die "Unsupported hypervisor $KATA_HYPERVISOR" + fi + + info "Verifying trace forwarder in tmux session $KATA_TMUX_FORWARDER_SESSION" + + local cmd="tmux capture-pane -pt $KATA_TMUX_FORWARDER_SESSION | tr -d '\n' | tr -d '\"' | grep -q \"source:kata-trace-forwarder\"" + waitForProcess 10 1 "$cmd" +} + +kill_trace_forwarder() +{ + tmux kill-session -t "$KATA_TMUX_FORWARDER_SESSION" +} + +setup() +{ + # containerd must be running in order to use ctr to generate traces + restart_containerd_service + + local cmds=() + # For container manager (containerd) + cmds+=('ctr') + # For jaeger + cmds+=('docker') + # For launching processes + cmds+=('tmux') + + local cmd + for cmd in "${cmds[@]}" + do + local result + result=$(command -v "$cmd" || true) + [ -n "$result" ] || die "need $cmd" + done + + run_trace_forwarder + + start_jaeger + + "${SCRIPT_PATH}/configure_tracing_for_kata.sh" enable +} + +run_test() +{ + local service="$1" + local min_spans="$2" + local logdir="$3" + + [ -z "$service" ] && die "need service name" + [ -z "$min_spans" ] && die "need minimum span count" + [ -z "$logdir" ] && die "need logdir" + + info "Running test for service '$service'" + + logdir="$logdir/$service" + mkdir -p "$logdir" + + check_jaeger_output "$service" "$min_spans" "$logdir" + check_spans "$logdir" + + info "test passed" +} + +run_tests() +{ + # List of services to check + # + # Format: "name:min-spans" + # + # Where: + # + # - 'name' is the Jaeger service name. + # - 'min-spans' is an integer representing the minimum number of + # trace spans this service should generate. + # + # Notes: + # + # - Uses an array to ensure predictable ordering. + # - All services listed are expected to generate traces + # when create_traces() is called a single time. + local -a services + + services+=("kata:125") + + create_traces + + logdir=$(mktemp -d) + + for service in "${services[@]}" + do + local name=$(echo "${service}"|cut -d: -f1) + local min_spans=$(echo "${service}"|cut -d: -f2) + + run_test "${name}" "${min_spans}" "${logdir}" + done + + info "all tests passed" + success="true" +} + +usage() +{ + cat <] + +Commands: + + clean - Perform cleanup phase only. + help - Show usage. + run - Only run tests (no setup or cleanup). + setup - Perform setup phase only. + +Environment variables: + + CI - if set, save logs of all tests to ${TRACE_LOG_DIR}. + DEBUG - if set, enable tracing and do not cleanup after tests. + DEBUG_KEEP_JAEGER - if set, do not shut down the Jaeger service. + DEBUG_KEEP_FORWARDER - if set, do not shut down the trace forwarder. + +Notes: + - Runs all test phases if no arguments are specified. + +EOF +} + +main() +{ + local cmd="${1:-}" + + case "$cmd" in + clean) success="true"; cleanup; exit 0;; + help|-h|-help|--help) usage; exit 0;; + run) run_tests; exit 0;; + setup) setup; exit 0;; + esac + + trap cleanup EXIT + + setup + + run_tests +} + +main "$@" From 63be808730b87eed56e2271269eee34dfcbe22e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Wed, 4 Oct 2023 19:38:04 +0200 Subject: [PATCH 316/339] ci: Add placeholder for runk tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The runk test has been executed as part of the former "ubuntu" jenkins CI. We're porting it to GHA and running it against LTS containerd. Signed-off-by: Fabiano Fidêncio (cherry picked from commit 7f23772763882e44e56841e5471b92afa1269b35) --- .github/workflows/ci.yaml | 8 +++++ .github/workflows/run-runk-tests.yaml | 46 +++++++++++++++++++++++++++ 2 files changed, 54 insertions(+) create mode 100644 .github/workflows/run-runk-tests.yaml diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 90a297712..35c6a40b8 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -233,6 +233,14 @@ jobs: commit-hash: ${{ inputs.commit-hash }} target-branch: ${{ inputs.target-branch }} + run-runk-tests: + needs: build-kata-static-tarball-amd64 + uses: ./.github/workflows/run-runk-tests.yaml + with: + tarball-suffix: -${{ inputs.tag }} + commit-hash: ${{ inputs.commit-hash }} + target-branch: ${{ inputs.target-branch }} + run-vfio-tests: needs: build-kata-static-tarball-amd64 uses: ./.github/workflows/run-vfio-tests.yaml diff --git a/.github/workflows/run-runk-tests.yaml b/.github/workflows/run-runk-tests.yaml new file mode 100644 index 000000000..36c111218 --- /dev/null +++ b/.github/workflows/run-runk-tests.yaml @@ -0,0 +1,46 @@ +name: CI | Run runk tests +on: + workflow_call: + inputs: + tarball-suffix: + required: false + type: string + commit-hash: + required: false + type: string + target-branch: + required: false + type: string + default: "" + +jobs: + run-runk: + runs-on: garm-ubuntu-2204-smaller + env: + CONTAINERD_VERSION: lts + steps: + - uses: actions/checkout@v4 + with: + ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} + + - name: Install dependencies + run: bash tests/integration/runk/gha-run.sh install-dependencies + + - name: get-kata-tarball + uses: actions/download-artifact@v3 + with: + name: kata-static-tarball-amd64${{ inputs.tarball-suffix }} + path: kata-artifacts + + - name: Install kata + run: bash tests/integration/runk/gha-run.sh install-kata kata-artifacts + + - name: Run tracing tests + run: bash tests/integration/runk/gha-run.sh run From c1da29b9b1526e91688622afb645c9397fe01179 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Wed, 4 Oct 2023 19:30:49 +0200 Subject: [PATCH 317/339] ci: Port runk tests to this repo MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I'm basically moving the runk tests from the tests repo to this one, and I'm adding the "Signed-off-by:" of every single contributor the tests. Fixes: #8116 Signed-off-by: Fabiano Fidêncio Signed-off-by: Chen Yiyang Signed-off-by: Gabriela Cervantes (cherry picked from commit da91c9df88c7c67bafbcd02bf7a1e0ebec9893b8) --- tests/integration/runk/gha-run.sh | 59 +++++++++++++++++ tests/integration/runk/runk-tests.sh | 99 ++++++++++++++++++++++++++++ 2 files changed, 158 insertions(+) create mode 100755 tests/integration/runk/gha-run.sh create mode 100755 tests/integration/runk/runk-tests.sh diff --git a/tests/integration/runk/gha-run.sh b/tests/integration/runk/gha-run.sh new file mode 100755 index 000000000..3f97c63aa --- /dev/null +++ b/tests/integration/runk/gha-run.sh @@ -0,0 +1,59 @@ +#!/bin/bash +# +# Copyright (c) 2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# + +set -o errexit +set -o nounset +set -o pipefail + +kata_tarball_dir="${2:-kata-artifacts}" +runk_dir="$(dirname "$(readlink -f "$0")")" +source "${runk_dir}/../../common.bash" + +function install_dependencies() { + info "Installing the dependencies needed for running the runk tests" + + # Dependency list of projects that we can rely on the system packages + # - jq + declare -a system_deps=( + jq + ) + + sudo apt-get update + sudo apt-get -y install "${system_deps[@]}" + + ensure_yq + + # Dependency list of projects that we can install them + # directly from their releases on GitHub: + # - containerd + # - cri-container-cni release tarball already includes CNI plugins + declare -a github_deps + github_deps[0]="cri_containerd:$(get_from_kata_deps "externals.containerd.${CONTAINERD_VERSION}")" + + for github_dep in "${github_deps[@]}"; do + IFS=":" read -r -a dep <<< "${github_dep}" + install_${dep[0]} "${dep[1]}" + done +} + +function run() { + info "Running runk tests using" + + bash -c ${runk_dir}/runk-tests.sh +} + +function main() { + action="${1:-}" + case "${action}" in + install-dependencies) install_dependencies ;; + install-kata) install_kata ;; + run) run ;; + *) >&2 die "Invalid argument" ;; + esac +} + +main "$@" diff --git a/tests/integration/runk/runk-tests.sh b/tests/integration/runk/runk-tests.sh new file mode 100755 index 000000000..714f10af5 --- /dev/null +++ b/tests/integration/runk/runk-tests.sh @@ -0,0 +1,99 @@ +#!/bin/bash +# +# Copyright (c) 2023 Kata Contributors +# +# SPDX-License-Identifier: Apache-2.0 +# +# This test will validate runk with containerd + +set -o errexit +set -o nounset +set -o pipefail +set -o errtrace + +tracing_dir="$(dirname "$(readlink -f "$0")")" +source "${tracing_dir}/../../common.bash" +source "${tracing_dir}/../../metrics/lib/common.bash" + +RUNK_BIN_PATH="/usr/local/bin/runk" +TEST_IMAGE="docker.io/library/busybox:latest" +CONTAINER_ID="id1" +PID_FILE="${CONTAINER_ID}.pid" +WORK_DIR="$(mktemp -d --tmpdir runk.XXXXX)" + +setup() { + echo "pull container image" + check_images ${TEST_IMAGE} +} + +test_runk() { + echo "start container with runk" + # Bind mount ${WORK_DIR}:/tmp. Tests below will store files in this dir and check them when container is frozon. + sudo ctr run --pid-file ${PID_FILE} -d --runc-binary ${RUNK_BIN_PATH} --mount type=bind,src=${WORK_DIR},dst=/tmp,options=rbind:rw ${TEST_IMAGE} ${CONTAINER_ID} + read CID PID STATUS <<< $(sudo ctr t ls | grep ${CONTAINER_ID}) + [ ${PID} == $(cat ${PID_FILE}) ] || die "pid is not consistent" + [ ${STATUS} == "RUNNING" ] || die "container status is not RUNNING" + + echo "exec process in a container" + sudo ctr t exec --exec-id id1 ${CONTAINER_ID} sh -c "echo hello > /tmp/foo" + [ "hello" == "$(sudo ctr t exec --exec-id id1 ${CONTAINER_ID} cat /tmp/foo)" ] || die "exec process failed" + + echo "test ps command" + sudo ctr t exec --detach --exec-id id1 ${CONTAINER_ID} sh + # one line is the titles, and the other 2 lines are porcess info + [ "3" == "$(sudo ctr t ps ${CONTAINER_ID} | wc -l)" ] || die "ps command failed" + + echo "test pause and resume" + # The process outputs lines into /tmp/{CONTAINER_ID}, which can be read in host when it's frozon. + sudo ctr t exec --detach --exec-id id2 ${CONTAINER_ID} sh -c "while true; do echo hello >> /tmp/${CONTAINER_ID}; sleep 0.1; done" + # sleep for 1s to make sure the process outputs some lines + sleep 1 + sudo ctr t pause ${CONTAINER_ID} + [ "PAUSED" == "$(sudo ctr t ls | grep ${CONTAINER_ID} | grep -o PAUSED)" ] || die "status is not PAUSED" + echo "container is paused" + local TMP_FILE="${WORK_DIR}/${CONTAINER_ID}" + local lines1=$(cat ${TMP_FILE} | wc -l) + # sleep for a while and check the lines are not changed. + sleep 1 + local lines2=$(cat ${TMP_FILE} | wc -l) + [ ${lines1} == ${lines2} ] || die "paused container is still running" + sudo ctr t resume ${CONTAINER_ID} + [ "RUNNING" == "$(sudo ctr t ls | grep ${CONTAINER_ID} | grep -o RUNNING)" ] || die "status is not RUNNING" + echo "container is resumed" + # sleep for a while and check the lines are changed. + sleep 1 + local lines3=$(cat ${TMP_FILE} | wc -l) + [ ${lines2} -lt ${lines3} ] || die "resumed container is not running" + + echo "kill the container and poll until it is stopped" + sudo ctr t kill --signal SIGKILL --all ${CONTAINER_ID} + # poll for a while until the task receives signal and exit + local cmd='[ "STOPPED" == "$(sudo ctr t ls | grep ${CONTAINER_ID} | awk "{print \$3}")" ]' + waitForProcess 10 1 "${cmd}" || die "failed to kill task" + + echo "check the container is stopped" + # there is only title line of ps command + [ "1" == "$(sudo ctr t ps ${CONTAINER_ID} | wc -l)" ] || die "kill command failed" + + # High-level container runtimes such as containerd call the kill command with + # --all option in order to terminate all processes inside the container + # even if the container already is stopped. Hence, a low-level runtime + # should allow kill --all regardless of the container state like runc. + echo "test kill --all is allowed regardless of the container state" + sudo ctr t kill --signal SIGKILL ${CONTAINER_ID} && die "kill should fail" + sudo ctr t kill --signal SIGKILL --all ${CONTAINER_ID} || die "kill --all should not fail" + + echo "delete the container" + sudo ctr t rm ${CONTAINER_ID} + [ -z "$(sudo ctr t ls | grep ${CONTAINER_ID})" ] || die "failed to delete task" + sudo ctr c rm ${CONTAINER_ID} || die "failed to delete container" +} + +clean_up() { + rm -f ${PID_FILE} + rm -rf ${WORK_DIR} +} + +setup +test_runk +clean_up From 301a7d94e32ddd49456c05c71d436b1b74eea99e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Bombo?= Date: Fri, 6 Oct 2023 14:11:29 -0700 Subject: [PATCH 318/339] gha: ci: Revert tracing test PR to unbreak CI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Revert "Merge pull request #8115 from fidencio/topic/ci-add-tracing-tests" This unbreaks CI as seen in https://github.com/kata-containers/kata-containers/actions/runs/6434757133 Fixes: #8161 Signed-off-by: Aurélien Bombo (cherry picked from commit e9bd8521139909e4def500a74b131cb0061819f4) --- .github/workflows/ci.yaml | 8 - .github/workflows/run-tracing-tests.yaml | 52 - tests/common.bash | 18 - .../tracing/configure_tracing_for_kata.sh | 40 - tests/functional/tracing/gha-run.sh | 56 - .../functional/tracing/test-agent-shutdown.sh | 1502 ----------------- tests/functional/tracing/tracing-test.sh | 540 ------ tests/integration/docker/gha-run.sh | 16 +- 8 files changed, 15 insertions(+), 2217 deletions(-) delete mode 100644 .github/workflows/run-tracing-tests.yaml delete mode 100755 tests/functional/tracing/configure_tracing_for_kata.sh delete mode 100755 tests/functional/tracing/gha-run.sh delete mode 100755 tests/functional/tracing/test-agent-shutdown.sh delete mode 100755 tests/functional/tracing/tracing-test.sh diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 35c6a40b8..a92f0fbbe 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -248,11 +248,3 @@ jobs: tarball-suffix: -${{ inputs.tag }} commit-hash: ${{ inputs.commit-hash }} target-branch: ${{ inputs.target-branch }} - - run-tracing-tests: - needs: build-kata-static-tarball-amd64 - uses: ./.github/workflows/run-tracing-tests.yaml - with: - tarball-suffix: -${{ inputs.tag }} - commit-hash: ${{ inputs.commit-hash }} - target-branch: ${{ inputs.target-branch }} diff --git a/.github/workflows/run-tracing-tests.yaml b/.github/workflows/run-tracing-tests.yaml deleted file mode 100644 index ede15aca1..000000000 --- a/.github/workflows/run-tracing-tests.yaml +++ /dev/null @@ -1,52 +0,0 @@ -name: CI | Run tracing tests -on: - workflow_call: - inputs: - tarball-suffix: - required: false - type: string - commit-hash: - required: false - type: string - target-branch: - required: false - type: string - default: "" - -jobs: - run-tracing: - strategy: - fail-fast: false - matrix: - vmm: - - clh # cloud-hypervisor - - qemu - runs-on: garm-ubuntu-2204-smaller - env: - KATA_HYPERVISOR: ${{ matrix.vmm }} - steps: - - uses: actions/checkout@v4 - with: - ref: ${{ inputs.commit-hash }} - fetch-depth: 0 - - - name: Rebase atop of the latest target branch - run: | - ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" - env: - TARGET_BRANCH: ${{ inputs.target-branch }} - - - name: Install dependencies - run: bash tests/functional/tracing/gha-run.sh install-dependencies - - - name: get-kata-tarball - uses: actions/download-artifact@v3 - with: - name: kata-static-tarball-amd64${{ inputs.tarball-suffix }} - path: kata-artifacts - - - name: Install kata - run: bash tests/functional/tracing/gha-run.sh install-kata kata-artifacts - - - name: Run tracing tests - run: bash tests/functional/tracing/gha-run.sh run diff --git a/tests/common.bash b/tests/common.bash index 34c59e052..a11144574 100644 --- a/tests/common.bash +++ b/tests/common.bash @@ -547,24 +547,6 @@ EOF sudo systemctl enable --now crio } -function install_docker() { - # Add Docker's official GPG key - sudo apt-get update - sudo apt-get -y install ca-certificates curl gnupg - sudo install -m 0755 -d /etc/apt/keyrings - curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /etc/apt/keyrings/docker.gpg - sudo chmod a+r /etc/apt/keyrings/docker.gpg - - # Add the repository to Apt sources: - echo \ - "deb [arch="$(dpkg --print-architecture)" signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu \ - "$(. /etc/os-release && echo "$VERSION_CODENAME")" stable" | \ - sudo tee /etc/apt/sources.list.d/docker.list > /dev/null - sudo apt-get update - - sudo apt-get install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin -} - # Convert architecture to the name used by golang function arch_to_golang() { local arch="$(uname -m)" diff --git a/tests/functional/tracing/configure_tracing_for_kata.sh b/tests/functional/tracing/configure_tracing_for_kata.sh deleted file mode 100755 index ed9757113..000000000 --- a/tests/functional/tracing/configure_tracing_for_kata.sh +++ /dev/null @@ -1,40 +0,0 @@ -#!/bin/bash -# -# Copyright (c) 2019-2022 Intel Corporation -# -# SPDX-License-Identifier: Apache-2.0 -# - -set -o errexit -set -o nounset -set -o pipefail - -SCRIPT_PATH=$(dirname "$(readlink -f "$0")") -source "${SCRIPT_PATH}/../../common.bash" - -[ "$#" -eq 1 ] || die "Specify enable or disable" - -kata_cfg_file=$(kata-runtime kata-env --json |jq '.Runtime | .Config | .Path' |cut -d\" -f2) - -enable_tracing() { - info "Enabling kata tracing on $kata_cfg_file" - sudo crudini --set "$kata_cfg_file" agent.kata enable_tracing true - sudo crudini --set "$kata_cfg_file" runtime enable_tracing true -} - -disable_tracing() { - info "Disabling kata tracing on $kata_cfg_file" - sudo crudini --set "$kata_cfg_file" agent.kata enable_tracing false - sudo crudini --set "$kata_cfg_file" runtime enable_tracing false -} - -main() { - cmd="$1" - case "$cmd" in - enable ) enable_tracing ;; - disable ) disable_tracing ;; - *) die "invalid command: '$cmd'" ;; - esac -} - -main "$@" diff --git a/tests/functional/tracing/gha-run.sh b/tests/functional/tracing/gha-run.sh deleted file mode 100755 index 336992662..000000000 --- a/tests/functional/tracing/gha-run.sh +++ /dev/null @@ -1,56 +0,0 @@ -#!/bin/bash -# -# Copyright (c) 2023 Intel Corporation -# -# SPDX-License-Identifier: Apache-2.0 -# - -set -o errexit -set -o nounset -set -o pipefail - -kata_tarball_dir="${2:-kata-artifacts}" -tracing_dir="$(dirname "$(readlink -f "$0")")" -source "${tracing_dir}/../../common.bash" - -function install_dependencies() { - info "Installing the dependencies needed for running the tracing tests" - - # Dependency list of projects that we can rely on the system packages - # - crudini - # - jq - # - socat - # - tmux - declare -a system_deps=( - crudini - jq - socat - tmux - ) - - sudo apt-get update - sudo apt-get -y install "${system_deps[@]}" - - # Install docker according to the docker's website documentation - install_docker -} - -function run() { - info "Running tracing tests using ${KATA_HYPERVISOR} hypervisor" - - enabling_hypervisor - bash -c ${tracing_dir}/test-agent-shutdown.sh - bash -c ${tracing_dir}/tracing-test.sh -} - -function main() { - action="${1:-}" - case "${action}" in - install-dependencies) install_dependencies ;; - install-kata) install_kata ;; - run) run ;; - *) >&2 die "Invalid argument" ;; - esac -} - -main "$@" diff --git a/tests/functional/tracing/test-agent-shutdown.sh b/tests/functional/tracing/test-agent-shutdown.sh deleted file mode 100755 index 6bad962d8..000000000 --- a/tests/functional/tracing/test-agent-shutdown.sh +++ /dev/null @@ -1,1502 +0,0 @@ -#!/bin/bash -# Copyright (c) 2021 Intel Corporation -# -# SPDX-License-Identifier: Apache-2.0 -# -#--------------------------------------------------------------------- -# Description: Test the Kata Containers 2.x rust agent shutdown behaviour. -# -# Normally, the kata-agent process running inside the VM is not shut down; -# once the workload ends and the agent has returned the workload return -# value back to the runtime, the runtime simply kills the VM. This is safe -# since nothing the user cares about is running any more. -# -# However, for agent tracing, a graceful agent shutdown is necessary to ensure -# all trace spans are generated. When *static* agent tracing is enabled, the -# runtime relies entirely on the agent to perform a graceful shutdown _and_ -# shut down the VM. -# -# This script tests the kata-agent in two ways: -# -# - "manually" / "standalone" where the agent binary is run directly. -# - Inside a Kata VM, started by a shimv2-capable container manager -# (containerd). -# -# In both cases, the agent is shut down using the agent-ctl tool -# to request the agent shut down gracefully. -# -# Various configuration options are also tested. One of these enables -# the agents built-in (VSOCK) debug console. This test not only enables -# the option but also connects to the created console. -# -# Since this script needs to start various programs with a terminal, -# it uses tmux(1) consistently to simplify the handling logic. -#--------------------------------------------------------------------- - -readonly script_name=${0##*/} - -set -o errexit -set -o nounset -set -o pipefail -set -o errtrace - -SCRIPT_PATH=$(dirname "$(readlink -f "$0")") -source "${SCRIPT_PATH}/../../common.bash" -source "/etc/os-release" || source "/usr/lib/os-release" - -CTR_RUNTIME=${CTR_RUNTIME:-"io.containerd.kata.v2"} - -# Kata always uses this value -EXPECTED_VSOCK_PORT="1024" - -DOCKER_IMAGE=${DOCKER_IMAGE:-"busybox"} -CTR_IMAGE=${CTR_IMAGE:-"quay.io/prometheus/busybox:latest"} - -# Number of times the test should be run -KATA_AGENT_SHUTDOWN_TEST_COUNT=${KATA_AGENT_SHUTDOWN_TEST_COUNT:-1} - -# Default VSOCK port used by the agent -KATA_AGENT_VSOCK_CONSOLE_PORT=${KATA_AGENT_VSOCK_CONSOLE_PORT:-1026} - -# The shutdown test type that represents a "default" / vanilla Kata -# installation (where no debug options are enabled). -VANILLA_TEST_TYPE='default' - -# Name of tmux(1) sessions to create to run Kata VM and local agent in -KATA_TMUX_VM_SESSION="kata-shutdown-test-vm-session" -KATA_TMUX_LOCAL_SESSION="kata-shutdown-test-local-agent-session" - -# Name of tmux(1) session to create to run a debug console in -KATA_TMUX_CONSOLE_SESSION="kata-shutdown-test-console-session" - -# tmux(1) session to run the trace forwarder in -KATA_TMUX_FORWARDER_SESSION="kata-shutdown-test-trace-forwarder-session" - -KATA_HYPERVISOR="${KATA_HYPERVISOR:-qemu}" - -# List of test types used by configure_kata(). -# -# Each element contains four colon delimited fields: -# -# 1: Name. -# 2: Whether debug should be enabled for the agent+runtime. -# 3: Whether hypervisor debug should be enabled. -# (handled separately due to a previous bug which blocked agent shutdown). -# 4: Whether a VSOCK debug console should be configured and used. -# -# Notes: -# -# - Tests are run in the order found in this array. -# - An array is used (rather than a hash) to ensure the standard/vanilla -# configuration is run *last*. The reason for this being that debug is -# needed to diagnose shutdown errors, so there is no point in runnning -# the default scenario first, in case it fails (and it thus "undebuggable"). -shutdown_test_types=( - 'with-debug:true:false:false' - 'with-debug-console:false:false:true' - 'with-hypervisor-debug:true:true:false' - 'with-everything:true:true:true' - "${VANILLA_TEST_TYPE}:false:false:false" -) - -# Number of fields each entry in the 'shutdown_test_types' array should have. -shutdown_test_type_fields=4 - -# Pseudo test type name that represents all test types defined -# in the 'shutdown_test_types' array. -ALL_TEST_TYPES='all' - -DEFAULT_SHUTDOWN_TEST_TYPE="${ALL_TEST_TYPES}" - -# List of ways of running the agent: -# -# Each element contains two colon delimited fields: -# -# 1: Name used for a particular way of running the agent. -# 2: Description. -agent_test_types=( - 'local:Run agent using agent-ctl tool' - 'vm:Run agent inside a Kata Container' -) - -# Default value from the 'agent_test_types' array. -DEFAULT_AGENT_TEST_TYPE='vm' - -# Set by every call to run_single_agent() -test_start_time= -test_end_time= - -#------------------------------------------------------------------------------- -# Settings - -# values used to wait for local and VM processes to start and end. -wait_time_secs=${WAIT_TIME_SECS:-20} -sleep_time_secs=${SLEEP_TIME_SECS:-1} - -# Time to allow for the agent and VM to shutdown -shutdown_time_secs=${SHUTDOWN_TIME_SECS:-120} - -# Name for the container that will be created -container_id="${CONTAINER_ID:-kata-agent-shutdown-test}" - -# If 'true', don't run any commands, just show what would be run. -dry_run="${DRY_RUN:-false}" - -# If 'true', don't remove logs on a successful run. -keep_logs="${KEEP_LOGS:-false}" - -# Name of socket file used by a local agent. -agent_socket_file="kata-agent.socket" - -# Kata Agent socket URI. -# -# Notes: -# -# - The file is an abstract socket -# (meaning it is not visible in the filesystem). -# -# - The agent and the agent-ctl tool use slightly different -# address formats for abstract sockets. -local_agent_server_addr="unix://${agent_socket_file}" -local_agent_ctl_server_addr="unix://@${agent_socket_file}" - -# Address that is dynamically configured when using CLH before -# starting trace forwarder or container -clh_socket_path= -clh_socket_prefix="/run/vc/vm/" - -ctl_log_file="${PWD}/agent-ctl.log" - -# Log file that must contain agent output. -agent_log_file="${PWD}/kata-agent.log" - -# Set in setup() based on KATA_HYPERVISOR -# Supported hypervisors are qemu and clh -configured_hypervisor= -# String that would appear in config file (qemu or clh) -configured_hypervisor_cfg= - -# Full path to directory containing an OCI bundle based on "$DOCKER_IMAGE", -# which is required by the agent control tool. -bundle_dir=${BUNDLE_DIR:-""} - -#--------------------------------------- -# Default values - -default_arch=$(uname -m) -arch="${arch:-${default_arch}}" - -#------------------------------------------------------------------------------- - -agent_binary="/usr/bin/kata-agent" - -# Maximum debug level -default_agent_log_level="trace" - -agent_log_level=${agent_log_level:-${default_agent_log_level}} - -# Full path to the main configuration file (set by setup()). -kata_cfg_file= - -# Set in setup() based on KATA_HYPERVISOR -hypervisor_binary= - - -#------------------------------------------------------------------------------- - -[ -n "${DEBUG:-}" ] && set -o xtrace - -usage() -{ - cat < : Agent test type to use - (default: '$DEFAULT_AGENT_TEST_TYPE'). - -c : Run specified number of iterations - (default: $KATA_AGENT_SHUTDOWN_TEST_COUNT). - -d : Enable debug (shell trace) output. - -h : Show this help statement. - -k : Keep logs on successful run - (default: logs will be deleted on success). - -l : List all available agent and shutdown test types. - -n : Dry-run mode - show the commands that would be run. - -t : Only run the specified shutdown test type - (default: '$DEFAULT_SHUTDOWN_TEST_TYPE'). - -Notes: - -- These tests should be run *before* the Kata Agent tracing tests, since if - the agent cannot be shut down, static tracing will not work reliably. - -- By default all shutdown test types are run, but only the default agent test - type is run. - -EOF -} - -warn() -{ - echo >&2 "WARNING: $*" -} - -# Run the specified command, or if dry-run mode is enabled, -# just show the command that would be run. -run_cmd() -{ - local cmdline="$@" - - if [ "$dry_run" = 'true' ] - then - info "dry-run: Would run: '$cmdline'" - else - eval $cmdline - fi -} - -# Show a subset of processes (for debugging) -show_procs() -{ - info "Processes" - - local hypervisor - hypervisor="qemu" - [ ${configured_hypervisor} = "clh" ] && hypervisor="cloud-hypervisor" - - local patterns=() - - patterns+=("kata-agent-ctl") - patterns+=("${hypervisor}") - patterns+=("containerd") - patterns+=("ctr") - - local pattern_list - pattern_list=$(echo "${patterns[@]}"|tr ' ' '|') - - local regex - regex="(${pattern_list})" - - ps -efww | egrep -i "$regex" || true -} - -kill_tmux_sessions() -{ - local session - - for session in \ - "$KATA_TMUX_CONSOLE_SESSION" \ - "$KATA_TMUX_FORWARDER_SESSION" \ - "$KATA_TMUX_LOCAL_SESSION" \ - "$KATA_TMUX_VM_SESSION" - do - tmux kill-session -t "$session" &>/dev/null || true - done - - true -} - -get_shutdown_test_type_entry() -{ - local shutdown_test_type="${1:-}" - [ -z "$shutdown_test_type" ] && die "need shutdown test type name" - - local entry - - for entry in "${shutdown_test_types[@]}" - do - local count - count=$(echo "$entry"|tr ':' '\n'|wc -l) - [ "$count" -eq "$shutdown_test_type_fields" ] \ - || die "expected $shutdown_test_type_fields fields, found $count: '$entry'" - - local name - - name=$(echo "$entry"|cut -d: -f1) - - [ "$name" = "$shutdown_test_type" ] \ - && echo "$entry" \ - && break - done - - echo -} - -list_shutdown_test_types() -{ - local entry - local debug_value - local hypervisor_debug_value - local debug_console_value - - printf "# Shutdown test types:\n\n" - - printf "%-24s %-15s %-23s %s\n\n" \ - "Test type" \ - "Debug enabled" \ - "Hypervisor debug" \ - "Debug console used" - - for entry in "${shutdown_test_types[@]}" - do - local name - local debug_value - local hypervisor_debug_value - local debug_console_value - - name=$(echo "$entry"|cut -d: -f1) - debug_value=$(echo "$entry"|cut -d: -f2) - hypervisor_debug_value=$(echo "$entry"|cut -d: -f3) - debug_console_value=$(echo "$entry"|cut -d: -f4) - - printf "%-24s %-15s %-23s %s\n" \ - "$name" \ - "$debug_value" \ - "$hypervisor_debug_value" \ - "$debug_console_value" - done - - echo -} - -list_agent_test_types() -{ - local entry - - printf "# Agent test types:\n\n" - - printf "%-12s %s\n\n" \ - "Agent type" \ - "Description" - - for entry in "${agent_test_types[@]}" - do - local name - local descr - - name=$(echo "$entry"|cut -d: -f1) - descr=$(echo "$entry"|cut -d: -f2-) - - local msg="" - - [ "$name" = "$DEFAULT_AGENT_TEST_TYPE" ] && msg=" (default)" - - printf "%-12s %s%s.\n" \ - "$name" \ - "$descr" \ - "$msg" - done - - echo -} - -list_test_types() -{ - list_agent_test_types - list_shutdown_test_types -} - -# Set Kata options according to test type. -configure_kata() -{ - local shutdown_test_type="${1:-}" - [ -z "$shutdown_test_type" ] && die "need shutdown test type" - - local entry - local debug_value - local hypervisor_debug_value - local debug_console_value - - local entry - entry=$(get_shutdown_test_type_entry "$shutdown_test_type" || true) - [ -z "$entry" ] && die "invalid test type: '$shutdown_test_type'" - - debug_value=$(echo "$entry"|cut -d: -f2) - hypervisor_debug_value=$(echo "$entry"|cut -d: -f3) - debug_console_value=$(echo "$entry"|cut -d: -f4) - - [ -z "$debug_value" ] && \ - die "need debug value for $shutdown_test_type" - - [ -z "$hypervisor_debug_value" ] && \ - die "need hypervisor debug value for $shutdown_test_type" - - [ -z "$debug_console_value" ] && \ - die "need debug console value for $shutdown_test_type" - - toggle_debug "$debug_value" "$hypervisor_debug_value" - toggle_vsock_debug_console "$debug_console_value" - - # Enable agent tracing - # - # Even though this program only tests agent shutdown, static tracing - # must be configured. This is because normally (with tracing - # disabled), the runtime kills the VM after the workload has exited. - # However, if static tracing is enabled, the runtime will not kill the - # VM - the responsibility for shutting down the VM is given to the - # agent process running inside the VM. - - if [ "$shutdown_test_type" = "$VANILLA_TEST_TYPE" ] - then - # We don't need to worry about the 'trace_mode' here since agent tracing - # is *only* enabled if the 'enable_tracing' variable is set. - run_cmd sudo crudini --set "${kata_cfg_file}" 'agent.kata' 'enable_tracing' 'false' - else - run_cmd sudo crudini --set "${kata_cfg_file}" 'agent.kata' 'enable_tracing' 'true' - fi -} - -unconfigure_kata() -{ - info "Resetting configuration to defaults" - - configure_kata "$VANILLA_TEST_TYPE" -} - -# Enable/disable the agent's built-in VSOCK debug console -toggle_vsock_debug_console() -{ - run_cmd sudo crudini --set "${kata_cfg_file}" \ - 'agent.kata' 'debug_console_enabled' "$1" -} - -# Enable/disable debug options. -# -# Note: Don't use 'kata-manager.sh "enable-debug"' since this -# enables all debug (including the problematic hypervisor -# debug - see below). -toggle_debug() -{ - local value="${1:-}" - local hypervisor_debug="${2:-}" - - [ -z "$value" ] && die "need value" - [ -z "$hypervisor_debug" ] && die "need hypervisor debug value" - - # list of confguration.toml sections that have debug options we care about - local debug_sections=() - - debug_sections+=('agent.kata') - debug_sections+=('runtime') - - local section - - for section in "${debug_sections[@]}" - do - run_cmd sudo crudini --set "$kata_cfg_file" "$section" \ - 'enable_debug' "$value" - done - - # XXX: Enabling hypervisor debug for QEMU will make a systemd debug - # console service inoperable (*), but we need to test it anyhow. - # - # (*) - If enabled, it stops "kata-debug.service" from attaching to - # the console and the socat call made on the client hangs until - # the VM is shut down! - local section - - section=$(printf "hypervisor.%s" "$configured_hypervisor_cfg") - - run_cmd sudo crudini --set "$kata_cfg_file" "$section" \ - 'enable_debug' "$hypervisor_debug_value" -} - -# Provide a "semi-valid" vsock address for when dry-run mode is active. -# The URI includes a message telling the user to change it and replace -# with the real VSOCK CID value. -get_dry_run_agent_vsock_address() -{ - echo "vsock://FIXME-CHANGE-TO-VSOCK-CID:${EXPECTED_VSOCK_PORT}" -} - -# Start a debug console shell using the agent's built-in debug console -# feature. -# -# Note: You should be able to use "kata-runtime exec $cid", but that isn't -# working currently. -connect_to_vsock_debug_console() -{ - local agent_addr - - if [ "$dry_run" = 'true' ] - then - agent_addr=$(get_dry_run_agent_vsock_address) - else - agent_addr=$(get_agent_vsock_address || true) - [ -z "$agent_addr" ] && die "cannot determine agent VSOCK address" - fi - - local socat_connect= - if [ $configured_hypervisor = "qemu" ]; then - socat_connect=$(echo "$agent_addr"|sed 's!^vsock://!vsock-connect:!') - elif [ $configured_hypervisor = "clh" ]; then - socat_connect="unix-connect:${clh_socket_path}" - else - die "Cannot configure address for socat, unknown hypervisor: '$configured_hypervisor'" - fi - - run_cmd \ - "tmux new-session \ - -d \ - -s \"$KATA_TMUX_CONSOLE_SESSION\" \ - \"socat \ - '${socat_connect}' \ - stdout\"" - -} - -cleanup() -{ - # Save the result of the last call made before - # this handler was called. - # - # XXX: This *MUST* be the first command in this function! - local failure_ret="$?" - - [ "$dry_run" = 'true' ] && return 0 - - if [ "$failure_ret" -eq 0 ] && [ "$keep_logs" = 'true' ] - then - info "SUCCESS: Test passed, but leaving logs:" - info "" - info "agent log file : ${agent_log_file}" - info "agent-ctl log file : ${ctl_log_file}" - info "OCI bundle directory : ${bundle_dir}" - - return 0 - fi - - local arg="${1:-}" - - if [ $failure_ret -ne 0 ] && [ "$arg" != 'initial' ]; then - warn "ERROR: Test failed" - warn "" - warn "Not cleaning up to help debug failure:" - warn "" - - if [ "${CI:-}" = "true" ] - then - show_procs - - info "VSOCK details" - ss -Hp --vsock || true - - info "agent-ctl log file" - sudo cat "${ctl_log_file}" || true - echo - - info "agent log file" - sudo cat "${agent_log_file}" || true - echo - - else - info "agent-ctl log file : ${ctl_log_file}" - info "agent log file : ${agent_log_file}" - fi - - info "OCI bundle directory : ${bundle_dir}" - - return 0 - fi - - kill_tmux_sessions - - unconfigure_kata - - [ "$arg" != 'initial' ] && [ -d "$bundle_dir" ] && rm -rf "$bundle_dir" - - sudo rm -f \ - "$agent_log_file" \ - "$ctl_log_file" - - clean_env_ctr &>/dev/null || true - - local sandbox_dir="/run/sandbox-ns/" - - # XXX: Without doing this, the agent will hang attempting to create the - # XXX: namespaces (in function "setup_shared_namespaces()") - sudo umount -f "${sandbox_dir}/uts" "${sandbox_dir}/ipc" &>/dev/null || true - sudo rm -rf "${sandbox_dir}" &>/dev/null || true - - # Check that clh socket was deleted - if [ $configured_hypervisor = "clh" ] && [ ! -z $clh_socket_path ]; then - [ -f $clh_socket_path ] && die "CLH socket path $clh_socket_path was not properly cleaned up" - fi - - sudo systemctl restart containerd -} - -setup_containerd() -{ - local file="/etc/containerd/config.toml" - - [ -e "$file" ] || die "missing containerd config file: '$file'" - - # Although the containerd config file is in TOML format, crudini(1) - # won't parse it due to the indentation it uses. - local containerd_debug_enabled - - containerd_debug_enabled=$(sed \ - -e '/./{H;$!d;}' \ - -e 'x;/\[debug\]/!d;' \ - "$file" |\ - grep "level *= *\"debug\"" || true) - - if [ -z "$containerd_debug_enabled" ] - then - cat <<-EOF | sudo tee -a "$file" - [debug] - # Allow Kata Containers debug messages to be propageted - # into the hosts journal. - # (use "journalctl -t kata" to view). - level = "debug" - EOF - - sudo systemctl restart containerd - fi - - sudo ctr image pull "$CTR_IMAGE" - - true -} - -create_oci_rootfs() -{ - local dir="${1:-}" - - [ -z "$dir" ] && die "Need OCI rootfs dir" - - sudo docker export $(sudo docker create "$DOCKER_IMAGE") |\ - tar -C "${dir}" -xvf - >/dev/null -} - -setup_oci_bundle() -{ - bundle_dir="$(mktemp -d)" - export bundle_dir - - info "Creating OCI bundle in directory: '$bundle_dir'" - - local config="${bundle_dir}/config.json" - local rootfs_dir="${bundle_dir}/rootfs/" - - mkdir -p "$rootfs_dir" - - create_oci_rootfs "$rootfs_dir" - - pushd "$bundle_dir" &>/dev/null - runc spec - popd &>/dev/null - - [ -e "$config" ] || die "no OCI config file at ${config}" -} - -setup() -{ - configured_hypervisor="${KATA_HYPERVISOR:-}" - - if [ "${KATA_HYPERVISOR:-}" = "qemu" ]; then - hypervisor_binary="qemu-system-${arch}" - configured_hypervisor_cfg="qemu" - elif [ "${KATA_HYPERVISOR:-}" = "clh" ]; then - hypervisor_binary="cloud-hypervisor" - configured_hypervisor_cfg="clh" - else - local msg="" - msg+="Exiting as hypervisor test dependency not met" - msg+=" (expected 'qemu' or 'cloud-hypervisor', found '$KATA_HYPERVISOR')" - die "$msg" - fi - info "Configured hypervisor is $configured_hypervisor" - - trap cleanup EXIT - - # Don't mess with an existing tmux session - unset TMUX - - [ "$dry_run" = 'false' ] && \ - [ -z "$bundle_dir" ] && \ - setup_oci_bundle || true - - local cmds=() - - # For parsing TOML config files - cmds+=('crudini') - - # For container manager (containerd) - cmds+=('ctr') - - # for OCI bundle creation - cmds+=('docker') - cmds+=('runc') - - # For querying VSOCK sockets - cmds+=('socat') - - # For launching processes - cmds+=('tmux') - - local cmd - - for cmd in "${cmds[@]}" - do - local result - result=$(command -v "$cmd" || true) - [ -n "$result" ] || die "need $cmd" - done - - kata_cfg_file=$(kata-runtime kata-env \ - --json |\ - jq '.Runtime | .Config | .Path' |\ - cut -d\" -f2 || true) - - [ -z "$kata_cfg_file" ] && die "Cannot determine config file" - - sudo mkdir -p $(dirname "$kata_cfg_file") - - #------------------------------ - # Check configured hypervisor - - local hypervisor_section - - hypervisor_section=$(printf "hypervisor.%s\n" "${configured_hypervisor_cfg}") - - local ret - - { crudini --get "${kata_cfg_file}" "${hypervisor_section}" &>/dev/null; ret=$?; } || true - - [ "$ret" -eq 0 ] || \ - die "Configured hypervisor ${configured_hypervisor} does not match config file ${kata_cfg_file}" - - setup_containerd -} - -start_local_agent() -{ - local log_file="${1:-}" - [ -z "$log_file" ] && die "need agent log file" - - local running - running=$(get_local_agent_pid || true) - - [ -n "$running" ] && die "agent already running: '$running'" - - # Note: it's imperative that we capture stderr to the log file - # as the agent writes the shutdown message to this stream! - run_cmd \ - "tmux new-session \ - -d \ - -s \"$KATA_TMUX_LOCAL_SESSION\" \ - \"sudo \ - RUST_BACKTRACE=full \ - KATA_AGENT_LOG_LEVEL=${agent_log_level} \ - KATA_AGENT_SERVER_ADDR=${local_agent_server_addr} \ - ${agent_binary} \ - &> ${log_file}\"" - - [ "$dry_run" = 'false' ] && wait_for_local_agent_to_start || true -} - -# Wait for the agent to finish starting -wait_for_kata_vm_agent_to_start() -{ - local cid="${1:-}" - [ -z "$log_file" ] && die "need container ID" - - # First, check the containerd status of the container - local cmd="sudo ctr task list | grep \"${cid}\" | grep -q \"RUNNING\"" - - info "Waiting for VM to start (cid: '$cid')" - - waitForProcess \ - "$wait_time_secs" \ - "$sleep_time_secs" \ - "$cmd" - - show_procs - - # Next, ensure there is a valid VSOCK address for the VM - info "Waiting for agent VSOCK server" - - cmd="get_agent_vsock_address_simple >/dev/null" - - waitForProcess \ - "$wait_time_secs" \ - "$sleep_time_secs" \ - "$cmd" - - info "Kata VM running" -} - -check_local_agent_alive() -{ - local cmds=() - - cmds+=("-c Check") - - run_agent_ctl \ - "${local_agent_ctl_server_addr}" \ - "${cmds[@]}" - - true -} - -wait_for_local_agent_to_start() -{ - local cmd="check_local_agent_alive" - - info "Waiting for agent process to start" - - waitForProcess \ - "$wait_time_secs" \ - "$sleep_time_secs" \ - "$cmd" - - info "Kata agent process running" -} - -# Create a Kata Container that blocks "forever" -start_agent_in_kata_vm() -{ - local log_file="${1:-}" - [ -z "$log_file" ] && die "need agent log file" - - local snapshotter="" - local ret - - # Allow containerd to run on a ZFS root filesystem - { zfs list &>/dev/null; ret=$?; } || true - [ "$ret" = 0 ] && snapshotter='zfs' - - # Ensure the container blocks forever - local cmd='tail -f /dev/null' - - run_cmd \ - "tmux new-session \ - -d \ - -s \"$KATA_TMUX_VM_SESSION\" \ - \"sudo ctr run \ - --snapshotter '$snapshotter' \ - --runtime '${CTR_RUNTIME}' \ - --rm \ - -t '${CTR_IMAGE}' \ - '$container_id' \ - $cmd\"" - - [ "$dry_run" = 'false' ] && \ - wait_for_kata_vm_agent_to_start "$container_id" || true -} - -start_agent() -{ - local agent_test_type="${1:-}" - [ -z "$agent_test_type" ] && die "need agent test type" - - local log_file="${2:-}" - [ -z "$log_file" ] && die "need agent log file" - - case "$agent_test_type" in - 'local') start_local_agent "$log_file" ;; - 'vm') start_agent_in_kata_vm "$log_file" ;; - *) die "invalid agent test type: '$agent_test_type'" ;; - esac - - true -} - -run_agent_ctl() -{ - local server_addr="${1:-}" - - shift - - local cmds="${*:-}" - - [ -n "$server_addr" ] || die "need agent ttRPC server address" - [ -n "$cmds" ] || die "need commands for agent control tool" - - local agent_ctl_path - agent_ctl_path="/opt/kata/bin/kata-agent-ctl" - - local redirect="&>\"${ctl_log_file}\"" - - if [ "$dry_run" = 'true' ] - then - redirect="" - bundle_dir="FIXME-set-to-OCI-bundle-directory" - fi - - local server_address= - if [ $configured_hypervisor = "qemu" ]; then - server_address="--server-address \"${server_addr}\"" - elif [ $configured_hypervisor = "clh" ]; then - server_address="--server-address \"${server_addr}\" --hybrid-vsock" - else - die "Cannot configure server address, unknown hypervisor: '$configured_hypervisor'" - fi - - run_cmd \ - sudo \ - RUST_BACKTRACE=full \ - "${agent_ctl_path}" \ - -l debug \ - connect \ - "${server_address}" \ - --bundle-dir "${bundle_dir}" \ - "${cmds}" \ - "${redirect}" -} - -# This function "cheats" a little - it gets the agent -# to do some work *and then* stops it. -stop_local_agent() -{ - local cmds=() - - cmds+=("-c Check") - cmds+=("-c GetGuestDetails") - cmds+=("-c 'sleep 1s'") - cmds+=("-c DestroySandbox") - - run_agent_ctl \ - "${local_agent_ctl_server_addr}" \ - "${cmds[@]}" -} - -get_addresses() -{ - local addresses= - - if [ $configured_hypervisor = "qemu" ]; then - addresses=$(ss -Hp --vsock |\ - egrep -v "\" |\ - awk '$2 ~ /^ESTAB$/ {print $6}' |\ - grep ":${EXPECTED_VSOCK_PORT}$") - elif [ $configured_hypervisor = "clh" ]; then - # since we preconfigured the socket, we are checking to see if it is reported - addresses=$(ss -Hp |\ - grep "${clh_socket_path}" |\ - awk '$2 ~ /^ESTAB$/ {print $5}') - else - die "Cannot retrieve address, unknown hypervisor: '$configured_hypervisor'" - fi - - echo ${addresses} -} - -# Doesn't fail. Instead it will return the empty string on error. -get_agent_vsock_address_simple() -{ - local addresses=$(get_addresses) - - [ -z "$addresses" ] && return 1 - - local expected_count=1 - - local count - count=$(echo "$addresses"|wc -l || true) - - [ "$count" -eq "$expected_count" ] || return 1 - - if [ $configured_hypervisor = "qemu" ]; then - local cid - local port - - cid=$(echo "$addresses"|cut -d: -f1) - port=$(echo "$addresses"|cut -d: -f2) - - echo "vsock://${cid}:${port}" - elif [ $configured_hypervisor = "clh" ]; then - address=$(echo "$addresses" | awk 'NR==1{print $1}') - echo "unix://${address}" - else - die "Cannot get agent vsock address, unknown hypervisor: '$configured_hypervisor'" - fi - - return 0 -} - -get_agent_vsock_address() -{ - local addresses=$(get_addresses) - - [ -z "$addresses" ] && die "no VSOCK connections found" - - local expected_count=1 - - local count - count=$(echo "$addresses"|wc -l || true) - - if [ $configured_hypervisor = "qemu" ]; then - # For QEMU we always expect 1 result. For Cloud Hypervisor, if a debug console is configured - # and running, we will have more than 1 result, so only run this check for QEMU - [ "$count" -eq "$expected_count" ] \ - || die "expected $expected_count VSOCK entry, found $count: '$addresses'" - - local cid - local port - - cid=$(echo "$addresses"|cut -d: -f1) - port=$(echo "$addresses"|cut -d: -f2) - - echo "vsock://${cid}:${port}" - elif [ $configured_hypervisor = "clh" ]; then - address=$(echo "$addresses" | awk 'NR==1{print $1}') - echo "unix://${address}" - else - die "Cannot get agent vsock address, unknown hypervisor: '$configured_hypervisor'" - fi -} - -stop_agent_in_kata_vm() -{ - local agent_addr - - if [ "$dry_run" = 'true' ] - then - agent_addr=$(get_dry_run_agent_vsock_address) - else - agent_addr=$(get_agent_vsock_address || true) - [ -z "$agent_addr" ] && \ - die "cannot determine agent VSOCK address for $hypervisor_binary" - fi - - # List of API commands to send to the agent. - local cmds=() - - # Run a couple of query commands first to ensure - # the agent is listening. - cmds+=("-c Check") - cmds+=("-c GetGuestDetails") - - # Creating a container implies creating a sandbox, so request - # agent/VM/container shutdown by asking the agent - # to destroy the sandbox. - cmds+=("-c DestroySandbox") - - run_agent_ctl \ - "${agent_addr}" \ - "${cmds[@]}" - - true -} - -stop_agent() -{ - info "Stopping agent" - - local agent_test_type="${1:-}" - [ -z "$agent_test_type" ] && die "need agent test type" - - local log_file="${2:-}" - [ -z "$log_file" ] && die "need agent-ctl log file" - - case "$agent_test_type" in - 'local') stop_local_agent ;; - 'vm') stop_agent_in_kata_vm ;; - *) die "invalid agent test type: '$agent_test_type'" ;; - esac - - true -} - -get_local_agent_pid() -{ - local pids - - local name - name=$(basename "$agent_binary") - - pids=$(pgrep "$name" || true) - [ -z "$pids" ] && return 0 - - local count - count=$(echo "$pids"|wc -l) - - [ "$count" -gt 1 ] && \ - die "too many agent processes running ($count, '$pids')" - - echo $pids -} - -# Function that writes all agent logs to '$agent_log_file'. -get_agent_log_file() -{ - local agent_test_type="${1:-}" - [ -z "$agent_test_type" ] && die "need agent test type" - - local log_file="${2:-}" - [ -z "$log_file" ] && die "need agent log file" - - info "Getting agent log details" - - case "$agent_test_type" in - # NOP: File should have been created by start_local_agent() - 'local') true ;; - - # Extract journal entries for the duration of the test - 'vm') - sudo journalctl \ - -q \ - -a \ - -o cat \ - -t 'kata' \ - --since="$test_start_time" \ - > "$log_file" - ;; - - *) die "invalid agent test type: '$agent_test_type'" ;; - esac - - [ -e "$log_file" ] || die "no log file: '$log_file'" - [ -s "$log_file" ] || die "empty log file: '$log_file'" - - true -} - -# Function to run to ensure correct behaviour -validate_agent() -{ - local agent_test_type="${1:-}" - local shutdown_test_type="${2:-}" - local log_file="${3:-}" - - [ -z "$agent_test_type" ] && die "need agent test type" - [ -z "$shutdown_test_type" ] && die "need shutdown test type" - [ -z "$log_file" ] && die "need agent log file" - - info "validating" - - get_agent_log_file \ - "$agent_test_type" \ - "$log_file" - - # Regular expression that describes possible agent failures - local regex="(slog::Fuse|Drain|Custom|serialization error|thread.*panicked|stack backtrace:)" - - egrep -q "$regex" "$log_file" && cat $log_file && die "Found agent error in log file: '$log_file'" - - local entry - entry=$(get_shutdown_test_type_entry "$shutdown_test_type" || true) - [ -z "$entry" ] && die "invalid test type: '$shutdown_test_type'" - - local hypervisor_debug=$(echo "$entry"|cut -d: -f3) - local vsock_console=$(echo "$entry"|cut -d: -f4) - - local agent_debug_logs_available='false' - - [ "$hypervisor_debug" = 'true' ] && \ - [ "$vsock_console" = 'false' ] && \ - agent_debug_logs_available='true' - - if [ "$agent_debug_logs_available" = 'true' ] || [ "$agent_test_type" = 'local' ] - then - # The message the agent writes to stderr just before it exits. - local done_msg="\" - - egrep -q "$done_msg" "$log_file" || (cat $log_file && die "missing agent shutdown message") - else - # We can only check for the shutdown message if the agent debug - # logs are available. - info "Not checking for agent shutdown message as hypervisor debug disabled" - fi -} - -setup_agent() -{ - local shutdown_test_type="${1:-}" - [ -z "$shutdown_test_type" ] && die "need shutdown test type" - - kill_tmux_sessions - - configure_kata "$shutdown_test_type" - - true -} - -# Even though this test is not testing tracing, agent tracing needs to be -# enabled to stop the runtime from killing the VM. However, if tracing is -# enabled, the forwarder must be running. To remove the need for Jaeger to -# also be running, run the forwarder in "NOP" mode. -run_trace_forwarder() -{ - local forwarder_binary_path - forwarder_binary_path="/opt/kata/bin/kata-trace-forwarder" - - local socket_path_tf="" - - # If using CLH, socket path must be passed to trace forwarder - if [ $configured_hypervisor = "clh" ]; then - socket_path_tf="--socket-path ${clh_socket_path}" - fi - - run_cmd \ - "tmux new-session \ - -d \ - -s \"$KATA_TMUX_FORWARDER_SESSION\" \ - sudo \"$forwarder_binary_path --dump-only -l trace ${socket_path_tf}\"" -} - -check_agent_stopped() -{ - info "Checking agent stopped" - - local agent_test_type="${1:-}" - [ -z "$agent_test_type" ] && die "need agent test type" - - local cmd= - - case "$agent_test_type" in - 'local') cmd=check_local_agent_stopped ;; - 'vm') cmd=check_vm_stopped ;; - *) die "invalid agent test type: '$agent_test_type'" ;; - esac - - waitForProcess \ - "$shutdown_time_secs" \ - "$sleep_time_secs" \ - "$cmd" - - true -} - -check_local_agent_stopped() -{ - local ret=0 - - local i=0 - local max=20 - - agent_ended="false" - - local agent_pid - agent_pid=$(get_local_agent_pid || true) - - # Agent has finished - [ -z "$agent_pid" ] && return 0 - - for _ in $(seq "$max") - do - { sudo kill -0 "$agent_pid"; ret=$?; } || true - - [ "$ret" -ne 0 ] && agent_ended="true" && break - - sleep 0.2 - done - - [ "$agent_ended" = "false" ] && die "agent still running: pid $agent_pid" || true -} - -get_vm_pid() -{ - pgrep "$hypervisor_binary" -} - -check_vm_stopped() -{ - tmux list-sessions |\ - grep -q "^${KATA_TMUX_VM_SESSION}:" \ - && return 1 - - return 0 -} - -start_debug_console() -{ - local agent_test_type="${1:-}" - local shutdown_test_type="${2:-}" - - [ -z "$agent_test_type" ] && die "need agent test type" - [ -z "$shutdown_test_type" ] && die "need shutdown test type" - - info "Starting debug console" - - case "$agent_test_type" in - 'vm') connect_to_vsock_debug_console ;; - # NOP for a local agent since we cannot connect to the agents - # VSOCK console socket from *outside* the host! - 'local') true ;; - *) die "invalid agent test type: '$agent_test_type'" ;; - esac - - true -} - -run_single_agent() -{ - local agent_test_type="${1:-}" - local shutdown_test_type="${2:-}" - - [ -z "$agent_test_type" ] && die "need agent test type" - [ -z "$shutdown_test_type" ] && die "need shutdown test type" - - local msg - msg=$(printf \ - "Testing agent (agent test type: '%s', shutdown test type: '%s')" \ - "$agent_test_type" \ - "$shutdown_test_type") - info "$msg" - - setup_agent "$shutdown_test_type" - - if [ $configured_hypervisor = "clh" ]; then - # CLH uses hybrid VSOCK which uses a local UNIX socket that we need to specify - socket_path_template=$clh_socket_prefix$(sudo kata-runtime env --json | jq '.Hypervisor.SocketPath') - clh_socket_path=$(echo "$socket_path_template" | sed "s/{ID}/${container_id}/g" | tr -d '"') - [ "$dry_run" = 'false' ] && sudo mkdir -p $(dirname "$clh_socket_path") - fi - - run_trace_forwarder "$shutdown_test_type" - - sleep 5s - - test_start_time=$(date '+%F %T') - - start_agent \ - "$agent_test_type" \ - "$agent_log_file" - - info "Testing agent: shutdown test type: '$shutdown_test_type', agent test type: $agent_test_type" - - local entry - entry=$(get_shutdown_test_type_entry "$shutdown_test_type" || true) - local debug_console=$(echo "$entry"|cut -d: -f4) - [ "$debug_console" = 'true' ] && \ - start_debug_console \ - "$agent_test_type" \ - "$shutdown_test_type" - - stop_agent \ - "$agent_test_type" \ - "$ctl_log_file" - - # We only need to show the set of commands once - [ "$dry_run" = 'true' ] && exit 0 - - test_end_time=$(date '+%F %T') - - check_agent_stopped "$agent_test_type" - - validate_agent \ - "$agent_test_type" \ - "$shutdown_test_type" \ - "$agent_log_file" -} - -run_agent() -{ - local agent_test_type="${1:-}" - local shutdown_test_type="${2:-}" - - [ -z "$agent_test_type" ] && die "need agent test type" - [ -z "$shutdown_test_type" ] && die "need shutdown test type" - - case "$shutdown_test_type" in - "$ALL_TEST_TYPES") - local entry - - # Run all shutdown types - for entry in "${shutdown_test_types[@]}" - do - local name - name=$(echo "$entry"|cut -d: -f1) - - run_single_agent \ - "$agent_test_type" \ - "$name" - - # Clean up between iterations - sudo rm -f \ - "$ctl_log_file" \ - "$agent_log_file" - - local addresses=$(get_addresses || true) - - [ -z "$addresses" ] || \ - die "found unexpected vsock addresses: '$addresses'" - - done - ;; - - *) - run_single_agent \ - "$agent_test_type" \ - "$shutdown_test_type" - ;; - esac - -} - -test_agent_shutdown() -{ - local count="${1:-}" - local agent_test_type="${2:-}" - local shutdown_test_type="${3:-}" - - [ -z "$count" ] && die "need count" - [ -z "$agent_test_type" ] && die "need agent test type" - [ -z "$shutdown_test_type" ] && die "need shutdown test type" - - # Start with a clean environment - [ "$dry_run" = 'false' ] && cleanup initial || true - - local i - - for i in $(seq "$count") - do - [ "$dry_run" = 'false' ] && \ - info "testing agent: run $i of $count" || true - run_agent \ - "$agent_test_type" \ - "$shutdown_test_type" - done - - info "testing agent: completed $count runs" -} - -handle_args() -{ - local opt - - local count="${KATA_AGENT_SHUTDOWN_TEST_COUNT}" - local shutdown_test_type="$DEFAULT_SHUTDOWN_TEST_TYPE" - local agent_test_type="$DEFAULT_AGENT_TEST_TYPE" - - while getopts "a:c:dhklnt:" opt "$@" - do - case "$opt" in - a) agent_test_type="$OPTARG" ;; - c) count="$OPTARG" ;; - d) set -o xtrace ;; - h) usage; exit 0 ;; - k) keep_logs='true' ;; - l) list_test_types; exit 0 ;; - n) dry_run='true' ;; - t) shutdown_test_type="$OPTARG" ;; - *) die "invalid option: '$opt'" ;; - esac - done - - setup - - test_agent_shutdown \ - "$count" \ - "$agent_test_type" \ - "$shutdown_test_type" -} - -main() -{ - handle_args "$@" -} - -main "$@" diff --git a/tests/functional/tracing/tracing-test.sh b/tests/functional/tracing/tracing-test.sh deleted file mode 100755 index e8256ba38..000000000 --- a/tests/functional/tracing/tracing-test.sh +++ /dev/null @@ -1,540 +0,0 @@ -#!/bin/bash -# Copyright (c) 2019-2022 Intel Corporation -# -# SPDX-License-Identifier: Apache-2.0 -# - -set -o errexit -set -o nounset -set -o pipefail -set -o errtrace - -script_name=${0##*/} -source "/etc/os-release" || source "/usr/lib/os-release" - -# Set to true if all tests pass -success="false" - -DEBUG=${DEBUG:-} - -# If set to any value, do not shut down the Jaeger service. -DEBUG_KEEP_JAEGER=${DEBUG_KEEP_JAEGER:-} -# If set to any value, do not shut down the trace forwarder. -DEBUG_KEEP_FORWARDER=${DEBUG_KEEP_FORWARDER:-} - -[ -n "$DEBUG" ] && set -o xtrace - -SCRIPT_PATH=$(dirname "$(readlink -f "$0")") -source "${SCRIPT_PATH}/../../common.bash" - -RUNTIME="io.containerd.kata.v2" -CONTAINER_IMAGE="quay.io/prometheus/busybox:latest" - -TRACE_LOG_DIR=${TRACE_LOG_DIR:-${KATA_TESTS_LOGDIR}/traces} - -KATA_HYPERVISOR="${KATA_HYPERVISOR:-qemu}" - -# files for output -formatted_traces_file="kata-traces-formatted.json" -trace_summary_file="span-summary.txt" - -# tmux(1) session to run the trace forwarder in -KATA_TMUX_FORWARDER_SESSION="kata-trace-forwarder-session" - -forwarder_binary="/opt/kata/bin/kata-trace-forwarder" - -# path prefix for CLH socket path -socket_path_prefix="/run/vc/vm/" - -container_id="tracing-test" - -jaeger_server=${jaeger_server:-localhost} -jaeger_ui_port=${jaeger_ui_port:-16686} -jaeger_docker_container_name="jaeger" - -# Span data for testing: -# 1. Existence of spans in jaeger output -# 2. That the relative ordering in the data occurs -# in the jaeger output. -# This is tested to make sure specific spans exist in the output and -# that the order of spans is preserved. -# Ordered in latest in sequence to earliest. -# -# Fields are all span existing span names in relative order from latest -# to earliest call in a sequence of calls. Example (pseudocode): -# func1() { -# span = trace("func1") -# func2() -# end span -# } -# func2() { -# span = trace("func2") -# func3() -# end span -# } -# func3() { -# span = trace("func3") -# end span -# } -# The following data should result in a passing test: -# 'func3:func2:func1' -# 'func3:func2' -# 'func3:func1' -# 'func2:func1' -span_ordering_data=( - 'StartVM:createSandboxFromConfig:create:rootSpan' - 'setup_shared_namespaces:StartVM:createSandboxFromConfig:create:rootSpan' - 'start_container:Start:rootSpan' - 'stopSandbox:Stop:Start:rootSpan' -) - -# Cleanup will remove Jaeger container and -# disable tracing. -cleanup() -{ - local fp="die" - local result="failed" - local dest="$logdir" - - if [ "$success" = "true" ]; then - local fp="info" - result="passed" - - [ -z "$DEBUG_KEEP_JAEGER" ] && stop_jaeger 2>/dev/null || true - - [ -z "$DEBUG_KEEP_FORWARDER" ] && kill_trace_forwarder - - # The tests worked so remove the logs - if [ -n "$DEBUG" ]; then - eval "$fp" "test $result - logs left in '$dest'" - else - "${SCRIPT_PATH}/configure_tracing_for_kata.sh" disable - - [ -d "$logdir" ] && rm -rf "$logdir" || true - fi - - return 0 - fi - - if [ -n "${CI:-}" ]; then - # Running under the CI, so copy the logs to allow them - # to be added as test artifacts. - sudo mkdir -p "$TRACE_LOG_DIR" - sudo cp -a "$logdir"/* "$TRACE_LOG_DIR" - - dest="$TRACE_LOG_DIR" - fi - - eval "$fp" "test $result - logs left in '$dest'" -} - -# Run an operation to generate Jaeger trace spans -create_traces() -{ - sudo ctr image pull "$CONTAINER_IMAGE" - sudo ctr run --runtime "$RUNTIME" --rm "$CONTAINER_IMAGE" "$container_id" true -} - -start_jaeger() -{ - local jaeger_docker_image="jaegertracing/all-in-one:latest" - - sudo docker rm -f "${jaeger_docker_container_name}" - - # Defaults - see https://www.jaegertracing.io/docs/getting-started/ - sudo docker run -d --runtime runc --name "${jaeger_docker_container_name}" \ - -e COLLECTOR_ZIPKIN_HTTP_PORT=9411 \ - -p 5775:5775/udp \ - -p 6831:6831/udp \ - -p 6832:6832/udp \ - -p 5778:5778 \ - -p "${jaeger_ui_port}:${jaeger_ui_port}" \ - -p 14268:14268 \ - -p 9411:9411 \ - "$jaeger_docker_image" - - sudo mkdir -m 0750 -p "$TRACE_LOG_DIR" -} - -stop_jaeger() -{ - sudo docker stop "${jaeger_docker_container_name}" - sudo docker rm -f "${jaeger_docker_container_name}" -} - -get_jaeger_traces() -{ - local service="$1" - [ -z "$service" ] && die "need jaeger service name" - - local traces_url="http://${jaeger_server}:${jaeger_ui_port}/api/traces?service=${service}" - curl -s "${traces_url}" 2>/dev/null -} - -get_trace_summary() -{ - local status="$1" - [ -z "$status" ] && die "need jaeger status JSON" - - echo "${status}" | jq -S '.data[].spans[] | [.spanID, .operationName] | @sh' -} - -get_span_count() -{ - local status="$1" - [ -z "$status" ] && die "need jaeger status JSON" - - # This could be simplified but creating a variable holding the - # summary is useful in debug mode as the summary is displayed. - local trace_summary=$(get_trace_summary "$status" || true) - - [ -z "$trace_summary" ] && die "failed to get trace summary" - - local count=$(echo "${trace_summary}" | wc -l) - - [ -z "$count" ] && count=0 - - echo "$count" -} - -# Returns status from Jaeger web UI -get_jaeger_status() -{ - local service="$1" - local logdir="$2" - - [ -z "$service" ] && die "need jaeger service name" - [ -z "$logdir" ] && die "need logdir" - - local status="" - local span_count=0 - - # Find spans - status=$(get_jaeger_traces "$service" || true) - if [ -n "$status" ]; then - echo "$status" | tee "$logdir/${service}-status.json" - span_count=$(get_span_count "$status") - fi - - [ -z "$status" ] && die "failed to query Jaeger for status" - [ "$span_count" -eq 0 ] && die "failed to find any trace spans" - [ "$span_count" -le 0 ] && die "invalid span count" - - get_trace_summary "$status" > "$logdir/$trace_summary_file" -} - -# Check Jaeger spans for the specified service. -check_jaeger_output() -{ - local service="$1" - local min_spans="$2" - local logdir="$3" - - [ -z "$service" ] && die "need jaeger service name" - [ -z "$min_spans" ] && die "need minimum trace span count" - [ -z "$logdir" ] && die "need logdir" - - local status - local errors=0 - - info "Checking Jaeger status" - - status=$(get_jaeger_status "$service" "$logdir") - - #------------------------------ - # Basic sanity checks - [ -z "$status" ] && die "failed to query status via HTTP" - - local span_lines=$(echo "$status"|jq -S '.data[].spans | length') - [ -z "$span_lines" ] && die "no span status" - - # Log the spans to allow for analysis in case the test fails - echo "$status"|jq -S . > "$logdir/${service}-traces-formatted.json" - - local span_lines_count=$(echo "$span_lines"|wc -l) - - # Total up all span counts - local spans=$(echo "$span_lines"|paste -sd+ -|bc) - [ -z "$spans" ] && die "no spans" - - # Ensure total span count is numeric - echo "$spans"|grep -q "^[0-9][0-9]*$" || die "invalid span count: '$spans'" - - info "found $spans spans (across $span_lines_count traces)" - - # Validate - [ "$spans" -lt "$min_spans" ] && die "expected >= $min_spans spans, got $spans" - - # Look for common errors in span data - local error_msg=$(echo "$status"|jq -S . 2>/dev/null|grep "invalid parent span" || true) - - if [ -n "$error_msg" ]; then - errors=$((errors+1)) - warn "Found invalid parent span errors: $error_msg" - else - errors=$((errors-1)) - [ "$errors" -lt 0 ] && errors=0 - fi - - # Crude but it works - error_or_warning_msgs=$(echo "$status" |\ - jq -S . 2>/dev/null |\ - jq '.data[].spans[].warnings' |\ - grep -E -v "\" |\ - grep -E -v "\[" |\ - grep -E -v "\]" |\ - grep -E -v "clock skew" || true) # ignore clock skew error - - if [ -n "$error_or_warning_msgs" ]; then - errors=$((errors+1)) - warn "Found errors/warnings: $error_or_warning_msgs" - else - errors=$((errors-1)) - [ "$errors" -lt 0 ] && errors=0 - fi - - [ "$errors" -eq 0 ] || die "errors detected" -} - -# Check output for spans in span_ordering_data -check_spans() -{ - local logdir="$1" - [ -z "$logdir" ] && die "need logdir" - - local errors=0 - - # Check for existence of spans in output so we do not do the more - # time consuming test of checking span ordering if it will fail - info "Checking spans: ${span_ordering_data[@]}" - local missing_spans=() - for span_ordering in "${span_ordering_data[@]}"; do - local test_spans=(`echo $span_ordering | tr ':' ' '`) - for s in "${test_spans[@]}"; do - grep -q \'$s\' "$logdir/$trace_summary_file" || missing_spans+=( "$s" ) - done - done - if [ "${#missing_spans[@]}" -gt 0 ]; then - die "Fail: Missing spans: ${missing_spans[@]}" - fi - - # Check relative ordering of spans. We are not checking full trace, just - # that known calls are not out of order based on the test input. - for span_ordering in "${span_ordering_data[@]}"; do # runs maximum length of span_ordering_data - local test_spans=(`echo $span_ordering | tr ':' ' '`) - - # create array for span IDs that match span string - local span_ids=() - for span in "${test_spans[@]}"; do - grep -q \'$span\' "$logdir/$trace_summary_file" || die "Fail: Missing span: $span" - id=$(cat "$logdir/$formatted_traces_file" | jq ".data[].spans[] | select(.operationName==\"$span\") | .spanID") || die "Fail: error with span $span retrieved from traces" - id_formatted=$(echo $id | tr -d '\"' | tr '\n' ':') # format to a string for parsing later, not an array - span_ids+=("$id_formatted") - done - - # We now have 2 parallel arrays where test_spans[n] is the string name and - # span_ids[n] has all possible span IDs for that string separated by a colon - - # Since functions can be called multiple times, we may have multiple results - # for span IDs. - initial_span_ids=(`echo ${span_ids[0]} | tr ':' ' '`) - for initial in "${initial_span_ids[@]}"; do # test parents for all initial spans - # construct array of all parents of first span - local retrieved_spans=() - local current_span="$initial" - [ "$current_span" != "" ] || break - - MAX_DEPTH=20 # to prevent infinite loop due to unforeseen errors - for i in `seq 1 $MAX_DEPTH`; do - retrieved_spans+=("$current_span") - current_span=$(cat "$logdir/$formatted_traces_file" | jq ".data[].spans[] | select(.spanID==\"$current_span\") | .references[].spanID") || die "Fail: error with current_span $current_span retrieved from formatted traces" - [ "$current_span" != "" ] || break - current_span=$(echo $current_span | tr -d '"') - [ $i -lt $MAX_DEPTH ] || die "Fail: max depth reached, error in jq or adjust test depth" - done - - # Keep track of this index so we can ensure we are testing the constructed array in order - # Increment when there is a match between test case and constructed path - local retrieved_ids_index=0 - - local matches=0 - local index=0 - - # TODO: Optimize - for ((index=0; index<${#span_ids[@]}; index++)); do - for ((r_index=$retrieved_ids_index; r_index<${#retrieved_spans[@]}; r_index++)); do - grep -q "${retrieved_spans[$r_index]}" <<< ${span_ids[$index]} && (( retrieved_ids_index=$r_index+1 )) && (( matches+=1 )) && break - done - done - - local last_initial_span_index=${#initial_span_ids[@]}-1 - if [ $matches -eq ${#span_ids[@]} ]; then - info "Pass: spans \"${test_spans[@]}\" found in jaeger output" - break - elif [ $matches -lt ${#span_ids[@]} ] && [ "$initial" = "${initial_span_ids[$last_initial_span_index]}" ]; then - die "Fail: spans \"${test_spans[@]}\" NOT in jaeger output" - fi - # else repeat test for next initial span ID - done - done - - -} - -run_trace_forwarder() -{ - if [ $KATA_HYPERVISOR = "qemu" ]; then - tmux new-session -d -s "$KATA_TMUX_FORWARDER_SESSION" "sudo $forwarder_binary -l trace" - elif [ $KATA_HYPERVISOR = "clh" ]; then - # CLH uses hybrid VSOCK which uses a local UNIX socket that we need to specify - socket_path_template=$socket_path_prefix$(sudo kata-runtime env --json | jq '.Hypervisor.SocketPath') - socket_path=$(echo "$socket_path_template" | sed "s/{ID}/${container_id}/g" | tr -d '"') - sudo mkdir -p $(dirname "$socket_path") - - tmux new-session -d -s "$KATA_TMUX_FORWARDER_SESSION" "sudo $forwarder_binary -l trace --socket-path $socket_path" - else - die "Unsupported hypervisor $KATA_HYPERVISOR" - fi - - info "Verifying trace forwarder in tmux session $KATA_TMUX_FORWARDER_SESSION" - - local cmd="tmux capture-pane -pt $KATA_TMUX_FORWARDER_SESSION | tr -d '\n' | tr -d '\"' | grep -q \"source:kata-trace-forwarder\"" - waitForProcess 10 1 "$cmd" -} - -kill_trace_forwarder() -{ - tmux kill-session -t "$KATA_TMUX_FORWARDER_SESSION" -} - -setup() -{ - # containerd must be running in order to use ctr to generate traces - restart_containerd_service - - local cmds=() - # For container manager (containerd) - cmds+=('ctr') - # For jaeger - cmds+=('docker') - # For launching processes - cmds+=('tmux') - - local cmd - for cmd in "${cmds[@]}" - do - local result - result=$(command -v "$cmd" || true) - [ -n "$result" ] || die "need $cmd" - done - - run_trace_forwarder - - start_jaeger - - "${SCRIPT_PATH}/configure_tracing_for_kata.sh" enable -} - -run_test() -{ - local service="$1" - local min_spans="$2" - local logdir="$3" - - [ -z "$service" ] && die "need service name" - [ -z "$min_spans" ] && die "need minimum span count" - [ -z "$logdir" ] && die "need logdir" - - info "Running test for service '$service'" - - logdir="$logdir/$service" - mkdir -p "$logdir" - - check_jaeger_output "$service" "$min_spans" "$logdir" - check_spans "$logdir" - - info "test passed" -} - -run_tests() -{ - # List of services to check - # - # Format: "name:min-spans" - # - # Where: - # - # - 'name' is the Jaeger service name. - # - 'min-spans' is an integer representing the minimum number of - # trace spans this service should generate. - # - # Notes: - # - # - Uses an array to ensure predictable ordering. - # - All services listed are expected to generate traces - # when create_traces() is called a single time. - local -a services - - services+=("kata:125") - - create_traces - - logdir=$(mktemp -d) - - for service in "${services[@]}" - do - local name=$(echo "${service}"|cut -d: -f1) - local min_spans=$(echo "${service}"|cut -d: -f2) - - run_test "${name}" "${min_spans}" "${logdir}" - done - - info "all tests passed" - success="true" -} - -usage() -{ - cat <] - -Commands: - - clean - Perform cleanup phase only. - help - Show usage. - run - Only run tests (no setup or cleanup). - setup - Perform setup phase only. - -Environment variables: - - CI - if set, save logs of all tests to ${TRACE_LOG_DIR}. - DEBUG - if set, enable tracing and do not cleanup after tests. - DEBUG_KEEP_JAEGER - if set, do not shut down the Jaeger service. - DEBUG_KEEP_FORWARDER - if set, do not shut down the trace forwarder. - -Notes: - - Runs all test phases if no arguments are specified. - -EOF -} - -main() -{ - local cmd="${1:-}" - - case "$cmd" in - clean) success="true"; cleanup; exit 0;; - help|-h|-help|--help) usage; exit 0;; - run) run_tests; exit 0;; - setup) setup; exit 0;; - esac - - trap cleanup EXIT - - setup - - run_tests -} - -main "$@" diff --git a/tests/integration/docker/gha-run.sh b/tests/integration/docker/gha-run.sh index 0a0100668..cab3401ea 100755 --- a/tests/integration/docker/gha-run.sh +++ b/tests/integration/docker/gha-run.sh @@ -16,7 +16,21 @@ source "${docker_dir}/../../common.bash" function install_dependencies() { info "Installing the dependencies needed for running the docker smoke test" - install_docker + # Add Docker's official GPG key: + sudo apt-get update + sudo apt-get -y install ca-certificates curl gnupg + sudo install -m 0755 -d /etc/apt/keyrings + curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /etc/apt/keyrings/docker.gpg + sudo chmod a+r /etc/apt/keyrings/docker.gpg + + # Add the repository to Apt sources: + echo \ + "deb [arch="$(dpkg --print-architecture)" signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu \ + "$(. /etc/os-release && echo "$VERSION_CODENAME")" stable" | \ + sudo tee /etc/apt/sources.list.d/docker.list > /dev/null + sudo apt-get update + + sudo apt-get install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin } function run() { From fe52c0900c7eb312ac5d3e6a900d3f853373f623 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Sun, 8 Oct 2023 13:55:01 +0000 Subject: [PATCH 319/339] gha: combine basic amd64 jobs into a single yaml GHA has an undocumented limitation that there can be at most 20 referenced yamls in a single yaml file. We workaround it by combining multiple jobs into a single yaml file. Fixes: #8161 Signed-off-by: Peng Tao (cherry picked from commit b60e0a9b5788b4eccc1801dcf67c1499f7640532) --- .github/workflows/basic-ci-amd64.yaml | 197 ++++++++++++++++++ .github/workflows/ci.yaml | 36 +--- .../run-containerd-stability-tests.yaml | 50 ----- .../workflows/run-cri-containerd-tests.yaml | 56 ----- .github/workflows/run-nydus-tests.yaml | 56 ----- .github/workflows/run-vfio-tests.yaml | 49 ----- 6 files changed, 199 insertions(+), 245 deletions(-) create mode 100644 .github/workflows/basic-ci-amd64.yaml delete mode 100644 .github/workflows/run-containerd-stability-tests.yaml delete mode 100644 .github/workflows/run-cri-containerd-tests.yaml delete mode 100644 .github/workflows/run-nydus-tests.yaml delete mode 100644 .github/workflows/run-vfio-tests.yaml diff --git a/.github/workflows/basic-ci-amd64.yaml b/.github/workflows/basic-ci-amd64.yaml new file mode 100644 index 000000000..5780605ed --- /dev/null +++ b/.github/workflows/basic-ci-amd64.yaml @@ -0,0 +1,197 @@ +name: CI | Basic amd64 tests +on: + workflow_call: + inputs: + tarball-suffix: + required: false + type: string + commit-hash: + required: false + type: string + target-branch: + required: false + type: string + default: "" + +jobs: + run-cri-containerd: + strategy: + # We can set this to true whenever we're 100% sure that + # the all the tests are not flaky, otherwise we'll fail + # all the tests due to a single flaky instance. + fail-fast: false + matrix: + containerd_version: ['lts', 'active'] + vmm: ['clh', 'qemu'] + runs-on: garm-ubuntu-2204-smaller + env: + CONTAINERD_VERSION: ${{ matrix.containerd_version }} + GOPATH: ${{ github.workspace }} + KATA_HYPERVISOR: ${{ matrix.vmm }} + steps: + - uses: actions/checkout@v3 + with: + ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} + + - name: Install dependencies + run: bash tests/integration/cri-containerd/gha-run.sh install-dependencies + + - name: get-kata-tarball + uses: actions/download-artifact@v3 + with: + name: kata-static-tarball-amd64${{ inputs.tarball-suffix }} + path: kata-artifacts + + - name: Install kata + run: bash tests/integration/cri-containerd/gha-run.sh install-kata kata-artifacts + + - name: Run cri-containerd tests + run: bash tests/integration/cri-containerd/gha-run.sh run + + run-containerd-stability: + strategy: + fail-fast: false + matrix: + containerd_version: ['lts', 'active'] + vmm: ['clh', 'qemu'] + runs-on: garm-ubuntu-2204-smaller + env: + CONTAINERD_VERSION: ${{ matrix.containerd_version }} + GOPATH: ${{ github.workspace }} + KATA_HYPERVISOR: ${{ matrix.vmm }} + steps: + - uses: actions/checkout@v3 + with: + ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} + + - name: get-kata-tarball + uses: actions/download-artifact@v3 + with: + name: kata-static-tarball-amd64${{ inputs.tarball-suffix }} + path: kata-artifacts + + - name: Install kata + run: bash tests/stability/gha-run.sh install-kata kata-artifacts + + - name: Run containerd-stability tests + run: bash tests/stability/gha-run.sh run + + run-nydus: + strategy: + # We can set this to true whenever we're 100% sure that + # the all the tests are not flaky, otherwise we'll fail + # all the tests due to a single flaky instance. + fail-fast: false + matrix: + containerd_version: ['lts', 'active'] + vmm: ['clh', 'qemu', 'dragonball'] + runs-on: garm-ubuntu-2204-smaller + env: + CONTAINERD_VERSION: ${{ matrix.containerd_version }} + GOPATH: ${{ github.workspace }} + KATA_HYPERVISOR: ${{ matrix.vmm }} + steps: + - uses: actions/checkout@v3 + with: + ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} + + - name: Install dependencies + run: bash tests/integration/nydus/gha-run.sh install-dependencies + + - name: get-kata-tarball + uses: actions/download-artifact@v3 + with: + name: kata-static-tarball-amd64${{ inputs.tarball-suffix }} + path: kata-artifacts + + - name: Install kata + run: bash tests/integration/nydus/gha-run.sh install-kata kata-artifacts + + - name: Run nydus tests + run: bash tests/integration/nydus/gha-run.sh run + + run-runk: + runs-on: garm-ubuntu-2204-smaller + env: + CONTAINERD_VERSION: lts + steps: + - uses: actions/checkout@v4 + with: + ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} + + - name: Install dependencies + run: bash tests/integration/runk/gha-run.sh install-dependencies + + - name: get-kata-tarball + uses: actions/download-artifact@v3 + with: + name: kata-static-tarball-amd64${{ inputs.tarball-suffix }} + path: kata-artifacts + + - name: Install kata + run: bash tests/integration/runk/gha-run.sh install-kata kata-artifacts + + - name: Run tracing tests + run: bash tests/integration/runk/gha-run.sh run + + run-vfio: + strategy: + fail-fast: false + matrix: + vmm: ['clh', 'qemu'] + runs-on: garm-ubuntu-2304 + env: + GOPATH: ${{ github.workspace }} + KATA_HYPERVISOR: ${{ matrix.vmm }} + steps: + - uses: actions/checkout@v3 + with: + ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} + + - name: Install dependencies + run: bash tests/functional/vfio/gha-run.sh install-dependencies + + - name: get-kata-tarball + uses: actions/download-artifact@v3 + with: + name: kata-static-tarball-amd64${{ inputs.tarball-suffix }} + path: kata-artifacts + + - name: Run vfio tests + timeout-minutes: 15 + run: bash tests/functional/vfio/gha-run.sh run diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index a92f0fbbe..5c74b84f2 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -209,41 +209,9 @@ jobs: commit-hash: ${{ inputs.commit-hash }} target-branch: ${{ inputs.target-branch }} - run-cri-containerd-tests: + run-basic-amd64-tests: needs: build-kata-static-tarball-amd64 - uses: ./.github/workflows/run-cri-containerd-tests.yaml - with: - tarball-suffix: -${{ inputs.tag }} - commit-hash: ${{ inputs.commit-hash }} - target-branch: ${{ inputs.target-branch }} - - run-containerd-stability-tests: - needs: build-kata-static-tarball-amd64 - uses: ./.github/workflows/run-containerd-stability-tests.yaml - with: - tarball-suffix: -${{ inputs.tag }} - commit-hash: ${{ inputs.commit-hash }} - target-branch: ${{ inputs.target-branch }} - - run-nydus-tests: - needs: build-kata-static-tarball-amd64 - uses: ./.github/workflows/run-nydus-tests.yaml - with: - tarball-suffix: -${{ inputs.tag }} - commit-hash: ${{ inputs.commit-hash }} - target-branch: ${{ inputs.target-branch }} - - run-runk-tests: - needs: build-kata-static-tarball-amd64 - uses: ./.github/workflows/run-runk-tests.yaml - with: - tarball-suffix: -${{ inputs.tag }} - commit-hash: ${{ inputs.commit-hash }} - target-branch: ${{ inputs.target-branch }} - - run-vfio-tests: - needs: build-kata-static-tarball-amd64 - uses: ./.github/workflows/run-vfio-tests.yaml + uses: ./.github/workflows/basic-ci-amd64.yaml with: tarball-suffix: -${{ inputs.tag }} commit-hash: ${{ inputs.commit-hash }} diff --git a/.github/workflows/run-containerd-stability-tests.yaml b/.github/workflows/run-containerd-stability-tests.yaml deleted file mode 100644 index 76de75d9e..000000000 --- a/.github/workflows/run-containerd-stability-tests.yaml +++ /dev/null @@ -1,50 +0,0 @@ -name: CI | Run containerd stability tests -on: - workflow_call: - inputs: - tarball-suffix: - required: false - type: string - commit-hash: - required: false - type: string - target-branch: - required: false - type: string - default: "" - -jobs: - run-containerd-stability: - strategy: - fail-fast: false - matrix: - containerd_version: ['lts', 'active'] - vmm: ['clh', 'qemu'] - runs-on: garm-ubuntu-2204-smaller - env: - CONTAINERD_VERSION: ${{ matrix.containerd_version }} - GOPATH: ${{ github.workspace }} - KATA_HYPERVISOR: ${{ matrix.vmm }} - steps: - - uses: actions/checkout@v3 - with: - ref: ${{ inputs.commit-hash }} - fetch-depth: 0 - - - name: Rebase atop of the latest target branch - run: | - ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" - env: - TARGET_BRANCH: ${{ inputs.target-branch }} - - - name: get-kata-tarball - uses: actions/download-artifact@v3 - with: - name: kata-static-tarball-amd64${{ inputs.tarball-suffix }} - path: kata-artifacts - - - name: Install kata - run: bash tests/stability/gha-run.sh install-kata kata-artifacts - - - name: Run containerd-stability tests - run: bash tests/stability/gha-run.sh run diff --git a/.github/workflows/run-cri-containerd-tests.yaml b/.github/workflows/run-cri-containerd-tests.yaml deleted file mode 100644 index f42833609..000000000 --- a/.github/workflows/run-cri-containerd-tests.yaml +++ /dev/null @@ -1,56 +0,0 @@ -name: CI | Run cri-containerd tests -on: - workflow_call: - inputs: - tarball-suffix: - required: false - type: string - commit-hash: - required: false - type: string - target-branch: - required: false - type: string - default: "" - -jobs: - run-cri-containerd: - strategy: - # We can set this to true whenever we're 100% sure that - # the all the tests are not flaky, otherwise we'll fail - # all the tests due to a single flaky instance. - fail-fast: false - matrix: - containerd_version: ['lts', 'active'] - vmm: ['clh', 'qemu'] - runs-on: garm-ubuntu-2204-smaller - env: - CONTAINERD_VERSION: ${{ matrix.containerd_version }} - GOPATH: ${{ github.workspace }} - KATA_HYPERVISOR: ${{ matrix.vmm }} - steps: - - uses: actions/checkout@v3 - with: - ref: ${{ inputs.commit-hash }} - fetch-depth: 0 - - - name: Rebase atop of the latest target branch - run: | - ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" - env: - TARGET_BRANCH: ${{ inputs.target-branch }} - - - name: Install dependencies - run: bash tests/integration/cri-containerd/gha-run.sh install-dependencies - - - name: get-kata-tarball - uses: actions/download-artifact@v3 - with: - name: kata-static-tarball-amd64${{ inputs.tarball-suffix }} - path: kata-artifacts - - - name: Install kata - run: bash tests/integration/cri-containerd/gha-run.sh install-kata kata-artifacts - - - name: Run cri-containerd tests - run: bash tests/integration/cri-containerd/gha-run.sh run diff --git a/.github/workflows/run-nydus-tests.yaml b/.github/workflows/run-nydus-tests.yaml deleted file mode 100644 index 71ee0fe86..000000000 --- a/.github/workflows/run-nydus-tests.yaml +++ /dev/null @@ -1,56 +0,0 @@ -name: CI | Run nydus tests -on: - workflow_call: - inputs: - tarball-suffix: - required: false - type: string - commit-hash: - required: false - type: string - target-branch: - required: false - type: string - default: "" - -jobs: - run-nydus: - strategy: - # We can set this to true whenever we're 100% sure that - # the all the tests are not flaky, otherwise we'll fail - # all the tests due to a single flaky instance. - fail-fast: false - matrix: - containerd_version: ['lts', 'active'] - vmm: ['clh', 'qemu', 'dragonball'] - runs-on: garm-ubuntu-2204-smaller - env: - CONTAINERD_VERSION: ${{ matrix.containerd_version }} - GOPATH: ${{ github.workspace }} - KATA_HYPERVISOR: ${{ matrix.vmm }} - steps: - - uses: actions/checkout@v3 - with: - ref: ${{ inputs.commit-hash }} - fetch-depth: 0 - - - name: Rebase atop of the latest target branch - run: | - ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" - env: - TARGET_BRANCH: ${{ inputs.target-branch }} - - - name: Install dependencies - run: bash tests/integration/nydus/gha-run.sh install-dependencies - - - name: get-kata-tarball - uses: actions/download-artifact@v3 - with: - name: kata-static-tarball-amd64${{ inputs.tarball-suffix }} - path: kata-artifacts - - - name: Install kata - run: bash tests/integration/nydus/gha-run.sh install-kata kata-artifacts - - - name: Run nydus tests - run: bash tests/integration/nydus/gha-run.sh run diff --git a/.github/workflows/run-vfio-tests.yaml b/.github/workflows/run-vfio-tests.yaml deleted file mode 100644 index 4542ec1a3..000000000 --- a/.github/workflows/run-vfio-tests.yaml +++ /dev/null @@ -1,49 +0,0 @@ -name: CI | Run vfio tests -on: - workflow_call: - inputs: - tarball-suffix: - required: false - type: string - commit-hash: - required: false - type: string - target-branch: - required: false - type: string - default: "" - -jobs: - run-vfio: - strategy: - fail-fast: false - matrix: - vmm: ['clh', 'qemu'] - runs-on: garm-ubuntu-2304 - env: - GOPATH: ${{ github.workspace }} - KATA_HYPERVISOR: ${{ matrix.vmm }} - steps: - - uses: actions/checkout@v3 - with: - ref: ${{ inputs.commit-hash }} - fetch-depth: 0 - - - name: Rebase atop of the latest target branch - run: | - ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" - env: - TARGET_BRANCH: ${{ inputs.target-branch }} - - - name: Install dependencies - run: bash tests/functional/vfio/gha-run.sh install-dependencies - - - name: get-kata-tarball - uses: actions/download-artifact@v3 - with: - name: kata-static-tarball-amd64${{ inputs.tarball-suffix }} - path: kata-artifacts - - - name: Run vfio tests - timeout-minutes: 15 - run: bash tests/functional/vfio/gha-run.sh run From 621e6e6d8c5826e3a3b6b686f426c38bce832a22 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Sun, 8 Oct 2023 14:04:52 +0000 Subject: [PATCH 320/339] gha: combine coco jobs into a single yaml So that we don't risk exceeding the GHA 20 rerefenced yaml files limit that easy. Signed-off-by: Peng Tao (cherry picked from commit 954d40cce5b4075d2c82615e23b2e11601786e55) --- .github/workflows/ci.yaml | 37 +--- .github/workflows/run-k8s-tests-on-sev.yaml | 65 ------- .github/workflows/run-k8s-tests-on-snp.yaml | 65 ------- .github/workflows/run-k8s-tests-on-tdx.yaml | 64 ------- .github/workflows/run-kata-coco-tests.yaml | 176 ++++++++++++++++++ .../run-kata-deploy-tests-on-tdx.yaml | 54 ------ 6 files changed, 178 insertions(+), 283 deletions(-) delete mode 100644 .github/workflows/run-k8s-tests-on-sev.yaml delete mode 100644 .github/workflows/run-k8s-tests-on-snp.yaml delete mode 100644 .github/workflows/run-k8s-tests-on-tdx.yaml create mode 100644 .github/workflows/run-kata-coco-tests.yaml delete mode 100644 .github/workflows/run-kata-deploy-tests-on-tdx.yaml diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 5c74b84f2..71fc7ec56 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -113,17 +113,6 @@ jobs: target-branch: ${{ inputs.target-branch }} secrets: inherit - run-kata-deploy-tests-on-tdx: - needs: [publish-kata-deploy-payload-amd64, build-and-publish-tee-confidential-unencrypted-image] - uses: ./.github/workflows/run-kata-deploy-tests-on-tdx.yaml - with: - registry: ghcr.io - repo: ${{ github.repository_owner }}/kata-deploy-ci - tag: ${{ inputs.tag }}-amd64 - commit-hash: ${{ inputs.commit-hash }} - pr-number: ${{ inputs.pr-number }} - target-branch: ${{ inputs.target-branch }} - run-kata-monitor-tests: needs: build-kata-static-tarball-amd64 uses: ./.github/workflows/run-kata-monitor-tests.yaml @@ -168,31 +157,9 @@ jobs: target-branch: ${{ inputs.target-branch }} secrets: inherit - run-k8s-tests-on-sev: + run-kata-coco-tests: needs: [publish-kata-deploy-payload-amd64, build-and-publish-tee-confidential-unencrypted-image] - uses: ./.github/workflows/run-k8s-tests-on-sev.yaml - with: - registry: ghcr.io - repo: ${{ github.repository_owner }}/kata-deploy-ci - tag: ${{ inputs.tag }}-amd64 - commit-hash: ${{ inputs.commit-hash }} - pr-number: ${{ inputs.pr-number }} - target-branch: ${{ inputs.target-branch }} - - run-k8s-tests-on-snp: - needs: [publish-kata-deploy-payload-amd64, build-and-publish-tee-confidential-unencrypted-image] - uses: ./.github/workflows/run-k8s-tests-on-snp.yaml - with: - registry: ghcr.io - repo: ${{ github.repository_owner }}/kata-deploy-ci - tag: ${{ inputs.tag }}-amd64 - commit-hash: ${{ inputs.commit-hash }} - pr-number: ${{ inputs.pr-number }} - target-branch: ${{ inputs.target-branch }} - - run-k8s-tests-on-tdx: - needs: [publish-kata-deploy-payload-amd64, build-and-publish-tee-confidential-unencrypted-image] - uses: ./.github/workflows/run-k8s-tests-on-tdx.yaml + uses: ./.github/workflows/run-kata-coco-tests.yaml with: registry: ghcr.io repo: ${{ github.repository_owner }}/kata-deploy-ci diff --git a/.github/workflows/run-k8s-tests-on-sev.yaml b/.github/workflows/run-k8s-tests-on-sev.yaml deleted file mode 100644 index a720e2486..000000000 --- a/.github/workflows/run-k8s-tests-on-sev.yaml +++ /dev/null @@ -1,65 +0,0 @@ -name: CI | Run kubernetes tests on SEV -on: - workflow_call: - inputs: - registry: - required: true - type: string - repo: - required: true - type: string - tag: - required: true - type: string - pr-number: - required: true - type: string - commit-hash: - required: false - type: string - target-branch: - required: false - type: string - default: "" - -jobs: - run-k8s-tests: - strategy: - fail-fast: false - matrix: - vmm: - - qemu-sev - runs-on: sev - env: - DOCKER_REGISTRY: ${{ inputs.registry }} - DOCKER_REPO: ${{ inputs.repo }} - DOCKER_TAG: ${{ inputs.tag }} - PR_NUMBER: ${{ inputs.pr-number }} - KATA_HYPERVISOR: ${{ matrix.vmm }} - KUBECONFIG: /home/kata/.kube/config - KUBERNETES: "vanilla" - USING_NFD: "false" - K8S_TEST_HOST_TYPE: "baremetal" - steps: - - uses: actions/checkout@v3 - with: - ref: ${{ inputs.commit-hash }} - fetch-depth: 0 - - - name: Rebase atop of the latest target branch - run: | - ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" - env: - TARGET_BRANCH: ${{ inputs.target-branch }} - - - name: Deploy Kata - timeout-minutes: 10 - run: bash tests/integration/kubernetes/gha-run.sh deploy-kata-sev - - - name: Run tests - timeout-minutes: 30 - run: bash tests/integration/kubernetes/gha-run.sh run-tests - - - name: Delete kata-deploy - if: always() - run: bash tests/integration/kubernetes/gha-run.sh cleanup-sev diff --git a/.github/workflows/run-k8s-tests-on-snp.yaml b/.github/workflows/run-k8s-tests-on-snp.yaml deleted file mode 100644 index 33ae57d3a..000000000 --- a/.github/workflows/run-k8s-tests-on-snp.yaml +++ /dev/null @@ -1,65 +0,0 @@ -name: CI | Run kubernetes tests on SEV-SNP -on: - workflow_call: - inputs: - registry: - required: true - type: string - repo: - required: true - type: string - tag: - required: true - type: string - pr-number: - required: true - type: string - commit-hash: - required: false - type: string - target-branch: - required: false - type: string - default: "" - -jobs: - run-k8s-tests: - strategy: - fail-fast: false - matrix: - vmm: - - qemu-snp - runs-on: sev-snp - env: - DOCKER_REGISTRY: ${{ inputs.registry }} - DOCKER_REPO: ${{ inputs.repo }} - DOCKER_TAG: ${{ inputs.tag }} - PR_NUMBER: ${{ inputs.pr-number }} - KATA_HYPERVISOR: ${{ matrix.vmm }} - KUBECONFIG: /home/kata/.kube/config - KUBERNETES: "vanilla" - USING_NFD: "false" - K8S_TEST_HOST_TYPE: "baremetal" - steps: - - uses: actions/checkout@v3 - with: - ref: ${{ inputs.commit-hash }} - fetch-depth: 0 - - - name: Rebase atop of the latest target branch - run: | - ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" - env: - TARGET_BRANCH: ${{ inputs.target-branch }} - - - name: Deploy Kata - timeout-minutes: 10 - run: bash tests/integration/kubernetes/gha-run.sh deploy-kata-snp - - - name: Run tests - timeout-minutes: 30 - run: bash tests/integration/kubernetes/gha-run.sh run-tests - - - name: Delete kata-deploy - if: always() - run: bash tests/integration/kubernetes/gha-run.sh cleanup-snp diff --git a/.github/workflows/run-k8s-tests-on-tdx.yaml b/.github/workflows/run-k8s-tests-on-tdx.yaml deleted file mode 100644 index 940fd0a39..000000000 --- a/.github/workflows/run-k8s-tests-on-tdx.yaml +++ /dev/null @@ -1,64 +0,0 @@ -name: CI | Run kubernetes tests on TDX -on: - workflow_call: - inputs: - registry: - required: true - type: string - repo: - required: true - type: string - tag: - required: true - type: string - pr-number: - required: true - type: string - commit-hash: - required: false - type: string - target-branch: - required: false - type: string - default: "" - -jobs: - run-k8s-tests: - strategy: - fail-fast: false - matrix: - vmm: - - qemu-tdx - runs-on: tdx - env: - DOCKER_REGISTRY: ${{ inputs.registry }} - DOCKER_REPO: ${{ inputs.repo }} - DOCKER_TAG: ${{ inputs.tag }} - PR_NUMBER: ${{ inputs.pr-number }} - KATA_HYPERVISOR: ${{ matrix.vmm }} - KUBERNETES: "k3s" - USING_NFD: "true" - K8S_TEST_HOST_TYPE: "baremetal" - steps: - - uses: actions/checkout@v3 - with: - ref: ${{ inputs.commit-hash }} - fetch-depth: 0 - - - name: Rebase atop of the latest target branch - run: | - ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" - env: - TARGET_BRANCH: ${{ inputs.target-branch }} - - - name: Deploy Kata - timeout-minutes: 10 - run: bash tests/integration/kubernetes/gha-run.sh deploy-kata-tdx - - - name: Run tests - timeout-minutes: 30 - run: bash tests/integration/kubernetes/gha-run.sh run-tests - - - name: Delete kata-deploy - if: always() - run: bash tests/integration/kubernetes/gha-run.sh cleanup-tdx diff --git a/.github/workflows/run-kata-coco-tests.yaml b/.github/workflows/run-kata-coco-tests.yaml new file mode 100644 index 000000000..2021d10ac --- /dev/null +++ b/.github/workflows/run-kata-coco-tests.yaml @@ -0,0 +1,176 @@ +name: CI | Run kata coco tests +on: + workflow_call: + inputs: + registry: + required: true + type: string + repo: + required: true + type: string + tag: + required: true + type: string + pr-number: + required: true + type: string + commit-hash: + required: false + type: string + target-branch: + required: false + type: string + default: "" + +jobs: + run-kata-deploy-tests-on-tdx: + strategy: + fail-fast: false + matrix: + vmm: + - qemu-tdx + runs-on: tdx + env: + DOCKER_REGISTRY: ${{ inputs.registry }} + DOCKER_REPO: ${{ inputs.repo }} + DOCKER_TAG: ${{ inputs.tag }} + PR_NUMBER: ${{ inputs.pr-number }} + KATA_HYPERVISOR: ${{ matrix.vmm }} + KUBERNETES: "k3s" + USING_NFD: "true" + steps: + - uses: actions/checkout@v3 + with: + ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} + + - name: Run tests + run: bash tests/functional/kata-deploy/gha-run.sh run-tests + + run-k8s-tests-on-tdx: + strategy: + fail-fast: false + matrix: + vmm: + - qemu-tdx + runs-on: tdx + env: + DOCKER_REGISTRY: ${{ inputs.registry }} + DOCKER_REPO: ${{ inputs.repo }} + DOCKER_TAG: ${{ inputs.tag }} + PR_NUMBER: ${{ inputs.pr-number }} + KATA_HYPERVISOR: ${{ matrix.vmm }} + KUBERNETES: "k3s" + USING_NFD: "true" + K8S_TEST_HOST_TYPE: "baremetal" + steps: + - uses: actions/checkout@v3 + with: + ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} + + - name: Deploy Kata + timeout-minutes: 10 + run: bash tests/integration/kubernetes/gha-run.sh deploy-kata-tdx + + - name: Run tests + timeout-minutes: 30 + run: bash tests/integration/kubernetes/gha-run.sh run-tests + + - name: Delete kata-deploy + if: always() + run: bash tests/integration/kubernetes/gha-run.sh cleanup-tdx + + run-k8s-tests-on-sev: + strategy: + fail-fast: false + matrix: + vmm: + - qemu-sev + runs-on: sev + env: + DOCKER_REGISTRY: ${{ inputs.registry }} + DOCKER_REPO: ${{ inputs.repo }} + DOCKER_TAG: ${{ inputs.tag }} + PR_NUMBER: ${{ inputs.pr-number }} + KATA_HYPERVISOR: ${{ matrix.vmm }} + KUBECONFIG: /home/kata/.kube/config + KUBERNETES: "vanilla" + USING_NFD: "false" + K8S_TEST_HOST_TYPE: "baremetal" + steps: + - uses: actions/checkout@v3 + with: + ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} + + - name: Deploy Kata + timeout-minutes: 10 + run: bash tests/integration/kubernetes/gha-run.sh deploy-kata-sev + + - name: Run tests + timeout-minutes: 30 + run: bash tests/integration/kubernetes/gha-run.sh run-tests + + - name: Delete kata-deploy + if: always() + run: bash tests/integration/kubernetes/gha-run.sh cleanup-sev + + run-k8s-tests-sev-snp: + strategy: + fail-fast: false + matrix: + vmm: + - qemu-snp + runs-on: sev-snp + env: + DOCKER_REGISTRY: ${{ inputs.registry }} + DOCKER_REPO: ${{ inputs.repo }} + DOCKER_TAG: ${{ inputs.tag }} + PR_NUMBER: ${{ inputs.pr-number }} + KATA_HYPERVISOR: ${{ matrix.vmm }} + KUBECONFIG: /home/kata/.kube/config + KUBERNETES: "vanilla" + USING_NFD: "false" + K8S_TEST_HOST_TYPE: "baremetal" + steps: + - uses: actions/checkout@v3 + with: + ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} + + - name: Deploy Kata + timeout-minutes: 10 + run: bash tests/integration/kubernetes/gha-run.sh deploy-kata-snp + + - name: Run tests + timeout-minutes: 30 + run: bash tests/integration/kubernetes/gha-run.sh run-tests + + - name: Delete kata-deploy + if: always() + run: bash tests/integration/kubernetes/gha-run.sh cleanup-snp diff --git a/.github/workflows/run-kata-deploy-tests-on-tdx.yaml b/.github/workflows/run-kata-deploy-tests-on-tdx.yaml deleted file mode 100644 index 6b439cea8..000000000 --- a/.github/workflows/run-kata-deploy-tests-on-tdx.yaml +++ /dev/null @@ -1,54 +0,0 @@ -name: CI | Run kata-deploy tests on TDX -on: - workflow_call: - inputs: - registry: - required: true - type: string - repo: - required: true - type: string - tag: - required: true - type: string - pr-number: - required: true - type: string - commit-hash: - required: false - type: string - target-branch: - required: false - type: string - default: "" - -jobs: - run-kata-deploy-tests: - strategy: - fail-fast: false - matrix: - vmm: - - qemu-tdx - runs-on: tdx - env: - DOCKER_REGISTRY: ${{ inputs.registry }} - DOCKER_REPO: ${{ inputs.repo }} - DOCKER_TAG: ${{ inputs.tag }} - PR_NUMBER: ${{ inputs.pr-number }} - KATA_HYPERVISOR: ${{ matrix.vmm }} - KUBERNETES: "k3s" - USING_NFD: "true" - steps: - - uses: actions/checkout@v3 - with: - ref: ${{ inputs.commit-hash }} - fetch-depth: 0 - - - name: Rebase atop of the latest target branch - run: | - ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" - env: - TARGET_BRANCH: ${{ inputs.target-branch }} - - - name: Run tests - run: bash tests/functional/kata-deploy/gha-run.sh run-tests From 33791f09447ace722698d5c20ff2a8f40f559056 Mon Sep 17 00:00:00 2001 From: David Esparza Date: Thu, 5 Oct 2023 23:50:49 -0600 Subject: [PATCH 321/339] metrics: stops kata components and k8s deployment when test finishes This PR adds a trap whenever the scrip exits, it deletes the iperf k8s deployment and k8s services, and deletes the kata components. This way, when the script finishes, it verifies that there are indeed no kata components still running. Fixes: #8126 Signed-off-by: David Esparza (cherry picked from commit bba34910df0027c4f0b1407d6bdadfe93ec84f35) --- .../k8s-network-metrics-iperf3.sh | 45 ++++++++++--------- 1 file changed, 23 insertions(+), 22 deletions(-) diff --git a/tests/metrics/network/iperf3_kubernetes/k8s-network-metrics-iperf3.sh b/tests/metrics/network/iperf3_kubernetes/k8s-network-metrics-iperf3.sh index 9d434e912..88beb895c 100755 --- a/tests/metrics/network/iperf3_kubernetes/k8s-network-metrics-iperf3.sh +++ b/tests/metrics/network/iperf3_kubernetes/k8s-network-metrics-iperf3.sh @@ -26,6 +26,8 @@ source "${SCRIPT_PATH}/../../lib/common.bash" iperf_file=$(mktemp iperfresults.XXXXXXXXXX) TEST_NAME="${TEST_NAME:-network-iperf3}" COLLECT_ALL="${COLLECT_ALL:-false}" +IPERF_DEPLOYMENT="${SCRIPT_PATH}/runtimeclass_workloads/iperf3-deployment.yaml" +IPERF_DAEMONSET="${SCRIPT_PATH}/runtimeclass_workloads/iperf3-daemonset.yaml" function remove_tmp_file() { rm -rf "${iperf_file}" @@ -177,56 +179,55 @@ function iperf3_start_deployment() { cmds=("bc" "jq") check_cmds "${cmds[@]}" - init_env - # Check no processes are left behind check_processes - export service="iperf3-server" - export deployment="iperf3-server-deployment" - wait_time=20 sleep_time=2 # Create deployment - kubectl create -f "${SCRIPT_PATH}/runtimeclass_workloads/iperf3-deployment.yaml" + kubectl create -f "${IPERF_DEPLOYMENT}" # Check deployment creation - local cmd="kubectl wait --for=condition=Available deployment/${deployment}" - waitForProcess "$wait_time" "$sleep_time" "$cmd" + local cmd="kubectl wait --for=condition=Available deployment/iperf3-server-deployment" + waitForProcess "${wait_time}" "${sleep_time}" "${cmd}" # Create DaemonSet - kubectl create -f "${SCRIPT_PATH}/runtimeclass_workloads/iperf3-daemonset.yaml" - - # Expose deployment - kubectl expose deployment/"${deployment}" + kubectl create -f "${IPERF_DAEMONSET}" # Get the names of the server pod export server_pod_name=$(kubectl get pods -o name | grep server | cut -d '/' -f2) # Verify the server pod is working - local cmd="kubectl get pod $server_pod_name -o yaml | grep 'phase: Running'" - waitForProcess "$wait_time" "$sleep_time" "$cmd" + local cmd="kubectl get pod ${server_pod_name} -o yaml | grep 'phase: Running'" + waitForProcess "${wait_time}" "${sleep_time}" "${cmd}" # Get the names of client pod export client_pod_name=$(kubectl get pods -o name | grep client | cut -d '/' -f2) # Verify the client pod is working - local cmd="kubectl get pod $client_pod_name -o yaml | grep 'phase: Running'" - waitForProcess "$wait_time" "$sleep_time" "$cmd" + local cmd="kubectl get pod ${client_pod_name} -o yaml | grep 'phase: Running'" + waitForProcess "${wait_time}" "${sleep_time}" "${cmd}" # Get the ip address of the server pod - export server_ip_add=$(kubectl get pod "$server_pod_name" -o jsonpath='{.status.podIP}') + export server_ip_add=$(kubectl get pod "${server_pod_name}" -o jsonpath='{.status.podIP}') } function iperf3_deployment_cleanup() { - kubectl delete pod "$server_pod_name" "$client_pod_name" - kubectl delete ds iperf3-clients - kubectl delete deployment "$deployment" - kubectl delete service "$deployment" + info "iperf: deleting deployments and services" + kubectl delete pod "${server_pod_name}" "${client_pod_name}" + kubectl delete -f "${IPERF_DAEMONSET}" + kubectl delete -f "${IPERF_DEPLOYMENT}" + kill_kata_components && sleep 1 + kill_kata_components check_processes + info "End of iperf3 test" } +# The deployment must be removed in +# any case the script terminates. +trap iperf3_deployment_cleanup EXIT + function help() { echo "$(cat << EOF Usage: $0 "[options]" @@ -309,8 +310,8 @@ function main() { export COLLECT_ALL=true && iperf3_bandwidth && iperf3_jitter && iperf3_cpu && iperf3_parallel fi + info "iperf3: saving test results" metrics_json_save - iperf3_deployment_cleanup } main "$@" From c3b91ed39498e08ce62481ae5f4dfadb7b91c459 Mon Sep 17 00:00:00 2001 From: Wainer dos Santos Moschetta Date: Wed, 9 Aug 2023 16:05:13 -0300 Subject: [PATCH 322/339] ci: k8s: create k8s clusters with kcli Adapted the gha-run.sh script to create a Kubernetes cluster locally using the kcli tool. Use `./gha-run.sh create-cluster-kcli` to create it, and `./gha-run.sh delete-cluster-kcli` to delete. Fixes #7620 Signed-off-by: Wainer dos Santos Moschetta (cherry picked from commit 89bef7d0366555a7011c555428a6bbfd1ba1d1f9) --- tests/gha-run-k8s-common.sh | 43 +++++++++++++++++++++++++ tests/integration/kubernetes/gha-run.sh | 2 ++ 2 files changed, 45 insertions(+) diff --git a/tests/gha-run-k8s-common.sh b/tests/gha-run-k8s-common.sh index 44b02f32e..4f7de35e1 100644 --- a/tests/gha-run-k8s-common.sh +++ b/tests/gha-run-k8s-common.sh @@ -107,6 +107,11 @@ function delete_cluster() { --yes } +function delete_cluster_kcli() { + CLUSTER_NAME="${CLUSTER_NAME:-kata-k8s}" + kcli delete -y kube "$CLUSTER_NAME" +} + function get_nodes_and_pods_info() { kubectl debug $(kubectl get nodes -o name) -it --image=quay.io/kata-containers/kata-debug:latest || true kubectl get pods -o name | grep node-debugger | xargs kubectl delete || true @@ -165,6 +170,44 @@ function deploy_k3s() { cp /etc/rancher/k3s/k3s.yaml ~/.kube/config } +function create_cluster_kcli() { + CLUSTER_NAME="${CLUSTER_NAME:-kata-k8s}" + + delete_cluster_kcli || true + + kcli create kube "${KUBE_TYPE:-generic}" \ + -P domain="kata.com" \ + -P pool="${LIBVIRT_POOL:-default}" \ + -P ctlplanes="${CLUSTER_CONTROL_NODES:-1}" \ + -P workers="${CLUSTER_WORKERS:-1}" \ + -P network="${LIBVIRT_NETWORK:-default}" \ + -P image="${CLUSTER_IMAGE:-ubuntu2004}" \ + -P sdn=flannel \ + -P nfs=false \ + -P disk_size="${CLUSTER_DISK_SIZE:-20}" \ + "${CLUSTER_NAME}" + + export KUBECONFIG="$HOME/.kcli/clusters/$CLUSTER_NAME/auth/kubeconfig" + + local cmd="kubectl get nodes | grep '.*worker.*\'" + echo "Wait at least one worker be Ready" + if ! waitForProcess "330" "30" "$cmd"; then + echo "ERROR: worker nodes not ready." + kubectl get nodes + return 1 + fi + + # Ensure that system pods are running or completed. + cmd="[ \$(kubectl get pods -A --no-headers | grep -v 'Running\|Completed' | wc -l) -eq 0 ]" + echo "Wait system pods be running or completed" + if ! waitForProcess "90" "30" "$cmd"; then + echo "ERROR: not all pods are Running or Completed." + kubectl get pods -A + kubectl get pods -A + return 1 + fi +} + function deploy_rke2() { curl -sfL https://get.rke2.io | sudo sh - diff --git a/tests/integration/kubernetes/gha-run.sh b/tests/integration/kubernetes/gha-run.sh index 360628a6b..43b97eeb0 100755 --- a/tests/integration/kubernetes/gha-run.sh +++ b/tests/integration/kubernetes/gha-run.sh @@ -202,6 +202,7 @@ function main() { install-azure-cli) install_azure_cli ;; login-azure) login_azure ;; create-cluster) create_cluster ;; + create-cluster-kcli) create_cluster_kcli ;; configure-snapshotter) configure_snapshotter ;; setup-crio) setup_crio ;; deploy-k8s) deploy_k8s ;; @@ -219,6 +220,7 @@ function main() { cleanup-tdx) cleanup "tdx" ;; cleanup-garm) cleanup "garm" ;; delete-cluster) cleanup "aks" ;; + delete-cluster-kcli) delete_cluster_kcli ;; *) >&2 echo "Invalid argument"; exit 2 ;; esac } From 308ce26438b7c8040e9811e7624c90b4c8c66ffe Mon Sep 17 00:00:00 2001 From: Wainer dos Santos Moschetta Date: Wed, 9 Aug 2023 16:22:19 -0300 Subject: [PATCH 323/339] ci: k8s: set default image for deploy_kata() On CI workflows the variables DOCKER_REGISTRY, DOCKER_REPO and DOCKER_TAG are exported to match the built image. However, when running the script outside of CI context, a developer might just use the latest image which in this case will be `quay.io/kata-containers/kata-deploy-ci:kata-containers-latest`. Fixes #7620 Signed-off-by: Wainer dos Santos Moschetta (cherry picked from commit cbb9aa15b6d21e2c98b0e2b60dbf0359ae3c765c) --- tests/integration/kubernetes/gha-run.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/integration/kubernetes/gha-run.sh b/tests/integration/kubernetes/gha-run.sh index 43b97eeb0..65b7d38d1 100755 --- a/tests/integration/kubernetes/gha-run.sh +++ b/tests/integration/kubernetes/gha-run.sh @@ -12,6 +12,10 @@ kubernetes_dir="$(dirname "$(readlink -f "$0")")" source "${kubernetes_dir}/../../gha-run-k8s-common.sh" tools_dir="${repo_root_dir}/tools" +DOCKER_REGISTRY=${DOCKER_REGISTRY:-quay.io} +DOCKER_REPO=${DOCKER_REPO:-kata-containers/kata-deploy-ci} +DOCKER_TAG=${DOCKER_TAG:-kata-containers-latest} + function configure_devmapper() { sudo mkdir -p /var/lib/containerd/devmapper sudo truncate --size 10G /var/lib/containerd/devmapper/data-disk.img From 6b76d21568d36e111c1a5e1dfd5f855a73b7ae25 Mon Sep 17 00:00:00 2001 From: Wainer dos Santos Moschetta Date: Wed, 9 Aug 2023 16:38:58 -0300 Subject: [PATCH 324/339] ci: k8s: add cleanup-kcli() to gha-run.sh The cleanup-kcli() behaves like other clean up for bare-metal (e.g. sev, tdx...etc) except that KUBECONFIG should be exported. Fixes #7620 Signed-off-by: Wainer dos Santos Moschetta (cherry picked from commit d2be8eef1a43f285e4821a9807b509644ad78168) --- tests/integration/kubernetes/gha-run.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/integration/kubernetes/gha-run.sh b/tests/integration/kubernetes/gha-run.sh index 65b7d38d1..cf17fa639 100755 --- a/tests/integration/kubernetes/gha-run.sh +++ b/tests/integration/kubernetes/gha-run.sh @@ -160,6 +160,9 @@ function cleanup() { test_type="${2:-k8s}" ensure_yq + [ "$platform" = "kcli" ] && \ + export KUBECONFIG="$HOME/.kcli/clusters/${CLUSTER_NAME:-kata-k8s}/auth/kubeconfig" + echo "Gather information about the nodes and pods before cleaning up the node" get_nodes_and_pods_info @@ -219,6 +222,7 @@ function main() { deploy-kata-tdx) deploy_kata "tdx" ;; deploy-kata-garm) deploy_kata "garm" ;; run-tests) run_tests ;; + cleanup-kcli) cleanup "kcli" ;; cleanup-sev) cleanup "sev" ;; cleanup-snp) cleanup "snp" ;; cleanup-tdx) cleanup "tdx" ;; From 56cebfb4857a77fe77ed06cd84d4cf5689428f89 Mon Sep 17 00:00:00 2001 From: Wainer dos Santos Moschetta Date: Wed, 9 Aug 2023 18:18:10 -0300 Subject: [PATCH 325/339] ci: k8s: add deploy-kata-kcli() to gh-run.sh The cleanup-kcli() behaves like other deploy kata for bare-metal (e.g. sev, tdx...etc) except that KUBECONFIG should be exported. Fixes #7620 Signed-off-by: Wainer dos Santos Moschetta (cherry picked from commit c2ef1f0fb089c5cce3e2c126b6c544e7e8166a58) --- tests/integration/kubernetes/gha-run.sh | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/integration/kubernetes/gha-run.sh b/tests/integration/kubernetes/gha-run.sh index cf17fa639..8835c23ea 100755 --- a/tests/integration/kubernetes/gha-run.sh +++ b/tests/integration/kubernetes/gha-run.sh @@ -95,7 +95,10 @@ function deploy_kata() { platform="${1}" ensure_yq - # Emsure we're in the default namespace + [ "$platform" = "kcli" ] && \ + export KUBECONFIG="$HOME/.kcli/clusters/${CLUSTER_NAME:-kata-k8s}/auth/kubeconfig" + + # Ensure we're in the default namespace kubectl config set-context --current --namespace=default sed -i -e "s|quay.io/kata-containers/kata-deploy:latest|${DOCKER_REGISTRY}/${DOCKER_REPO}:${DOCKER_TAG}|g" "${tools_dir}/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml" @@ -217,6 +220,7 @@ function main() { install-kubectl) install_kubectl ;; get-cluster-credentials) get_cluster_credentials ;; deploy-kata-aks) deploy_kata "aks" ;; + deploy-kata-kcli) deploy_kata "kcli" ;; deploy-kata-sev) deploy_kata "sev" ;; deploy-kata-snp) deploy_kata "snp" ;; deploy-kata-tdx) deploy_kata "tdx" ;; From 5cd2e947dc781bc36ebf002b3a5bb1d6351805df Mon Sep 17 00:00:00 2001 From: Wainer dos Santos Moschetta Date: Thu, 10 Aug 2023 19:28:58 -0300 Subject: [PATCH 326/339] ci: k8s: run_tests() for kcli The only difference to the other platforms is that it needs to export KUBECONFIG. Fixes #7620 Signed-off-by: Wainer dos Santos Moschetta (cherry picked from commit d54e6d9cdaae6fff1bbb04719f0383be880484ab) --- tests/integration/kubernetes/gha-run.sh | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/integration/kubernetes/gha-run.sh b/tests/integration/kubernetes/gha-run.sh index 8835c23ea..b0c1bdce2 100755 --- a/tests/integration/kubernetes/gha-run.sh +++ b/tests/integration/kubernetes/gha-run.sh @@ -145,6 +145,11 @@ function deploy_kata() { } function run_tests() { + platform="${1:-}" + + [ "$platform" = "kcli" ] && \ + export KUBECONFIG="$HOME/.kcli/clusters/${CLUSTER_NAME:-kata-k8s}/auth/kubeconfig" + # Delete any spurious tests namespace that was left behind kubectl delete namespace kata-containers-k8s-tests &> /dev/null || true @@ -226,6 +231,7 @@ function main() { deploy-kata-tdx) deploy_kata "tdx" ;; deploy-kata-garm) deploy_kata "garm" ;; run-tests) run_tests ;; + run-tests-kcli) run_tests "kcli" ;; cleanup-kcli) cleanup "kcli" ;; cleanup-sev) cleanup "sev" ;; cleanup-snp) cleanup "snp" ;; From 6fb40ad47dd9b4b2ec9b24518352c7269a4dfa4e Mon Sep 17 00:00:00 2001 From: Wainer dos Santos Moschetta Date: Wed, 9 Aug 2023 18:40:03 -0300 Subject: [PATCH 327/339] kata-deploy: re-format kata-[deploy|cleanup].yaml The .tests/integration/kubernetes/gh-run.sh script run `yq write` a couple of times to edit the kata-[deploy|cleanup].yaml, resulting on the file being formatted again. This is annoying because leaves the git tree dirty. Signed-off-by: Wainer dos Santos Moschetta (cherry picked from commit 4af78be13aa276cfadd4f4b859f62bdf45e5bac7) --- .../kata-cleanup/base/kata-cleanup.yaml | 52 ++++++------- .../kata-deploy/base/kata-deploy.yaml | 76 +++++++++---------- 2 files changed, 64 insertions(+), 64 deletions(-) diff --git a/tools/packaging/kata-deploy/kata-cleanup/base/kata-cleanup.yaml b/tools/packaging/kata-deploy/kata-cleanup/base/kata-cleanup.yaml index 3d9006572..7cb875689 100644 --- a/tools/packaging/kata-deploy/kata-cleanup/base/kata-cleanup.yaml +++ b/tools/packaging/kata-deploy/kata-cleanup/base/kata-cleanup.yaml @@ -6,39 +6,39 @@ metadata: namespace: kube-system spec: selector: - matchLabels: - name: kubelet-kata-cleanup + matchLabels: + name: kubelet-kata-cleanup template: metadata: - labels: - name: kubelet-kata-cleanup + labels: + name: kubelet-kata-cleanup spec: serviceAccountName: kata-deploy-sa hostPID: true nodeSelector: - katacontainers.io/kata-runtime: cleanup + katacontainers.io/kata-runtime: cleanup containers: - - name: kube-kata-cleanup - image: quay.io/kata-containers/kata-deploy:latest - imagePullPolicy: Always - command: [ "bash", "-c", "/opt/kata-artifacts/scripts/kata-deploy.sh reset" ] - env: - - name: NODE_NAME - valueFrom: - fieldRef: - fieldPath: spec.nodeName - - name: DEBUG - value: "false" - - name: SHIMS - value: "clh dragonball fc qemu-nvidia-gpu qemu-sev qemu-snp qemu-tdx qemu" - - name: DEFAULT_SHIM - value: "qemu" - - name: CREATE_RUNTIMECLASSES - value: "false" - - name: CREATE_DEFAULT_RUNTIMECLASS - value: "false" - securityContext: - privileged: true + - name: kube-kata-cleanup + image: quay.io/kata-containers/kata-deploy:latest + imagePullPolicy: Always + command: ["bash", "-c", "/opt/kata-artifacts/scripts/kata-deploy.sh reset"] + env: + - name: NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: DEBUG + value: "false" + - name: SHIMS + value: "clh dragonball fc qemu-nvidia-gpu qemu-sev qemu-snp qemu-tdx qemu" + - name: DEFAULT_SHIM + value: "qemu" + - name: CREATE_RUNTIMECLASSES + value: "false" + - name: CREATE_DEFAULT_RUNTIMECLASS + value: "false" + securityContext: + privileged: true updateStrategy: rollingUpdate: maxUnavailable: 1 diff --git a/tools/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml b/tools/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml index c10061d90..3b4e8888a 100644 --- a/tools/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml +++ b/tools/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml @@ -6,50 +6,50 @@ metadata: namespace: kube-system spec: selector: - matchLabels: - name: kata-deploy + matchLabels: + name: kata-deploy template: metadata: - labels: - name: kata-deploy + labels: + name: kata-deploy spec: serviceAccountName: kata-deploy-sa hostPID: true containers: - - name: kube-kata - image: quay.io/kata-containers/kata-deploy:latest - imagePullPolicy: Always - lifecycle: - preStop: - exec: - command: ["bash", "-c", "/opt/kata-artifacts/scripts/kata-deploy.sh cleanup"] - command: [ "bash", "-c", "/opt/kata-artifacts/scripts/kata-deploy.sh install" ] - env: - - name: NODE_NAME - valueFrom: - fieldRef: - fieldPath: spec.nodeName - - name: DEBUG - value: "false" - - name: SHIMS - value: "clh dragonball fc qemu qemu-nvidia-gpu qemu-sev qemu-snp qemu-tdx" - - name: DEFAULT_SHIM - value: "qemu" - - name: CREATE_RUNTIMECLASSES - value: "false" - - name: CREATE_DEFAULT_RUNTIMECLASS - value: "false" - securityContext: - privileged: true - volumeMounts: - - name: crio-conf - mountPath: /etc/crio/ - - name: containerd-conf - mountPath: /etc/containerd/ - - name: kata-artifacts - mountPath: /opt/kata/ - - name: local-bin - mountPath: /usr/local/bin/ + - name: kube-kata + image: quay.io/kata-containers/kata-deploy:latest + imagePullPolicy: Always + lifecycle: + preStop: + exec: + command: ["bash", "-c", "/opt/kata-artifacts/scripts/kata-deploy.sh cleanup"] + command: ["bash", "-c", "/opt/kata-artifacts/scripts/kata-deploy.sh install"] + env: + - name: NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: DEBUG + value: "false" + - name: SHIMS + value: "clh dragonball fc qemu qemu-nvidia-gpu qemu-sev qemu-snp qemu-tdx" + - name: DEFAULT_SHIM + value: "qemu" + - name: CREATE_RUNTIMECLASSES + value: "false" + - name: CREATE_DEFAULT_RUNTIMECLASS + value: "false" + securityContext: + privileged: true + volumeMounts: + - name: crio-conf + mountPath: /etc/crio/ + - name: containerd-conf + mountPath: /etc/containerd/ + - name: kata-artifacts + mountPath: /opt/kata/ + - name: local-bin + mountPath: /usr/local/bin/ volumes: - name: crio-conf hostPath: From c4b0f1f31baf1ec2ba5acd299232abac8d846f9c Mon Sep 17 00:00:00 2001 From: Wainer dos Santos Moschetta Date: Thu, 10 Aug 2023 10:41:57 -0300 Subject: [PATCH 328/339] ci: k8s: shellcheck fixes to gha-run.sh Fixed a couple of warns shellcheck emitted and disabled others: * SC2154 (var is referenced but not assigned) * SC2086 (Double quote to prevent globbing and word splitting) Signed-off-by: Wainer dos Santos Moschetta (cherry picked from commit 200e542921a87f9f99d10c9bfce749d53917edc3) --- tests/integration/kubernetes/gha-run.sh | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tests/integration/kubernetes/gha-run.sh b/tests/integration/kubernetes/gha-run.sh index b0c1bdce2..a13ba541a 100755 --- a/tests/integration/kubernetes/gha-run.sh +++ b/tests/integration/kubernetes/gha-run.sh @@ -10,6 +10,7 @@ set -o pipefail kubernetes_dir="$(dirname "$(readlink -f "$0")")" source "${kubernetes_dir}/../../gha-run-k8s-common.sh" +# shellcheck disable=2154 tools_dir="${repo_root_dir}/tools" DOCKER_REGISTRY=${DOCKER_REGISTRY:-quay.io} @@ -115,7 +116,7 @@ function deploy_kata() { echo "::group::Final kata-deploy.yaml that is used in the test" cat "${tools_dir}/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml" - cat "${tools_dir}/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml" | grep "${DOCKER_REGISTRY}/${DOCKER_REPO}:${DOCKER_TAG}" || die "Failed to setup the tests image" + grep "${DOCKER_REGISTRY}/${DOCKER_REPO}:${DOCKER_TAG}" "${tools_dir}/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml" || die "Failed to setup the tests image" echo "::endgroup::" kubectl apply -f "${tools_dir}/packaging/kata-deploy/kata-rbac/base/kata-rbac.yaml" @@ -154,7 +155,7 @@ function run_tests() { kubectl delete namespace kata-containers-k8s-tests &> /dev/null || true # Create a new namespace for the tests and switch to it - kubectl apply -f ${kubernetes_dir}/runtimeclass_workloads/tests-namespace.yaml + kubectl apply -f "${kubernetes_dir}/runtimeclass_workloads/tests-namespace.yaml" kubectl config set-context --current --namespace=kata-containers-k8s-tests pushd "${kubernetes_dir}" @@ -191,6 +192,7 @@ function cleanup() { cleanup_spec="-f "${tools_dir}/packaging/kata-deploy/kata-cleanup/base/kata-cleanup.yaml"" fi + # shellcheck disable=2086 kubectl delete ${deploy_spec} kubectl -n kube-system wait --timeout=10m --for=delete -l name=kata-deploy pod @@ -199,10 +201,12 @@ function cleanup() { sed -i -e "s|quay.io/kata-containers/kata-deploy:latest|${DOCKER_REGISTRY}/${DOCKER_REPO}:${DOCKER_TAG}|g" "${tools_dir}/packaging/kata-deploy/kata-cleanup/base/kata-cleanup.yaml" cat "${tools_dir}/packaging/kata-deploy/kata-cleanup/base/kata-cleanup.yaml" - cat "${tools_dir}/packaging/kata-deploy/kata-cleanup/base/kata-cleanup.yaml" | grep "${DOCKER_REGISTRY}/${DOCKER_REPO}:${DOCKER_TAG}" || die "Failed to setup the tests image" + grep "${DOCKER_REGISTRY}/${DOCKER_REPO}:${DOCKER_TAG}" "${tools_dir}/packaging/kata-deploy/kata-cleanup/base/kata-cleanup.yaml" || die "Failed to setup the tests image" + # shellcheck disable=2086 kubectl apply ${cleanup_spec} sleep 180s + # shellcheck disable=2086 kubectl delete ${cleanup_spec} kubectl delete -f "${tools_dir}/packaging/kata-deploy/kata-rbac/base/kata-rbac.yaml" } From 7fceb21362ca6b0d2dd31a72a24b44acc5b6abc4 Mon Sep 17 00:00:00 2001 From: Wainer dos Santos Moschetta Date: Thu, 10 Aug 2023 14:30:54 -0300 Subject: [PATCH 329/339] ci: k8s: configurable deploy kata timeout The deploy-kata() of gha-run.sh will wait for 10 minutes for the kata deploy installation finish. This allow users of the script to overwrite that value by exporting the KATA_DEPLOY_WAIT_TIMEOUT environment variable. Fixes #7620 Signed-off-by: Wainer dos Santos Moschetta (cherry picked from commit 6677a61fe410d287309f58427150b8237928a8da) --- tests/integration/kubernetes/gha-run.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/integration/kubernetes/gha-run.sh b/tests/integration/kubernetes/gha-run.sh index a13ba541a..731f0740d 100755 --- a/tests/integration/kubernetes/gha-run.sh +++ b/tests/integration/kubernetes/gha-run.sh @@ -16,6 +16,7 @@ tools_dir="${repo_root_dir}/tools" DOCKER_REGISTRY=${DOCKER_REGISTRY:-quay.io} DOCKER_REPO=${DOCKER_REPO:-kata-containers/kata-deploy-ci} DOCKER_TAG=${DOCKER_TAG:-kata-containers-latest} +KATA_DEPLOY_WAIT_TIMEOUT=${KATA_DEPLOY_WAIT_TIMEOUT:-10m} function configure_devmapper() { sudo mkdir -p /var/lib/containerd/devmapper @@ -125,7 +126,7 @@ function deploy_kata() { else kubectl apply -f "${tools_dir}/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml" fi - kubectl -n kube-system wait --timeout=10m --for=condition=Ready -l name=kata-deploy pod + kubectl -n kube-system wait --timeout="${KATA_DEPLOY_WAIT_TIMEOUT}" --for=condition=Ready -l name=kata-deploy pod # This is needed as the kata-deploy pod will be set to "Ready" when it starts running, # which may cause issues like not having the node properly labeled or the artefacts From 5f2c7c78ffdbf10fa087782cef68000000f7382a Mon Sep 17 00:00:00 2001 From: Wainer dos Santos Moschetta Date: Thu, 10 Aug 2023 15:31:29 -0300 Subject: [PATCH 330/339] ci: k8s: set KATA_HYPERVISOR default value Let KATA_HYPERVISOR be qemu by default in gh-run.sh as this variable is required to tweak some configurations of kata-deploy. Fixes #7620 Signed-off-by: Wainer dos Santos Moschetta (cherry picked from commit 68f083c4d082c3fe4f5585a46eb4d722171e44ee) --- tests/integration/kubernetes/gha-run.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/integration/kubernetes/gha-run.sh b/tests/integration/kubernetes/gha-run.sh index 731f0740d..33002e4ff 100755 --- a/tests/integration/kubernetes/gha-run.sh +++ b/tests/integration/kubernetes/gha-run.sh @@ -17,6 +17,7 @@ DOCKER_REGISTRY=${DOCKER_REGISTRY:-quay.io} DOCKER_REPO=${DOCKER_REPO:-kata-containers/kata-deploy-ci} DOCKER_TAG=${DOCKER_TAG:-kata-containers-latest} KATA_DEPLOY_WAIT_TIMEOUT=${KATA_DEPLOY_WAIT_TIMEOUT:-10m} +KATA_HYPERVISOR=${KATA_HYPERVISOR:-qemu} function configure_devmapper() { sudo mkdir -p /var/lib/containerd/devmapper From 0eaf81c1a2706bb47e6c86df095d26a4c6e9a2ee Mon Sep 17 00:00:00 2001 From: Wainer dos Santos Moschetta Date: Thu, 10 Aug 2023 17:30:22 -0300 Subject: [PATCH 331/339] tests: add get_one_kata_node() to tests_common.sh The introduced get_one_kata_node() returns the first node that has the kata-runtime=true label, i.e., supposedly a node with kata installed. This is useful for tests that should run on a determined worker node on a multi-nodes cluster. Fixes #7619 Signed-off-by: Wainer dos Santos Moschetta (cherry picked from commit 61c9c17bff5511dc419701398bf77aab0eb79b9f) --- tests/integration/kubernetes/tests_common.sh | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/integration/kubernetes/tests_common.sh b/tests/integration/kubernetes/tests_common.sh index f932bd656..079080338 100644 --- a/tests/integration/kubernetes/tests_common.sh +++ b/tests/integration/kubernetes/tests_common.sh @@ -38,6 +38,14 @@ get_pod_config_dir() { info "k8s configured to use runtimeclass" } +# Return the first worker found that is kata-runtime labeled. +get_one_kata_node() { + local resource_name + resource_name="$(kubectl get node -l katacontainers.io/kata-runtime=true -o name | head -1)" + # Remove leading "/node" + echo "${resource_name/"node/"}" +} + # Runs a command in the host filesystem. exec_host() { node="$(kubectl get node -o name)" From a54bdd00d592a2f3dfba2ec0427a3c1dc959b241 Mon Sep 17 00:00:00 2001 From: Wainer dos Santos Moschetta Date: Thu, 10 Aug 2023 17:48:25 -0300 Subject: [PATCH 332/339] tests: exec_host() now gets the node name The exec_host() simply fails on cluster with multi-nodes because `kubectl get node -o name" will return a list o names. Moreover, it will return control nodes names which usually don't have kata installed. Fixes #7619 Signed-off-by: Wainer dos Santos Moschetta (cherry picked from commit 3a00fc910122b024ef7e5775ec2793976ad56102) --- tests/integration/kubernetes/tests_common.sh | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tests/integration/kubernetes/tests_common.sh b/tests/integration/kubernetes/tests_common.sh index 079080338..3c8876e13 100644 --- a/tests/integration/kubernetes/tests_common.sh +++ b/tests/integration/kubernetes/tests_common.sh @@ -47,10 +47,14 @@ get_one_kata_node() { } # Runs a command in the host filesystem. +# +# Parameters: +# $1 - the node name +# exec_host() { - node="$(kubectl get node -o name)" + node="$1" # `kubectl debug` always returns 0, so we hack it to return the right exit code. - command="$@" + command="${@:2}" command+='; echo -en \\n$?' # We're trailing the `\r` here due to: https://github.com/kata-containers/kata-containers/issues/8051 # tl;dr: When testing with CRI-O we're facing the foillowing error: @@ -61,7 +65,7 @@ exec_host() { # [bats-exec-test:38] INFO: k8s configured to use runtimeclass # bash: line 1: $'\r': command not found # ``` - output="$(kubectl debug -qit "${node}" --image=alpine:latest -- chroot /host bash -c "${command}" | tr -d '\r')" + output="$(kubectl debug -qit "node/${node}" --image=alpine:latest -- chroot /host bash -c "${command}" | tr -d '\r')" kubectl get pods -o name | grep node-debugger | xargs kubectl delete > /dev/null exit_code="$(echo "${output}" | tail -1)" echo "$(echo "${output}" | head -n -1)" From 58ad83330053e3eebd0956c28edded066770c845 Mon Sep 17 00:00:00 2001 From: Wainer dos Santos Moschetta Date: Thu, 10 Aug 2023 18:42:19 -0300 Subject: [PATCH 333/339] tests: run k8s-file-volume on a given node This test can give false-positive on a multi-node cluster. Changed it to use the new get_one_kata_node() and the modified exec_host() to run the setup commands on a given node (that has kata installed) and ensure the test pod is scheduled at that same node. Fixes #7619 Signed-off-by: Wainer dos Santos Moschetta (cherry picked from commit 666993da8d75c0c268304442bca82e33ea3e956f) --- tests/integration/kubernetes/k8s-file-volume.bats | 8 +++++--- .../runtimeclass_workloads/pod-file-volume.yaml | 1 + 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/tests/integration/kubernetes/k8s-file-volume.bats b/tests/integration/kubernetes/k8s-file-volume.bats index d849db2ed..37ccd85f8 100644 --- a/tests/integration/kubernetes/k8s-file-volume.bats +++ b/tests/integration/kubernetes/k8s-file-volume.bats @@ -14,7 +14,8 @@ setup() { [ "${KATA_HYPERVISOR}" == "fc" ] && skip "test not working see: ${fc_limitations}" pod_name="test-file-volume" container_name="busybox-file-volume-container" - tmp_file=$(exec_host mktemp /tmp/file-volume-test-foo.XXXXX) + node="$(get_one_kata_node)" + tmp_file=$(exec_host "$node" mktemp /tmp/file-volume-test-foo.XXXXX) mount_path="/tmp/foo.txt" file_body="test" get_pod_config_dir @@ -22,11 +23,12 @@ setup() { @test "Test readonly volume for pods" { # Write test body to temp file - exec_host "echo "$file_body" > $tmp_file" + exec_host "$node" "echo "$file_body" > $tmp_file" # Create test yaml sed -e "s|HOST_FILE|$tmp_file|" ${pod_config_dir}/pod-file-volume.yaml > ${pod_config_dir}/test-pod-file-volume.yaml sed -i "s|MOUNT_PATH|$mount_path|" ${pod_config_dir}/test-pod-file-volume.yaml + sed -i "s|NODE|$node|" ${pod_config_dir}/test-pod-file-volume.yaml # Create pod kubectl create -f "${pod_config_dir}/test-pod-file-volume.yaml" @@ -43,6 +45,6 @@ teardown() { [ "${KATA_HYPERVISOR}" == "firecracker" ] && skip "test not working see: ${fc_limitations}" [ "${KATA_HYPERVISOR}" == "fc" ] && skip "test not working see: ${fc_limitations}" kubectl delete pod "$pod_name" - exec_host rm -f $tmp_file + exec_host "$node" rm -f $tmp_file rm -f ${pod_config_dir}/test-pod-file-volume.yaml.yaml } diff --git a/tests/integration/kubernetes/runtimeclass_workloads/pod-file-volume.yaml b/tests/integration/kubernetes/runtimeclass_workloads/pod-file-volume.yaml index 4784b1477..e7a194f42 100644 --- a/tests/integration/kubernetes/runtimeclass_workloads/pod-file-volume.yaml +++ b/tests/integration/kubernetes/runtimeclass_workloads/pod-file-volume.yaml @@ -11,6 +11,7 @@ spec: terminationGracePeriodSeconds: 0 runtimeClassName: kata restartPolicy: Never + nodeName: NODE volumes: - name: shared-file hostPath: From c4456c21d92afb71dc51592807c4bd1157889748 Mon Sep 17 00:00:00 2001 From: Wainer dos Santos Moschetta Date: Thu, 10 Aug 2023 18:45:04 -0300 Subject: [PATCH 334/339] tests: run k8s-volume on a given node This test can give false-positive on a multi-node cluster. Changed it to use the new get_one_kata_node() and the modified exec_host() to run the setup commands on a given node (that has kata installed) and ensure the test pod is scheduled at that same node. Fixes #7619 Signed-off-by: Wainer dos Santos Moschetta (cherry picked from commit c30c3ff1853c8d8ccb86240fd7fb6fd67d9021a0) --- tests/integration/kubernetes/k8s-volume.bats | 19 ++++++++++++------- .../runtimeclass_workloads/pv-pod.yaml | 1 + 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/tests/integration/kubernetes/k8s-volume.bats b/tests/integration/kubernetes/k8s-volume.bats index 705794443..7bb69f95d 100644 --- a/tests/integration/kubernetes/k8s-volume.bats +++ b/tests/integration/kubernetes/k8s-volume.bats @@ -15,13 +15,17 @@ setup() { get_pod_config_dir - tmp_file=$(exec_host mktemp -d /tmp/data.XXXX) + node=$(get_one_kata_node) + tmp_file=$(exec_host "$node" mktemp -d /tmp/data.XXXX) + pv_yaml=$(mktemp --tmpdir pv_config.XXXXXX.yaml) pod_yaml=$(mktemp --tmpdir pod_config.XXXXXX.yaml) msg="Hello from Kubernetes" - exec_host "echo $msg > $tmp_file/index.html" + exec_host "$node" "echo $msg > $tmp_file/index.html" pod_name="pv-pod" # Define temporary file at yaml - sed -e "s|tmp_data|${tmp_file}|g" ${pod_config_dir}/pv-volume.yaml > "$pod_yaml" + sed -e "s|tmp_data|${tmp_file}|g" ${pod_config_dir}/pv-volume.yaml > "$pv_yaml" + sed -e "s|NODE|${node}|g" "${pod_config_dir}/pv-pod.yaml" > "$pod_yaml" + } @test "Create Persistent Volume" { @@ -31,7 +35,7 @@ setup() { volume_claim="pv-claim" # Create the persistent volume - kubectl create -f "$pod_yaml" + kubectl create -f "$pv_yaml" # Check the persistent volume is Available cmd="kubectl get pv $volume_name | grep Available" @@ -45,7 +49,7 @@ setup() { waitForProcess "$wait_time" "$sleep_time" "$cmd" # Create pod - kubectl create -f "${pod_config_dir}/pv-pod.yaml" + kubectl create -f "$pod_yaml" # Check pod creation kubectl wait --for=condition=Ready --timeout=$timeout pod "$pod_name" @@ -62,8 +66,9 @@ teardown() { kubectl describe "pod/$pod_name" kubectl delete pod "$pod_name" + rm -f "$pod_yaml" kubectl delete pvc "$volume_claim" kubectl delete pv "$volume_name" - rm -f "$pod_yaml" - exec_host rm -rf "$tmp_file" + rm -f "$pv_yaml" + exec_host "$node" rm -rf "$tmp_file" } diff --git a/tests/integration/kubernetes/runtimeclass_workloads/pv-pod.yaml b/tests/integration/kubernetes/runtimeclass_workloads/pv-pod.yaml index 6a165b971..c3686a981 100644 --- a/tests/integration/kubernetes/runtimeclass_workloads/pv-pod.yaml +++ b/tests/integration/kubernetes/runtimeclass_workloads/pv-pod.yaml @@ -10,6 +10,7 @@ metadata: spec: terminationGracePeriodSeconds: 0 runtimeClassName: kata + nodeName: NODE volumes: - name: pv-storage persistentVolumeClaim: From a195539307982f95d2fb90675b912fc4cfaad384 Mon Sep 17 00:00:00 2001 From: Wainer dos Santos Moschetta Date: Wed, 20 Sep 2023 13:36:34 -0300 Subject: [PATCH 335/339] ci: k8s: set KUBERNETES default value The KUBERNETES variable is mostly used by kata-deploy whether to apply k3s specific deployments or not. It is used to select the type of kubernetes to be installed (k3s, k0s, rancher...etc) and it is always set on CI. Running the script locally we want to set a value by default to avoid `KUBERNETES: unbound variable` errors. Signed-off-by: Wainer dos Santos Moschetta (cherry picked from commit e669282c25f3a928fe51e251079475a40b96cd9c) --- tests/integration/kubernetes/gha-run.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/integration/kubernetes/gha-run.sh b/tests/integration/kubernetes/gha-run.sh index 33002e4ff..c72e35be7 100755 --- a/tests/integration/kubernetes/gha-run.sh +++ b/tests/integration/kubernetes/gha-run.sh @@ -18,6 +18,7 @@ DOCKER_REPO=${DOCKER_REPO:-kata-containers/kata-deploy-ci} DOCKER_TAG=${DOCKER_TAG:-kata-containers-latest} KATA_DEPLOY_WAIT_TIMEOUT=${KATA_DEPLOY_WAIT_TIMEOUT:-10m} KATA_HYPERVISOR=${KATA_HYPERVISOR:-qemu} +KUBERNETES="${KUBERNETES:-}" function configure_devmapper() { sudo mkdir -p /var/lib/containerd/devmapper From 602c56c0d7397159a576d1d1335469823908b004 Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Fri, 6 Oct 2023 15:23:03 +0000 Subject: [PATCH 336/339] tests: Enable soak parallel test This PR enables the soak parallel test for stability test. Fixes #8153 Signed-off-by: Gabriela Cervantes (cherry picked from commit 0f04d527d9cb0f0a981bda377ed7853f34d78802) --- tests/stability/gha-run.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/stability/gha-run.sh b/tests/stability/gha-run.sh index 66e6f21c0..e05ae5aa4 100755 --- a/tests/stability/gha-run.sh +++ b/tests/stability/gha-run.sh @@ -8,6 +8,7 @@ set -o errexit set -o nounset set -o pipefail +set -x kata_tarball_dir="${2:-kata-artifacts}" stability_dir="$(dirname "$(readlink -f "$0")")" @@ -16,8 +17,8 @@ source "${stability_dir}/../common.bash" function run() { info "Running soak parallel stability tests using ${KATA_HYPERVISOR} hypervisor" - # export ITERATIONS=2 MAX_CONTAINERS=20 - # bash "${stability_dir}/soak_parallel_rm.sh" + export ITERATIONS=2 MAX_CONTAINERS=20 + bash "${stability_dir}/soak_parallel_rm.sh" } function main() { From a818f628d7dc5f8cf099153951b000c8a5167624 Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Fri, 6 Oct 2023 15:27:28 +0000 Subject: [PATCH 337/339] tests: Add soak parallel stability test This PR adds the soak parallel stability test. Signed-off-by: Gabriela Cervantes (cherry picked from commit dec3951ca55fbb1c2e3dd23551a815fcdebe7d11) --- tests/stability/soak_parallel_rm.sh | 208 ++++++++++++++++++++++++++++ versions.yaml | 8 ++ 2 files changed, 216 insertions(+) create mode 100755 tests/stability/soak_parallel_rm.sh diff --git a/tests/stability/soak_parallel_rm.sh b/tests/stability/soak_parallel_rm.sh new file mode 100755 index 000000000..6bf74ea61 --- /dev/null +++ b/tests/stability/soak_parallel_rm.sh @@ -0,0 +1,208 @@ +#!/bin/bash +# +# Copyright (c) 2017-2018, 2020 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# +# This test will run a number of parallel containers, and then try to +# 'rm -f' them all at the same time. It will check after each run and +# rm that we have the expected number of containers, shims, +# qemus and runtimes active +# The goals are two fold: +# - spot any stuck or non-started components +# - catch any hang ups + +cidir=$(dirname "$0") +source "${cidir}/../metrics/lib/common.bash" +source "/etc/os-release" || source "/usr/lib/os-release" +set -x + +# How many times will we run the test loop... +ITERATIONS="${ITERATIONS:-5}" + +# the system 'free available' level where we stop running the tests, as otherwise +# the system can crawl to a halt, and/or start refusing to launch new VMs anyway +# We choose 2G, as that is one of the default VM sizes for Kata +MEM_CUTOFF="${MEM_CUTOFF:-(2*1024*1024*1024)}" + +# do we need a command argument for this payload? +COMMAND="${COMMAND:-tail -f /dev/null}" + +# Runtime path +RUNTIME_PATH=$(command -v $RUNTIME) + +# The place where virtcontainers keeps its active pod info +# This is ultimately what 'kata-runtime list' uses to get its info, but +# we can also check it for sanity directly +VC_POD_DIR="${VC_POD_DIR:-/run/vc/sbs}" + +# let's cap the test. If you want to run until you hit the memory limit +# then just set this to a very large number +MAX_CONTAINERS="${MAX_CONTAINERS:-110}" + +KATA_HYPERVISOR="${KATA_HYPERVISOR:-qemu}" + +check_vsock_active() { + vsock_configured=$($RUNTIME_PATH kata-env | awk '/UseVSock/ {print $3}') + vsock_supported=$($RUNTIME_PATH kata-env | awk '/SupportVSock/ {print $3}') + if [ "$vsock_configured" == true ] && [ "$vsock_supported" == true ]; then + return 0 + else + return 1 + fi +} + +count_containers() { + sudo ctr c list -q | wc -l +} + +check_all_running() { + local goterror=0 + + echo "Checking ${how_many} containers have all relevant components" + + # check what docker thinks + how_many_running=$(count_containers) + + if (( ${how_many_running} != ${how_many} )); then + echo "Wrong number of containers running (${how_many_running} != ${how_many}) - stopping" + ((goterror++)) + fi + + # Only check for Kata components if we are using a Kata runtime + if (( $check_kata_components )); then + + # check we have the right number of shims + how_many_shims=$(pgrep -a -f ${SHIM_PATH} | grep containerd.sock | wc -l) + # one shim process per container... + if (( ${how_many_running} != ${how_many_shims} )); then + echo "Wrong number of shims running (${how_many_running} != ${how_many_shims}) - stopping" + ((goterror++)) + fi + + # check we have the right number of vm's + if [[ "$KATA_HYPERVISOR" != "dragonball" ]]; then + how_many_vms=$(pgrep -a $(basename ${HYPERVISOR_PATH} | cut -d '-' -f1) | wc -l) + if (( ${how_many_running} != ${how_many_vms} )); then + echo "Wrong number of $KATA_HYPERVISOR running (${how_many_running} != ${how_many_vms}) - stopping" + ((goterror++)) + fi + fi + + # if this is kata-runtime, check how many pods virtcontainers thinks we have + if [[ "$RUNTIME" == "containerd-shim-kata-v2" ]]; then + if [ -d "${VC_POD_DIR}" ]; then + num_vc_pods=$(sudo ls -1 ${VC_POD_DIR} | wc -l) + + if (( ${how_many_running} != ${num_vc_pods} )); then + echo "Wrong number of pods in $VC_POD_DIR (${how_many_running} != ${num_vc_pods}) - stopping)" + ((goterror++)) + fi + fi + fi + fi + + if (( goterror != 0 )); then + show_system_ctr_state + die "Got $goterror errors, quitting" + fi +} + +# reported system 'available' memory +get_system_avail() { + echo $(free -b | head -2 | tail -1 | awk '{print $7}') +} + +go() { + echo "Running..." + + how_many=0 + + while true; do { + check_all_running + + local i + for ((i=1; i<= ${MAX_CONTAINERS}; i++)); do + containers+=($(random_name)) + sudo ctr run --runtime=${CTR_RUNTIME} -d ${nginx_image} ${containers[-1]} sh -c ${COMMAND} + ((how_many++)) + done + + if (( ${how_many} >= ${MAX_CONTAINERS} )); then + echo "And we have hit the max ${how_many} containers" + return + fi + + how_much=$(get_system_avail) + if (( ${how_much} < ${MEM_CUTOFF} )); then + echo "And we are out of memory on container ${how_many} (${how_much} < ${MEM_CUTOFF})" + return + fi + } + done +} + +count_mounts() { + echo $(mount | wc -l) +} + +check_mounts() { + final_mount_count=$(count_mounts) + + if [[ $final_mount_count < $initial_mount_count ]]; then + echo "Final mount count does not match initial count (${final_mount_count} != ${initial_mount_count})" + fi +} + +init() { + restart_containerd_service + extract_kata_env + clean_env_ctr + + # remember how many mount points we had before we do anything + # and then sanity check we end up with no new ones dangling at the end + initial_mount_count=$(count_mounts) + + # Only check Kata items if we are using a Kata runtime + if [[ "$RUNTIME" == "containerd-shim-kata-v2" ]]; then + echo "Checking Kata runtime" + check_kata_components=1 + else + echo "Not a Kata runtime, not checking for Kata components" + check_kata_components=0 + fi + + versions_file="${cidir}/../versions.yaml" + nginx_version=$("${GOPATH}/bin/yq" read "$versions_file" "docker_images.nginx.version") + nginx_image="docker.io/library/nginx:$nginx_version" + + # Pull nginx image + sudo ctr image pull ${nginx_image} + if [ $? != 0 ]; then + die "Unable to retry docker image ${nginx_image}" + fi +} + +spin() { + local i + for ((i=1; i<= ITERATIONS; i++)); do { + echo "Start iteration $i of $ITERATIONS" + #spin them up + go + #check we are in a sane state + check_all_running + #shut them all down + clean_env_ctr + #Note there should be none running + how_many=0 + #and check they all died + check_all_running + #and that we have no dangling mounts + check_mounts + } + done + +} + +init +spin diff --git a/versions.yaml b/versions.yaml index 3aff0b999..2d23f2113 100644 --- a/versions.yaml +++ b/versions.yaml @@ -379,3 +379,11 @@ plugins: available on a Kubernetes host. url: "https://github.com/k8snetworkplumbingwg/sriov-network-device-plugin" version: "b7f6d3e0679796e907ecca88cfab0e32e326850d" + +docker_images: + description: "Docker hub images used for testing" + + nginx: + description: "Proxy server for HTTP, HTTPS, SMTP, POP3 and IMAP protocols" + url: "https://hub.docker.com/_/nginx/" + version: "1.15-alpine" From ef49db59f77a5b556d9f0bcac9012d0e5299915f Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Fri, 6 Oct 2023 18:41:43 +0000 Subject: [PATCH 338/339] gha: Add general dependencies to stability tests This PR adds the general dependencies to stability tests. Signed-off-by: Gabriela Cervantes (cherry picked from commit 84e3d884e4c7dfde4e5bec38920af8952a9df60f) --- tests/stability/gha-run.sh | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/tests/stability/gha-run.sh b/tests/stability/gha-run.sh index e05ae5aa4..586e45015 100755 --- a/tests/stability/gha-run.sh +++ b/tests/stability/gha-run.sh @@ -14,6 +14,27 @@ kata_tarball_dir="${2:-kata-artifacts}" stability_dir="$(dirname "$(readlink -f "$0")")" source "${stability_dir}/../common.bash" +function install_dependencies() { + info "Installing the dependencies needed for running the containerd-stability tests" + + declare -a system_deps=( + jq + ) + + sudo apt-get update + sudo apt-get -y install "${system_deps[@]}" + + ensure_yq + + declare -a github_deps + github_deps[0]="cri_containerd:$(get_from_kata_deps "externals.containerd.${CONTAINERD_VERSION}")" + + for github_dep in "${github_deps[@]}"; do + IFS=":" read -r -a dep <<< "${github_dep}" + install_${dep[0]} "${dep[1]}" + done +} + function run() { info "Running soak parallel stability tests using ${KATA_HYPERVISOR} hypervisor" @@ -24,6 +45,7 @@ function run() { function main() { action="${1:-}" case "${action}" in + install-dependencies) install_dependencies ;; install-kata) install_kata ;; enabling-hypervisor) enabling_hypervisor ;; run) run ;; From 7ff98daecffa8b106abd2628c488779416ed4a76 Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Mon, 9 Oct 2023 17:09:32 +0000 Subject: [PATCH 339/339] gha: Add install dependencies for stability tests This PR adds the install dependencies for stability tests. Signed-off-by: Gabriela Cervantes (cherry picked from commit e786b2b019dab49716e2ab40b7cb57b19686859d) --- .github/workflows/basic-ci-amd64.yaml | 3 ++ tests/stability/gha-run.sh | 1 - tests/stability/soak_parallel_rm.sh | 44 +++++++++++++-------------- 3 files changed, 25 insertions(+), 23 deletions(-) diff --git a/.github/workflows/basic-ci-amd64.yaml b/.github/workflows/basic-ci-amd64.yaml index 5780605ed..6ac0a5240 100644 --- a/.github/workflows/basic-ci-amd64.yaml +++ b/.github/workflows/basic-ci-amd64.yaml @@ -78,6 +78,9 @@ jobs: env: TARGET_BRANCH: ${{ inputs.target-branch }} + - name: Install dependencies + run: bash tests/stability/gha-run.sh install-dependencies + - name: get-kata-tarball uses: actions/download-artifact@v3 with: diff --git a/tests/stability/gha-run.sh b/tests/stability/gha-run.sh index 586e45015..01672534a 100755 --- a/tests/stability/gha-run.sh +++ b/tests/stability/gha-run.sh @@ -8,7 +8,6 @@ set -o errexit set -o nounset set -o pipefail -set -x kata_tarball_dir="${2:-kata-artifacts}" stability_dir="$(dirname "$(readlink -f "$0")")" diff --git a/tests/stability/soak_parallel_rm.sh b/tests/stability/soak_parallel_rm.sh index 6bf74ea61..c05c2fade 100755 --- a/tests/stability/soak_parallel_rm.sh +++ b/tests/stability/soak_parallel_rm.sh @@ -1,6 +1,6 @@ #!/bin/bash # -# Copyright (c) 2017-2018, 2020 Intel Corporation +# Copyright (c) 2017-2023 Intel Corporation # # SPDX-License-Identifier: Apache-2.0 # @@ -42,7 +42,7 @@ MAX_CONTAINERS="${MAX_CONTAINERS:-110}" KATA_HYPERVISOR="${KATA_HYPERVISOR:-qemu}" -check_vsock_active() { +function check_vsock_active() { vsock_configured=$($RUNTIME_PATH kata-env | awk '/UseVSock/ {print $3}') vsock_supported=$($RUNTIME_PATH kata-env | awk '/SupportVSock/ {print $3}') if [ "$vsock_configured" == true ] && [ "$vsock_supported" == true ]; then @@ -52,20 +52,20 @@ check_vsock_active() { fi } -count_containers() { +function count_containers() { sudo ctr c list -q | wc -l } -check_all_running() { +function check_all_running() { local goterror=0 - echo "Checking ${how_many} containers have all relevant components" + info "Checking ${how_many} containers have all relevant components" # check what docker thinks how_many_running=$(count_containers) if (( ${how_many_running} != ${how_many} )); then - echo "Wrong number of containers running (${how_many_running} != ${how_many}) - stopping" + info "Wrong number of containers running (${how_many_running} != ${how_many}) - stopping" ((goterror++)) fi @@ -76,7 +76,7 @@ check_all_running() { how_many_shims=$(pgrep -a -f ${SHIM_PATH} | grep containerd.sock | wc -l) # one shim process per container... if (( ${how_many_running} != ${how_many_shims} )); then - echo "Wrong number of shims running (${how_many_running} != ${how_many_shims}) - stopping" + info "Wrong number of shims running (${how_many_running} != ${how_many_shims}) - stopping" ((goterror++)) fi @@ -84,7 +84,7 @@ check_all_running() { if [[ "$KATA_HYPERVISOR" != "dragonball" ]]; then how_many_vms=$(pgrep -a $(basename ${HYPERVISOR_PATH} | cut -d '-' -f1) | wc -l) if (( ${how_many_running} != ${how_many_vms} )); then - echo "Wrong number of $KATA_HYPERVISOR running (${how_many_running} != ${how_many_vms}) - stopping" + info "Wrong number of $KATA_HYPERVISOR running (${how_many_running} != ${how_many_vms}) - stopping" ((goterror++)) fi fi @@ -95,7 +95,7 @@ check_all_running() { num_vc_pods=$(sudo ls -1 ${VC_POD_DIR} | wc -l) if (( ${how_many_running} != ${num_vc_pods} )); then - echo "Wrong number of pods in $VC_POD_DIR (${how_many_running} != ${num_vc_pods}) - stopping)" + info "Wrong number of pods in $VC_POD_DIR (${how_many_running} != ${num_vc_pods}) - stopping)" ((goterror++)) fi fi @@ -109,12 +109,12 @@ check_all_running() { } # reported system 'available' memory -get_system_avail() { +function get_system_avail() { echo $(free -b | head -2 | tail -1 | awk '{print $7}') } -go() { - echo "Running..." +function go() { + info "Running..." how_many=0 @@ -129,32 +129,32 @@ go() { done if (( ${how_many} >= ${MAX_CONTAINERS} )); then - echo "And we have hit the max ${how_many} containers" + info "And we have hit the max ${how_many} containers" return fi how_much=$(get_system_avail) if (( ${how_much} < ${MEM_CUTOFF} )); then - echo "And we are out of memory on container ${how_many} (${how_much} < ${MEM_CUTOFF})" + info "And we are out of memory on container ${how_many} (${how_much} < ${MEM_CUTOFF})" return fi } done } -count_mounts() { +function count_mounts() { echo $(mount | wc -l) } -check_mounts() { +function check_mounts() { final_mount_count=$(count_mounts) if [[ $final_mount_count < $initial_mount_count ]]; then - echo "Final mount count does not match initial count (${final_mount_count} != ${initial_mount_count})" + info "Final mount count does not match initial count (${final_mount_count} != ${initial_mount_count})" fi } -init() { +function init() { restart_containerd_service extract_kata_env clean_env_ctr @@ -165,10 +165,10 @@ init() { # Only check Kata items if we are using a Kata runtime if [[ "$RUNTIME" == "containerd-shim-kata-v2" ]]; then - echo "Checking Kata runtime" + info "Checking Kata runtime" check_kata_components=1 else - echo "Not a Kata runtime, not checking for Kata components" + info "Not a Kata runtime, not checking for Kata components" check_kata_components=0 fi @@ -183,10 +183,10 @@ init() { fi } -spin() { +function spin() { local i for ((i=1; i<= ITERATIONS; i++)); do { - echo "Start iteration $i of $ITERATIONS" + info "Start iteration $i of $ITERATIONS" #spin them up go #check we are in a sane state