runtime-rs: support memory resize

Fixes:#6875 Signed-off-by: Zhongtao Hu <zhongtaohu.tim@linux.alibaba.com>
2025-12-18 23:04:20 +01:00 · 2023-05-17 16:12:35 +08:00
parent 81e55c424a
commit 8d9fd9c067
22 changed files with 336 additions and 113 deletions
--- a/src/runtime-rs/crates/resource/src/cpu_mem/cpu.rs
+++ b/src/runtime-rs/crates/resource/src/cpu_mem/cpu.rs
@@ -11,7 +11,6 @@ use std::{
    sync::Arc,
 };

-use agent::{Agent, OnlineCPUMemRequest};
 use anyhow::{Context, Ok, Result};
 use hypervisor::Hypervisor;
 use kata_types::{config::TomlConfig, cpu::LinuxContainerCpuResources};
@@ -52,7 +51,6 @@ impl CpuResource {
        linux_cpus: Option<&LinuxCpu>,
        op: ResourceUpdateOp,
        hypervisor: &dyn Hypervisor,
-        agent: &dyn Agent,
    ) -> Result<()> {
        self.update_container_cpu_resources(cid, linux_cpus, op)
            .await
@@ -67,13 +65,13 @@ impl CpuResource {
        }

        let curr_vcpus = self
-            .do_update_cpu_resources(vcpu_required, op, hypervisor, agent)
+            .do_update_cpu_resources(vcpu_required, op, hypervisor)
            .await?;
        self.update_current_vcpu(curr_vcpus).await;
        Ok(())
    }

-    async fn current_vcpu(&self) -> u32 {
+    pub(crate) async fn current_vcpu(&self) -> u32 {
        let current_vcpu = self.current_vcpu.read().await;
        *current_vcpu
    }
@@ -148,7 +146,6 @@ impl CpuResource {
        new_vcpus: u32,
        op: ResourceUpdateOp,
        hypervisor: &dyn Hypervisor,
-        agent: &dyn Agent,
    ) -> Result<u32> {
        let old_vcpus = self.current_vcpu().await;

@@ -164,25 +161,11 @@ impl CpuResource {
        // the number of vcpus would not be lower than the default size
        let new_vcpus = cmp::max(new_vcpus, self.default_vcpu);

-        let (old, new) = hypervisor
+        let (_, new) = hypervisor
            .resize_vcpu(old_vcpus, new_vcpus)
            .await
            .context("resize vcpus")?;

-        if old < new {
-            let add = new - old;
-            info!(sl!(), "request to onlineCpuMem with {:?} cpus", add);
-
-            agent
-                .online_cpu_mem(OnlineCPUMemRequest {
-                    wait: false,
-                    nb_cpus: new,
-                    cpu_only: true,
-                })
-                .await
-                .context("online vcpus")?;
-        }
-
        Ok(new)
    }
 }
--- a/src/runtime-rs/crates/resource/src/cpu_mem/initial_size.rs
+++ b/src/runtime-rs/crates/resource/src/cpu_mem/initial_size.rs
@@ -19,6 +19,7 @@ use kata_types::{
 struct InitialSize {
    vcpu: u32,
    mem_mb: u32,
+    orig_toml_default_mem: u32,
 }

 // generate initial resource(vcpu and memory in MiB) from spec's information
@@ -66,7 +67,11 @@ impl TryFrom<&oci::Spec> for InitialSize {
            sl!(),
            "(from PodSandbox's annotation / SingleContainer's spec) initial size: vcpu={}, mem_mb={}", vcpu, mem_mb
        );
-        Ok(Self { vcpu, mem_mb })
+        Ok(Self {
+            vcpu,
+            mem_mb,
+            orig_toml_default_mem: 0,
+        })
    }
 }

@@ -93,7 +98,7 @@ impl InitialSizeManager {
        })
    }

-    pub fn setup_config(&self, config: &mut TomlConfig) -> Result<()> {
+    pub fn setup_config(&mut self, config: &mut TomlConfig) -> Result<()> {
        // update this data to the hypervisor config for later use by hypervisor
        let hypervisor_name = &config.runtime.hypervisor_name;
        let hv = config
@@ -104,6 +109,7 @@ impl InitialSizeManager {
        if self.resource.vcpu > 0 {
            hv.cpu_info.default_vcpus = self.resource.vcpu as i32
        }
+        self.resource.orig_toml_default_mem = hv.memory_info.default_memory;
        if self.resource.mem_mb > 0 {
            // since the memory overhead introduced by kata-agent and system components
            // will really affect the amount of memory the user can use, so we choose to
@@ -114,6 +120,10 @@ impl InitialSizeManager {
        }
        Ok(())
    }
+
+    pub fn get_orig_toml_default_mem(&self) -> u32 {
+        self.resource.orig_toml_default_mem
+    }
 }

 fn get_nr_vcpu(resource: &LinuxContainerCpuResources) -> u32 {
@@ -173,7 +183,11 @@ mod tests {
                    quota: None,
                    memory: None,
                },
-                result: InitialSize { vcpu: 0, mem_mb: 0 },
+                result: InitialSize {
+                    vcpu: 0,
+                    mem_mb: 0,
+                    orig_toml_default_mem: 0,
+                },
            },
            TestData {
                desc: "normal resource limit",
@@ -186,6 +200,7 @@ mod tests {
                result: InitialSize {
                    vcpu: 3,
                    mem_mb: 512,
+                    orig_toml_default_mem: 0,
                },
            },
        ]
--- a/src/runtime-rs/crates/resource/src/cpu_mem/mem.rs
+++ b/src/runtime-rs/crates/resource/src/cpu_mem/mem.rs
@@ -0,0 +1,170 @@
+// Copyright (c) 2019-2023 Alibaba Cloud
+// Copyright (c) 2019-2023 Ant Group
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+
+use std::collections::HashMap;
+use std::sync::Arc;
+
+use anyhow::{Context, Ok, Result};
+use hypervisor::Hypervisor;
+use kata_types::config::TomlConfig;
+use oci::LinuxResources;
+use tokio::sync::RwLock;
+
+use crate::cpu_mem::initial_size::InitialSizeManager;
+use crate::ResourceUpdateOp;
+
+// MIB_TO_BYTES_SHIFT the number to shift needed to convert MiB to Bytes
+pub const MIB_TO_BYTES_SHIFT: i32 = 20;
+
+#[derive(Default, Debug, Clone)]
+pub struct MemResource {
+    /// Current memory
+    pub(crate) current_mem: Arc<RwLock<u32>>,
+
+    /// Default memory
+    pub(crate) orig_toml_default_mem: u32,
+
+    /// MemResource of each container
+    pub(crate) container_mem_resources: Arc<RwLock<HashMap<String, LinuxResources>>>,
+
+    /// Use guest swap
+    pub(crate) use_guest_swap: bool,
+}
+
+impl MemResource {
+    pub fn new(config: Arc<TomlConfig>, init_size_manager: InitialSizeManager) -> Result<Self> {
+        let hypervisor_name = config.runtime.hypervisor_name.clone();
+        let hypervisor_config = config
+            .hypervisor
+            .get(&hypervisor_name)
+            .context("failed to get hypervisor")?;
+
+        Ok(Self {
+            current_mem: Arc::new(RwLock::new(hypervisor_config.memory_info.default_memory)),
+            container_mem_resources: Arc::new(RwLock::new(HashMap::new())),
+            use_guest_swap: hypervisor_config.memory_info.enable_guest_swap,
+            orig_toml_default_mem: init_size_manager.get_orig_toml_default_mem(),
+        })
+    }
+
+    pub(crate) async fn update_mem_resources(
+        &self,
+        cid: &str,
+        linux_resources: Option<&LinuxResources>,
+        op: ResourceUpdateOp,
+        hypervisor: &dyn Hypervisor,
+    ) -> Result<()> {
+        self.update_container_mem_resources(cid, linux_resources, op)
+            .await
+            .context("update container memory resources")?;
+        // the unit here is MB
+        let (mut mem_sb_mb, need_pod_swap, swap_sb_mb) = self
+            .total_mems(self.use_guest_swap)
+            .await
+            .context("failed to calculate total memory requirement for containers")?;
+        mem_sb_mb += self.orig_toml_default_mem;
+        if need_pod_swap {
+            mem_sb_mb += swap_sb_mb;
+        }
+        info!(sl!(), "calculate mem_sb_mb {}", mem_sb_mb);
+
+        let curr_mem = self
+            .do_update_mem_resource(mem_sb_mb, swap_sb_mb, hypervisor)
+            .await
+            .context("failed to update_mem_resource")?;
+
+        self.update_current_mem(curr_mem).await;
+        Ok(())
+    }
+
+    async fn update_current_mem(&self, new_mem: u32) {
+        let mut current_mem = self.current_mem.write().await;
+        *current_mem = new_mem;
+    }
+
+    async fn total_mems(&self, use_guest_swap: bool) -> Result<(u32, bool, u32)> {
+        // sb stands for sandbox
+        let mut mem_sandbox = 0;
+        let mut need_pod_swap = false;
+        let mut swap_sandbox = 0;
+
+        let resources = self.container_mem_resources.read().await;
+
+        for (_, r) in resources.iter() {
+            for l in &r.hugepage_limits {
+                mem_sandbox += l.limit;
+            }
+
+            if let Some(memory) = &r.memory {
+                // set current_limit to 0 if memory limit is not set to container
+                let current_limit = memory.limit.map_or(0, |limit| {
+                    mem_sandbox += limit as u64;
+                    info!(sl!(), "memory sb: {}, memory limit: {}", mem_sandbox, limit);
+                    limit
+                });
+
+                if let Some(swappiness) = memory.swappiness {
+                    if swappiness > 0 && use_guest_swap {
+                        if let Some(swap) = memory.swap {
+                            if swap > current_limit {
+                                swap_sandbox = swap.saturating_sub(current_limit);
+                            }
+                        }
+                        // if current_limit is 0, the container will have access to the entire memory available on the host system
+                        // so we add swap for this
+                        else if current_limit == 0 {
+                            need_pod_swap = true;
+                        } else {
+                            swap_sandbox += current_limit;
+                        }
+                    }
+                }
+            }
+        }
+
+        Ok((
+            (mem_sandbox >> MIB_TO_BYTES_SHIFT) as u32,
+            need_pod_swap,
+            (swap_sandbox >> MIB_TO_BYTES_SHIFT) as u32,
+        ))
+    }
+
+    // update container_cpu_resources field
+    async fn update_container_mem_resources(
+        &self,
+        cid: &str,
+        linux_resources: Option<&LinuxResources>,
+        op: ResourceUpdateOp,
+    ) -> Result<()> {
+        if let Some(r) = linux_resources {
+            let mut resources = self.container_mem_resources.write().await;
+            match op {
+                ResourceUpdateOp::Add | ResourceUpdateOp::Update => {
+                    resources.insert(cid.to_owned(), r.clone());
+                }
+                ResourceUpdateOp::Del => {
+                    resources.remove(cid);
+                }
+            }
+        }
+        Ok(())
+    }
+
+    async fn do_update_mem_resource(
+        &self,
+        new_mem: u32,
+        _swap_sz_mb: u32,
+        hypervisor: &dyn Hypervisor,
+    ) -> Result<u32> {
+        info!(sl!(), "requesting vmm to update memory to {:?}", new_mem);
+        let (new_memory, _mem_config) = hypervisor
+            .resize_memory(new_mem)
+            .await
+            .context("resize memory")?;
+
+        Ok(new_memory)
+    }
+}
--- a/src/runtime-rs/crates/resource/src/cpu_mem/mod.rs
+++ b/src/runtime-rs/crates/resource/src/cpu_mem/mod.rs
@@ -6,3 +6,4 @@

 pub mod cpu;
 pub mod initial_size;
+pub mod mem;
--- a/src/runtime-rs/crates/resource/src/manager.rs
+++ b/src/runtime-rs/crates/resource/src/manager.rs
@@ -19,6 +19,7 @@ use persist::sandbox_persist::Persist;
 use tokio::sync::RwLock;
 use tracing::instrument;

+use crate::cpu_mem::initial_size::InitialSizeManager;
 use crate::network::NetworkConfig;
 use crate::resource_persist::ResourceState;
 use crate::ResourceUpdateOp;
@@ -47,13 +48,15 @@ impl ResourceManager {
        agent: Arc<dyn Agent>,
        hypervisor: Arc<dyn Hypervisor>,
        toml_config: Arc<TomlConfig>,
+        init_size_manager: InitialSizeManager,
    ) -> Result<Self> {
        // Regist resource logger for later use.
        logging::register_subsystem_logger("runtimes", "resource");

        Ok(Self {
            inner: Arc::new(RwLock::new(
-                ResourceManagerInner::new(sid, agent, hypervisor, toml_config).await?,
+                ResourceManagerInner::new(sid, agent, hypervisor, toml_config, init_size_manager)
+                    .await?,
            )),
        })
    }
--- a/src/runtime-rs/crates/resource/src/manager_inner.rs
+++ b/src/runtime-rs/crates/resource/src/manager_inner.rs
@@ -6,7 +6,7 @@

 use std::{sync::Arc, thread};

-use agent::{types::Device, Agent, Storage};
+use agent::{types::Device, Agent, OnlineCPUMemRequest, Storage};
 use anyhow::{anyhow, Context, Ok, Result};
 use async_trait::async_trait;
 use hypervisor::{
@@ -25,7 +25,7 @@ use tokio::{runtime, sync::RwLock};

 use crate::{
    cgroups::{CgroupArgs, CgroupsResource},
-    cpu_mem::cpu::CpuResource,
+    cpu_mem::{cpu::CpuResource, initial_size::InitialSizeManager, mem::MemResource},
    manager::ManagerArgs,
    network::{self, Network, NetworkConfig},
    resource_persist::ResourceState,
@@ -48,6 +48,7 @@ pub(crate) struct ResourceManagerInner {
    pub volume_resource: VolumeResource,
    pub cgroups_resource: CgroupsResource,
    pub cpu_resource: CpuResource,
+    pub mem_resource: MemResource,
 }

 impl ResourceManagerInner {
@@ -56,6 +57,7 @@ impl ResourceManagerInner {
        agent: Arc<dyn Agent>,
        hypervisor: Arc<dyn Hypervisor>,
        toml_config: Arc<TomlConfig>,
+        init_size_manager: InitialSizeManager,
    ) -> Result<Self> {
        // create device manager
        let dev_manager = DeviceManager::new(hypervisor.clone())
@@ -64,6 +66,7 @@ impl ResourceManagerInner {

        let cgroups_resource = CgroupsResource::new(sid, &toml_config)?;
        let cpu_resource = CpuResource::new(toml_config.clone())?;
+        let mem_resource = MemResource::new(toml_config.clone(), init_size_manager)?;
        Ok(Self {
            sid: sid.to_string(),
            toml_config,
@@ -76,6 +79,7 @@ impl ResourceManagerInner {
            volume_resource: VolumeResource::new(),
            cgroups_resource,
            cpu_resource,
+            mem_resource,
        })
    }

@@ -427,15 +431,23 @@ impl ResourceManagerInner {

        // if static_sandbox_resource_mgmt, we will not have to update sandbox's cpu or mem resource
        if !self.toml_config.runtime.static_sandbox_resource_mgmt {
+            // update cpu
            self.cpu_resource
-                .update_cpu_resources(
-                    cid,
-                    linux_cpus,
-                    op,
-                    self.hypervisor.as_ref(),
-                    self.agent.as_ref(),
-                )
+                .update_cpu_resources(cid, linux_cpus, op, self.hypervisor.as_ref())
                .await?;
+            // update memory
+            self.mem_resource
+                .update_mem_resources(cid, linux_resources, op, self.hypervisor.as_ref())
+                .await?;
+
+            self.agent
+                .online_cpu_mem(OnlineCPUMemRequest {
+                    wait: false,
+                    nb_cpus: self.cpu_resource.current_vcpu().await,
+                    cpu_only: false,
+                })
+                .await
+                .context("online vcpus")?;
        }

        // we should firstly update the vcpus and mems, and then update the host cgroups
@@ -516,6 +528,7 @@ impl Persist for ResourceManagerInner {
            .await?,
            toml_config: Arc::new(TomlConfig::default()),
            cpu_resource: CpuResource::default(),
+            mem_resource: MemResource::default(),
        })
    }
 }