mirror of
https://github.com/aljazceru/kata-containers.git
synced 2025-12-18 23:04:20 +01:00
runtime-rs: support memory resize
Fixes:#6875 Signed-off-by: Zhongtao Hu <zhongtaohu.tim@linux.alibaba.com>
This commit is contained in:
@@ -11,7 +11,6 @@ use std::{
|
||||
sync::Arc,
|
||||
};
|
||||
|
||||
use agent::{Agent, OnlineCPUMemRequest};
|
||||
use anyhow::{Context, Ok, Result};
|
||||
use hypervisor::Hypervisor;
|
||||
use kata_types::{config::TomlConfig, cpu::LinuxContainerCpuResources};
|
||||
@@ -52,7 +51,6 @@ impl CpuResource {
|
||||
linux_cpus: Option<&LinuxCpu>,
|
||||
op: ResourceUpdateOp,
|
||||
hypervisor: &dyn Hypervisor,
|
||||
agent: &dyn Agent,
|
||||
) -> Result<()> {
|
||||
self.update_container_cpu_resources(cid, linux_cpus, op)
|
||||
.await
|
||||
@@ -67,13 +65,13 @@ impl CpuResource {
|
||||
}
|
||||
|
||||
let curr_vcpus = self
|
||||
.do_update_cpu_resources(vcpu_required, op, hypervisor, agent)
|
||||
.do_update_cpu_resources(vcpu_required, op, hypervisor)
|
||||
.await?;
|
||||
self.update_current_vcpu(curr_vcpus).await;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn current_vcpu(&self) -> u32 {
|
||||
pub(crate) async fn current_vcpu(&self) -> u32 {
|
||||
let current_vcpu = self.current_vcpu.read().await;
|
||||
*current_vcpu
|
||||
}
|
||||
@@ -148,7 +146,6 @@ impl CpuResource {
|
||||
new_vcpus: u32,
|
||||
op: ResourceUpdateOp,
|
||||
hypervisor: &dyn Hypervisor,
|
||||
agent: &dyn Agent,
|
||||
) -> Result<u32> {
|
||||
let old_vcpus = self.current_vcpu().await;
|
||||
|
||||
@@ -164,25 +161,11 @@ impl CpuResource {
|
||||
// the number of vcpus would not be lower than the default size
|
||||
let new_vcpus = cmp::max(new_vcpus, self.default_vcpu);
|
||||
|
||||
let (old, new) = hypervisor
|
||||
let (_, new) = hypervisor
|
||||
.resize_vcpu(old_vcpus, new_vcpus)
|
||||
.await
|
||||
.context("resize vcpus")?;
|
||||
|
||||
if old < new {
|
||||
let add = new - old;
|
||||
info!(sl!(), "request to onlineCpuMem with {:?} cpus", add);
|
||||
|
||||
agent
|
||||
.online_cpu_mem(OnlineCPUMemRequest {
|
||||
wait: false,
|
||||
nb_cpus: new,
|
||||
cpu_only: true,
|
||||
})
|
||||
.await
|
||||
.context("online vcpus")?;
|
||||
}
|
||||
|
||||
Ok(new)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -19,6 +19,7 @@ use kata_types::{
|
||||
struct InitialSize {
|
||||
vcpu: u32,
|
||||
mem_mb: u32,
|
||||
orig_toml_default_mem: u32,
|
||||
}
|
||||
|
||||
// generate initial resource(vcpu and memory in MiB) from spec's information
|
||||
@@ -66,7 +67,11 @@ impl TryFrom<&oci::Spec> for InitialSize {
|
||||
sl!(),
|
||||
"(from PodSandbox's annotation / SingleContainer's spec) initial size: vcpu={}, mem_mb={}", vcpu, mem_mb
|
||||
);
|
||||
Ok(Self { vcpu, mem_mb })
|
||||
Ok(Self {
|
||||
vcpu,
|
||||
mem_mb,
|
||||
orig_toml_default_mem: 0,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -93,7 +98,7 @@ impl InitialSizeManager {
|
||||
})
|
||||
}
|
||||
|
||||
pub fn setup_config(&self, config: &mut TomlConfig) -> Result<()> {
|
||||
pub fn setup_config(&mut self, config: &mut TomlConfig) -> Result<()> {
|
||||
// update this data to the hypervisor config for later use by hypervisor
|
||||
let hypervisor_name = &config.runtime.hypervisor_name;
|
||||
let hv = config
|
||||
@@ -104,6 +109,7 @@ impl InitialSizeManager {
|
||||
if self.resource.vcpu > 0 {
|
||||
hv.cpu_info.default_vcpus = self.resource.vcpu as i32
|
||||
}
|
||||
self.resource.orig_toml_default_mem = hv.memory_info.default_memory;
|
||||
if self.resource.mem_mb > 0 {
|
||||
// since the memory overhead introduced by kata-agent and system components
|
||||
// will really affect the amount of memory the user can use, so we choose to
|
||||
@@ -114,6 +120,10 @@ impl InitialSizeManager {
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn get_orig_toml_default_mem(&self) -> u32 {
|
||||
self.resource.orig_toml_default_mem
|
||||
}
|
||||
}
|
||||
|
||||
fn get_nr_vcpu(resource: &LinuxContainerCpuResources) -> u32 {
|
||||
@@ -173,7 +183,11 @@ mod tests {
|
||||
quota: None,
|
||||
memory: None,
|
||||
},
|
||||
result: InitialSize { vcpu: 0, mem_mb: 0 },
|
||||
result: InitialSize {
|
||||
vcpu: 0,
|
||||
mem_mb: 0,
|
||||
orig_toml_default_mem: 0,
|
||||
},
|
||||
},
|
||||
TestData {
|
||||
desc: "normal resource limit",
|
||||
@@ -186,6 +200,7 @@ mod tests {
|
||||
result: InitialSize {
|
||||
vcpu: 3,
|
||||
mem_mb: 512,
|
||||
orig_toml_default_mem: 0,
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
170
src/runtime-rs/crates/resource/src/cpu_mem/mem.rs
Normal file
170
src/runtime-rs/crates/resource/src/cpu_mem/mem.rs
Normal file
@@ -0,0 +1,170 @@
|
||||
// Copyright (c) 2019-2023 Alibaba Cloud
|
||||
// Copyright (c) 2019-2023 Ant Group
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
use anyhow::{Context, Ok, Result};
|
||||
use hypervisor::Hypervisor;
|
||||
use kata_types::config::TomlConfig;
|
||||
use oci::LinuxResources;
|
||||
use tokio::sync::RwLock;
|
||||
|
||||
use crate::cpu_mem::initial_size::InitialSizeManager;
|
||||
use crate::ResourceUpdateOp;
|
||||
|
||||
// MIB_TO_BYTES_SHIFT the number to shift needed to convert MiB to Bytes
|
||||
pub const MIB_TO_BYTES_SHIFT: i32 = 20;
|
||||
|
||||
#[derive(Default, Debug, Clone)]
|
||||
pub struct MemResource {
|
||||
/// Current memory
|
||||
pub(crate) current_mem: Arc<RwLock<u32>>,
|
||||
|
||||
/// Default memory
|
||||
pub(crate) orig_toml_default_mem: u32,
|
||||
|
||||
/// MemResource of each container
|
||||
pub(crate) container_mem_resources: Arc<RwLock<HashMap<String, LinuxResources>>>,
|
||||
|
||||
/// Use guest swap
|
||||
pub(crate) use_guest_swap: bool,
|
||||
}
|
||||
|
||||
impl MemResource {
|
||||
pub fn new(config: Arc<TomlConfig>, init_size_manager: InitialSizeManager) -> Result<Self> {
|
||||
let hypervisor_name = config.runtime.hypervisor_name.clone();
|
||||
let hypervisor_config = config
|
||||
.hypervisor
|
||||
.get(&hypervisor_name)
|
||||
.context("failed to get hypervisor")?;
|
||||
|
||||
Ok(Self {
|
||||
current_mem: Arc::new(RwLock::new(hypervisor_config.memory_info.default_memory)),
|
||||
container_mem_resources: Arc::new(RwLock::new(HashMap::new())),
|
||||
use_guest_swap: hypervisor_config.memory_info.enable_guest_swap,
|
||||
orig_toml_default_mem: init_size_manager.get_orig_toml_default_mem(),
|
||||
})
|
||||
}
|
||||
|
||||
pub(crate) async fn update_mem_resources(
|
||||
&self,
|
||||
cid: &str,
|
||||
linux_resources: Option<&LinuxResources>,
|
||||
op: ResourceUpdateOp,
|
||||
hypervisor: &dyn Hypervisor,
|
||||
) -> Result<()> {
|
||||
self.update_container_mem_resources(cid, linux_resources, op)
|
||||
.await
|
||||
.context("update container memory resources")?;
|
||||
// the unit here is MB
|
||||
let (mut mem_sb_mb, need_pod_swap, swap_sb_mb) = self
|
||||
.total_mems(self.use_guest_swap)
|
||||
.await
|
||||
.context("failed to calculate total memory requirement for containers")?;
|
||||
mem_sb_mb += self.orig_toml_default_mem;
|
||||
if need_pod_swap {
|
||||
mem_sb_mb += swap_sb_mb;
|
||||
}
|
||||
info!(sl!(), "calculate mem_sb_mb {}", mem_sb_mb);
|
||||
|
||||
let curr_mem = self
|
||||
.do_update_mem_resource(mem_sb_mb, swap_sb_mb, hypervisor)
|
||||
.await
|
||||
.context("failed to update_mem_resource")?;
|
||||
|
||||
self.update_current_mem(curr_mem).await;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn update_current_mem(&self, new_mem: u32) {
|
||||
let mut current_mem = self.current_mem.write().await;
|
||||
*current_mem = new_mem;
|
||||
}
|
||||
|
||||
async fn total_mems(&self, use_guest_swap: bool) -> Result<(u32, bool, u32)> {
|
||||
// sb stands for sandbox
|
||||
let mut mem_sandbox = 0;
|
||||
let mut need_pod_swap = false;
|
||||
let mut swap_sandbox = 0;
|
||||
|
||||
let resources = self.container_mem_resources.read().await;
|
||||
|
||||
for (_, r) in resources.iter() {
|
||||
for l in &r.hugepage_limits {
|
||||
mem_sandbox += l.limit;
|
||||
}
|
||||
|
||||
if let Some(memory) = &r.memory {
|
||||
// set current_limit to 0 if memory limit is not set to container
|
||||
let current_limit = memory.limit.map_or(0, |limit| {
|
||||
mem_sandbox += limit as u64;
|
||||
info!(sl!(), "memory sb: {}, memory limit: {}", mem_sandbox, limit);
|
||||
limit
|
||||
});
|
||||
|
||||
if let Some(swappiness) = memory.swappiness {
|
||||
if swappiness > 0 && use_guest_swap {
|
||||
if let Some(swap) = memory.swap {
|
||||
if swap > current_limit {
|
||||
swap_sandbox = swap.saturating_sub(current_limit);
|
||||
}
|
||||
}
|
||||
// if current_limit is 0, the container will have access to the entire memory available on the host system
|
||||
// so we add swap for this
|
||||
else if current_limit == 0 {
|
||||
need_pod_swap = true;
|
||||
} else {
|
||||
swap_sandbox += current_limit;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok((
|
||||
(mem_sandbox >> MIB_TO_BYTES_SHIFT) as u32,
|
||||
need_pod_swap,
|
||||
(swap_sandbox >> MIB_TO_BYTES_SHIFT) as u32,
|
||||
))
|
||||
}
|
||||
|
||||
// update container_cpu_resources field
|
||||
async fn update_container_mem_resources(
|
||||
&self,
|
||||
cid: &str,
|
||||
linux_resources: Option<&LinuxResources>,
|
||||
op: ResourceUpdateOp,
|
||||
) -> Result<()> {
|
||||
if let Some(r) = linux_resources {
|
||||
let mut resources = self.container_mem_resources.write().await;
|
||||
match op {
|
||||
ResourceUpdateOp::Add | ResourceUpdateOp::Update => {
|
||||
resources.insert(cid.to_owned(), r.clone());
|
||||
}
|
||||
ResourceUpdateOp::Del => {
|
||||
resources.remove(cid);
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn do_update_mem_resource(
|
||||
&self,
|
||||
new_mem: u32,
|
||||
_swap_sz_mb: u32,
|
||||
hypervisor: &dyn Hypervisor,
|
||||
) -> Result<u32> {
|
||||
info!(sl!(), "requesting vmm to update memory to {:?}", new_mem);
|
||||
let (new_memory, _mem_config) = hypervisor
|
||||
.resize_memory(new_mem)
|
||||
.await
|
||||
.context("resize memory")?;
|
||||
|
||||
Ok(new_memory)
|
||||
}
|
||||
}
|
||||
@@ -6,3 +6,4 @@
|
||||
|
||||
pub mod cpu;
|
||||
pub mod initial_size;
|
||||
pub mod mem;
|
||||
|
||||
@@ -19,6 +19,7 @@ use persist::sandbox_persist::Persist;
|
||||
use tokio::sync::RwLock;
|
||||
use tracing::instrument;
|
||||
|
||||
use crate::cpu_mem::initial_size::InitialSizeManager;
|
||||
use crate::network::NetworkConfig;
|
||||
use crate::resource_persist::ResourceState;
|
||||
use crate::ResourceUpdateOp;
|
||||
@@ -47,13 +48,15 @@ impl ResourceManager {
|
||||
agent: Arc<dyn Agent>,
|
||||
hypervisor: Arc<dyn Hypervisor>,
|
||||
toml_config: Arc<TomlConfig>,
|
||||
init_size_manager: InitialSizeManager,
|
||||
) -> Result<Self> {
|
||||
// Regist resource logger for later use.
|
||||
logging::register_subsystem_logger("runtimes", "resource");
|
||||
|
||||
Ok(Self {
|
||||
inner: Arc::new(RwLock::new(
|
||||
ResourceManagerInner::new(sid, agent, hypervisor, toml_config).await?,
|
||||
ResourceManagerInner::new(sid, agent, hypervisor, toml_config, init_size_manager)
|
||||
.await?,
|
||||
)),
|
||||
})
|
||||
}
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
|
||||
use std::{sync::Arc, thread};
|
||||
|
||||
use agent::{types::Device, Agent, Storage};
|
||||
use agent::{types::Device, Agent, OnlineCPUMemRequest, Storage};
|
||||
use anyhow::{anyhow, Context, Ok, Result};
|
||||
use async_trait::async_trait;
|
||||
use hypervisor::{
|
||||
@@ -25,7 +25,7 @@ use tokio::{runtime, sync::RwLock};
|
||||
|
||||
use crate::{
|
||||
cgroups::{CgroupArgs, CgroupsResource},
|
||||
cpu_mem::cpu::CpuResource,
|
||||
cpu_mem::{cpu::CpuResource, initial_size::InitialSizeManager, mem::MemResource},
|
||||
manager::ManagerArgs,
|
||||
network::{self, Network, NetworkConfig},
|
||||
resource_persist::ResourceState,
|
||||
@@ -48,6 +48,7 @@ pub(crate) struct ResourceManagerInner {
|
||||
pub volume_resource: VolumeResource,
|
||||
pub cgroups_resource: CgroupsResource,
|
||||
pub cpu_resource: CpuResource,
|
||||
pub mem_resource: MemResource,
|
||||
}
|
||||
|
||||
impl ResourceManagerInner {
|
||||
@@ -56,6 +57,7 @@ impl ResourceManagerInner {
|
||||
agent: Arc<dyn Agent>,
|
||||
hypervisor: Arc<dyn Hypervisor>,
|
||||
toml_config: Arc<TomlConfig>,
|
||||
init_size_manager: InitialSizeManager,
|
||||
) -> Result<Self> {
|
||||
// create device manager
|
||||
let dev_manager = DeviceManager::new(hypervisor.clone())
|
||||
@@ -64,6 +66,7 @@ impl ResourceManagerInner {
|
||||
|
||||
let cgroups_resource = CgroupsResource::new(sid, &toml_config)?;
|
||||
let cpu_resource = CpuResource::new(toml_config.clone())?;
|
||||
let mem_resource = MemResource::new(toml_config.clone(), init_size_manager)?;
|
||||
Ok(Self {
|
||||
sid: sid.to_string(),
|
||||
toml_config,
|
||||
@@ -76,6 +79,7 @@ impl ResourceManagerInner {
|
||||
volume_resource: VolumeResource::new(),
|
||||
cgroups_resource,
|
||||
cpu_resource,
|
||||
mem_resource,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -427,15 +431,23 @@ impl ResourceManagerInner {
|
||||
|
||||
// if static_sandbox_resource_mgmt, we will not have to update sandbox's cpu or mem resource
|
||||
if !self.toml_config.runtime.static_sandbox_resource_mgmt {
|
||||
// update cpu
|
||||
self.cpu_resource
|
||||
.update_cpu_resources(
|
||||
cid,
|
||||
linux_cpus,
|
||||
op,
|
||||
self.hypervisor.as_ref(),
|
||||
self.agent.as_ref(),
|
||||
)
|
||||
.update_cpu_resources(cid, linux_cpus, op, self.hypervisor.as_ref())
|
||||
.await?;
|
||||
// update memory
|
||||
self.mem_resource
|
||||
.update_mem_resources(cid, linux_resources, op, self.hypervisor.as_ref())
|
||||
.await?;
|
||||
|
||||
self.agent
|
||||
.online_cpu_mem(OnlineCPUMemRequest {
|
||||
wait: false,
|
||||
nb_cpus: self.cpu_resource.current_vcpu().await,
|
||||
cpu_only: false,
|
||||
})
|
||||
.await
|
||||
.context("online vcpus")?;
|
||||
}
|
||||
|
||||
// we should firstly update the vcpus and mems, and then update the host cgroups
|
||||
@@ -516,6 +528,7 @@ impl Persist for ResourceManagerInner {
|
||||
.await?,
|
||||
toml_config: Arc::new(TomlConfig::default()),
|
||||
cpu_resource: CpuResource::default(),
|
||||
mem_resource: MemResource::default(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user