Files
kata-containers/kernel/patches/0003-memory-hotplug-by-probe.patch
Jianyong Wu e654dbd836 kernel: Add memory hotplug(add) support for arm64
As memory hotplug for arm64 by acpi is not ready on qemu, we choose
"probe" instead. You can refer to [1] to get more infomation about
"probe". The process of memory hotplug by "probe" in kata lies below:
firstly, add memory in qemu qmp; secondly, echo the start phyical address
of that memory to /sys/devices/system/memory/probe, which will be done
through kata-agent; thirdly, excute online op, then this newly added
memory is capable to be used.

All functions in this patch will be called after "echo" op. It can be
divided into two parts:
1. create page table for that memory;
2. add that memory to memblock.

In this patch, NUMA must be turned off for not all arm64 machine supports
NUMA.
As the newly added memory should be placed from 2T to 6T which is decided
in qemu and phyical address and virtual address will be one-one mapping
when create pgd for that memory, we must config ARM64_VA_BITS as 48.
Also some configs should be turned on, especially "ARCH_MEMORY_PROBE".

We have tested this patch integrated with another patch which performed
that echo op. It works well when using "-m" in command line when start a
kata-container on aarch64 machine.

This patch derived from Maciej Bielski. You can refer to [2] to get full
infomation about it.

[1] https://www.kernel.org/doc/Documentation/memory-hotplug.txt
[2] https://lkml.org/lkml/2017/11/23/183

Fixes: #309

Signed-off-by: Jianyong Wu  <jianyong.wu@arm.com>
Signed-off-by: Jia He <justin.he@arm.com>
Signed-off-by: Penny Zheng <penny.zheng@arm.com>
2019-01-18 02:52:18 -05:00

220 lines
6.8 KiB
Diff

From 20ecfb98b99dafcd74d44c065de15adcc366ca5d Mon Sep 17 00:00:00 2001
From: Jianyong Wu <jianyong.wu@arm.com>
Date: Wed, 2 Jan 2019 03:55:49 -0500
Subject: [PATCH] memory hotplug
Signed-off-by: Jianyong Wu <jianyong.wu@arm.com>
---
arch/arm64/Kconfig | 13 ++++++
arch/arm64/include/asm/mmu.h | 3 ++
arch/arm64/mm/init.c | 86 ++++++++++++++++++++++++++++++++++++
arch/arm64/mm/mmu.c | 40 +++++++++++++++++
4 files changed, 142 insertions(+)
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 0df64a6a56d4..99a0c77a39f3 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -641,6 +641,15 @@ config HOTPLUG_CPU
Say Y here to experiment with turning CPUs off and on. CPUs
can be controlled through /sys/devices/system/cpu.
+config ARCH_HAS_ADD_PAGES
+ def_bool y
+ depends on ARCH_ENABLE_MEMORY_HOTPLUG
+
+config ARCH_ENABLE_MEMORY_HOTPLUG
+ def_bool y
+ depends on !NUMA
+
+
# Common NUMA Features
config NUMA
bool "Numa Memory Allocation and Scheduler Support"
@@ -653,6 +662,10 @@ config NUMA
local memory of the CPU and add some more
NUMA awareness to the kernel.
+config ARCH_MEMORY_PROBE
+ def_bool y
+ depends on MEMORY_HOTPLUG
+
config NODES_SHIFT
int "Maximum NUMA Nodes (as a power of 2)"
range 1 10
diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h
index 0d34bf0a89c7..2b3fa4d12db0 100644
--- a/arch/arm64/include/asm/mmu.h
+++ b/arch/arm64/include/asm/mmu.h
@@ -40,5 +40,8 @@ extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
pgprot_t prot, bool page_mappings_only);
extern void *fixmap_remap_fdt(phys_addr_t dt_phys);
extern void mark_linear_text_alias_ro(void);
+#ifdef CONFIG_MEMORY_HOTPLUG
+extern void hotplug_paging(phys_addr_t start, phys_addr_t size);
+#endif
#endif
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 5960bef0170d..141576031a78 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -722,3 +722,89 @@ static int __init register_mem_limit_dumper(void)
return 0;
}
__initcall(register_mem_limit_dumper);
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+int add_pages(int nid, unsigned long start_pfn,
+ unsigned long nr_pages, bool want_memblock)
+{
+ int ret;
+ u64 start_addr = start_pfn << PAGE_SHIFT;
+ /*
++ * Mark the first page in the range as unusable. This is needed
++ * because __add_section (within __add_pages) wants pfn_valid
++ * of it to be false, and in arm64 pfn falid is implemented by
++ * just checking at the nomap flag for existing blocks.
++ *
++ * A small trick here is that __add_section() requires only
++ * phys_start_pfn (that is the first pfn of a section) to be
++ * invalid. Regardless of whether it was assumed (by the function
++ * author) that all pfns within a section are either all valid
++ * or all invalid, it allows to avoid looping twice (once here,
++ * second when memblock_clear_nomap() is called) through all
++ * pfns of the section and modify only one pfn. Thanks to that,
++ * further, in __add_zone() only this very first pfn is skipped
++ * and corresponding page is not flagged reserved. Therefore it
++ * is enough to correct this setup only for it.
++ *
++ * When arch_add_memory() returns the walk_memory_range() function
++ * is called and passed with online_memory_block() callback,
++ * which execution finally reaches the memory_block_action()
++ * function, where also only the first pfn of a memory block is
++ * checked to be reserved. Above, it was first pfn of a section,
++ * here it is a block but
++ * (drivers/base/memory.c):
++ * sections_per_block = block_sz / MIN_MEMORY_BLOCK_SIZE;
++ * (include/linux/memory.h):
++ * #define MIN_MEMORY_BLOCK_SIZE (1UL << SECTION_SIZE_BITS)
++ * so we can consider block and section equivalently
++ */
+ memblock_mark_nomap(start_addr, 1<<PAGE_SHIFT);
+ ret = __add_pages(nid, start_pfn, nr_pages, want_memblock);
+
+ /*
++ * Make the pages usable after they have been added.
++ * This will make pfn_valid return true
++ */
+ memblock_clear_nomap(start_addr, 1<<PAGE_SHIFT);
+
+ /*
++ * This is a hack to avoid having to mix arch specific code
++ * into arch independent code. SetPageReserved is supposed
++ * to be called by __add_zone (within __add_section, within
++ * __add_pages). However, when it is called there, it assumes that
++ * pfn_valid returns true. For the way pfn_valid is implemented
++ * in arm64 (a check on the nomap flag), the only way to make
+ * this evaluate true inside __add_zone is to clear the nomap
+ * flags of blocks in architecture independent code.
+ *
+ * To avoid this, we set the Reserved flag here after we cleared
+ * the nomap flag in the line above.
+ */
+ SetPageReserved(pfn_to_page(start_pfn));
+
+ return ret;
+}
+
+int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock)
+{
+ int ret;
+ unsigned long start_pfn = start >> PAGE_SHIFT;
+ unsigned long nr_pages = size >> PAGE_SHIFT;
+ unsigned long end_pfn = start_pfn + nr_pages;
+ unsigned long max_sparsemem_pfn = 1UL << (MAX_PHYSMEM_BITS-PAGE_SHIFT);
+
+ if (end_pfn > max_sparsemem_pfn) {
+ pr_err("end_pfn too big");
+ return -1;
+ }
+ hotplug_paging(start, size);
+
+ ret = add_pages(nid, start_pfn, nr_pages, want_memblock);
+
+ if (ret)
+ pr_warn("%s: Problem encountered in __add_pages() ret=%d\n",
+ __func__, ret);
+
+ return ret;
+}
+#endif
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index f1eb15e0e864..667a0de4cbaf 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -21,6 +21,7 @@
#include <linux/export.h>
#include <linux/kernel.h>
#include <linux/errno.h>
+#include <linux/stop_machine.h>
#include <linux/init.h>
#include <linux/ioport.h>
#include <linux/kexec.h>
@@ -40,6 +41,7 @@
#include <asm/kernel-pgtable.h>
#include <asm/sections.h>
#include <asm/setup.h>
+#include <linux/stop_machine.h>
#include <asm/sizes.h>
#include <asm/tlb.h>
#include <asm/memblock.h>
@@ -615,6 +617,44 @@ void __init paging_init(void)
SWAPPER_DIR_SIZE - PAGE_SIZE);
}
+#ifdef CONFIG_MEMORY_HOTPLUG
+
+/*
++ * hotplug_paging() is used by memory hotplug to build new page tables
++ * for hot added memory.
++ */
+
+struct mem_range {
+ phys_addr_t base;
+ phys_addr_t size;
+};
+
+static int __hotplug_paging(void *data)
+{
+ int flags = 0;
+ struct mem_range *section = data;
+
+ if (debug_pagealloc_enabled())
+ flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
+
+ __create_pgd_mapping(swapper_pg_dir, section->base,
+ __phys_to_virt(section->base), section->size,
+ PAGE_KERNEL, pgd_pgtable_alloc, flags);
+
+ return 0;
+}
+
+inline void hotplug_paging(phys_addr_t start, phys_addr_t size)
+{
+ struct mem_range section = {
+ .base = start,
+ .size = size,
+ };
+
+ stop_machine(__hotplug_paging, &section, NULL);
+}
+#endif /* CONFIG_MEMORY_HOTPLUG */
+
/*
* Check whether a kernel address is valid (derived from arch/x86/).
*/
--
2.17.1