[Linux Kernel] Kernel 분석(v5.14.16) - Kmalloc & vmalloc

BlueMoon-1st 2022. 8. 14. 14:17

2022. 8. 14. 14:17

Kernel에서 할당하는 메모리는 주로 엄청 작은 크기의 메모리를 할당한다. 때문에 page단위의 메모리를 할당하게되면 너무 많은 메모리 낭비와 성능에서의 비효율성이 극대화 될 수 있다.

위의 문제를 해결하기 위해서 Linux에서는 크게 두 가지의 메모리 할당방법을 지원하고 kernel에서 메모리를 할당 할 때 이 두 가지 방법을 주로 사용한다.

하나는 kmalloc, 다른 하나는 vmalloc이다.

1. kmalloc

DMA 등 physical한 메모리와 1:1로 매핑되는데 주로 사용하며 기본적으로 메모리를 연속적(물리적으로)으로 할당하고, sleep되지 않는 특징이 있다.

이러한 특징으로 DMA 즉 ZONE_NORMAL, ZONE_DMA에서 사용하며 vmalloc보다 더 빠른 성능을 자랑한다.(vmalloc은 가상, kmalloc은 사실상 physical과 1:1매핑으로 당연히 더 빠르다)

ex) DMA, 인터럽트 핸들러

1) flag

kernel에서 메모리를할당 할 때는 flag를 통해서 원하는 메모리 할당을 유도할 수 있다.

ex) __GFP_DMA : ZONE_DMA 영역에 할당을 요청

ZONE 영역을 정할 수 있고 page, 워터마크, 회수, 액션 등 복합적인 플래그가 존재한다.

include/linux/gfp.h

/* Plain integer GFP bitmasks. Do not use this directly. */
#define ___GFP_DMA                0x01u
#define ___GFP_HIGHMEM                0x02u
#define ___GFP_DMA32                0x04u
#define ___GFP_MOVABLE                0x08u
#define ___GFP_RECLAIMABLE        0x10u
#define ___GFP_HIGH                0x20u
#define ___GFP_IO                0x40u
#define ___GFP_FS                0x80u
#define ___GFP_ZERO                0x100u
#define ___GFP_ATOMIC                0x200u
#define ___GFP_DIRECT_RECLAIM        0x400u
#define ___GFP_KSWAPD_RECLAIM        0x800u
#define ___GFP_WRITE                0x1000u
#define ___GFP_NOWARN                0x2000u
#define ___GFP_RETRY_MAYFAIL        0x4000u
#define ___GFP_NOFAIL                0x8000u
#define ___GFP_NORETRY                0x10000u
#define ___GFP_MEMALLOC                0x20000u
#define ___GFP_COMP                0x40000u
#define ___GFP_NOMEMALLOC        0x80000u
#define ___GFP_HARDWALL                0x100000u
#define ___GFP_THISNODE                0x200000u
#define ___GFP_ACCOUNT                0x400000u
#define ___GFP_ZEROTAGS                0x800000u
#define ___GFP_SKIP_KASAN_POISON        0x1000000u
#ifdef CONFIG_LOCKDEP
#define ___GFP_NOLOCKDEP        0x2000000u
#else
#define ___GFP_NOLOCKDEP        0
#endif
ref : https://elixir.bootlin.com/linux/v5.14.16/source/include/linux/gfp.h

2) 할당

include/linux/slab.h

/**
* kmalloc - allocate memory
* @size: how many bytes of memory are required.
**/
static __always_inline void *kmalloc(size_t size, gfp_t flags)
{
if (__builtin_constant_p(size)) {
#ifndef CONFIG_SLOB
unsigned int index;
#endif
if (size > KMALLOC_MAX_CACHE_SIZE)
return kmalloc_large(size, flags);
#ifndef CONFIG_SLOB
index = kmalloc_index(size);

if (!index)
return ZERO_SIZE_PTR;

return kmem_cache_alloc_trace(
kmalloc_caches[kmalloc_type(flags)][index],
flags, size);
#endif
}
return __kmalloc(size, flags);
}
ref : https://elixir.bootlin.com/linux/v5.14.16/source/include/linux/slab.h

위 할당 코드를 보면 크게 3가지 방법으로 할당을 진행한다.

가장 먼저 큰 사이즈에 대해서는 2^order를 이용한 buddy를 활용하고

상수의 경우(CONFIG_SLOB) kmalloc_index()를 통해서 상수 size에 맞는 free index를 뽑아내고 해당 index에 할당을 진행한다.

static __always_inline unsigned int __kmalloc_index(size_t size,
    bool size_is_constant)
{
if (!size)
return 0;

if (size <= KMALLOC_MIN_SIZE)
return KMALLOC_SHIFT_LOW;

if (KMALLOC_MIN_SIZE <= 32 && size > 64 && size <= 96)
return 1;
if (KMALLOC_MIN_SIZE <= 64 && size > 128 && size <= 192)
return 2;
if (size <=          8) return 3;
if (size <=         16) return 4;
if (size <=         32) return 5;
if (size <=         64) return 6;
if (size <=        128) return 7;
if (size <=        256) return 8;
if (size <=        512) return 9;
if (size <=       1024) return 10;
if (size <=   2 * 1024) return 11;
if (size <=   4 * 1024) return 12;
if (size <=   8 * 1024) return 13;
if (size <= 16 * 1024) return 14;
if (size <= 32 * 1024) return 15;
if (size <= 64 * 1024) return 16;
if (size <= 128 * 1024) return 17;
if (size <= 256 * 1024) return 18;
if (size <= 512 * 1024) return 19;
if (size <= 1024 * 1024) return 20;
if (size <= 2 * 1024 * 1024) return 21;
if (size <= 4 * 1024 * 1024) return 22;
if (size <= 8 * 1024 * 1024) return 23;
if (size <= 16 * 1024 * 1024) return 24;
if (size <= 32 * 1024 * 1024) return 25;

if ((IS_ENABLED(CONFIG_CC_IS_GCC) || CONFIG_CLANG_VERSION >= 110000)
    && !IS_ENABLED(CONFIG_PROFILE_ALL_BRANCHES) && size_is_constant)
BUILD_BUG_ON_MSG(1, "unexpected size in kmalloc_index()");
else
BUG();

/* Will never be reached. Needed because the compiler may complain */
return -1;
}
ref : https://elixir.bootlin.com/linux/v5.14.16/source/include/linux/slab.h#L379

위와 같이 상수 size에 대하여 분기문을 통해서 가장 최적화된 index를 출력해준다. (가장 메모리 낭비가 적은 크기의 index를 사용해서 할당을 진행하는 것)

요청한 크기가 상수이므로 상수에따른 적합한 free index를 활용하는 방법이다.

그리고 마지막은 동적으로 할당해주는 방법이다. 이 경우에는 크지 않은 메모리 중 상수 size가 아닌 경우를 예로들 수 있다.

이 경우에는 physical 주소를 찾아서 size만큼 할당을 진행한다.

2. vmalloc

vmalloc은 가상메모리 공간을 사용하는 방법으로 가상메모리는 연속적으로 할당하나 physical메모리는 연속적이지 않다. 때문에 메모리를 더 효율적으로 사용할 수 있는 장점이있다. 하지만 가상메모리를 물리주소로 치환하기위한 TLB등의 시스템이 부가적으로 필요하고 이로인해 효율성 저하가 발생한다.

때문에 kernel단에서는 주로 vmalloc보다는 kmalloc을 사용해서 할당을 진행한다.

vmalloc은 kamlloc과달리 가상메모리를 이용하기 때문에 kmalloc에서 할당,해제만 있는 것과는 달리 vmalloc에서는 할당, 해제, 매핑, 언매핑이 API로 존재한다.

1) 할당

vmalloc을 할당하기위해서는 가장먼저 init과정을 거치며 init을 통해 object 할당과 vmem_list를 설정한다. vmem은 효율성을 위해 레드블랙트리 알고리즘을 사용하며 init과정에서 기존 object로하여금 insert하여 사용가능한 레드블랙트리를만드는 정도까지의 작업을 init으로 보면된다.

이후 vmalloc()을 통해서 할당이 진행된다. vamlloc struct를 만들고 area를 할당받아서 레드블랙트리에 insert를 진행한다.

kmalloc과 마찬가지로 flag를통해서 원하는 메모리 할당을 유도할 수 있다.

할당한 메모리들은 가상메모리 공간에서 연속적으로 존재해야 하기 때문에 mapping의 과정을 따로 거치게 된다.

mm/vmalloc.c

/**
* __vmalloc_node_range - allocate virtually contiguous memory
* @size:                  allocation size
* @align:                  desired alignment
* @start:                  vm area range start
* @end:                  vm area range end
* @gfp_mask:                  flags for the page level allocator
* @prot:                  protection mask for the allocated pages
* @vm_flags:                  additional vm area flags (e.g. %VM_NO_GUARD)
* @node:                  node to use for allocation or NUMA_NO_NODE
* @caller:                  caller's return address
*
* Allocate enough pages to cover @size from the page level
* allocator with @gfp_mask flags. Map them into contiguous
* kernel virtual space, using a pagetable protection of @prot.
*
* Return: the address of the area or %NULL on failure
*/
void *__vmalloc_node_range(unsigned long size, unsigned long align,
unsigned long start, unsigned long end, gfp_t gfp_mask,
pgprot_t prot, unsigned long vm_flags, int node,
const void *caller)
{
struct vm_struct *area;
void *addr;
unsigned long real_size = size;
unsigned long real_align = align;
unsigned int shift = PAGE_SHIFT;

if (WARN_ON_ONCE(!size))
return NULL;

if ((size >> PAGE_SHIFT) > totalram_pages()) {
warn_alloc(gfp_mask, NULL,
"vmalloc error: size %lu, exceeds total pages",
real_size);
return NULL;
}

if (vmap_allow_huge && !(vm_flags & VM_NO_HUGE_VMAP)) {
unsigned long size_per_node;

/*
* Try huge pages. Only try for PAGE_KERNEL allocations,
* others like modules don't yet expect huge pages in
* their allocations due to apply_to_page_range not
* supporting them.
*/

size_per_node = size;
if (node == NUMA_NO_NODE)
size_per_node /= num_online_nodes();
if (arch_vmap_pmd_supported(prot) && size_per_node >= PMD_SIZE)
shift = PMD_SHIFT;
else
shift = arch_vmap_pte_supported_shift(size_per_node);

align = max(real_align, 1UL << shift);
size = ALIGN(real_size, 1UL << shift);
}

again:
area = __get_vm_area_node(real_size, align, shift, VM_ALLOC |
VM_UNINITIALIZED | vm_flags, start, end, node,
gfp_mask, caller);
if (!area) {
warn_alloc(gfp_mask, NULL,
"vmalloc error: size %lu, vm_struct allocation failed",
real_size);
goto fail;
}

addr = __vmalloc_area_node(area, gfp_mask, prot, shift, node);
if (!addr)
goto fail;

/*
* In this function, newly allocated vm_struct has VM_UNINITIALIZED
* flag. It means that vm_struct is not fully initialized.
* Now, it is fully initialized, so remove this flag here.
*/
clear_vm_uninitialized_flag(area);

size = PAGE_ALIGN(size);
kmemleak_vmalloc(area, size, gfp_mask);

return addr;

fail:
if (shift > PAGE_SHIFT) {
shift = PAGE_SHIFT;
align = real_align;
size = real_size;
goto again;
}

return NULL;
}
ref : https://elixir.bootlin.com/linux/v5.14.16/source/mm/vmalloc.c

vmalloc을 할당하는 메인 동작을 진행하는 function으로 vmalloc의 경우 새로운 area를 할당해서 넣을 수 있는 기존의 area단위로 먼저 구하고 __vmalloc_area_node를 통해서 할당과 매핑을 진행한다.

area->pages=kmalloc_node(array_size,nested_gfp,node);

출처: <https://elixir.bootlin.com/linux/v5.14.16/source/mm/vmalloc.c#L2828>

위 와같이 kmalloc_node를 통해서 실제적으로 area내 page할당을 진행한다. 할당하는 방법은 kmalloc과 마찬가지로 index를 구해서 freeList해당 인덱스에 할당을 진행한다.

staticintvmap_pages_range(unsignedlongaddr,unsignedlongend,pgprot_t prot,structpage**pages,unsignedintpage_shift){interr;err=vmap_pages_range_noflush(addr,end,prot,pages,page_shift);flush_cache_vmap(addr,end);returnerr;}

출처: <https://elixir.bootlin.com/linux/v5.14.16/source/mm/vmalloc.c#L603>

그리고 매핑은 위 함수를 호출해서 진행한다. area->page할당을통해서 셋업된 area에 대하여 virtual memory로 연결해주는 역할을 진행한다.

- ref

코드로 알아보는 ARM 리눅스 커널

저작자표시 변경금지 (새창열림)

'OS > Linux' 카테고리의 다른 글

[Linux Kernel] Kernel 분석(v5.14.16) - 슬랩 (0)	2022.06.11
[Linux Kernel] Kernel 분석(v5.14.16) - 워터마크 (0)	2022.05.29
[Linux Kernel] Kernel 분석(v5.14.16) - NUMA, Zone Allocation (2) (0)	2022.05.09
[Linux Kernel] Kernel 분석(v5.14.16) - NUMA, Zone Allocation (1) (0)	2022.05.01
[Linux Kernel] Kernel 분석(v5.14.16) - pcp (0)	2022.03.26

Blue-Moon의 정리노트!!