4 * Copyright (C) 1999 Ingo Molnar
5 * Discontiguous memory support, Kanoj Sarcar, SGI, Nov 1999
7 * simple boot-time physical memory area allocator and
8 * free memory collector. It's used to deal with reserved
9 * system memory and memory holes as well.
14 #include <lwk/bootmem.h>
15 #include <lwk/params.h>
19 #include <lwk/bitops.h>
23 * Set to true once bootmem allocator has been destroyed.
25 static bool bootmem_destoyed = false;
28 * Access to this subsystem has to be serialized externally.
29 * (this is true for the boot process anyway)
34 * Amount of system memory to reserve for use by the kernel. The first
35 * kmem_size bytes of system memory [0, kmem_size) will be added to the
36 * kernel memory pool. The remainder of system memory is left untouched by
37 * the kernel and is available for use by applications.
39 static unsigned long kmem_size = (1024 * 1024 * 8); /* default is first 8 MB */
40 param(kmem_size, ulong);
46 static bootmem_data_t __initdata bootmem_data;
49 * List of bootmem_data structures, each describing a section of
52 static LIST_HEAD(bdata_list);
55 * Returns the number of _pages_ that will be allocated for the boot bitmap.
58 bootmem_bootmap_pages(unsigned long pages)
60 unsigned long mapsize;
62 mapsize = (pages+7)/8;
63 mapsize = (mapsize + ~PAGE_MASK) & PAGE_MASK;
64 mapsize >>= PAGE_SHIFT;
70 * Links a newly created bootmem_data structure to the bdata_list.
73 link_bootmem(bootmem_data_t *bdata)
76 if (list_empty(&bdata_list)) {
77 list_add(&bdata->list, &bdata_list);
81 list_for_each_entry(ent, &bdata_list, list) {
82 if (bdata->node_boot_start < ent->node_boot_start) {
83 list_add_tail(&bdata->list, &ent->list);
87 list_add_tail(&bdata->list, &bdata_list);
92 * Called once to set up the allocator itself.
94 static unsigned long __init
96 bootmem_data_t *bdata,
97 unsigned long mapstart,
102 unsigned long mapsize = ((end - start)+7)/8;
104 mapsize = ALIGN(mapsize, sizeof(long));
105 bdata->node_bootmem_map = phys_to_virt(mapstart << PAGE_SHIFT);
106 bdata->node_boot_start = (start << PAGE_SHIFT);
107 bdata->node_low_pfn = end;
111 * Initially all pages are reserved - setup_arch() has to
112 * register free RAM areas explicitly.
114 memset(bdata->node_bootmem_map, 0xff, mapsize);
120 * Marks a particular physical memory range as unallocatable. Usable RAM
121 * might be used for boot-time allocations - or it might get added
122 * to the free page pool later on.
125 reserve_bootmem_core(
126 bootmem_data_t *bdata,
131 unsigned long sidx, eidx;
135 * round up, partially reserved pages are considered
139 BUG_ON(PFN_DOWN(addr) >= bdata->node_low_pfn);
140 BUG_ON(PFN_UP(addr + size) > bdata->node_low_pfn);
142 sidx = PFN_DOWN(addr - bdata->node_boot_start);
143 eidx = PFN_UP(addr + size - bdata->node_boot_start);
145 for (i = sidx; i < eidx; i++) {
146 if (test_and_set_bit(i, bdata->node_bootmem_map)) {
147 #ifdef CONFIG_DEBUG_BOOTMEM
148 printk("hm, page %08lx reserved twice.\n", i*PAGE_SIZE);
155 * Frees a section of bootmemory.
159 bootmem_data_t *bdata,
167 * round down end of usable mem, partially free pages are
168 * considered reserved.
171 unsigned long eidx = (addr + size - bdata->node_boot_start)/PAGE_SIZE;
172 unsigned long end = (addr + size)/PAGE_SIZE;
175 BUG_ON(end > bdata->node_low_pfn);
177 if (addr < bdata->last_success)
178 bdata->last_success = addr;
181 * Round up the beginning of the address.
183 start = (addr + PAGE_SIZE-1) / PAGE_SIZE;
184 sidx = start - (bdata->node_boot_start/PAGE_SIZE);
186 for (i = sidx; i < eidx; i++) {
187 if (unlikely(!test_and_clear_bit(i, bdata->node_bootmem_map)))
193 * We 'merge' subsequent allocations to save space. We might 'lose'
194 * some fraction of a page if allocations cannot be satisfied due to
195 * size constraints on boxes where there is physical RAM space
196 * fragmentation - in these cases (mostly large memory boxes) this
199 * On low memory boxes we get it right in 100% of the cases.
201 * alignment has to be a power of 2 value.
203 * NOTE: This function is _not_ reentrant.
206 __alloc_bootmem_core(
207 struct bootmem_data *bdata,
214 unsigned long offset, remaining_size, areasize, preferred;
215 unsigned long i, start = 0, incr, eidx, end_pfn = bdata->node_low_pfn;
218 if (bootmem_destoyed)
219 panic("The bootmem allocator has been destroyed.");
222 printk("__alloc_bootmem_core(): zero-sized request\n");
225 BUG_ON(align & (align-1));
227 if (limit && bdata->node_boot_start >= limit)
231 if (limit && end_pfn > limit)
234 eidx = end_pfn - (bdata->node_boot_start >> PAGE_SHIFT);
237 (bdata->node_boot_start & (align - 1UL)) != 0)
238 offset = (align - (bdata->node_boot_start & (align - 1UL)));
239 offset >>= PAGE_SHIFT;
242 * We try to allocate bootmem pages above 'goal'
243 * first, then we try to allocate lower pages.
245 if (goal && (goal >= bdata->node_boot_start) &&
246 ((goal >> PAGE_SHIFT) < end_pfn)) {
247 preferred = goal - bdata->node_boot_start;
249 if (bdata->last_success >= preferred)
250 if (!limit || (limit && limit > bdata->last_success))
251 preferred = bdata->last_success;
255 preferred = ALIGN(preferred, align) >> PAGE_SHIFT;
257 areasize = (size+PAGE_SIZE-1)/PAGE_SIZE;
258 incr = align >> PAGE_SHIFT ? : 1;
261 for (i = preferred; i < eidx; i += incr) {
263 i = find_next_zero_bit(bdata->node_bootmem_map, eidx, i);
267 if (test_bit(i, bdata->node_bootmem_map))
269 for (j = i + 1; j < i + areasize; ++j) {
272 if (test_bit (j, bdata->node_bootmem_map))
281 if (preferred > offset) {
288 bdata->last_success = start << PAGE_SHIFT;
289 BUG_ON(start >= eidx);
292 * Is the next page of the previous allocation-end the start
293 * of this allocation's buffer? If yes then we can 'merge'
294 * the previous partial page with this allocation.
296 if (align < PAGE_SIZE &&
297 bdata->last_offset && bdata->last_pos+1 == start) {
298 offset = ALIGN(bdata->last_offset, align);
299 BUG_ON(offset > PAGE_SIZE);
300 remaining_size = PAGE_SIZE-offset;
301 if (size < remaining_size) {
303 /* last_pos unchanged */
304 bdata->last_offset = offset+size;
305 ret = phys_to_virt(bdata->last_pos*PAGE_SIZE + offset +
306 bdata->node_boot_start);
308 remaining_size = size - remaining_size;
309 areasize = (remaining_size+PAGE_SIZE-1)/PAGE_SIZE;
310 ret = phys_to_virt(bdata->last_pos*PAGE_SIZE + offset +
311 bdata->node_boot_start);
312 bdata->last_pos = start+areasize-1;
313 bdata->last_offset = remaining_size;
315 bdata->last_offset &= ~PAGE_MASK;
317 bdata->last_pos = start + areasize - 1;
318 bdata->last_offset = size & ~PAGE_MASK;
319 ret = phys_to_virt(start * PAGE_SIZE + bdata->node_boot_start);
323 * Reserve the area now:
325 for (i = start; i < start+areasize; i++)
326 if (unlikely(test_and_set_bit(i, bdata->node_bootmem_map)))
328 memset(ret, 0, size);
333 free_all_bootmem_core(struct bootmem_data *bdata)
337 unsigned long i, m, count;
338 unsigned long bootmem_total=0, kmem_total=0, umem_total=0;
339 unsigned long kmem_max_idx, max_idx;
341 struct pmem_region rgn;
343 BUG_ON(!bdata->node_bootmem_map);
345 kmem_max_idx = (kmem_size >> PAGE_SHIFT) - (bdata->node_boot_start >> PAGE_SHIFT);
346 max_idx = bdata->node_low_pfn - (bdata->node_boot_start >> PAGE_SHIFT);
347 BUG_ON(kmem_max_idx > max_idx);
349 /* Create the initial kernel managed memory pool (kmem) */
351 pfn = bdata->node_boot_start >> PAGE_SHIFT; /* first extant page of node */
352 map = bdata->node_bootmem_map;
353 for (i = 0; i < kmem_max_idx; ) {
354 unsigned long v = ~map[i / BITS_PER_LONG];
357 vaddr = (unsigned long) __va(pfn << PAGE_SHIFT);
358 for (m = 1; m && i < kmem_max_idx; m<<=1, vaddr+=PAGE_SIZE, i++) {
361 kmem_add_memory(vaddr, PAGE_SIZE);
367 pfn += BITS_PER_LONG;
372 * At this point, kmem_alloc() will work. The physical memory tracking
373 * code relies on kmem_alloc(), so it cannot be initialized until now.
375 * Tell the physical memory tracking subsystem about the kernel-managed
376 * pool and the remaining memory that will be managed by user-space.
378 pfn = bdata->node_boot_start >> PAGE_SHIFT; /* first extant page of node */
379 map = bdata->node_bootmem_map;
380 pmem_region_unset_all(&rgn);
381 rgn.type_is_set = true;
382 rgn.allocated_is_set = true;
383 rgn.lgroup_is_set = true;
384 for (i = 0; i < max_idx; ) {
385 unsigned long v = ~map[i / BITS_PER_LONG];
386 unsigned long paddr = (unsigned long) pfn << PAGE_SHIFT;
388 for (m = 1; m && i < max_idx; m<<=1, paddr+=PAGE_SIZE, i++) {
390 rgn.end = paddr + PAGE_SIZE;
393 if (i < kmem_max_idx) {
394 rgn.type = PMEM_TYPE_KMEM;
395 rgn.allocated = true;
399 rgn.type = PMEM_TYPE_UMEM;
400 rgn.allocated = false;
405 rgn.type = PMEM_TYPE_BOOTMEM;
406 rgn.allocated = true;
415 pfn += BITS_PER_LONG;
419 * Now free the allocator bitmap itself, it's not
422 vaddr = (unsigned long)bdata->node_bootmem_map;
424 pmem_region_unset_all(&rgn);
425 rgn.type_is_set = true;
426 rgn.allocated_is_set = true;
427 rgn.lgroup_is_set = true;
428 for (i = 0; i < ((bdata->node_low_pfn-(bdata->node_boot_start >> PAGE_SHIFT))/8 + PAGE_SIZE-1)/PAGE_SIZE; i++,vaddr+=PAGE_SIZE) {
431 rgn.start = __pa(vaddr);
432 rgn.end = rgn.start + PAGE_SIZE;
434 if (i < kmem_max_idx) {
435 kmem_add_memory(vaddr, PAGE_SIZE);
436 rgn.type = PMEM_TYPE_KMEM;
437 rgn.allocated = true;
440 rgn.type = PMEM_TYPE_UMEM;
441 rgn.allocated = false;
449 /* Mark the bootmem allocator as dead */
450 bdata->node_bootmem_map = NULL;
453 "The boot-strap bootmem allocator has been destroyed:\n");
455 " %lu bytes released to the kernel-managed memory pool (kmem)\n",
456 kmem_total << PAGE_SHIFT);
458 " %lu bytes released to the user-managed memory pool (umem)\n",
459 umem_total << PAGE_SHIFT);
463 * Initialize boot memory allocator.
466 init_bootmem(unsigned long start, unsigned long pages)
468 return init_bootmem_core(&bootmem_data, start, 0, pages);
472 * Reserve a portion of the boot memory.
473 * This prevents the reserved memory from being allocated.
476 reserve_bootmem(unsigned long addr, unsigned long size)
478 reserve_bootmem_core(&bootmem_data, addr, size);
482 * Return a portion of boot memory to the free pool.
483 * Note that the region freed is the set of pages covering
484 * the byte range [addr, addr+size).
487 free_bootmem(unsigned long addr, unsigned long size)
489 free_bootmem_core(&bootmem_data, addr, size);
493 free_all_bootmem(void)
495 free_all_bootmem_core(&bootmem_data);
496 bootmem_destoyed = true;
500 __alloc_bootmem_nopanic(unsigned long size, unsigned long align, unsigned long goal)
502 bootmem_data_t *bdata;
505 list_for_each_entry(bdata, &bdata_list, list)
506 if ((ptr = __alloc_bootmem_core(bdata, size, align, goal, 0)))
512 * Allocate a chunk of memory from the boot memory allocator.
514 * size = number of bytes requested
515 * align = required alignment
516 * goal = hint specifying address to start search.
519 __alloc_bootmem(unsigned long size, unsigned long align, unsigned long goal)
521 void *mem = __alloc_bootmem_nopanic(size,align,goal);
525 * Whoops, we cannot satisfy the allocation request.
527 printk(KERN_ALERT "bootmem alloc of %lu bytes failed!\n", size);
528 panic("Out of memory");
533 * Allocates a block of memory of the specified size.
536 alloc_bootmem(unsigned long size)
538 return __alloc_bootmem(size, SMP_CACHE_BYTES, 0);
542 * Allocates a block of memory of the specified size and alignment.
545 alloc_bootmem_aligned(unsigned long size, unsigned long align)
547 return __alloc_bootmem(size, align, 0);
551 * Initializes the kernel memory subsystem.
554 mem_subsys_init(void)
556 /* We like powers of two */
557 if (!is_power_of_2(kmem_size)) {
558 printk(KERN_WARNING "kmem_size must be a power of two.");
559 kmem_size = roundup_pow_of_two(kmem_size);
563 "First %lu bytes of system memory reserved for the kernel.\n",
566 /* Initialize the kernel memory pool */
567 kmem_create_zone(PAGE_OFFSET, kmem_size);
569 arch_memsys_init(kmem_size);