Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


Merge branch 'devel'
[palacios.git] / kitten / mm / bootmem.c
diff --git a/kitten/mm/bootmem.c b/kitten/mm/bootmem.c
new file mode 100644 (file)
index 0000000..6220cf1
--- /dev/null
@@ -0,0 +1,571 @@
+/*
+ *  lwk/mm/bootmem.c
+ *
+ *  Copyright (C) 1999 Ingo Molnar
+ *  Discontiguous memory support, Kanoj Sarcar, SGI, Nov 1999
+ *
+ *  simple boot-time physical memory area allocator and
+ *  free memory collector. It's used to deal with reserved
+ *  system memory and memory holes as well.
+ */
+
+#include <lwk/init.h>
+#include <lwk/pfn.h>
+#include <lwk/bootmem.h>
+#include <lwk/params.h>
+#include <lwk/log2.h>
+#include <lwk/pmem.h>
+#include <lwk/kmem.h>
+#include <lwk/bitops.h>
+#include <arch/io.h>
+
+/**
+ * Set to true once bootmem allocator has been destroyed.
+ */
+static bool bootmem_destoyed = false;
+
+/**
+ * Access to this subsystem has to be serialized externally.
+ * (this is true for the boot process anyway)
+ */
+
+
+/**
+ * Amount of system memory to reserve for use by the kernel. The first
+ * kmem_size bytes of system memory [0, kmem_size) will be added to the
+ * kernel memory pool. The remainder of system memory is left untouched by
+ * the kernel and is available for use by applications.
+ */
+static unsigned long kmem_size = (1024 * 1024 * 8);  /* default is first 8 MB */
+param(kmem_size, ulong);
+
+
+/**
+ *
+ */
+static bootmem_data_t __initdata bootmem_data;
+
+/**
+ * List of bootmem_data structures, each describing a section of
+ * physical memory.
+ */
+static LIST_HEAD(bdata_list);
+
+/**
+ * Returns the number of _pages_ that will be allocated for the boot bitmap.
+ */
+unsigned long __init
+bootmem_bootmap_pages(unsigned long pages)
+{
+       unsigned long mapsize;
+
+       mapsize = (pages+7)/8;
+       mapsize = (mapsize + ~PAGE_MASK) & PAGE_MASK;
+       mapsize >>= PAGE_SHIFT;
+
+       return mapsize;
+}
+
+/**
+ * Links a newly created bootmem_data structure to the bdata_list.
+ */
+static void __init
+link_bootmem(bootmem_data_t *bdata)
+{
+       bootmem_data_t *ent;
+       if (list_empty(&bdata_list)) {
+               list_add(&bdata->list, &bdata_list);
+               return;
+       }
+       /* insert in order */
+       list_for_each_entry(ent, &bdata_list, list) {
+               if (bdata->node_boot_start < ent->node_boot_start) {
+                       list_add_tail(&bdata->list, &ent->list);
+                       return;
+               }
+       }
+       list_add_tail(&bdata->list, &bdata_list);
+       return;
+}
+
+/**
+ * Called once to set up the allocator itself.
+ */
+static unsigned long __init
+init_bootmem_core(
+       bootmem_data_t  *bdata,
+       unsigned long   mapstart,
+       unsigned long   start,
+       unsigned long   end
+)
+{
+       unsigned long mapsize = ((end - start)+7)/8;
+
+       mapsize = ALIGN(mapsize, sizeof(long));
+       bdata->node_bootmem_map = phys_to_virt(mapstart << PAGE_SHIFT);
+       bdata->node_boot_start = (start << PAGE_SHIFT);
+       bdata->node_low_pfn = end;
+       link_bootmem(bdata);
+
+       /*
+        * Initially all pages are reserved - setup_arch() has to
+        * register free RAM areas explicitly.
+        */
+       memset(bdata->node_bootmem_map, 0xff, mapsize);
+
+       return mapsize;
+}
+
+/**
+ * Marks a particular physical memory range as unallocatable. Usable RAM
+ * might be used for boot-time allocations - or it might get added
+ * to the free page pool later on.
+ */
+static void __init
+reserve_bootmem_core(
+       bootmem_data_t  *bdata,
+       unsigned long   addr,
+       unsigned long   size
+)
+{
+       unsigned long sidx, eidx;
+       unsigned long i;
+
+       /*
+        * round up, partially reserved pages are considered
+        * fully reserved.
+        */
+       BUG_ON(!size);
+       BUG_ON(PFN_DOWN(addr) >= bdata->node_low_pfn);
+       BUG_ON(PFN_UP(addr + size) > bdata->node_low_pfn);
+
+       sidx = PFN_DOWN(addr - bdata->node_boot_start);
+       eidx = PFN_UP(addr + size - bdata->node_boot_start);
+
+       for (i = sidx; i < eidx; i++) {
+               if (test_and_set_bit(i, bdata->node_bootmem_map)) {
+#ifdef CONFIG_DEBUG_BOOTMEM
+                       printk("hm, page %08lx reserved twice.\n", i*PAGE_SIZE);
+#endif
+               }
+       }
+}
+
+/**
+ * Frees a section of bootmemory.
+ */
+static void __init
+free_bootmem_core(
+       bootmem_data_t  *bdata,
+       unsigned long   addr,
+       unsigned long   size
+)
+{
+       unsigned long i;
+       unsigned long start;
+       /*
+        * round down end of usable mem, partially free pages are
+        * considered reserved.
+        */
+       unsigned long sidx;
+       unsigned long eidx = (addr + size - bdata->node_boot_start)/PAGE_SIZE;
+       unsigned long end = (addr + size)/PAGE_SIZE;
+
+       BUG_ON(!size);
+       BUG_ON(end > bdata->node_low_pfn);
+
+       if (addr < bdata->last_success)
+               bdata->last_success = addr;
+
+       /*
+        * Round up the beginning of the address.
+        */
+       start = (addr + PAGE_SIZE-1) / PAGE_SIZE;
+       sidx = start - (bdata->node_boot_start/PAGE_SIZE);
+
+       for (i = sidx; i < eidx; i++) {
+               if (unlikely(!test_and_clear_bit(i, bdata->node_bootmem_map)))
+                       BUG();
+       }
+}
+
+/**
+ * We 'merge' subsequent allocations to save space. We might 'lose'
+ * some fraction of a page if allocations cannot be satisfied due to
+ * size constraints on boxes where there is physical RAM space
+ * fragmentation - in these cases (mostly large memory boxes) this
+ * is not a problem.
+ *
+ * On low memory boxes we get it right in 100% of the cases.
+ *
+ * alignment has to be a power of 2 value.
+ *
+ * NOTE:  This function is _not_ reentrant.
+ */
+void * __init
+__alloc_bootmem_core(
+       struct bootmem_data     *bdata,
+       unsigned long           size,
+       unsigned long           align,
+       unsigned long           goal,
+       unsigned long           limit
+)
+{
+       unsigned long offset, remaining_size, areasize, preferred;
+       unsigned long i, start = 0, incr, eidx, end_pfn = bdata->node_low_pfn;
+       void *ret;
+
+       if (bootmem_destoyed)
+               panic("The bootmem allocator has been destroyed.");
+
+       if(!size) {
+               printk("__alloc_bootmem_core(): zero-sized request\n");
+               BUG();
+       }
+       BUG_ON(align & (align-1));
+
+       if (limit && bdata->node_boot_start >= limit)
+               return NULL;
+
+        limit >>=PAGE_SHIFT;
+       if (limit && end_pfn > limit)
+               end_pfn = limit;
+
+       eidx = end_pfn - (bdata->node_boot_start >> PAGE_SHIFT);
+       offset = 0;
+       if (align &&
+           (bdata->node_boot_start & (align - 1UL)) != 0)
+               offset = (align - (bdata->node_boot_start & (align - 1UL)));
+       offset >>= PAGE_SHIFT;
+
+       /*
+        * We try to allocate bootmem pages above 'goal'
+        * first, then we try to allocate lower pages.
+        */
+       if (goal && (goal >= bdata->node_boot_start) && 
+           ((goal >> PAGE_SHIFT) < end_pfn)) {
+               preferred = goal - bdata->node_boot_start;
+
+               if (bdata->last_success >= preferred)
+                       if (!limit || (limit && limit > bdata->last_success))
+                               preferred = bdata->last_success;
+       } else
+               preferred = 0;
+
+       preferred = ALIGN(preferred, align) >> PAGE_SHIFT;
+       preferred += offset;
+       areasize = (size+PAGE_SIZE-1)/PAGE_SIZE;
+       incr = align >> PAGE_SHIFT ? : 1;
+
+restart_scan:
+       for (i = preferred; i < eidx; i += incr) {
+               unsigned long j;
+               i = find_next_zero_bit(bdata->node_bootmem_map, eidx, i);
+               i = ALIGN(i, incr);
+               if (i >= eidx)
+                       break;
+               if (test_bit(i, bdata->node_bootmem_map))
+                       continue;
+               for (j = i + 1; j < i + areasize; ++j) {
+                       if (j >= eidx)
+                               goto fail_block;
+                       if (test_bit (j, bdata->node_bootmem_map))
+                               goto fail_block;
+               }
+               start = i;
+               goto found;
+       fail_block:
+               i = ALIGN(j, incr);
+       }
+
+       if (preferred > offset) {
+               preferred = offset;
+               goto restart_scan;
+       }
+       return NULL;
+
+found:
+       bdata->last_success = start << PAGE_SHIFT;
+       BUG_ON(start >= eidx);
+
+       /*
+        * Is the next page of the previous allocation-end the start
+        * of this allocation's buffer? If yes then we can 'merge'
+        * the previous partial page with this allocation.
+        */
+       if (align < PAGE_SIZE &&
+           bdata->last_offset && bdata->last_pos+1 == start) {
+               offset = ALIGN(bdata->last_offset, align);
+               BUG_ON(offset > PAGE_SIZE);
+               remaining_size = PAGE_SIZE-offset;
+               if (size < remaining_size) {
+                       areasize = 0;
+                       /* last_pos unchanged */
+                       bdata->last_offset = offset+size;
+                       ret = phys_to_virt(bdata->last_pos*PAGE_SIZE + offset +
+                                               bdata->node_boot_start);
+               } else {
+                       remaining_size = size - remaining_size;
+                       areasize = (remaining_size+PAGE_SIZE-1)/PAGE_SIZE;
+                       ret = phys_to_virt(bdata->last_pos*PAGE_SIZE + offset +
+                                               bdata->node_boot_start);
+                       bdata->last_pos = start+areasize-1;
+                       bdata->last_offset = remaining_size;
+               }
+               bdata->last_offset &= ~PAGE_MASK;
+       } else {
+               bdata->last_pos = start + areasize - 1;
+               bdata->last_offset = size & ~PAGE_MASK;
+               ret = phys_to_virt(start * PAGE_SIZE + bdata->node_boot_start);
+       }
+
+       /*
+        * Reserve the area now:
+        */
+       for (i = start; i < start+areasize; i++)
+               if (unlikely(test_and_set_bit(i, bdata->node_bootmem_map)))
+                       BUG();
+       memset(ret, 0, size);
+       return ret;
+}
+
+static void __init
+free_all_bootmem_core(struct bootmem_data *bdata)
+{
+       unsigned long pfn;
+       unsigned long vaddr;
+       unsigned long i, m, count;
+       unsigned long bootmem_total=0, kmem_total=0, umem_total=0;
+       unsigned long kmem_max_idx, max_idx;
+       unsigned long *map; 
+       struct pmem_region rgn;
+
+       BUG_ON(!bdata->node_bootmem_map);
+
+       kmem_max_idx = (kmem_size >> PAGE_SHIFT) - (bdata->node_boot_start >> PAGE_SHIFT);
+       max_idx = bdata->node_low_pfn - (bdata->node_boot_start >> PAGE_SHIFT);
+       BUG_ON(kmem_max_idx > max_idx);
+
+       /* Create the initial kernel managed memory pool (kmem) */
+       count = 0;
+       pfn = bdata->node_boot_start >> PAGE_SHIFT;  /* first extant page of node */
+       map = bdata->node_bootmem_map;
+       for (i = 0; i < kmem_max_idx; ) {
+               unsigned long v = ~map[i / BITS_PER_LONG];
+
+               if (v) {
+                       vaddr = (unsigned long) __va(pfn << PAGE_SHIFT);
+                       for (m = 1; m && i < kmem_max_idx; m<<=1, vaddr+=PAGE_SIZE, i++) {
+                               if (v & m) {
+                                       count++;
+                                       kmem_add_memory(vaddr, PAGE_SIZE);
+                               }
+                       }
+               } else {
+                       i+=BITS_PER_LONG;
+               }
+               pfn += BITS_PER_LONG;
+       }
+       BUG_ON(count == 0);
+
+       /*
+        * At this point, kmem_alloc() will work. The physical memory tracking
+        * code relies on kmem_alloc(), so it cannot be initialized until now.
+        *
+        * Tell the physical memory tracking subsystem about the kernel-managed
+        * pool and the remaining memory that will be managed by user-space.
+        */
+       pfn = bdata->node_boot_start >> PAGE_SHIFT;  /* first extant page of node */
+       map = bdata->node_bootmem_map;
+       pmem_region_unset_all(&rgn);
+       rgn.type_is_set = true;
+       rgn.allocated_is_set = true;
+       rgn.lgroup_is_set = true;
+       for (i = 0; i < max_idx; ) {
+               unsigned long v = ~map[i / BITS_PER_LONG];
+               unsigned long paddr = (unsigned long) pfn << PAGE_SHIFT;
+
+               for (m = 1; m && i < max_idx; m<<=1, paddr+=PAGE_SIZE, i++) {
+                       rgn.start = paddr;
+                       rgn.end   = paddr + PAGE_SIZE;
+
+                       if (v & m) {
+                               if (i < kmem_max_idx) {
+                                       rgn.type = PMEM_TYPE_KMEM;
+                                       rgn.allocated = true;
+                                       rgn.lgroup = 0;
+                                       ++kmem_total;
+                               } else {
+                                       rgn.type = PMEM_TYPE_UMEM;
+                                       rgn.allocated = false;
+                                       rgn.lgroup = 0;
+                                       ++umem_total;
+                               }
+                       } else {
+                               rgn.type = PMEM_TYPE_BOOTMEM;
+                               rgn.allocated = true;
+                               rgn.lgroup = 0;
+                               ++bootmem_total;
+                       }
+
+                       if (pmem_add(&rgn))
+                               BUG();
+               }
+
+               pfn += BITS_PER_LONG;
+       }
+
+       /*
+        * Now free the allocator bitmap itself, it's not
+        * needed anymore:
+        */
+       vaddr = (unsigned long)bdata->node_bootmem_map;
+       count = 0;
+       pmem_region_unset_all(&rgn);
+       rgn.type_is_set = true;
+       rgn.allocated_is_set = true;
+       rgn.lgroup_is_set = true;
+       for (i = 0; i < ((bdata->node_low_pfn-(bdata->node_boot_start >> PAGE_SHIFT))/8 + PAGE_SIZE-1)/PAGE_SIZE; i++,vaddr+=PAGE_SIZE) {
+               count++;
+
+               rgn.start = __pa(vaddr);
+               rgn.end   = rgn.start + PAGE_SIZE;
+
+               if (i < kmem_max_idx) {
+                       kmem_add_memory(vaddr, PAGE_SIZE);
+                       rgn.type = PMEM_TYPE_KMEM;
+                       rgn.allocated = true;
+                       rgn.lgroup = 0;
+               } else {
+                       rgn.type = PMEM_TYPE_UMEM;
+                       rgn.allocated = false;
+                       rgn.lgroup = 0;
+               }
+
+               pmem_add(&rgn);
+       }
+       BUG_ON(count == 0);
+
+       /* Mark the bootmem allocator as dead */
+       bdata->node_bootmem_map = NULL;
+
+       printk(KERN_DEBUG
+              "The boot-strap bootmem allocator has been destroyed:\n");
+       printk(KERN_DEBUG
+              "  %lu bytes released to the kernel-managed memory pool (kmem)\n",
+              kmem_total << PAGE_SHIFT);
+       printk(KERN_DEBUG
+              "  %lu bytes released to the user-managed memory pool (umem)\n",
+              umem_total << PAGE_SHIFT);
+}
+
+/**
+ * Initialize boot memory allocator.
+ */
+unsigned long __init
+init_bootmem(unsigned long start, unsigned long pages)
+{
+       return init_bootmem_core(&bootmem_data, start, 0, pages);
+}
+
+/**
+ * Reserve a portion of the boot memory.
+ * This prevents the reserved memory from being allocated.
+ */
+void __init
+reserve_bootmem(unsigned long addr, unsigned long size)
+{
+       reserve_bootmem_core(&bootmem_data, addr, size);
+}
+
+/**
+ * Return a portion of boot memory to the free pool.
+ * Note that the region freed is the set of pages covering
+ * the byte range [addr, addr+size).
+ */
+void __init
+free_bootmem(unsigned long addr, unsigned long size)
+{
+       free_bootmem_core(&bootmem_data, addr, size);
+}
+
+void __init
+free_all_bootmem(void)
+{
+       free_all_bootmem_core(&bootmem_data);
+       bootmem_destoyed = true;
+}
+
+static void * __init
+__alloc_bootmem_nopanic(unsigned long size, unsigned long align, unsigned long goal)
+{
+       bootmem_data_t *bdata;
+       void *ptr;
+
+       list_for_each_entry(bdata, &bdata_list, list)
+               if ((ptr = __alloc_bootmem_core(bdata, size, align, goal, 0)))
+                       return(ptr);
+       return NULL;
+}
+
+/**
+ * Allocate a chunk of memory from the boot memory allocator.
+ *
+ *     size  = number of bytes requested
+ *     align = required alignment
+ *     goal  = hint specifying address to start search.
+ */
+void * __init
+__alloc_bootmem(unsigned long size, unsigned long align, unsigned long goal)
+{
+       void *mem = __alloc_bootmem_nopanic(size,align,goal);
+       if (mem)
+               return mem;
+       /*
+        * Whoops, we cannot satisfy the allocation request.
+        */
+       printk(KERN_ALERT "bootmem alloc of %lu bytes failed!\n", size);
+       panic("Out of memory");
+       return NULL;
+}
+
+/**
+ * Allocates a block of memory of the specified size.
+ */
+void * __init
+alloc_bootmem(unsigned long size)
+{
+       return __alloc_bootmem(size, SMP_CACHE_BYTES, 0);
+}
+
+/**
+ * Allocates a block of memory of the specified size and alignment.
+ */
+void * __init
+alloc_bootmem_aligned(unsigned long size, unsigned long align)
+{
+       return __alloc_bootmem(size, align, 0);
+}
+
+/**
+ * Initializes the kernel memory subsystem.
+ */
+void __init
+mem_subsys_init(void)
+{
+       /* We like powers of two */
+       if (!is_power_of_2(kmem_size)) {
+               printk(KERN_WARNING "kmem_size must be a power of two.");
+               kmem_size = roundup_pow_of_two(kmem_size);
+       }
+
+       printk(KERN_DEBUG
+              "First %lu bytes of system memory reserved for the kernel.\n",
+              kmem_size);
+
+       /* Initialize the kernel memory pool */
+       kmem_create_zone(PAGE_OFFSET, kmem_size);
+       free_all_bootmem();
+       arch_memsys_init(kmem_size);
+}
+