1 /* Copyright (c) 2007, Sandia National Laboratories */
2 /* Modified by Jack Lange, 2012 */
4 #include <linux/module.h>
5 #include <linux/slab.h>
6 #include <linux/log2.h>
10 #include <linux/proc_fs.h>
11 #include <linux/seq_file.h>
15 * Converts a block address to its block index in the specified buddy allocator.
16 * A block's index is used to find the block's tag bit, mp->tag_bits[block_id].
19 block_to_id(struct buddy_mempool *mp, struct block *block)
21 unsigned long block_id =
22 ((unsigned long)__pa(block) - mp->base_addr) >> mp->zone->min_order;
23 BUG_ON(block_id >= mp->num_blocks);
29 * Marks a block as free by setting its tag bit to one.
32 mark_available(struct buddy_mempool *mp, struct block *block)
34 __set_bit(block_to_id(mp, block), mp->tag_bits);
39 * Marks a block as allocated by setting its tag bit to zero.
42 mark_allocated(struct buddy_mempool *mp, struct block *block)
44 __clear_bit(block_to_id(mp, block), mp->tag_bits);
49 * Returns true if block is free, false if it is allocated.
52 is_available(struct buddy_mempool *mp, struct block *block)
54 return test_bit(block_to_id(mp, block), mp->tag_bits);
59 * Returns the address of the block's buddy block.
62 find_buddy(struct buddy_mempool *mp, struct block *block, unsigned long order)
67 BUG_ON((unsigned long)__pa(block) < mp->base_addr);
69 /* Fixup block address to be zero-relative */
70 _block = (unsigned long)__pa(block) - mp->base_addr;
72 /* Calculate buddy in zero-relative space */
73 _buddy = _block ^ (1UL << order);
75 /* Return the buddy's address */
76 return (void *)(_buddy + __va(mp->base_addr));
80 static inline uintptr_t pool_end_addr(struct buddy_mempool * pool) {
81 return pool->base_addr + (1UL << pool->pool_order);
85 static struct buddy_mempool *
86 find_mempool(struct buddy_memzone * zone, uintptr_t addr) {
87 struct rb_node * n = zone->mempools.rb_node;
88 struct buddy_mempool * pool = NULL;
91 pool = rb_entry(n, struct buddy_mempool, tree_node);
93 if (addr < pool->base_addr) {
95 } else if (addr >= pool_end_addr(pool)) {
108 insert_mempool(struct buddy_memzone * zone,
109 struct buddy_mempool * pool) {
110 struct rb_node ** p = &(zone->mempools.rb_node);
111 struct rb_node * parent = NULL;
112 struct buddy_mempool * tmp_pool;
116 tmp_pool = rb_entry(parent, struct buddy_mempool, tree_node);
118 if (pool_end_addr(pool) <= tmp_pool->base_addr) {
120 } else if (pool->base_addr >= pool_end_addr(tmp_pool)) {
127 rb_link_node(&(pool->tree_node), parent, p);
128 rb_insert_color(&(pool->tree_node), &(zone->mempools));
137 /* This adds a pool of a given size to a buddy allocated zone
140 int buddy_add_pool(struct buddy_memzone * zone,
141 unsigned long base_addr,
142 unsigned long pool_order,
143 void *user_metadata) {
144 struct buddy_mempool * mp = NULL;
145 unsigned long flags = 0;
148 if (pool_order > zone->max_order) {
149 ERROR("Pool order size is larger than max allowable zone size (pool_order=%lu) (max_order=%lu)\n", pool_order, zone->max_order);
151 } else if (pool_order < zone->min_order) {
152 ERROR("Pool order is smaller than min allowable zone size (pool_order=%lu) (min_order=%lu)\n", pool_order, zone->min_order);
156 mp = palacios_alloc_extended(sizeof(struct buddy_mempool), GFP_KERNEL, zone->node_id);
159 ERROR("Could not allocate mempool\n");
163 mp->base_addr = base_addr;
164 mp->pool_order = pool_order;
166 mp->num_free_blocks = 0;
168 mp->user_metadata = user_metadata;
170 /* Allocate a bitmap with 1 bit per minimum-sized block */
171 mp->num_blocks = (1UL << pool_order) / (1UL << zone->min_order);
173 mp->tag_bits = palacios_alloc_extended(
174 BITS_TO_LONGS(mp->num_blocks) * sizeof(long), GFP_KERNEL, zone->node_id
177 if (!(mp->tag_bits)) {
178 ERROR("Could not allocate tag_bits\n");
184 /* Initially mark all minimum-sized blocks as allocated */
185 bitmap_zero(mp->tag_bits, mp->num_blocks);
187 palacios_spinlock_lock_irqsave(&(zone->lock), flags);
188 ret = insert_mempool(zone, mp);
189 palacios_spinlock_unlock_irqrestore(&(zone->lock), flags);
192 ERROR("Error: Could not insert mempool into zone\n");
193 palacios_free(mp->tag_bits);
199 buddy_free(zone, base_addr, pool_order);
201 INFO("Added memory pool (addr=%p), order=%lu\n", (void *)base_addr, pool_order);
208 * Removes a mempool from a zone,
209 * assumes the zone lock is already held
211 static int __buddy_remove_mempool(struct buddy_memzone * zone,
212 unsigned long base_addr,
214 void **user_metadata)
217 struct buddy_mempool * pool = NULL;
218 struct block * block = NULL;
221 pool = find_mempool(zone, base_addr);
224 ERROR("Could not find mempool with base address (%p)\n", (void *)base_addr);
228 block = (struct block *)__va(pool->base_addr);
230 INFO("Removing Mempool %p, base=%p\n",pool,block);
232 // The largest order block in the memory pool must be free
233 if (!is_available(pool, block)) {
235 ERROR("Trying to remove an in use memory pool\n");
239 WARNING("Forcefully removing in use memory pool\n");
243 *user_metadata = pool->user_metadata;
245 if (is_available(pool,block)) {
246 list_del(&(block->link));
248 // we may not be on the free list if we are being
249 // forcibly removed before all allocations are freed
252 rb_erase(&(pool->tree_node), &(zone->mempools));
254 palacios_free(pool->tag_bits);
262 int buddy_remove_pool(struct buddy_memzone * zone,
263 unsigned long base_addr,
265 void **user_metadata)
267 unsigned long flags = 0;
271 palacios_spinlock_lock_irqsave(&(zone->lock), flags);
272 ret = __buddy_remove_mempool(zone, base_addr, force, user_metadata);
273 palacios_spinlock_unlock_irqrestore(&(zone->lock), flags);
283 * Allocates a block of memory of the requested size (2^order bytes).
286 * [IN] mp: Buddy system memory allocator object.
287 * [IN] order: Block size to allocate (2^order bytes).
288 * [IN] constraints: bitmmask showing restrictions for scan. currently: 0=none, or LWK_BUDDY_CONSTRAINT_4GB
290 * Success: Pointer to the start of the allocated memory block.
294 buddy_alloc(struct buddy_memzone *zone, unsigned long order, int constraints)
297 struct buddy_mempool * mp = NULL;
298 struct list_head * list = NULL;
299 struct list_head * cur = NULL;
300 struct block * block = NULL;
301 struct block * buddy_block = NULL;
302 unsigned long flags = 0;
304 if (constraints && constraints!=LWK_BUDDY_CONSTRAINT_4GB) {
305 ERROR("Do not know how to satisfy constraint mask 0x%x\n", constraints);
306 return (uintptr_t) NULL;
309 BUG_ON(zone == NULL);
310 BUG_ON(order > zone->max_order);
312 /* Fixup requested order to be at least the minimum supported */
313 if (order < zone->min_order) {
314 order = zone->min_order;
317 INFO("zone=%p, order=%lu\n", zone, order);
319 palacios_spinlock_lock_irqsave(&(zone->lock), flags);
321 for (j = order; j <= zone->max_order; j++) {
323 INFO("Order iter=%lu\n", j);
327 list = &(zone->avail[j]);
329 if (list_empty(list)) {
333 list_for_each(cur, list) {
334 block = list_entry(cur, struct block, link);
337 // without a constraint, we just want the first one
341 if (constraints & LWK_BUDDY_CONSTRAINT_4GB) {
342 // under this constraint, we will only use if the entirity
343 // of the allocation within the block will be below 4 GB
344 void *block_pa = (void*)__pa(block);
345 if ((block_pa + (1ULL<<order)) <= (void*)(0x100000000ULL)) {
346 // this block will work
349 // look for the next block
357 // uh oh, no block, look to next order
361 // have appropriate block, will allocate
363 list_del(&(block->link));
367 mark_allocated(mp, block);
369 INFO("pool=%p, block=%p, order=%lu, j=%lu\n", mp, block, order, j);
371 /* Trim if a higher order block than necessary was allocated */
374 buddy_block = (struct block *)((unsigned long)block + (1UL << j));
375 buddy_block->mp = mp;
376 buddy_block->order = j;
377 mark_available(mp, buddy_block);
378 list_add(&(buddy_block->link), &(zone->avail[j]));
381 mp->num_free_blocks -= (1UL << (order - zone->min_order));
383 palacios_spinlock_unlock_irqrestore(&(zone->lock), flags);
388 palacios_spinlock_unlock_irqrestore(&(zone->lock), flags);
390 return (uintptr_t)NULL;
395 * Returns a block of memory to the buddy system memory allocator.
399 //! Buddy system memory allocator object.
400 struct buddy_memzone * zone,
401 //! Address of memory block to free.
403 //! Size of the memory block (2^order bytes).
407 struct block * block = NULL;
408 struct buddy_mempool * pool = NULL;
409 unsigned long flags = 0;
411 BUG_ON(zone == NULL);
412 BUG_ON(order > zone->max_order);
415 if ((addr & ((1UL << zone->min_order) - 1)) != 0) {
416 ERROR("Attempting to free an invalid memory address (%p)\n", (void *)addr);
421 /* Fixup requested order to be at least the minimum supported */
422 if (order < zone->min_order) {
423 order = zone->min_order;
427 palacios_spinlock_lock_irqsave(&(zone->lock), flags);
429 pool = find_mempool(zone, addr);
431 if ((pool == NULL) || (order > pool->pool_order)) {
432 WARNING("Attempted to free an invalid page address (%p) - pool=%p order=%lu\n", (void *)addr,pool,order);
433 palacios_spinlock_unlock_irqrestore(&(zone->lock), flags);
438 /* Overlay block structure on the memory block being freed */
439 block = (struct block *) __va(addr);
441 if (is_available(pool, block)) {
442 ERROR("Error: Freeing an available block\n");
443 palacios_spinlock_unlock_irqrestore(&(zone->lock), flags);
447 pool->num_free_blocks += (1UL << (order - zone->min_order));
449 /* Coalesce as much as possible with adjacent free buddy blocks */
450 while (order < pool->pool_order) {
451 /* Determine our buddy block's address */
452 struct block * buddy = find_buddy(pool, block, order);
454 /* Make sure buddy is available and has the same size as us */
455 if (!is_available(pool, buddy))
457 if (is_available(pool, buddy) && (buddy->order != order))
460 /* OK, we're good to go... buddy merge! */
461 list_del(&buddy->link);
465 block->order = order;
468 /* Add the (possibly coalesced) block to the appropriate free list */
469 block->order = order;
471 mark_available(pool, block);
472 list_add(&(block->link), &(zone->avail[order]));
474 palacios_spinlock_unlock_irqrestore(&(zone->lock), flags);
483 * Dumps the state of a buddy system memory allocator object to the console.
486 zone_mem_show(struct seq_file * s, void * v) {
487 struct buddy_memzone * zone = s->private;
489 unsigned long num_blocks;
490 struct list_head * entry = NULL;
491 unsigned long flags = 0;
495 seq_printf(s, "Null Zone Pointer!!\n");
499 seq_printf(s, "DUMP OF BUDDY MEMORY ZONE:\n");
500 seq_printf(s, " Zone Max Order=%lu, Min Order=%lu\n",
501 zone->max_order, zone->min_order);
503 palacios_spinlock_lock_irqsave(&(zone->lock), flags);
505 for (i = zone->min_order; i <= zone->max_order; i++) {
507 /* Count the number of memory blocks in the list */
509 list_for_each(entry, &zone->avail[i]) {
513 seq_printf(s, " order %2lu: %lu free blocks\n", i, num_blocks);
517 seq_printf(s, " %lu memory pools\n", zone->num_pools);
518 // list pools in zone
520 struct rb_node * node = rb_first(&(zone->mempools));
521 struct buddy_mempool * pool = NULL;
524 pool = rb_entry(node, struct buddy_mempool, tree_node);
526 seq_printf(s, " Base Addr=%p, order=%lu, size=%lu, free=%lu\n",
527 (void *)pool->base_addr, pool->pool_order, (1UL << pool->pool_order),
528 pool->num_free_blocks << zone->min_order);
531 node = rb_next(node);
535 palacios_spinlock_unlock_irqrestore(&(zone->lock), flags);
541 static int zone_proc_open(struct inode * inode, struct file * filp) {
542 struct proc_dir_entry * proc_entry = PDE(inode);
543 INFO("proc_entry at %p, data at %p\n", proc_entry, proc_entry->data);
544 return single_open(filp, zone_mem_show, proc_entry->data);
548 static struct file_operations zone_proc_ops = {
549 .owner = THIS_MODULE,
550 .open = zone_proc_open,
553 .release = single_release,
557 void buddy_deinit(struct buddy_memzone * zone, int (*free_callback)(void *user_metadata)) {
559 struct rb_node *node;
560 struct buddy_mempool **pools;
561 unsigned long long base_addr;
564 unsigned long num_in_tree;
566 pools = (struct buddy_mempool **) palacios_alloc(sizeof(struct buddy_mempool *)*zone->num_pools);
568 ERROR("Cannot allocate space for doing deinit of memory zone\n");
572 // We will lock only to build up the memory pool list
573 // when we free memory, we need to be able to support free callbacks
574 // that could block. This does leave a race with adds, allocs, and frees, however
575 // In Palacios, we expect a deinit will only really happen on the module unload
576 // so this should not present a problem
577 palacios_spinlock_lock_irqsave(&(zone->lock), flags);
579 // because it does not appear possible to erase while iterating
580 // over the rb tree, we do the following contorted mess
582 for (num_in_tree=0, node=rb_first(&(zone->mempools));
583 node && num_in_tree<zone->num_pools;
584 node=rb_next(node), num_in_tree++) {
586 pools[num_in_tree]=rb_entry(node,struct buddy_mempool, tree_node);
589 palacios_spinlock_unlock_irqrestore(&(zone->lock), flags);
591 if (num_in_tree != zone->num_pools) {
592 WARNING("Odd, the number of pools in the tree is %lu, but the zone reports %lu\n",
593 num_in_tree, zone->num_pools);
596 // now we'll free the memory
597 // note that buddy_remove_mempool also removes them
598 // from the rb tree, and frees them
599 for (i=0;i<num_in_tree;i++) {
600 base_addr = pools[i]->base_addr;
602 if (buddy_remove_pool(zone, base_addr, 1, &meta)) {
603 WARNING("Cannot remove memory pool at %p during zone deinit...\n",(void*)(base_addr));
607 // pool and node are now gone...
609 // invoke the callback to free the actual memory, if any
616 // get rid of /proc entry
618 char proc_file_name[128];
620 memset(proc_file_name, 0, 128);
621 snprintf(proc_file_name, 128, "v3-mem%d", zone->node_id);
623 remove_proc_entry(proc_file_name, palacios_get_procdir());
627 palacios_free(pools);
628 palacios_free(zone->avail);
637 * Initializes a buddy system memory allocator object.
640 * [IN] base_addr: Base address of the memory pool.
641 * [IN] pool_order: Size of the memory pool (2^pool_order bytes).
642 * [IN] min_order: Minimum allocatable block size (2^min_order bytes).
645 * Success: Pointer to an initialized buddy system memory allocator.
648 * NOTE: The min_order argument is provided as an optimization. Since one tag
649 * bit is required for each minimum-sized block, large memory pools that
650 * allow order 0 allocations will use large amounts of memory. Specifying
651 * a min_order of 5 (32 bytes), for example, reduces the number of tag
654 struct buddy_memzone *
656 unsigned long max_order,
657 unsigned long min_order,
661 struct buddy_memzone * zone = NULL;
664 DEBUG("Initializing Memory zone with up to %lu bit blocks on Node %d\n", max_order, node_id);
667 /* Smallest block size must be big enough to hold a block structure */
668 if ((1UL << min_order) < sizeof(struct block))
669 min_order = ilog2( roundup_pow_of_two(sizeof(struct block)) );
671 /* The minimum block order must be smaller than the pool order */
672 if (min_order > max_order)
675 zone = palacios_alloc_extended(sizeof(struct buddy_memzone), GFP_KERNEL, node_id);
677 INFO("Allocated zone at %p\n", zone);
680 ERROR("Could not allocate memzone\n");
684 memset(zone, 0, sizeof(struct buddy_memzone));
686 zone->max_order = max_order;
687 zone->min_order = min_order;
688 zone->node_id = node_id;
690 /* Allocate a list for every order up to the maximum allowed order */
691 zone->avail = palacios_alloc_extended((max_order + 1) * sizeof(struct list_head), GFP_KERNEL, zone->node_id);
693 if (!(zone->avail)) {
694 ERROR("Unable to allocate space for zone list\n");
699 INFO("Allocated free lists at %p\n", zone->avail);
701 /* Initially all lists are empty */
702 for (i = 0; i <= max_order; i++) {
703 INIT_LIST_HEAD(&zone->avail[i]);
707 palacios_spinlock_init(&(zone->lock));
709 zone->mempools.rb_node = NULL;
711 INFO("Allocated zone at %p\n", zone);
714 struct proc_dir_entry * zone_entry = NULL;
715 char proc_file_name[128];
717 memset(proc_file_name, 0, 128);
718 snprintf(proc_file_name, 128, "v3-mem%u", zone->node_id);
720 zone_entry = create_proc_entry(proc_file_name, 0444, palacios_get_procdir());
722 zone_entry->proc_fops = &zone_proc_ops;
723 zone_entry->data = zone;
724 INFO("Successfully created /proc/v3vee/v3-mem%d\n", zone->node_id);
726 ERROR("Cannot create /proc/v3vee/v3-mem%d\n", zone->node_id);