X-Git-Url: http://v3vee.org/palacios/gitweb/gitweb.cgi?a=blobdiff_plain;f=linux_module%2Fbuddy.c;h=f21c95dae3ef547fcf7f6d9d19bee5bd72f6a30e;hb=b58fe2254858e3ecc94be5d86f2a93f2cfe0a0d5;hp=e076fd71c3ddda0066d78c55e6fa2da2d1076d2e;hpb=a61cac5cf9cf6beb6dccec02b94bb5a37429e7fb;p=palacios.git diff --git a/linux_module/buddy.c b/linux_module/buddy.c index e076fd7..f21c95d 100644 --- a/linux_module/buddy.c +++ b/linux_module/buddy.c @@ -139,7 +139,8 @@ insert_mempool(struct buddy_memzone * zone, int buddy_add_pool(struct buddy_memzone * zone, unsigned long base_addr, - unsigned long pool_order) { + unsigned long pool_order, + void *user_metadata) { struct buddy_mempool * mp = NULL; unsigned long flags = 0; int ret = 0; @@ -152,9 +153,9 @@ int buddy_add_pool(struct buddy_memzone * zone, return -1; } - mp = kmalloc_node(sizeof(struct buddy_mempool), GFP_KERNEL, zone->node_id); + mp = palacios_alloc_extended(sizeof(struct buddy_mempool), GFP_KERNEL, zone->node_id); - if (IS_ERR(mp)) { + if (!mp) { ERROR("Could not allocate mempool\n"); return -1; } @@ -164,31 +165,40 @@ int buddy_add_pool(struct buddy_memzone * zone, mp->zone = zone; mp->num_free_blocks = 0; + mp->user_metadata = user_metadata; + /* Allocate a bitmap with 1 bit per minimum-sized block */ mp->num_blocks = (1UL << pool_order) / (1UL << zone->min_order); - mp->tag_bits = kmalloc_node( + mp->tag_bits = palacios_alloc_extended( BITS_TO_LONGS(mp->num_blocks) * sizeof(long), GFP_KERNEL, zone->node_id ); + if (!(mp->tag_bits)) { + ERROR("Could not allocate tag_bits\n"); + palacios_free(mp); + return -1; + } + + /* Initially mark all minimum-sized blocks as allocated */ bitmap_zero(mp->tag_bits, mp->num_blocks); - spin_lock_irqsave(&(zone->lock), flags); + palacios_spinlock_lock_irqsave(&(zone->lock), flags); ret = insert_mempool(zone, mp); - spin_unlock_irqrestore(&(zone->lock), flags); + palacios_spinlock_unlock_irqrestore(&(zone->lock), flags); if (ret == -1) { ERROR("Error: Could not insert mempool into zone\n"); - kfree(mp->tag_bits); - kfree(mp); + palacios_free(mp->tag_bits); + palacios_free(mp); return -1; } buddy_free(zone, base_addr, pool_order); - printk("Added memory pool (addr=%p), order=%lu\n", (void *)base_addr, pool_order); + INFO("Added memory pool (addr=%p), order=%lu\n", (void *)base_addr, pool_order); return 0; } @@ -200,11 +210,14 @@ int buddy_add_pool(struct buddy_memzone * zone, */ static int __buddy_remove_mempool(struct buddy_memzone * zone, unsigned long base_addr, - unsigned char force) { + unsigned char force, + void **user_metadata) +{ struct buddy_mempool * pool = NULL; struct block * block = NULL; + pool = find_mempool(zone, base_addr); if (pool == NULL) { @@ -212,18 +225,34 @@ static int __buddy_remove_mempool(struct buddy_memzone * zone, return -1; } - if (!bitmap_empty(pool->tag_bits, pool->num_blocks)) { - ERROR("Trying to remove an in use memory pool\n"); - return -1; - } - block = (struct block *)__va(pool->base_addr); - list_del(&(block->link)); + INFO("Removing Mempool %p, base=%p\n",pool,block); + + // The largest order block in the memory pool must be free + if (!is_available(pool, block)) { + if (!force) { + ERROR("Trying to remove an in use memory pool\n"); + *user_metadata=0; + return -1; + } else { + WARNING("Forcefully removing in use memory pool\n"); + } + } + + *user_metadata = pool->user_metadata; + + if (is_available(pool,block)) { + list_del(&(block->link)); + } else { + // we may not be on the free list if we are being + // forcibly removed before all allocations are freed + } + rb_erase(&(pool->tree_node), &(zone->mempools)); - kfree(pool->tag_bits); - kfree(pool); + palacios_free(pool->tag_bits); + palacios_free(pool); zone->num_pools--; @@ -231,14 +260,17 @@ static int __buddy_remove_mempool(struct buddy_memzone * zone, } int buddy_remove_pool(struct buddy_memzone * zone, - unsigned long base_addr, - unsigned char force) { + unsigned long base_addr, + unsigned char force, + void **user_metadata) +{ unsigned long flags = 0; int ret = 0; - spin_lock_irqsave(&(zone->lock), flags); - ret = __buddy_remove_mempool(zone, base_addr, force); - spin_unlock_irqrestore(&(zone->lock), flags); + + palacios_spinlock_lock_irqsave(&(zone->lock), flags); + ret = __buddy_remove_mempool(zone, base_addr, force, user_metadata); + palacios_spinlock_unlock_irqrestore(&(zone->lock), flags); return ret; } @@ -246,23 +278,26 @@ int buddy_remove_pool(struct buddy_memzone * zone, + /** * Allocates a block of memory of the requested size (2^order bytes). * * Arguments: * [IN] mp: Buddy system memory allocator object. * [IN] order: Block size to allocate (2^order bytes). - * + * [IN] filter_func: returns nonzero if given paddr is OK to use + * [IN] filter_state: opaque argument to filter_func * Returns: * Success: Pointer to the start of the allocated memory block. * Failure: NULL */ uintptr_t -buddy_alloc(struct buddy_memzone *zone, unsigned long order) +buddy_alloc(struct buddy_memzone *zone, unsigned long order, int (*filter_func)(void *paddr, void *filter_state), void *filter_state) { unsigned long j; struct buddy_mempool * mp = NULL; struct list_head * list = NULL; + struct list_head * cur = NULL; struct block * block = NULL; struct block * buddy_block = NULL; unsigned long flags = 0; @@ -275,33 +310,58 @@ buddy_alloc(struct buddy_memzone *zone, unsigned long order) order = zone->min_order; } - printk("zone=%p, order=%lu\n", zone, order); + INFO("zone=%p, order=%lu\n", zone, order); - spin_lock_irqsave(&(zone->lock), flags); + palacios_spinlock_lock_irqsave(&(zone->lock), flags); for (j = order; j <= zone->max_order; j++) { - printk("Order iter=%lu\n", j); + INFO("Order iter=%lu\n", j); - /* Try to allocate the first block in the order j list */ - list = &zone->avail[j]; + block=NULL; - if (list_empty(list)) + list = &(zone->avail[j]); + + if (list_empty(list)) { continue; + } + + list_for_each(cur, list) { + block = list_entry(cur, struct block, link); + + if (!filter_func) { + // without a filter, we just want the first one + break; + } else { + + void *block_pa = (void*)__pa(block); + + if (filter_func(block_pa,filter_state)) { + // this block will work + break; + } else { + // this block won't work + block=NULL; + continue; + } + + } + } + + if (!block) { + // uh oh, no block, look to next order + continue; + } + + // have appropriate block, will allocate - block = list_entry(list->next, struct block, link); list_del(&(block->link)); mp = block->mp; mark_allocated(mp, block); - printk("pool=%p, block=%p, order=%lu, j=%lu\n", mp, block, order, j); - - /* - spin_unlock_irqrestore(&(zone->lock), flags); - return 0; - */ + INFO("pool=%p, block=%p, order=%lu, j=%lu\n", mp, block, order, j); /* Trim if a higher order block than necessary was allocated */ while (j > order) { @@ -315,12 +375,12 @@ buddy_alloc(struct buddy_memzone *zone, unsigned long order) mp->num_free_blocks -= (1UL << (order - zone->min_order)); - spin_unlock_irqrestore(&(zone->lock), flags); + palacios_spinlock_unlock_irqrestore(&(zone->lock), flags); return __pa(block); } - spin_unlock_irqrestore(&(zone->lock), flags); + palacios_spinlock_unlock_irqrestore(&(zone->lock), flags); return (uintptr_t)NULL; } @@ -329,7 +389,7 @@ buddy_alloc(struct buddy_memzone *zone, unsigned long order) /** * Returns a block of memory to the buddy system memory allocator. */ -void +int buddy_free( //! Buddy system memory allocator object. struct buddy_memzone * zone, @@ -359,14 +419,14 @@ buddy_free( } - spin_lock_irqsave(&(zone->lock), flags); + palacios_spinlock_lock_irqsave(&(zone->lock), flags); pool = find_mempool(zone, addr); if ((pool == NULL) || (order > pool->pool_order)) { - WARNING("Attempted to free an invalid page address (%p)\n", (void *)addr); - spin_unlock_irqrestore(&(zone->lock), flags); - return; + WARNING("Attempted to free an invalid page address (%p) - pool=%p order=%lu\n", (void *)addr,pool,order); + palacios_spinlock_unlock_irqrestore(&(zone->lock), flags); + return -1; } @@ -374,9 +434,9 @@ buddy_free( block = (struct block *) __va(addr); if (is_available(pool, block)) { - printk(KERN_ERR "Error: Freeing an available block\n"); - spin_unlock_irqrestore(&(zone->lock), flags); - return; + ERROR("Error: Freeing an available block\n"); + palacios_spinlock_unlock_irqrestore(&(zone->lock), flags); + return -1; } pool->num_free_blocks += (1UL << (order - zone->min_order)); @@ -406,7 +466,9 @@ buddy_free( mark_available(pool, block); list_add(&(block->link), &(zone->avail[order])); - spin_unlock_irqrestore(&(zone->lock), flags); + palacios_spinlock_unlock_irqrestore(&(zone->lock), flags); + + return 0; } @@ -433,7 +495,7 @@ zone_mem_show(struct seq_file * s, void * v) { seq_printf(s, " Zone Max Order=%lu, Min Order=%lu\n", zone->max_order, zone->min_order); - spin_lock_irqsave(&(zone->lock), flags); + palacios_spinlock_lock_irqsave(&(zone->lock), flags); for (i = zone->min_order; i <= zone->max_order; i++) { @@ -465,7 +527,7 @@ zone_mem_show(struct seq_file * s, void * v) { } } - spin_unlock_irqrestore(&(zone->lock), flags); + palacios_spinlock_unlock_irqrestore(&(zone->lock), flags); return 0; } @@ -473,7 +535,7 @@ zone_mem_show(struct seq_file * s, void * v) { static int zone_proc_open(struct inode * inode, struct file * filp) { struct proc_dir_entry * proc_entry = PDE(inode); - printk("proc_entry at %p, data at %p\n", proc_entry, proc_entry->data); + INFO("proc_entry at %p, data at %p\n", proc_entry, proc_entry->data); return single_open(filp, zone_mem_show, proc_entry->data); } @@ -487,28 +549,79 @@ static struct file_operations zone_proc_ops = { }; - -void buddy_deinit(struct buddy_memzone * zone) { +void buddy_deinit(struct buddy_memzone * zone, int (*free_callback)(void *user_metadata)) { unsigned long flags; + struct rb_node *node; + struct buddy_mempool **pools; + unsigned long long base_addr; + void *meta; + int i; + unsigned long num_in_tree; + + pools = (struct buddy_mempool **) palacios_alloc(sizeof(struct buddy_mempool *)*zone->num_pools); + if (!pools) { + ERROR("Cannot allocate space for doing deinit of memory zone\n"); + return ; + } + + // We will lock only to build up the memory pool list + // when we free memory, we need to be able to support free callbacks + // that could block. This does leave a race with adds, allocs, and frees, however + // In Palacios, we expect a deinit will only really happen on the module unload + // so this should not present a problem + palacios_spinlock_lock_irqsave(&(zone->lock), flags); + + // because it does not appear possible to erase while iterating + // over the rb tree, we do the following contorted mess + // get the pools + for (num_in_tree=0, node=rb_first(&(zone->mempools)); + node && num_in_treenum_pools; + node=rb_next(node), num_in_tree++) { + + pools[num_in_tree]=rb_entry(node,struct buddy_mempool, tree_node); + } - spin_lock_irqsave(&(zone->lock), flags); + palacios_spinlock_unlock_irqrestore(&(zone->lock), flags); - // for each pool, free it + if (num_in_tree != zone->num_pools) { + WARNING("Odd, the number of pools in the tree is %lu, but the zone reports %lu\n", + num_in_tree, zone->num_pools); + } - spin_unlock_irqrestore(&(zone->lock), flags); - + // now we'll free the memory + // note that buddy_remove_mempool also removes them + // from the rb tree, and frees them + for (i=0;ibase_addr; + + if (buddy_remove_pool(zone, base_addr, 1, &meta)) { + WARNING("Cannot remove memory pool at %p during zone deinit...\n",(void*)(base_addr)); + continue; + } + + // pool and node are now gone... + + // invoke the callback to free the actual memory, if any + if (free_callback) { + free_callback(meta); + } + } + + + // get rid of /proc entry { char proc_file_name[128]; memset(proc_file_name, 0, 128); snprintf(proc_file_name, 128, "v3-mem%d", zone->node_id); - remove_proc_entry(proc_file_name, palacios_proc_dir); + remove_proc_entry(proc_file_name, palacios_get_procdir()); } - kfree(zone->avail); - kfree(zone); + palacios_free(pools); + palacios_free(zone->avail); + palacios_free(zone); return; } @@ -554,11 +667,11 @@ buddy_init( if (min_order > max_order) return NULL; - zone = kmalloc_node(sizeof(struct buddy_memzone), GFP_KERNEL, node_id); + zone = palacios_alloc_extended(sizeof(struct buddy_memzone), GFP_KERNEL, node_id); - printk("Allocated zone at %p\n", zone); + INFO("Allocated zone at %p\n", zone); - if (IS_ERR(zone)) { + if (!zone) { ERROR("Could not allocate memzone\n"); return NULL; } @@ -570,9 +683,15 @@ buddy_init( zone->node_id = node_id; /* Allocate a list for every order up to the maximum allowed order */ - zone->avail = kmalloc_node((max_order + 1) * sizeof(struct list_head), GFP_KERNEL, zone->node_id); + zone->avail = palacios_alloc_extended((max_order + 1) * sizeof(struct list_head), GFP_KERNEL, zone->node_id); - printk("Allocated free lists at %p\n", zone->avail); + if (!(zone->avail)) { + ERROR("Unable to allocate space for zone list\n"); + palacios_free(zone); + return NULL; + } + + INFO("Allocated free lists at %p\n", zone->avail); /* Initially all lists are empty */ for (i = 0; i <= max_order; i++) { @@ -580,25 +699,26 @@ buddy_init( } - spin_lock_init(&(zone->lock)); + palacios_spinlock_init(&(zone->lock)); zone->mempools.rb_node = NULL; - printk("Allocated zone at %p\n", zone); + INFO("Allocated zone at %p\n", zone); { struct proc_dir_entry * zone_entry = NULL; char proc_file_name[128]; memset(proc_file_name, 0, 128); - snprintf(proc_file_name, 128, "v3-mem%d", zone->node_id); + snprintf(proc_file_name, 128, "v3-mem%u", zone->node_id); - zone_entry = create_proc_entry(proc_file_name, 0444, palacios_proc_dir); + zone_entry = create_proc_entry(proc_file_name, 0444, palacios_get_procdir()); if (zone_entry) { zone_entry->proc_fops = &zone_proc_ops; zone_entry->data = zone; + INFO("Successfully created /proc/v3vee/v3-mem%d\n", zone->node_id); } else { - printk(KERN_ERR "Error creating memory zone proc file\n"); + ERROR("Cannot create /proc/v3vee/v3-mem%d\n", zone->node_id); } }