From: Peter Dinda Date: Tue, 20 Aug 2013 23:14:30 +0000 (-0500) Subject: Memory management enhancements: dynamic removal, cleanup at module remove time X-Git-Url: http://v3vee.org/palacios/gitweb/gitweb.cgi?p=palacios.git;a=commitdiff_plain;h=28dcbfda8061d2785301784d27694d1e02f54fff Memory management enhancements: dynamic removal, cleanup at module remove time --- diff --git a/linux_module/buddy.c b/linux_module/buddy.c index f7c91cb..ea5b705 100644 --- a/linux_module/buddy.c +++ b/linux_module/buddy.c @@ -210,6 +210,7 @@ static int __buddy_remove_mempool(struct buddy_memzone * zone, struct buddy_mempool * pool = NULL; struct block * block = NULL; + pool = find_mempool(zone, base_addr); if (pool == NULL) { @@ -217,15 +218,23 @@ static int __buddy_remove_mempool(struct buddy_memzone * zone, return -1; } - if (!bitmap_empty(pool->tag_bits, pool->num_blocks)) { - ERROR("Trying to remove an in use memory pool\n"); - return -1; + block = (struct block *)__va(pool->base_addr); + + INFO("Removing Mempool %p, base=%p\n",pool,block); + + // The largest order block in the memory pool must be free + if (!is_available(pool, block)) { + if (!force) { + ERROR("Trying to remove an in use memory pool\n"); + *user_metadata=0; + return -1; + } else { + WARNING("Forcefully removing in use memory pool\n"); + } } *user_metadata = pool->user_metadata; - block = (struct block *)__va(pool->base_addr); - list_del(&(block->link)); rb_erase(&(pool->tree_node), &(zone->mempools)); @@ -255,6 +264,7 @@ int buddy_remove_pool(struct buddy_memzone * zone, + /** * Allocates a block of memory of the requested size (2^order bytes). * @@ -528,16 +538,66 @@ static struct file_operations zone_proc_ops = { }; -void buddy_deinit(struct buddy_memzone * zone) { +void buddy_deinit(struct buddy_memzone * zone, int (*free_callback)(void *user_metadata)) { unsigned long flags; - + struct rb_node *node; + struct buddy_mempool **pools; + unsigned long long base_addr; + void *meta; + int i; + unsigned long num_in_tree; + + pools = (struct buddy_mempool **) palacios_alloc(sizeof(struct buddy_mempool *)*zone->num_pools); + if (!pools) { + ERROR("Cannot allocate space for doing deinit of memory zone\n"); + return ; + } + + // We will lock only to build up the memory pool list + // when we free memory, we need to be able to support free callbacks + // that could block. This does leave a race with adds, allocs, and frees, however + // In Palacios, we expect a deinit will only really happen on the module unload + // so this should not present a problem palacios_spinlock_lock_irqsave(&(zone->lock), flags); - // for each pool, free it -#warning We really need to free the memory pools here + // because it does not appear possible to erase while iterating + // over the rb tree, we do the following contorted mess + // get the pools + for (num_in_tree=0, node=rb_first(&(zone->mempools)); + node && num_in_treenum_pools; + node=rb_next(node), num_in_tree++) { + + pools[num_in_tree]=rb_entry(node,struct buddy_mempool, tree_node); + } palacios_spinlock_unlock_irqrestore(&(zone->lock), flags); - + + if (num_in_tree != zone->num_pools) { + WARNING("Odd, the number of pools in the tree is %lu, but the zone reports %lu\n", + num_in_tree, zone->num_pools); + } + + // now we'll free the memory + // note that buddy_remove_mempool also removes them + // from the rb tree, and frees them + for (i=0;ibase_addr; + + if (buddy_remove_pool(zone, base_addr, 1, &meta)) { + WARNING("Cannot remove memory pool at %p during zone deinit...\n",(void*)(base_addr)); + continue; + } + + // pool and node are now gone... + + // invoke the callback to free the actual memory, if any + if (free_callback) { + free_callback(meta); + } + } + + + // get rid of /proc entry { char proc_file_name[128]; @@ -548,6 +608,7 @@ void buddy_deinit(struct buddy_memzone * zone) { } + palacios_free(pools); palacios_free(zone->avail); palacios_free(zone); diff --git a/linux_module/buddy.h b/linux_module/buddy.h index 4781cfc..1017a3a 100644 --- a/linux_module/buddy.h +++ b/linux_module/buddy.h @@ -73,7 +73,8 @@ buddy_init(unsigned long pool_order, unsigned int node_id); extern void -buddy_deinit(struct buddy_memzone * zone); +buddy_deinit(struct buddy_memzone * zone, + int (*free_callback)(void *user_metadata)); /* Add pool at given physical address */ extern int diff --git a/linux_module/main.c b/linux_module/main.c index caaca4f..425e08f 100644 --- a/linux_module/main.c +++ b/linux_module/main.c @@ -197,8 +197,31 @@ out_err: break; } + case V3_REMOVE_MEMORY: { + struct v3_mem_region mem; + + memset(&mem, 0, sizeof(struct v3_mem_region)); + + if (copy_from_user(&mem, argp, sizeof(struct v3_mem_region))) { + ERROR("copy from user error getting mem_region...\n"); + return -EFAULT; + } + + DEBUG("Removing memory at address %p\n", (void*)(mem.base_addr)); + + if (remove_palacios_memory(&mem) == -1) { + ERROR("Error removing memory from Palacios\n"); + return -EFAULT; + } + + break; + } + + + case V3_RESET_MEMORY: { - if (palacios_init_mm() == -1) { + DEBUG("Resetting memory\n"); + if (palacios_deinit_mm() == -1) { ERROR("Error resetting Palacios memory\n"); return -EFAULT; } diff --git a/linux_module/mm.c b/linux_module/mm.c index 5e98f79..c5ec916 100644 --- a/linux_module/mm.c +++ b/linux_module/mm.c @@ -153,7 +153,7 @@ int add_palacios_memory(struct v3_mem_region *r) { if (buddy_add_pool(memzones[node_id], r->base_addr, pool_order, keep)) { ERROR("ALERT ALERT ALERT Unable to add pool to buddy allocator...\n"); if (r->type==REQUESTED || r->type==REQUESTED32) { - free_pages((uintptr_t)__va(r->base_addr), get_order(r->num_pages)); + free_pages((uintptr_t)__va(r->base_addr), get_order(r->num_pages*PAGE_SIZE)); } palacios_free(keep); return -1; @@ -164,27 +164,47 @@ int add_palacios_memory(struct v3_mem_region *r) { -int palacios_remove_memory(uintptr_t base_addr) { - int node_id = numa_addr_to_node(base_addr); +int remove_palacios_memory(struct v3_mem_region *req) { + int node_id = numa_addr_to_node(req->base_addr); struct v3_mem_region *r; - if (buddy_remove_pool(memzones[node_id], base_addr, 0, (void**)(&r))) { //unforced remove - ERROR("Cannot remove memory at base address 0x%p because it is in use\n", (void*)base_addr); + if (buddy_remove_pool(memzones[node_id], req->base_addr, 0, (void**)(&r))) { //unforced remove + ERROR("Cannot remove memory at base address 0x%p\n", (void*)(req->base_addr)); return -1; } - if (r->type==REQUESTED || r->type==REQUESTED32) { - free_pages((uintptr_t)__va(r->base_addr), get_order(r->num_pages)); - } else { - // user space resposible for onlining + if (r) { + if (r->type==REQUESTED || r->type==REQUESTED32) { + free_pages((uintptr_t)__va(r->base_addr), get_order(r->num_pages*PAGE_SIZE)); + } else { + // user space responsible for onlining + } + palacios_free(r); } - - palacios_free(r); return 0; } +static int handle_free(void *meta) +{ + struct v3_mem_region *r = (struct v3_mem_region *)meta; + + if (r) { + if (r->type==REQUESTED || r->type==REQUESTED32) { + //INFO("Freeing %llu pages at %p\n",r->num_pages,(void*)(r->base_addr)); + free_pages((uintptr_t)__va(r->base_addr), get_order(r->num_pages*PAGE_SIZE)); + } else { + // user space responsible for onlining + } + palacios_free(r); + } + + return 0; +} + + + int palacios_deinit_mm( void ) { @@ -194,7 +214,8 @@ int palacios_deinit_mm( void ) { for (i = 0; i < numa_num_nodes(); i++) { if (memzones[i]) { - buddy_deinit(memzones[i]); + INFO("Deiniting memory zone %d\n",i); + buddy_deinit(memzones[i],handle_free); } // note that the memory is not onlined here - offlining and onlining @@ -202,6 +223,7 @@ int palacios_deinit_mm( void ) { if (seed_addrs[i]) { // free the seed regions + INFO("Freeing seed addrs %d\n",i); free_pages((uintptr_t)__va(seed_addrs[i]), MAX_ORDER - 1); } } @@ -254,7 +276,7 @@ int palacios_init_mm( void ) { pgs = alloc_pages_node(node_id, GFP_DMA32, MAX_ORDER - 1); if (!pgs) { - INFO("Could not allocate initial memory block for node %d beloew 4GB\n", node_id); + INFO("Could not allocate initial memory block for node %d below 4GB\n", node_id); pgs = alloc_pages_node(node_id, GFP_KERNEL, MAX_ORDER - 1); diff --git a/linux_module/mm.h b/linux_module/mm.h index 9452ddd..b758b72 100644 --- a/linux_module/mm.h +++ b/linux_module/mm.h @@ -16,7 +16,7 @@ u64 get_palacios_num_pages(void); int add_palacios_memory(struct v3_mem_region *reg); -int remove_palacios_memory(uintptr_t base_addr); +int remove_palacios_memory(struct v3_mem_region *reg); int palacios_init_mm( void ); int palacios_deinit_mm( void ); diff --git a/linux_module/palacios.h b/linux_module/palacios.h index 977d617..7b3a32b 100644 --- a/linux_module/palacios.h +++ b/linux_module/palacios.h @@ -11,8 +11,9 @@ #define V3_CREATE_GUEST 12 #define V3_FREE_GUEST 13 -#define V3_ADD_MEMORY 50 -#define V3_RESET_MEMORY 51 +#define V3_ADD_MEMORY 50 +#define V3_RESET_MEMORY 51 +#define V3_REMOVE_MEMORY 52 #define V3_ADD_PCI_HW_DEV 55 #define V3_ADD_PCI_USER_DEV 56