X-Git-Url: http://v3vee.org/palacios/gitweb/gitweb.cgi?a=blobdiff_plain;f=linux_module%2Fmm.c;h=e4c374a9a4030a49d190d37b8ce3d8134944f44b;hb=8cd246c3830733c2850cef049a7ad153daf0dd13;hp=d71cb968e0467216e5db02141b162868320c2950;hpb=07aa8f3c18a33af0961e7546980a63ab5f6fba4f;p=palacios.git diff --git a/linux_module/mm.c b/linux_module/mm.c index d71cb96..e4c374a 100644 --- a/linux_module/mm.c +++ b/linux_module/mm.c @@ -10,246 +10,345 @@ //static struct list_head pools; #include "palacios.h" +#include "mm.h" +#include "buddy.h" +#include "numa.h" +#include "palacios/vmm.h" -#define OFFLINE_POOL_THRESHOLD 12 -struct mempool { - uintptr_t base_addr; - u64 num_pages; +static struct buddy_memzone ** memzones = NULL; +static uintptr_t * seed_addrs = NULL; - u8 * bitmap; -}; +// alignment is in bytes +uintptr_t alloc_palacios_pgs(u64 num_pages, u32 alignment, int node_id, int (*filter_func)(void *paddr, void *filter_state), void *filter_state) { + uintptr_t addr = 0; + int any = node_id==-1; // can allocate on any -static struct mempool pool; + if (node_id == -1) { + int cpu_id = get_cpu(); + put_cpu(); + + node_id = numa_cpu_to_node(cpu_id); // try first preferentially for the calling pcore -static inline int get_page_bit(int index) { - int major = index / 8; - int minor = index % 8; + } else if (numa_num_nodes() == 1) { + node_id = 0; + } else if (node_id >= numa_num_nodes()) { + ERROR("Requesting memory from an invalid NUMA node. (Node: %d) (%d nodes on system)\n", + node_id, numa_num_nodes()); + return 0; + } - return (pool.bitmap[major] & (0x1 << minor)); -} + addr = buddy_alloc(memzones[node_id], get_order(num_pages * PAGE_SIZE) + PAGE_SHIFT, filter_func, filter_state); -static inline void set_page_bit(int index) { - int major = index / 8; - int minor = index % 8; + if (!addr && any) { + int i; + // do a scan to see if we can satisfy request on any node + for (i=0; i< numa_num_nodes(); i++) { + if (i!=node_id) { + addr = buddy_alloc(memzones[i], get_order(num_pages * PAGE_SIZE) + PAGE_SHIFT, filter_func, filter_state); + if (addr) { + break; + } + } + } + } + - pool.bitmap[major] |= (0x1 << minor); + //DEBUG("Returning from alloc addr=%p, vaddr=%p\n", (void *)addr, __va(addr)); + return addr; } -static inline void clear_page_bit(int index) { - int major = index / 8; - int minor = index % 8; - pool.bitmap[major] &= ~(0x1 << minor); + +void free_palacios_pgs(uintptr_t pg_addr, u64 num_pages) { + int node_id = numa_addr_to_node(pg_addr); + + //DEBUG("Freeing Memory page %p\n", (void *)pg_addr); + if (buddy_free(memzones[node_id], pg_addr, get_order(num_pages * PAGE_SIZE) + PAGE_SHIFT)) { + // it is possible that the allocation was actually on a different zone, + // so, just to be sure, we'll try to dellocate on each + for (node_id=0;node_idtype,r->node,r->base_addr,r->num_pages); - if (alignment > 0) { - step = alignment / PAGE_SIZE; + // fixup request regardless of its type + if (r->num_pages*4096 < get_palacios_mem_block_size()) { + WARNING("Allocating a memory pool smaller than the Palacios block size - may not be useful\n"); } - // Start the search at the correct alignment - if (pool.base_addr % alignment) { - start = ((alignment - (pool.base_addr % alignment)) >> 12); + if (pow2(get_order(r->num_pages*PAGE_SIZE)) != r->num_pages) { + WARNING("Allocating a memory pool that is not a power of two (is %llu) - it will be rounded down!\n", r->num_pages); + r->num_pages=pow2(get_order(r->num_pages*PAGE_SIZE)); + WARNING("Rounded request is for %llu pages\n", r->num_pages); } - DEBUG("\t Start idx %d (base_addr=%p)\n", start, (void *)(u64)pool.base_addr); - - for (i = start; i < (pool.num_pages - num_pages); i += step) { - if (get_page_bit(i) == 0) { - int j = 0; - int collision = 0; - for (j = i; (j - i) < num_pages; j++) { - if (get_page_bit(j) == 1) { - collision = 1; - break; - } - } + if (!(keep=palacios_alloc(sizeof(struct v3_mem_region)))) { + ERROR("Error allocating space for tracking region\n"); + return -1; + } - if (collision == 1) { - break; - } - for (j = i; (j - i) < num_pages; j++) { - set_page_bit(j); - } + if (r->type==REQUESTED || r->type==REQUESTED32) { + struct page *pgs; - return pool.base_addr + (i * PAGE_SIZE); - } + INFO("Attempting to allocate %llu pages of %s memory\n", r->num_pages, + r->type==REQUESTED ? "64 bit (unrestricted)" : + r->type==REQUESTED32 ? "32 bit (restricted)" : "unknown (assuming 64 bit unrestricted)"); + + pgs = alloc_pages_node(r->node, + r->type==REQUESTED ? GFP_KERNEL : + r->type==REQUESTED32 ? GFP_DMA32 : GFP_KERNEL, + get_order(r->num_pages*PAGE_SIZE)); + if (!pgs) { + ERROR("Unable to satisfy allocation request\n"); + palacios_free(keep); + return -1; + } + r->base_addr = page_to_pfn(pgs) << PAGE_SHIFT; } + - ERROR("ALERT ALERT Allocation of Large Number of Contiguous Pages FAILED\n"); + *keep = *r; - return 0; -} + node_id = numa_addr_to_node(r->base_addr); + if (node_id == -1) { + ERROR("Error locating node for addr %p\n", (void *)(r->base_addr)); + return -1; + } -// alignment is in bytes -uintptr_t alloc_palacios_pgs(u64 num_pages, u32 alignment) { - uintptr_t addr = 0; + if ((node_id != r->node) && (r->node!=-1)) { + INFO("Memory add request is for node %d, but memory is in node %d\n",r->node,node_id); + } - if (num_pages < OFFLINE_POOL_THRESHOLD) { - struct page * pgs = NULL; - void *temp; - int order = get_order(num_pages * PAGE_SIZE); - - pgs = alloc_pages(GFP_DMA32, order); - - if (!pgs) { - ERROR("Could not allocate small number of contigious pages\n"); - return 0; + pool_order = get_order(r->num_pages * PAGE_SIZE) + PAGE_SHIFT; + + if (buddy_add_pool(memzones[node_id], r->base_addr, pool_order, keep)) { + ERROR("ALERT ALERT ALERT Unable to add pool to buddy allocator...\n"); + if (r->type==REQUESTED || r->type==REQUESTED32) { + free_pages((uintptr_t)__va(r->base_addr), get_order(r->num_pages*PAGE_SIZE)); } - - /* DEBUG("%llu pages (order=%d) aquired from alloc_pages\n", - num_pages, order); */ + palacios_free(keep); + return -1; + } - addr = page_to_pfn(pgs) << PAGE_SHIFT; + return 0; +} - temp = (void*)addr; - if ( (temp>=(void*)(pool.base_addr) && - (temp<((void*)(pool.base_addr)+pool.num_pages*PAGE_SIZE))) - || ((temp+num_pages*PAGE_SIZE)>=(void*)(pool.base_addr) && - ((temp+num_pages*PAGE_SIZE)<((void*)(pool.base_addr)+pool.num_pages*PAGE_SIZE))) ) { - ERROR("ALERT ALERT Allocation of small number of contiguous pages returned block that " - "OVERLAPS with the offline page pool addr=%p, addr+numpages=%p, " - "pool.base_addr=%p, pool.base_addr+pool.numpages=%p\n", - temp, temp+num_pages*PAGE_SIZE, (void*)(pool.base_addr), - (void*)(pool.base_addr)+pool.num_pages*PAGE_SIZE); - } +int remove_palacios_memory(struct v3_mem_region *req) { + int node_id = numa_addr_to_node(req->base_addr); + struct v3_mem_region *r; - - } else { - //DEBUG("Allocating %llu pages from bitmap allocator\n", num_pages); - //addr = pool.base_addr; - addr = alloc_contig_pgs(num_pages, alignment); - if (!addr) { - ERROR("Could not allocate large number of contiguous pages\n"); - } + if (buddy_remove_pool(memzones[node_id], req->base_addr, 0, (void**)(&r))) { //unforced remove + ERROR("Cannot remove memory at base address 0x%p\n", (void*)(req->base_addr)); + return -1; } + if (r) { + if (r->type==REQUESTED || r->type==REQUESTED32) { + free_pages((uintptr_t)__va(r->base_addr), get_order(r->num_pages*PAGE_SIZE)); + } else { + // user space responsible for onlining + } + palacios_free(r); + } - //DEBUG("Returning from alloc addr=%p, vaddr=%p\n", (void *)addr, __va(addr)); - return addr; + return 0; } +static int handle_free(void *meta) +{ + struct v3_mem_region *r = (struct v3_mem_region *)meta; -void free_palacios_pgs(uintptr_t pg_addr, int num_pages) { - //DEBUG("Freeing Memory page %p\n", (void *)pg_addr); + if (r) { + if (r->type==REQUESTED || r->type==REQUESTED32) { + //INFO("Freeing %llu pages at %p\n",r->num_pages,(void*)(r->base_addr)); + free_pages((uintptr_t)__va(r->base_addr), get_order(r->num_pages*PAGE_SIZE)); + } else { + // user space responsible for onlining + } + palacios_free(r); + } + + return 0; +} - if ((pg_addr >= pool.base_addr) && - (pg_addr < pool.base_addr + (PAGE_SIZE * pool.num_pages))) { - int pg_idx = (pg_addr - pool.base_addr) / PAGE_SIZE; - int i = 0; + - if (num_pages pool.num_pages) { - ERROR("Freeing memory bounds exceeded for offline pool\n"); - return; - } + int i = 0; - for (i = 0; i < num_pages; i++) { - if (get_page_bit(pg_idx + i) == 0) { - ERROR("Trying to free unallocated page from offline pool\n"); + if (memzones) { + for (i = 0; i < numa_num_nodes(); i++) { + + if (memzones[i]) { + INFO("Deiniting memory zone %d\n",i); + buddy_deinit(memzones[i],handle_free); + } + + // note that the memory is not onlined here - offlining and onlining + // is the resposibility of the caller + + if (seed_addrs[i]) { + // free the seed regions + INFO("Freeing seed addrs %d\n",i); + free_pages((uintptr_t)__va(seed_addrs[i]), MAX_ORDER - 1); } - clear_page_bit(pg_idx + i); } - } else { - if (num_pages>=OFFLINE_POOL_THRESHOLD) { - ERROR("ALERT ALERT Large page deallocation from linux pool\n"); - } - __free_pages(pfn_to_page(pg_addr >> PAGE_SHIFT), get_order(num_pages * PAGE_SIZE)); + palacios_free(memzones); + palacios_free(seed_addrs); } + + return 0; } +int palacios_init_mm( void ) { + int num_nodes = numa_num_nodes(); + int node_id = 0; -int add_palacios_memory(uintptr_t base_addr, u64 num_pages) { - /* JRL: OK.... so this is horrible, terrible and if anyone else did it I would yell at them. - * But... the fact that you can do this in C is so ridiculous that I can't help myself. - * Note that we're repurposing "true" to be 1 here - */ + INFO("memory manager init: MAX_ORDER=%d (%llu bytes)\n",MAX_ORDER, PAGE_SIZE*pow2(MAX_ORDER)); - int bitmap_size = (num_pages / 8) + ((num_pages % 8) > 0); + memzones = palacios_alloc_extended(sizeof(struct buddy_memzone *) * num_nodes, GFP_KERNEL,-1); - if (pool.num_pages != 0) { - ERROR("ERROR: Memory has already been added\n"); + if (!memzones) { + ERROR("Cannot allocate space for memory zones\n"); + palacios_deinit_mm(); return -1; } - DEBUG("Managing %dMB of memory starting at %llu (%lluMB)\n", - (unsigned int)(num_pages * PAGE_SIZE) / (1024 * 1024), - (unsigned long long)base_addr, - (unsigned long long)(base_addr / (1024 * 1024))); + memset(memzones, 0, sizeof(struct buddy_memzone *) * num_nodes); + seed_addrs = palacios_alloc_extended(sizeof(uintptr_t) * num_nodes, GFP_KERNEL,-1); - pool.bitmap = palacios_alloc(bitmap_size); - - if (IS_ERR(pool.bitmap)) { - ERROR("Error allocating Palacios MM bitmap\n"); + if (!seed_addrs) { + ERROR("Cannot allocate space for seed addrs\n"); + palacios_deinit_mm(); return -1; } - - memset(pool.bitmap, 0, bitmap_size); - - pool.base_addr = base_addr; - pool.num_pages = num_pages; - - return 0; -} - + memset(seed_addrs, 0, sizeof(uintptr_t) * num_nodes); + + for (node_id = 0; node_id < num_nodes; node_id++) { + struct buddy_memzone * zone = NULL; + + // Seed the allocator with a small set of pages to allow initialization to complete. + // For now we will just grab some random pages, but in the future we will need to grab NUMA specific regions + // See: alloc_pages_node() + + { + struct page * pgs; + int actual_node; + + // attempt to first allocate below 4 GB for compatibility with + // 32 bit shadow paging + pgs = alloc_pages_node(node_id, GFP_DMA32, MAX_ORDER - 1); + + + if (!pgs) { + INFO("Could not allocate initial memory block for node %d below 4GB\n", node_id); + + pgs = alloc_pages_node(node_id, GFP_KERNEL, MAX_ORDER - 1); + + if (!pgs) { + INFO("Could not allocate initial memory block for node %d below 4GB\n", node_id); + if (!pgs) { + ERROR("Could not allocate initial memory block for node %d without restrictions\n", node_id); + BUG_ON(!pgs); + palacios_deinit_mm(); + return -1; + } + } else { + actual_node=numa_addr_to_node((uintptr_t)(page_to_pfn(pgs) << PAGE_SHIFT)); + if (actual_node != node_id) { + WARNING("Initial 64 bit allocation attempt for node %d resulted in allocation on node %d\n",node_id,actual_node); + } + } + + } else { + actual_node=numa_addr_to_node((uintptr_t)(page_to_pfn(pgs) << PAGE_SHIFT)); + if (actual_node != node_id) { + WARNING("Initial 32bit-limited allocation attempt for node %d resulted in allocation on node %d\n",node_id,actual_node); + } + } -int palacios_init_mm( void ) { + seed_addrs[node_id] = page_to_pfn(pgs) << PAGE_SHIFT; + } - pool.base_addr = 0; - pool.num_pages = 0; - pool.bitmap = NULL; + // Initialization is done using the compile-time memory block size since + // at this point, we do not yet know what the run-time size is + zone = buddy_init(get_order(V3_CONFIG_MEM_BLOCK_SIZE) + PAGE_SHIFT, PAGE_SHIFT, node_id); - return 0; -} + if (zone == NULL) { + ERROR("Could not initialization memory management for node %d\n", node_id); + palacios_deinit_mm(); + return -1; + } -int palacios_deinit_mm( void ) { + printk("Zone initialized, Adding seed region (order=%d)\n", + (MAX_ORDER - 1) + PAGE_SHIFT); - palacios_free(pool.bitmap); + if (buddy_add_pool(zone, seed_addrs[node_id], (MAX_ORDER - 1) + PAGE_SHIFT,0)) { + ERROR("Could not add pool to buddy allocator\n"); + palacios_deinit_mm(); + return -1; + } - pool.bitmap=0; - pool.base_addr=0; - pool.num_pages=0; + memzones[node_id] = zone; + } - // note that the memory is not onlined here - offlining and onlining - // is the resposibility of the caller - return 0; + } +