#include "mm.h"
#include "buddy.h"
#include "numa.h"
+#include "palacios/vmm.h"
static struct buddy_memzone ** memzones = NULL;
// alignment is in bytes
-uintptr_t alloc_palacios_pgs(u64 num_pages, u32 alignment, int node_id) {
+uintptr_t alloc_palacios_pgs(u64 num_pages, u32 alignment, int node_id, int (*filter_func)(void *paddr, void *filter_state), void *filter_state) {
uintptr_t addr = 0;
+ int any = node_id==-1; // can allocate on any
if (node_id == -1) {
int cpu_id = get_cpu();
put_cpu();
- node_id = numa_cpu_to_node(cpu_id);
+ node_id = numa_cpu_to_node(cpu_id); // try first preferentially for the calling pcore
+
} else if (numa_num_nodes() == 1) {
node_id = 0;
} else if (node_id >= numa_num_nodes()) {
return 0;
}
- addr = buddy_alloc(memzones[node_id], get_order(num_pages * PAGE_SIZE) + PAGE_SHIFT);
+ addr = buddy_alloc(memzones[node_id], get_order(num_pages * PAGE_SIZE) + PAGE_SHIFT, filter_func, filter_state);
+
+ if (!addr && any) {
+ int i;
+ // do a scan to see if we can satisfy request on any node
+ for (i=0; i< numa_num_nodes(); i++) {
+ if (i!=node_id) {
+ addr = buddy_alloc(memzones[i], get_order(num_pages * PAGE_SIZE) + PAGE_SHIFT, filter_func, filter_state);
+ if (addr) {
+ break;
+ }
+ }
+ }
+ }
+
//DEBUG("Returning from alloc addr=%p, vaddr=%p\n", (void *)addr, __va(addr));
return addr;
int node_id = numa_addr_to_node(pg_addr);
//DEBUG("Freeing Memory page %p\n", (void *)pg_addr);
- buddy_free(memzones[node_id], pg_addr, get_order(num_pages * PAGE_SIZE) + PAGE_SHIFT);
+ if (buddy_free(memzones[node_id], pg_addr, get_order(num_pages * PAGE_SIZE) + PAGE_SHIFT)) {
+ // it is possible that the allocation was actually on a different zone,
+ // so, just to be sure, we'll try to dellocate on each
+ for (node_id=0;node_id<numa_num_nodes();node_id++) {
+ if (!buddy_free(memzones[node_id], pg_addr, get_order(num_pages * PAGE_SIZE) + PAGE_SHIFT)) {
+ // successfully freed on different zone, which is also OK
+ break;
+ }
+ }
+ if (node_id==numa_num_nodes()) {
+ ERROR("Unable to free pages -addr=%p, numpages=%llu on any node\n",(void*)pg_addr,num_pages);
+ }
+ }
return;
}
return x;
}
+static unsigned long long get_palacios_mem_block_size(void)
+{
+ char *s = v3_lookup_option("mem_block_size");
+
+ if (!s) {
+ return V3_CONFIG_MEM_BLOCK_SIZE;
+ } else {
+ unsigned long long temp;
+
+ if (strict_strtoull(s,0,&temp)) {
+ return V3_CONFIG_MEM_BLOCK_SIZE; // odd...
+ } else {
+ return temp;
+ }
+ }
+}
+
int add_palacios_memory(struct v3_mem_region *r) {
int pool_order = 0;
int node_id = 0;
+
struct v3_mem_region *keep;
+ INFO("Palacios Memory Add Request: type=%d, node=%d, base_addr=0x%llx, num_pages=%llu\n",r->type,r->node,r->base_addr,r->num_pages);
+
// fixup request regardless of its type
- if (r->num_pages*4096 < V3_CONFIG_MEM_BLOCK_SIZE) {
+ if (r->num_pages*4096 < get_palacios_mem_block_size()) {
WARNING("Allocating a memory pool smaller than the Palacios block size - may not be useful\n");
}
- if (pow2(get_order(r->num_pages*PAGE_SIZE)) != r->num_pages*PAGE_SIZE) {
- WARNING("Allocating a memory pool that is not a power of two (is %llu) - it will be rounded down!\n", r->num_pages*PAGE_SIZE);
+ if (pow2(get_order(r->num_pages*PAGE_SIZE)) != r->num_pages) {
+ WARNING("Allocating a memory pool that is not a power of two (is %llu) - it will be rounded down!\n", r->num_pages);
r->num_pages=pow2(get_order(r->num_pages*PAGE_SIZE));
- WARNING("Rounded power Allocating a memory pool that is not a power of two (rounded to %llu)\n", r->num_pages*PAGE_SIZE);
+ WARNING("Rounded request is for %llu pages\n", r->num_pages);
}
if (r->type==REQUESTED || r->type==REQUESTED32) {
- struct page * pgs = alloc_pages_node(r->node,
- r->type==REQUESTED ? GFP_KERNEL :
- r->type==REQUESTED32 ? GFP_DMA32 : GFP_KERNEL,
- get_order(r->num_pages));
+ struct page *pgs;
+
+ INFO("Attempting to allocate %llu pages of %s memory\n", r->num_pages,
+ r->type==REQUESTED ? "64 bit (unrestricted)" :
+ r->type==REQUESTED32 ? "32 bit (restricted)" : "unknown (assuming 64 bit unrestricted)");
+
+ pgs = alloc_pages_node(r->node,
+ r->type==REQUESTED ? GFP_KERNEL :
+ r->type==REQUESTED32 ? GFP_DMA32 : GFP_KERNEL,
+ get_order(r->num_pages*PAGE_SIZE));
if (!pgs) {
ERROR("Unable to satisfy allocation request\n");
palacios_free(keep);
return -1;
- }
+ }
r->base_addr = page_to_pfn(pgs) << PAGE_SHIFT;
}
return -1;
}
+ if ((node_id != r->node) && (r->node!=-1)) {
+ INFO("Memory add request is for node %d, but memory is in node %d\n",r->node,node_id);
+ }
+
pool_order = get_order(r->num_pages * PAGE_SIZE) + PAGE_SHIFT;
if (buddy_add_pool(memzones[node_id], r->base_addr, pool_order, keep)) {
ERROR("ALERT ALERT ALERT Unable to add pool to buddy allocator...\n");
if (r->type==REQUESTED || r->type==REQUESTED32) {
- free_pages((uintptr_t)__va(r->base_addr), get_order(r->num_pages));
+ free_pages((uintptr_t)__va(r->base_addr), get_order(r->num_pages*PAGE_SIZE));
}
palacios_free(keep);
return -1;
-int palacios_remove_memory(uintptr_t base_addr) {
- int node_id = numa_addr_to_node(base_addr);
+int remove_palacios_memory(struct v3_mem_region *req) {
+ int node_id = numa_addr_to_node(req->base_addr);
struct v3_mem_region *r;
- if (buddy_remove_pool(memzones[node_id], base_addr, 0, (void**)(&r))) { //unforced remove
- ERROR("Cannot remove memory at base address 0x%p because it is in use\n", (void*)base_addr);
+ if (buddy_remove_pool(memzones[node_id], req->base_addr, 0, (void**)(&r))) { //unforced remove
+ ERROR("Cannot remove memory at base address 0x%p\n", (void*)(req->base_addr));
return -1;
}
- if (r->type==REQUESTED || r->type==REQUESTED32) {
- free_pages((uintptr_t)__va(r->base_addr), get_order(r->num_pages));
- } else {
- // user space resposible for onlining
+ if (r) {
+ if (r->type==REQUESTED || r->type==REQUESTED32) {
+ free_pages((uintptr_t)__va(r->base_addr), get_order(r->num_pages*PAGE_SIZE));
+ } else {
+ // user space responsible for onlining
+ }
+ palacios_free(r);
}
-
- palacios_free(r);
return 0;
}
+static int handle_free(void *meta)
+{
+ struct v3_mem_region *r = (struct v3_mem_region *)meta;
+
+ if (r) {
+ if (r->type==REQUESTED || r->type==REQUESTED32) {
+ //INFO("Freeing %llu pages at %p\n",r->num_pages,(void*)(r->base_addr));
+ free_pages((uintptr_t)__va(r->base_addr), get_order(r->num_pages*PAGE_SIZE));
+ } else {
+ // user space responsible for onlining
+ }
+ palacios_free(r);
+ }
+
+ return 0;
+}
+
+
+
int palacios_deinit_mm( void ) {
for (i = 0; i < numa_num_nodes(); i++) {
if (memzones[i]) {
- buddy_deinit(memzones[i]);
+ INFO("Deiniting memory zone %d\n",i);
+ buddy_deinit(memzones[i],handle_free);
}
// note that the memory is not onlined here - offlining and onlining
if (seed_addrs[i]) {
// free the seed regions
+ INFO("Freeing seed addrs %d\n",i);
free_pages((uintptr_t)__va(seed_addrs[i]), MAX_ORDER - 1);
}
}
int num_nodes = numa_num_nodes();
int node_id = 0;
- memzones = palacios_alloc_extended(sizeof(struct buddy_memzone *) * num_nodes, GFP_KERNEL);
+ INFO("memory manager init: MAX_ORDER=%d (%llu bytes)\n",MAX_ORDER, PAGE_SIZE*pow2(MAX_ORDER));
+
+ memzones = palacios_alloc_extended(sizeof(struct buddy_memzone *) * num_nodes, GFP_KERNEL,-1);
if (!memzones) {
ERROR("Cannot allocate space for memory zones\n");
memset(memzones, 0, sizeof(struct buddy_memzone *) * num_nodes);
- seed_addrs = palacios_alloc_extended(sizeof(uintptr_t) * num_nodes, GFP_KERNEL);
+ seed_addrs = palacios_alloc_extended(sizeof(uintptr_t) * num_nodes, GFP_KERNEL,-1);
if (!seed_addrs) {
ERROR("Cannot allocate space for seed addrs\n");
// See: alloc_pages_node()
{
- struct page * pgs = alloc_pages_node(node_id, GFP_KERNEL, MAX_ORDER - 1);
+ struct page * pgs;
+ int actual_node;
+
+ // attempt to first allocate below 4 GB for compatibility with
+ // 32 bit shadow paging
+ pgs = alloc_pages_node(node_id, GFP_DMA32, MAX_ORDER - 1);
+
if (!pgs) {
- ERROR("Could not allocate initial memory block for node %d\n", node_id);
- BUG_ON(!pgs);
- palacios_deinit_mm();
- return -1;
+ INFO("Could not allocate initial memory block for node %d below 4GB\n", node_id);
+
+ pgs = alloc_pages_node(node_id, GFP_KERNEL, MAX_ORDER - 1);
+
+ if (!pgs) {
+ INFO("Could not allocate initial memory block for node %d below 4GB\n", node_id);
+ if (!pgs) {
+ ERROR("Could not allocate initial memory block for node %d without restrictions\n", node_id);
+ palacios_deinit_mm();
+ return -1;
+ }
+ } else {
+ actual_node=numa_addr_to_node((uintptr_t)(page_to_pfn(pgs) << PAGE_SHIFT));
+ if (actual_node != node_id) {
+ WARNING("Initial 64 bit allocation attempt for node %d resulted in allocation on node %d\n",node_id,actual_node);
+ }
+ }
+
+ } else {
+ actual_node=numa_addr_to_node((uintptr_t)(page_to_pfn(pgs) << PAGE_SHIFT));
+ if (actual_node != node_id) {
+ WARNING("Initial 32bit-limited allocation attempt for node %d resulted in allocation on node %d\n",node_id,actual_node);
+ }
}
seed_addrs[node_id] = page_to_pfn(pgs) << PAGE_SHIFT;
}
+ // Initialization is done using the compile-time memory block size since
+ // at this point, we do not yet know what the run-time size is
zone = buddy_init(get_order(V3_CONFIG_MEM_BLOCK_SIZE) + PAGE_SHIFT, PAGE_SHIFT, node_id);
if (zone == NULL) {