#include <palacios/vmm_direct_paging.h>
-static inline
-struct v3_shadow_region * insert_shadow_region(struct v3_vm_info * vm,
- struct v3_shadow_region * region);
static int mem_offset_hypercall(struct guest_info * info, uint_t hcall_id, void * private_data) {
return 0;
}
+static int unhandled_err(struct guest_info * core, addr_t guest_va, addr_t guest_pa,
+ struct v3_mem_region * reg, pf_error_t access_info) {
-int v3_init_mem_map(struct v3_vm_info * vm) {
- struct v3_mem_map * map = &(vm->mem_map);
- addr_t mem_pages = vm->mem_size >> 12;
+ PrintError("Unhandled memory access error\n");
- memset(&(map->base_region), 0, sizeof(struct v3_shadow_region));
+ v3_print_mem_map(core->vm_info);
- map->shdw_regions.rb_node = NULL;
+ v3_print_guest_state(core);
+ return -1;
+}
- map->hook_hvas = V3_VAddr(V3_AllocPages(vm->num_cores));
+int v3_init_mem_map(struct v3_vm_info * vm) {
+ struct v3_mem_map * map = &(vm->mem_map);
+ addr_t mem_pages = vm->mem_size >> 12;
+ memset(&(map->base_region), 0, sizeof(struct v3_mem_region));
+
+ map->mem_regions.rb_node = NULL;
// There is an underlying region that contains all of the guest memory
// PrintDebug("Mapping %d pages of memory (%u bytes)\n", (int)mem_pages, (uint_t)info->mem_size);
+ // 2MB page alignment needed for 2MB hardware nested paging
map->base_region.guest_start = 0;
map->base_region.guest_end = mem_pages * PAGE_SIZE_4KB;
+
+#ifdef CONFIG_ALIGNED_PG_ALLOC
+ map->base_region.host_addr = (addr_t)V3_AllocAlignedPages(mem_pages, vm->mem_align);
+#else
map->base_region.host_addr = (addr_t)V3_AllocPages(mem_pages);
+#endif
map->base_region.flags.read = 1;
map->base_region.flags.write = 1;
map->base_region.flags.exec = 1;
map->base_region.flags.base = 1;
map->base_region.flags.alloced = 1;
+
+ map->base_region.unhandled = unhandled_err;
if ((void *)map->base_region.host_addr == NULL) {
PrintError("Could not allocate Guest memory\n");
}
-static inline addr_t get_hook_hva(struct guest_info * info) {
- return (addr_t)(info->vm_info->mem_map.hook_hvas + (PAGE_SIZE_4KB * info->cpu_id));
-}
-
-void v3_delete_shadow_map(struct v3_vm_info * vm) {
- struct rb_node * node = v3_rb_first(&(vm->mem_map.shdw_regions));
- struct v3_shadow_region * reg;
+void v3_delete_mem_map(struct v3_vm_info * vm) {
+ struct rb_node * node = v3_rb_first(&(vm->mem_map.mem_regions));
+ struct v3_mem_region * reg;
struct rb_node * tmp_node = NULL;
while (node) {
- reg = rb_entry(node, struct v3_shadow_region, tree_node);
+ reg = rb_entry(node, struct v3_mem_region, tree_node);
tmp_node = node;
node = v3_rb_next(node);
- v3_delete_shadow_region(vm, reg);
+ v3_delete_mem_region(vm, reg);
}
V3_FreePage((void *)(vm->mem_map.base_region.host_addr));
- V3_FreePage(V3_PAddr((void *)(vm->mem_map.hook_hvas)));
}
-
-
-int v3_add_shadow_mem( struct v3_vm_info * vm, uint16_t core_id,
- addr_t guest_addr_start,
- addr_t guest_addr_end,
- addr_t host_addr)
-{
- struct v3_shadow_region * entry = (struct v3_shadow_region *)V3_Malloc(sizeof(struct v3_shadow_region));
- memset(entry, 0, sizeof(struct v3_shadow_region));
+struct v3_mem_region * v3_create_mem_region(struct v3_vm_info * vm, uint16_t core_id,
+ addr_t guest_addr_start, addr_t guest_addr_end) {
+
+ struct v3_mem_region * entry = (struct v3_mem_region *)V3_Malloc(sizeof(struct v3_mem_region));
+ memset(entry, 0, sizeof(struct v3_mem_region));
entry->guest_start = guest_addr_start;
entry->guest_end = guest_addr_end;
- entry->host_addr = host_addr;
- entry->write_hook = NULL;
- entry->read_hook = NULL;
- entry->priv_data = NULL;
entry->core_id = core_id;
+ entry->unhandled = unhandled_err;
- entry->flags.read = 1;
- entry->flags.write = 1;
- entry->flags.exec = 1;
- entry->flags.alloced = 1;
-
- if (insert_shadow_region(vm, entry)) {
- V3_Free(entry);
- return -1;
- }
-
- return 0;
+ return entry;
}
-int v3_hook_write_mem(struct v3_vm_info * vm, uint16_t core_id,
- addr_t guest_addr_start, addr_t guest_addr_end, addr_t host_addr,
- int (*write)(struct guest_info * core, addr_t guest_addr, void * src, uint_t length, void * priv_data),
- void * priv_data) {
- struct v3_shadow_region * entry = (struct v3_shadow_region *)V3_Malloc(sizeof(struct v3_shadow_region));
- memset(entry, 0, sizeof(struct v3_shadow_region));
+int v3_add_shadow_mem( struct v3_vm_info * vm, uint16_t core_id,
+ addr_t guest_addr_start,
+ addr_t guest_addr_end,
+ addr_t host_addr)
+{
+ struct v3_mem_region * entry = NULL;
+
+ entry = v3_create_mem_region(vm, core_id,
+ guest_addr_start,
+ guest_addr_end);
- entry->guest_start = guest_addr_start;
- entry->guest_end = guest_addr_end;
entry->host_addr = host_addr;
- entry->write_hook = write;
- entry->read_hook = NULL;
- entry->priv_data = priv_data;
- entry->core_id = core_id;
- entry->flags.hook = 1;
+
entry->flags.read = 1;
+ entry->flags.write = 1;
entry->flags.exec = 1;
entry->flags.alloced = 1;
-
- if (insert_shadow_region(vm, entry)) {
- V3_Free(entry);
- return -1;
- }
-
- return 0;
-}
-
-int v3_hook_full_mem(struct v3_vm_info * vm, uint16_t core_id,
- addr_t guest_addr_start, addr_t guest_addr_end,
- int (*read)(struct guest_info * core, addr_t guest_addr, void * dst, uint_t length, void * priv_data),
- int (*write)(struct guest_info * core, addr_t guest_addr, void * src, uint_t length, void * priv_data),
- void * priv_data) {
-
- struct v3_shadow_region * entry = (struct v3_shadow_region *)V3_Malloc(sizeof(struct v3_shadow_region));
- memset(entry, 0, sizeof(struct v3_shadow_region));
-
- entry->guest_start = guest_addr_start;
- entry->guest_end = guest_addr_end;
- entry->host_addr = (addr_t)NULL;
- entry->write_hook = write;
- entry->read_hook = read;
- entry->priv_data = priv_data;
- entry->core_id = core_id;
-
- entry->flags.hook = 1;
-
- if (insert_shadow_region(vm, entry)) {
+ if (v3_insert_mem_region(vm, entry) == -1) {
V3_Free(entry);
return -1;
}
}
-// This will unhook the memory hook registered at start address
-// We do not support unhooking subregions
-int v3_unhook_mem(struct v3_vm_info * vm, uint16_t core_id, addr_t guest_addr_start) {
- struct v3_shadow_region * reg = v3_get_shadow_region(vm, core_id, guest_addr_start);
-
- if (!reg->flags.hook) {
- PrintError("Trying to unhook a non hooked memory region (addr=%p)\n", (void *)guest_addr_start);
- return -1;
- }
-
- v3_delete_shadow_region(vm, reg);
-
- return 0;
-}
-
-
static inline
-struct v3_shadow_region * __insert_shadow_region(struct v3_vm_info * vm,
- struct v3_shadow_region * region) {
- struct rb_node ** p = &(vm->mem_map.shdw_regions.rb_node);
+struct v3_mem_region * __insert_mem_region(struct v3_vm_info * vm,
+ struct v3_mem_region * region) {
+ struct rb_node ** p = &(vm->mem_map.mem_regions.rb_node);
struct rb_node * parent = NULL;
- struct v3_shadow_region * tmp_region;
+ struct v3_mem_region * tmp_region;
while (*p) {
parent = *p;
- tmp_region = rb_entry(parent, struct v3_shadow_region, tree_node);
+ tmp_region = rb_entry(parent, struct v3_mem_region, tree_node);
if (region->guest_end <= tmp_region->guest_start) {
p = &(*p)->rb_left;
return tmp_region;
} else if (region->core_id < tmp_region->core_id) {
p = &(*p)->rb_left;
- } else {
+ } else {
p = &(*p)->rb_right;
}
}
}
-static inline
-struct v3_shadow_region * insert_shadow_region(struct v3_vm_info * vm,
- struct v3_shadow_region * region) {
- struct v3_shadow_region * ret;
+
+int v3_insert_mem_region(struct v3_vm_info * vm, struct v3_mem_region * region) {
+ struct v3_mem_region * ret;
int i = 0;
- if ((ret = __insert_shadow_region(vm, region))) {
- return ret;
+ if ((ret = __insert_mem_region(vm, region))) {
+ return -1;
}
- v3_rb_insert_color(&(region->tree_node), &(vm->mem_map.shdw_regions));
+ v3_rb_insert_color(&(region->tree_node), &(vm->mem_map.mem_regions));
}
}
- return NULL;
-}
-
-
-
-
-
-
-int v3_handle_mem_hook(struct guest_info * info, addr_t guest_va, addr_t guest_pa,
- struct v3_shadow_region * reg, pf_error_t access_info) {
-
- addr_t op_addr = 0;
-
- if (reg->flags.alloced == 0) {
- op_addr = get_hook_hva(info);
- } else {
- op_addr = (addr_t)V3_VAddr((void *)v3_get_shadow_addr(reg, info->cpu_id, guest_pa));
- }
-
-
- if (access_info.write == 1) {
- // Write Operation
-
- if (v3_emulate_write_op(info, guest_va, guest_pa, op_addr,
- reg->write_hook, reg->priv_data) == -1) {
- PrintError("Write Full Hook emulation failed\n");
- return -1;
- }
- } else {
- // Read Operation
-
- if (reg->flags.read == 1) {
- PrintError("Tried to emulate read for a guest Readable page\n");
- return -1;
- }
-
- if (v3_emulate_read_op(info, guest_va, guest_pa, op_addr,
- reg->read_hook, reg->write_hook,
- reg->priv_data) == -1) {
- PrintError("Read Full Hook emulation failed\n");
- return -1;
- }
-
- }
-
-
return 0;
}
+
-
-struct v3_shadow_region * v3_get_shadow_region(struct v3_vm_info * vm, uint16_t core_id, addr_t guest_addr) {
- struct rb_node * n = vm->mem_map.shdw_regions.rb_node;
- struct v3_shadow_region * reg = NULL;
+struct v3_mem_region * v3_get_mem_region(struct v3_vm_info * vm, uint16_t core_id, addr_t guest_addr) {
+ struct rb_node * n = vm->mem_map.mem_regions.rb_node;
+ struct v3_mem_region * reg = NULL;
while (n) {
- reg = rb_entry(n, struct v3_shadow_region, tree_node);
+
+ reg = rb_entry(n, struct v3_mem_region, tree_node);
if (guest_addr < reg->guest_start) {
n = n->rb_left;
} else if (guest_addr >= reg->guest_end) {
n = n->rb_right;
} else {
- if ((core_id == reg->core_id) ||
- (reg->core_id == V3_MEM_CORE_ANY)) {
- return reg;
- } else {
+ if (reg->core_id == V3_MEM_CORE_ANY) {
+ // found relevant region, it's available on all cores
+ return reg;
+ } else if (core_id == reg->core_id) {
+ // found relevant region, it's available on the indicated core
+ return reg;
+ } else if (core_id < reg->core_id) {
+ // go left, core too big
+ n = n->rb_left;
+ } else if (core_id > reg->core_id) {
+ // go right, core too small
n = n->rb_right;
+ } else {
+ PrintDebug("v3_get_mem_region: Impossible!\n");
+ return NULL;
}
}
}
// There is not registered region, so we check if its a valid address in the base region
if (guest_addr > vm->mem_map.base_region.guest_end) {
- PrintError("Guest Address Exceeds Base Memory Size (ga=%p), (limit=%p)\n",
- (void *)guest_addr, (void *)vm->mem_map.base_region.guest_end);
+ PrintError("Guest Address Exceeds Base Memory Size (ga=0x%p), (limit=0x%p) (core=0x%x)\n",
+ (void *)guest_addr, (void *)vm->mem_map.base_region.guest_end, core_id);
v3_print_mem_map(vm);
return NULL;
}
-
+
+ return &(vm->mem_map.base_region);
+}
+
+
+
+/* Given an address, find the successor region. If the address is within a region, return that
+ * region. Input is an address, because the address may not have a region associated with it.
+ *
+ * Returns a region following or touching the given address. If address is invalid, NULL is
+ * returned, else the base region is returned if no region exists at or after the given address.
+ */
+struct v3_mem_region * v3_get_next_mem_region( struct v3_vm_info * vm, uint16_t core_id, addr_t guest_addr) {
+ struct rb_node * current_n = vm->mem_map.mem_regions.rb_node;
+ struct rb_node * successor_n = NULL; /* left-most node greater than guest_addr */
+ struct v3_mem_region * current_r = NULL;
+
+ /* current_n tries to find the region containing guest_addr, going right when smaller and left when
+ * greater. Each time current_n becomes greater than guest_addr, update successor <- current_n.
+ * current_n becomes successively closer to guest_addr than the previous time it was greater
+ * than guest_addr.
+ */
+
+ /* | is address, ---- is region, + is intersection */
+ while (current_n) {
+ current_r = rb_entry(current_n, struct v3_mem_region, tree_node);
+ if (current_r->guest_start > guest_addr) { /* | ---- */
+ successor_n = current_n;
+ current_n = current_n->rb_left;
+ } else {
+ if (current_r->guest_end > guest_addr) {
+ return current_r; /* +--- or --+- */
+ }
+ current_n = current_n->rb_right; /* ---- | */
+ }
+ }
+
+ /* Address does not have its own region. Check if it's a valid address in the base region */
+
+ if (guest_addr >= vm->mem_map.base_region.guest_end) {
+ PrintError("%s: Guest Address Exceeds Base Memory Size (ga=%p), (limit=%p)\n",
+ __FUNCTION__, (void *)guest_addr, (void *)vm->mem_map.base_region.guest_end);
+ v3_print_mem_map(vm);
+ return NULL;
+ }
+
return &(vm->mem_map.base_region);
}
-void v3_delete_shadow_region(struct v3_vm_info * vm, struct v3_shadow_region * reg) {
+void v3_delete_mem_region(struct v3_vm_info * vm, struct v3_mem_region * reg) {
int i = 0;
if (reg == NULL) {
}
}
- v3_rb_erase(&(reg->tree_node), &(vm->mem_map.shdw_regions));
+ v3_rb_erase(&(reg->tree_node), &(vm->mem_map.mem_regions));
V3_Free(reg);
}
+// Determine if a given address can be handled by a large page of the requested size
+uint32_t v3_get_max_page_size(struct guest_info * core, addr_t fault_addr, uint32_t req_size) {
+ addr_t pg_start = 0UL, pg_end = 0UL; // large page containing the faulting addres
+ struct v3_mem_region * pg_next_reg = NULL; // next immediate mem reg after page start addr
+ uint32_t page_size = PAGE_SIZE_4KB;
+
+ /* If the guest has been configured for large pages, then we must check for hooked regions of
+ * memory which may overlap with the large page containing the faulting address (due to
+ * potentially differing access policies in place for e.g. i/o devices and APIC). A large page
+ * can be used if a) no region overlaps the page [or b) a region does overlap but fully contains
+ * the page]. The [bracketed] text pertains to the #if 0'd code below, state D. TODO modify this
+ * note if someone decides to enable this optimization. It can be tested with the SeaStar
+ * mapping.
+ *
+ * Examples: (CAPS regions are returned by v3_get_next_mem_region; state A returns the base reg)
+ *
+ * |region| |region| 2MiB mapped (state A)
+ * |reg| |REG| 2MiB mapped (state B)
+ * |region| |reg| |REG| |region| |reg| 4KiB mapped (state C)
+ * |reg| |reg| |--REGION---| [2MiB mapped (state D)]
+ * |--------------------------------------------| RAM
+ * ^ fault addr
+ * |----|----|----|----|----|page|----|----|----| 2MB pages
+ * >>>>>>>>>>>>>>>>>>>> search space
+ */
+
+
+ // guest page maps to a host page + offset (so when we shift, it aligns with a host page)
+ switch (req_size) {
+ case PAGE_SIZE_4KB:
+ return PAGE_SIZE_4KB;
+ case PAGE_SIZE_2MB:
+ pg_start = PAGE_ADDR_2MB(fault_addr);
+ pg_end = (pg_start + PAGE_SIZE_2MB);
+ break;
+ case PAGE_SIZE_4MB:
+ pg_start = PAGE_ADDR_4MB(fault_addr);
+ pg_end = (pg_start + PAGE_SIZE_4MB);
+ break;
+ case PAGE_SIZE_1GB:
+ pg_start = PAGE_ADDR_1GB(fault_addr);
+ pg_end = (pg_start + PAGE_SIZE_1GB);
+ break;
+ default:
+ PrintError("Invalid large page size requested.\n");
+ return -1;
+ }
+
+ //PrintDebug("%s: page [%p,%p) contains address\n", __FUNCTION__, (void *)pg_start, (void *)pg_end);
+ pg_next_reg = v3_get_next_mem_region(core->vm_info, core->cpu_id, pg_start);
+ if (pg_next_reg == NULL) {
+ PrintError("%s: Error: address not in base region, %p\n", __FUNCTION__, (void *)fault_addr);
+ return PAGE_SIZE_4KB;
+ }
-addr_t v3_get_shadow_addr(struct v3_shadow_region * reg, uint16_t core_id, addr_t guest_addr) {
- if (reg && (reg->flags.alloced == 1)) {
- return (guest_addr - reg->guest_start) + reg->host_addr;
+ if (pg_next_reg->flags.base == 1) {
+ page_size = req_size; // State A
+ //PrintDebug("%s: base region [%p,%p) contains page.\n", __FUNCTION__,
+ // (void *)pg_next_reg->guest_start, (void *)pg_next_reg->guest_end);
} else {
- // PrintError("MEM Region Invalid\n");
- return 0;
+#if 0 // State B/C and D optimization
+ if ((pg_next_reg->guest_end >= pg_end) &&
+ ((pg_next_reg->guest_start >= pg_end) || (pg_next_reg->guest_start <= pg_start))) {
+ page_size = req_size;
+ }
+
+ PrintDebug("%s: region [%p,%p) %s partially overlap with page\n", __FUNCTION__,
+ (void *)pg_next_reg->guest_start, (void *)pg_next_reg->guest_end,
+ (page_size == req_size) ? "does not" : "does");
+
+#else // State B/C
+ if (pg_next_reg->guest_start >= pg_end) {
+
+ page_size = req_size;
+ }
+
+ PrintDebug("%s: region [%p,%p) %s overlap with page\n", __FUNCTION__,
+ (void *)pg_next_reg->guest_start, (void *)pg_next_reg->guest_end,
+ (page_size == req_size) ? "does not" : "does");
+
+#endif
}
+ return page_size;
}
-
+// For an address on a page of size page_size, compute the actual alignment
+// of the physical page it maps to
+uint32_t v3_compute_page_alignment(addr_t page_addr)
+{
+ if (PAGE_OFFSET_1GB(page_addr) == 0) {
+ return PAGE_SIZE_1GB;
+ } else if (PAGE_OFFSET_4MB(page_addr) == 0) {
+ return PAGE_SIZE_4MB;
+ } else if (PAGE_OFFSET_2MB(page_addr) == 0) {
+ return PAGE_SIZE_2MB;
+ } else if (PAGE_OFFSET_4KB(page_addr) == 0) {
+ return PAGE_SIZE_4KB;
+ } else {
+ PrintError("Non-page aligned address passed to %s.\n", __FUNCTION__);
+ return 0;
+ }
+}
void v3_print_mem_map(struct v3_vm_info * vm) {
- struct rb_node * node = v3_rb_first(&(vm->mem_map.shdw_regions));
- struct v3_shadow_region * reg = &(vm->mem_map.base_region);
+ struct rb_node * node = v3_rb_first(&(vm->mem_map.mem_regions));
+ struct v3_mem_region * reg = &(vm->mem_map.base_region);
int i = 0;
- V3_Print("Memory Layout:\n");
+ V3_Print("Memory Layout (all cores):\n");
- V3_Print("Base Region: 0x%p - 0x%p -> 0x%p\n",
+ V3_Print("Base Region (all cores): 0x%p - 0x%p -> 0x%p\n",
(void *)(reg->guest_start),
(void *)(reg->guest_end - 1),
(void *)(reg->host_addr));
}
do {
- reg = rb_entry(node, struct v3_shadow_region, tree_node);
+ reg = rb_entry(node, struct v3_mem_region, tree_node);
V3_Print("%d: 0x%p - 0x%p -> 0x%p\n", i,
(void *)(reg->guest_start),
(void *)(reg->guest_end - 1),
(void *)(reg->host_addr));
- V3_Print("\t(flags=%x) (WriteHook = 0x%p) (ReadHook = 0x%p)\n",
- reg->flags.value,
- (void *)(reg->write_hook),
- (void *)(reg->read_hook));
+ V3_Print("\t(flags=0x%x) (core=0x%x) (unhandled = 0x%p)\n",
+ reg->flags.value,
+ reg->core_id,
+ reg->unhandled);
i++;
} while ((node = v3_rb_next(node)));