2 * This file is part of the Palacios Virtual Machine Monitor developed
3 * by the V3VEE Project with funding from the United States National
4 * Science Foundation and the Department of Energy.
6 * The V3VEE Project is a joint project between Northwestern University
7 * and the University of New Mexico. You can find out more at
10 * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu>
11 * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org>
12 * All rights reserved.
14 * Author: Jack Lange <jarusl@cs.northwestern.edu>
16 * This is free software. You are permitted to use,
17 * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
20 #include <palacios/vmm_shadow_paging.h>
21 #include <palacios/vmm_swapbypass.h>
22 #include <palacios/vmm_ctrl_regs.h>
24 #include <palacios/vm_guest.h>
25 #include <palacios/vm_guest_mem.h>
26 #include <palacios/vmm_paging.h>
27 #include <palacios/vmm_hashtable.h>
30 #ifdef V3_CONFIG_SWAPBYPASS_TELEMETRY
31 #include <palacios/vmm_telemetry.h>
39 // This is a hack and 32 bit linux specific.... need to fix...
42 uint32_t dev_index : 8;
43 uint32_t pg_index : 23;
47 struct shadow_pointer {
55 struct list_head node;
63 struct shadow_page_data {
67 struct list_head page_list_node;
71 struct swapbypass_local_state {
73 struct list_head page_list;
80 struct v3_swap_ops * ops;
86 struct swapbypass_vm_state {
87 struct v3_swap_dev devs[256];
89 #ifdef V3_CONFIG_SWAPBYPASS_TELEMETRY
91 uint32_t write_faults;
93 uint32_t mapped_pages;
98 struct hashtable * shdw_ptr_ht;
104 static uint_t swap_hash_fn(addr_t key) {
105 return v3_hash_long(key, 32);
109 static int swap_eq_fn(addr_t key1, addr_t key2) {
110 return (key1 == key2);
115 static inline uint32_t get_pg_index(pte32_t * pte) {
116 return ((struct swap_pte *)pte)->pg_index;
120 static inline uint32_t get_dev_index(pte32_t * pte) {
121 return ((struct swap_pte *)pte)->dev_index;
125 // Present = 0 and Dirty = 0
127 static inline int is_swapped_pte32(pte32_t * pte) {
128 return ((pte->present == 0) && (*(uint32_t *)pte != 0));
134 static struct shadow_page_data * create_new_shadow_pt(struct guest_info * core);
138 #ifdef V3_CONFIG_SWAPBYPASS_TELEMETRY
139 static void telemetry_cb(struct v3_vm_info * vm, void * private_data, char * hdr) {
140 struct swapbypass_vm_state * swap_state = (struct swapbypass_vm_state *)(vm->shdw_impl.impl_data);
142 V3_Print(vm, VCORE_NONE, "%sSymbiotic Swap:\n", hdr);
143 V3_Print(vm, VCORE_NONE, "%s\tRead faults=%d\n", hdr, swap_state->read_faults);
144 V3_Print(vm, VCORE_NONE, "%s\tWrite faults=%d\n", hdr, swap_state->write_faults);
145 V3_Print(vm, VCORE_NONE, "%s\tMapped Pages=%d\n", hdr, swap_state->mapped_pages);
146 V3_Print(vm, VCORE_NONE, "%s\tFlushes=%d\n", hdr, swap_state->flushes);
147 V3_Print(vm, VCORE_NONE, "%s\tlist size=%d\n", hdr, swap_state->list_size);
159 static int get_vaddr_perms(struct guest_info * info, addr_t vaddr, pte32_t * guest_pte, pf_error_t * page_perms) {
160 uint64_t pte_val = (uint64_t)*(uint32_t *)guest_pte;
162 // symcall to check if page is in cache or on swap disk
163 if (v3_sym_call3(info, SYMCALL_MEM_LOOKUP, (uint64_t *)&vaddr, (uint64_t *)&pte_val, (uint64_t *)page_perms) == -1) {
164 PrintError(info->vm_info, info, "Sym call error?? that's weird... \n");
168 // V3_Print(info->vm_info, info, "page perms = %x\n", *(uint32_t *)page_perms);
178 static addr_t get_swapped_pg_addr(struct v3_vm_info * vm, pte32_t * guest_pte) {
179 struct swapbypass_vm_state * swap_state = (struct swapbypass_vm_state *)(vm->shdw_impl.impl_data);
180 int dev_index = get_dev_index(guest_pte);
181 struct v3_swap_dev * swp_dev = &(swap_state->devs[dev_index]);
184 if (! swp_dev->present ) {
188 return (addr_t)swp_dev->ops->get_swap_entry(get_pg_index(guest_pte), swp_dev->private_data);
193 static addr_t map_swp_page(struct v3_vm_info * vm, pte32_t * shadow_pte, pte32_t * guest_pte, void * swp_page_ptr) {
194 struct swapbypass_vm_state * swap_state = (struct swapbypass_vm_state *)(vm->shdw_impl.impl_data);
195 struct list_head * shdw_ptr_list = NULL;
196 struct shadow_pointer * shdw_ptr = NULL;
200 if (swp_page_ptr == NULL) {
201 // PrintError(vm, VCORE_NONE, "Swapped out page not found on swap device\n");
205 shdw_ptr_list = (struct list_head *)v3_htable_search(swap_state->shdw_ptr_ht, (addr_t)*(uint32_t *)guest_pte);
207 if (shdw_ptr_list == NULL) {
208 shdw_ptr_list = (struct list_head *)V3_Malloc(sizeof(struct list_head));
210 if (!shdw_ptr_list) {
211 PrintError(vm, VCORE_NONE, "Cannot allocate\n");
215 #ifdef V3_CONFIG_SWAPBYPASS_TELEMETRY
216 swap_state->list_size++;
218 INIT_LIST_HEAD(shdw_ptr_list);
219 v3_htable_insert(swap_state->shdw_ptr_ht, (addr_t)*(uint32_t *)guest_pte, (addr_t)shdw_ptr_list);
222 shdw_ptr = (struct shadow_pointer *)V3_Malloc(sizeof(struct shadow_pointer));
224 if (shdw_ptr == NULL) {
225 PrintError(vm, VCORE_NONE, "MEMORY LEAK\n");
226 #ifdef V3_CONFIG_SWAPBYPASS_TELEMETRY
227 telemetry_cb(vm, NULL, "");
232 shdw_ptr->shadow_pte = shadow_pte;
233 shdw_ptr->guest_pte = *(uint32_t *)guest_pte;
234 shdw_ptr->pg_index = get_pg_index(guest_pte);
235 shdw_ptr->dev_index = get_dev_index(guest_pte);
237 // We don't check for conflicts, because it should not happen...
238 list_add(&(shdw_ptr->node), shdw_ptr_list);
240 return PAGE_BASE_ADDR((addr_t)V3_PAddr(swp_page_ptr));
247 #include "vmm_shdw_pg_swapbypass_32.h"
248 #include "vmm_shdw_pg_swapbypass_32pae.h"
249 #include "vmm_shdw_pg_swapbypass_64.h"
252 static struct shadow_page_data * create_new_shadow_pt(struct guest_info * core) {
253 struct v3_shdw_pg_state * state = &(core->shdw_pg_state);
254 struct swapbypass_local_state * impl_state = (struct swapbypass_local_state *)(state->local_impl_data);
255 v3_reg_t cur_cr3 = core->ctrl_regs.cr3;
256 struct shadow_page_data * page_tail = NULL;
257 addr_t shdw_page = 0;
259 if (!list_empty(&(impl_state->page_list))) {
260 page_tail = list_tail_entry(&(impl_state->page_list), struct shadow_page_data, page_list_node);
263 if (page_tail->cr3 != cur_cr3) {
264 PrintDebug(core->vm_info, core, "Reusing old shadow Page: %p (cur_CR3=%p)(page_cr3=%p) \n",
265 (void *)(addr_t)page_tail->page_pa,
266 (void *)(addr_t)cur_cr3,
267 (void *)(addr_t)(page_tail->cr3));
269 list_move(&(page_tail->page_list_node), &(impl_state->page_list));
271 memset(V3_VAddr((void *)(page_tail->page_pa)), 0, PAGE_SIZE_4KB);
280 page_tail = (struct shadow_page_data *)V3_Malloc(sizeof(struct shadow_page_data));
283 PrintError(core->vm_info, core, "Cannot allocate\n");
287 page_tail->page_pa = (addr_t)V3_AllocPages(1);
289 if (!page_tail->page_pa) {
290 PrintError(core->vm_info, core, "Cannot allocate page\n");
294 PrintDebug(core->vm_info, core, "Allocating new shadow Page: %p (cur_cr3=%p)\n",
295 (void *)(addr_t)page_tail->page_pa,
296 (void *)(addr_t)cur_cr3);
298 page_tail->cr3 = cur_cr3;
299 list_add(&(page_tail->page_list_node), &(impl_state->page_list));
301 shdw_page = (addr_t)V3_VAddr((void *)(page_tail->page_pa));
302 memset((void *)shdw_page, 0, PAGE_SIZE_4KB);
312 int v3_register_swap_disk(struct v3_vm_info * vm, int dev_index,
313 struct v3_swap_ops * ops, void * private_data) {
314 struct swapbypass_vm_state * swap_state = (struct swapbypass_vm_state *)(vm->shdw_impl.impl_data);
316 swap_state->devs[dev_index].present = 1;
317 swap_state->devs[dev_index].private_data = private_data;
318 swap_state->devs[dev_index].ops = ops;
326 int v3_swap_in_notify(struct v3_vm_info * vm, int pg_index, int dev_index) {
327 struct list_head * shdw_ptr_list = NULL;
328 struct swapbypass_vm_state * swap_state = (struct swapbypass_vm_state *)(vm->shdw_impl.impl_data);
329 struct shadow_pointer * tmp_shdw_ptr = NULL;
330 struct shadow_pointer * shdw_ptr = NULL;
331 struct swap_pte guest_pte = {0, dev_index, pg_index};
333 shdw_ptr_list = (struct list_head * )v3_htable_search(swap_state->shdw_ptr_ht, *(addr_t *)&(guest_pte));
335 if (shdw_ptr_list == NULL) {
339 list_for_each_entry_safe(shdw_ptr, tmp_shdw_ptr, shdw_ptr_list, node) {
340 if ((shdw_ptr->pg_index == pg_index) &&
341 (shdw_ptr->dev_index == dev_index)) {
343 // Trigger faults for next shadow access
344 shdw_ptr->shadow_pte->present = 0;
346 // Delete entry from list
347 list_del(&(shdw_ptr->node));
357 int v3_swap_flush(struct v3_vm_info * vm) {
358 struct swapbypass_vm_state * swap_state = (struct swapbypass_vm_state *)(vm->shdw_impl.impl_data);
359 struct hashtable_iter * ht_iter = v3_create_htable_iter(swap_state->shdw_ptr_ht);
361 // PrintDebug(vm, VCORE_NONE, "Flushing Symbiotic Swap table\n");
363 #ifdef V3_CONFIG_SWAPBYPASS_TELEMETRY
364 swap_state->flushes++;
368 PrintError(vm, VCORE_NONE, "NULL iterator in swap flush!! Probably will crash soon...\n");
371 while (ht_iter->entry) {
372 struct shadow_pointer * tmp_shdw_ptr = NULL;
373 struct shadow_pointer * shdw_ptr = NULL;
374 struct list_head * shdw_ptr_list = (struct list_head *)v3_htable_get_iter_value(ht_iter);
376 // delete all swapped entries
377 // we can leave the list_head structures and reuse them for the next round
379 list_for_each_entry_safe(shdw_ptr, tmp_shdw_ptr, shdw_ptr_list, node) {
380 if (shdw_ptr == NULL) {
381 PrintError(vm, VCORE_NONE, "Null shadow pointer in swap flush!! Probably crashing soon...\n");
384 // Trigger faults for next shadow access
385 shdw_ptr->shadow_pte->present = 0;
387 // Delete entry from list
388 list_del(&(shdw_ptr->node));
392 v3_htable_iter_advance(ht_iter);
406 static int sb_init(struct v3_vm_info * vm, v3_cfg_tree_t * cfg) {
407 struct v3_shdw_impl_state * impl_state = &(vm->shdw_impl);
408 struct swapbypass_vm_state * sb_state = NULL;
410 memset(sb_state, 0, sizeof(struct swapbypass_vm_state));
411 sb_state->shdw_ptr_ht = v3_create_htable(0, swap_hash_fn, swap_eq_fn);
413 #ifdef V3_CONFIG_SWAPBYPASS_TELEMETRY
414 if (vm->enable_telemetry) {
415 v3_add_telemetry_cb(vm, telemetry_cb, NULL);
419 impl_state->impl_data = sb_state;
421 PrintDebug(vm, VCORE_NONE, "Initialized SwapBypass\n");
427 static int sb_deinit(struct v3_vm_info * vm) {
431 static int sb_local_init(struct guest_info * core) {
432 struct v3_shdw_pg_state * state = &(core->shdw_pg_state);
433 struct swapbypass_local_state * swapbypass_state = NULL;
435 V3_Print(core->vm_info, core, "SWAPBYPASS local initialization\n");
437 swapbypass_state = (struct swapbypass_local_state *)V3_Malloc(sizeof(struct swapbypass_local_state));
439 if (!swapbypass_state) {
440 PrintError(core->vm_info, core, "Cannot allocate\n");
445 INIT_LIST_HEAD(&(swapbypass_state->page_list));
447 state->local_impl_data = swapbypass_state;
453 static int sb_activate_shdw_pt(struct guest_info * core) {
454 switch (v3_get_vm_cpu_mode(core)) {
457 return activate_shadow_pt_32(core);
459 return activate_shadow_pt_32pae(core);
463 return activate_shadow_pt_64(core);
465 PrintError(core->vm_info, core, "Invalid CPU mode: %s\n", v3_cpu_mode_to_str(v3_get_vm_cpu_mode(core)));
472 static int sb_invalidate_shdw_pt(struct guest_info * core) {
473 return sb_activate_shdw_pt(core);
477 static int sb_handle_pf(struct guest_info * core, addr_t fault_addr, pf_error_t error_code) {
479 switch (v3_get_vm_cpu_mode(core)) {
481 return handle_shadow_pagefault_32(core, fault_addr, error_code);
484 return handle_shadow_pagefault_32pae(core, fault_addr, error_code);
488 return handle_shadow_pagefault_64(core, fault_addr, error_code);
491 PrintError(core->vm_info, core, "Unhandled CPU Mode: %s\n", v3_cpu_mode_to_str(v3_get_vm_cpu_mode(core)));
497 static int sb_handle_invlpg(struct guest_info * core, addr_t vaddr) {
499 switch (v3_get_vm_cpu_mode(core)) {
501 return handle_shadow_invlpg_32(core, vaddr);
503 return handle_shadow_invlpg_32pae(core, vaddr);
507 return handle_shadow_invlpg_64(core, vaddr);
509 PrintError(core->vm_info, core, "Invalid CPU mode: %s\n", v3_cpu_mode_to_str(v3_get_vm_cpu_mode(core)));
514 static struct v3_shdw_pg_impl sb_impl = {
515 .name = "SWAPBYPASS",
518 .local_init = sb_local_init,
519 .handle_pagefault = sb_handle_pf,
520 .handle_invlpg = sb_handle_invlpg,
521 .activate_shdw_pt = sb_activate_shdw_pt,
522 .invalidate_shdw_pt = sb_invalidate_shdw_pt
529 register_shdw_pg_impl(&sb_impl);