2 * This file is part of the Palacios Virtual Machine Monitor developed
3 * by the V3VEE Project with funding from the United States National
4 * Science Foundation and the Department of Energy.
6 * The V3VEE Project is a joint project between Northwestern University
7 * and the University of New Mexico. You can find out more at
10 * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu>
11 * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org>
12 * All rights reserved.
14 * Author: Jack Lange <jarusl@cs.northwestern.edu>
16 * This is free software. You are permitted to use,
17 * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
20 #include <palacios/vmm_shadow_paging.h>
21 #include <palacios/vmm_swapbypass.h>
22 #include <palacios/vmm_ctrl_regs.h>
24 #include <palacios/vm_guest.h>
25 #include <palacios/vm_guest_mem.h>
26 #include <palacios/vmm_paging.h>
27 #include <palacios/vmm_hashtable.h>
30 #ifdef V3_CONFIG_SWAPBYPASS_TELEMETRY
31 #include <palacios/vmm_telemetry.h>
39 // This is a hack and 32 bit linux specific.... need to fix...
42 uint32_t dev_index : 8;
43 uint32_t pg_index : 23;
47 struct shadow_pointer {
55 struct list_head node;
63 struct shadow_page_data {
67 struct list_head page_list_node;
71 struct swapbypass_local_state {
73 struct list_head page_list;
80 struct v3_swap_ops * ops;
86 struct swapbypass_vm_state {
87 struct v3_swap_dev devs[256];
89 #ifdef V3_CONFIG_SWAPBYPASS_TELEMETRY
91 uint32_t write_faults;
93 uint32_t mapped_pages;
98 struct hashtable * shdw_ptr_ht;
104 static uint_t swap_hash_fn(addr_t key) {
105 return v3_hash_long(key, 32);
109 static int swap_eq_fn(addr_t key1, addr_t key2) {
110 return (key1 == key2);
115 static inline uint32_t get_pg_index(pte32_t * pte) {
116 return ((struct swap_pte *)pte)->pg_index;
120 static inline uint32_t get_dev_index(pte32_t * pte) {
121 return ((struct swap_pte *)pte)->dev_index;
125 // Present = 0 and Dirty = 0
127 static inline int is_swapped_pte32(pte32_t * pte) {
128 return ((pte->present == 0) && (*(uint32_t *)pte != 0));
134 static struct shadow_page_data * create_new_shadow_pt(struct guest_info * core);
138 #ifdef V3_CONFIG_SWAPBYPASS_TELEMETRY
139 static void telemetry_cb(struct v3_vm_info * vm, void * private_data, char * hdr) {
140 struct swapbypass_vm_state * swap_state = (struct swapbypass_vm_state *)(vm->shdw_impl.impl_data);
142 V3_Print(vm, VCORE_NONE, "%sSymbiotic Swap:\n", hdr);
143 V3_Print(vm, VCORE_NONE, "%s\tRead faults=%d\n", hdr, swap_state->read_faults);
144 V3_Print(vm, VCORE_NONE, "%s\tWrite faults=%d\n", hdr, swap_state->write_faults);
145 V3_Print(vm, VCORE_NONE, "%s\tMapped Pages=%d\n", hdr, swap_state->mapped_pages);
146 V3_Print(vm, VCORE_NONE, "%s\tFlushes=%d\n", hdr, swap_state->flushes);
147 V3_Print(vm, VCORE_NONE, "%s\tlist size=%d\n", hdr, swap_state->list_size);
159 static int get_vaddr_perms(struct guest_info * info, addr_t vaddr, pte32_t * guest_pte, pf_error_t * page_perms) {
160 uint64_t pte_val = (uint64_t)*(uint32_t *)guest_pte;
162 // symcall to check if page is in cache or on swap disk
163 if (v3_sym_call3(info, SYMCALL_MEM_LOOKUP, (uint64_t *)&vaddr, (uint64_t *)&pte_val, (uint64_t *)page_perms) == -1) {
164 PrintError(info->vm_info, info, "Sym call error?? that's weird... \n");
168 // V3_Print(info->vm_info, info, "page perms = %x\n", *(uint32_t *)page_perms);
178 static addr_t get_swapped_pg_addr(struct v3_vm_info * vm, pte32_t * guest_pte) {
179 struct swapbypass_vm_state * swap_state = (struct swapbypass_vm_state *)(vm->shdw_impl.impl_data);
180 int dev_index = get_dev_index(guest_pte);
181 struct v3_swap_dev * swp_dev = &(swap_state->devs[dev_index]);
184 if (! swp_dev->present ) {
188 return (addr_t)swp_dev->ops->get_swap_entry(get_pg_index(guest_pte), swp_dev->private_data);
193 static addr_t map_swp_page(struct v3_vm_info * vm, pte32_t * shadow_pte, pte32_t * guest_pte, void * swp_page_ptr) {
194 struct swapbypass_vm_state * swap_state = (struct swapbypass_vm_state *)(vm->shdw_impl.impl_data);
195 struct list_head * shdw_ptr_list = NULL;
196 struct shadow_pointer * shdw_ptr = NULL;
200 if (swp_page_ptr == NULL) {
201 // PrintError(vm, VCORE_NONE, "Swapped out page not found on swap device\n");
205 shdw_ptr_list = (struct list_head *)v3_htable_search(swap_state->shdw_ptr_ht, (addr_t)*(uint32_t *)guest_pte);
207 if (shdw_ptr_list == NULL) {
208 shdw_ptr_list = (struct list_head *)V3_Malloc(sizeof(struct list_head));
210 if (!shdw_ptr_list) {
211 PrintError(vm, VCORE_NONE, "Cannot allocate\n");
215 #ifdef V3_CONFIG_SWAPBYPASS_TELEMETRY
216 swap_state->list_size++;
218 INIT_LIST_HEAD(shdw_ptr_list);
219 v3_htable_insert(swap_state->shdw_ptr_ht, (addr_t)*(uint32_t *)guest_pte, (addr_t)shdw_ptr_list);
222 shdw_ptr = (struct shadow_pointer *)V3_Malloc(sizeof(struct shadow_pointer));
224 if (shdw_ptr == NULL) {
225 PrintError(vm, VCORE_NONE, "MEMORY LEAK\n");
226 #ifdef V3_CONFIG_SWAPBYPASS_TELEMETRY
227 telemetry_cb(vm, NULL, "");
232 shdw_ptr->shadow_pte = shadow_pte;
233 shdw_ptr->guest_pte = *(uint32_t *)guest_pte;
234 shdw_ptr->pg_index = get_pg_index(guest_pte);
235 shdw_ptr->dev_index = get_dev_index(guest_pte);
237 // We don't check for conflicts, because it should not happen...
238 list_add(&(shdw_ptr->node), shdw_ptr_list);
240 return PAGE_BASE_ADDR((addr_t)V3_PAddr(swp_page_ptr));
247 #include "vmm_shdw_pg_swapbypass_32.h"
248 #include "vmm_shdw_pg_swapbypass_32pae.h"
249 #include "vmm_shdw_pg_swapbypass_64.h"
251 static inline int get_constraints(struct guest_info *core)
253 switch (v3_get_vm_cpu_mode(core)) {
256 return V3_ALLOC_PAGES_CONSTRAINT_4GB;
264 return V3_ALLOC_PAGES_CONSTRAINT_4GB;
267 return V3_ALLOC_PAGES_CONSTRAINT_4GB;
271 static struct shadow_page_data * create_new_shadow_pt(struct guest_info * core) {
272 struct v3_shdw_pg_state * state = &(core->shdw_pg_state);
273 struct swapbypass_local_state * impl_state = (struct swapbypass_local_state *)(state->local_impl_data);
274 v3_reg_t cur_cr3 = core->ctrl_regs.cr3;
275 struct shadow_page_data * page_tail = NULL;
276 addr_t shdw_page = 0;
278 if (!list_empty(&(impl_state->page_list))) {
279 page_tail = list_tail_entry(&(impl_state->page_list), struct shadow_page_data, page_list_node);
282 if (page_tail->cr3 != cur_cr3) {
283 PrintDebug(core->vm_info, core, "Reusing old shadow Page: %p (cur_CR3=%p)(page_cr3=%p) \n",
284 (void *)(addr_t)page_tail->page_pa,
285 (void *)(addr_t)cur_cr3,
286 (void *)(addr_t)(page_tail->cr3));
288 list_move(&(page_tail->page_list_node), &(impl_state->page_list));
290 memset(V3_VAddr((void *)(page_tail->page_pa)), 0, PAGE_SIZE_4KB);
299 page_tail = (struct shadow_page_data *)V3_Malloc(sizeof(struct shadow_page_data));
302 PrintError(core->vm_info, core, "Cannot allocate\n");
306 page_tail->page_pa = (addr_t)V3_AllocPagesExtended(1,PAGE_SIZE_4KB,-1,get_constraints(core));
308 if (!page_tail->page_pa) {
309 PrintError(core->vm_info, core, "Cannot allocate page\n");
313 PrintDebug(core->vm_info, core, "Allocating new shadow Page: %p (cur_cr3=%p)\n",
314 (void *)(addr_t)page_tail->page_pa,
315 (void *)(addr_t)cur_cr3);
317 page_tail->cr3 = cur_cr3;
318 list_add(&(page_tail->page_list_node), &(impl_state->page_list));
320 shdw_page = (addr_t)V3_VAddr((void *)(page_tail->page_pa));
321 memset((void *)shdw_page, 0, PAGE_SIZE_4KB);
331 int v3_register_swap_disk(struct v3_vm_info * vm, int dev_index,
332 struct v3_swap_ops * ops, void * private_data) {
333 struct swapbypass_vm_state * swap_state = (struct swapbypass_vm_state *)(vm->shdw_impl.impl_data);
335 swap_state->devs[dev_index].present = 1;
336 swap_state->devs[dev_index].private_data = private_data;
337 swap_state->devs[dev_index].ops = ops;
345 int v3_swap_in_notify(struct v3_vm_info * vm, int pg_index, int dev_index) {
346 struct list_head * shdw_ptr_list = NULL;
347 struct swapbypass_vm_state * swap_state = (struct swapbypass_vm_state *)(vm->shdw_impl.impl_data);
348 struct shadow_pointer * tmp_shdw_ptr = NULL;
349 struct shadow_pointer * shdw_ptr = NULL;
350 struct swap_pte guest_pte = {0, dev_index, pg_index};
352 shdw_ptr_list = (struct list_head * )v3_htable_search(swap_state->shdw_ptr_ht, *(addr_t *)&(guest_pte));
354 if (shdw_ptr_list == NULL) {
358 list_for_each_entry_safe(shdw_ptr, tmp_shdw_ptr, shdw_ptr_list, node) {
359 if ((shdw_ptr->pg_index == pg_index) &&
360 (shdw_ptr->dev_index == dev_index)) {
362 // Trigger faults for next shadow access
363 shdw_ptr->shadow_pte->present = 0;
365 // Delete entry from list
366 list_del(&(shdw_ptr->node));
376 int v3_swap_flush(struct v3_vm_info * vm) {
377 struct swapbypass_vm_state * swap_state = (struct swapbypass_vm_state *)(vm->shdw_impl.impl_data);
378 struct hashtable_iter * ht_iter = v3_create_htable_iter(swap_state->shdw_ptr_ht);
380 // PrintDebug(vm, VCORE_NONE, "Flushing Symbiotic Swap table\n");
382 #ifdef V3_CONFIG_SWAPBYPASS_TELEMETRY
383 swap_state->flushes++;
387 PrintError(vm, VCORE_NONE, "NULL iterator in swap flush!! Probably will crash soon...\n");
390 while (ht_iter->entry) {
391 struct shadow_pointer * tmp_shdw_ptr = NULL;
392 struct shadow_pointer * shdw_ptr = NULL;
393 struct list_head * shdw_ptr_list = (struct list_head *)v3_htable_get_iter_value(ht_iter);
395 // delete all swapped entries
396 // we can leave the list_head structures and reuse them for the next round
398 list_for_each_entry_safe(shdw_ptr, tmp_shdw_ptr, shdw_ptr_list, node) {
399 if (shdw_ptr == NULL) {
400 PrintError(vm, VCORE_NONE, "Null shadow pointer in swap flush!! Probably crashing soon...\n");
403 // Trigger faults for next shadow access
404 shdw_ptr->shadow_pte->present = 0;
406 // Delete entry from list
407 list_del(&(shdw_ptr->node));
411 v3_htable_iter_advance(ht_iter);
425 static int sb_init(struct v3_vm_info * vm, v3_cfg_tree_t * cfg) {
426 struct v3_shdw_impl_state * impl_state = &(vm->shdw_impl);
427 struct swapbypass_vm_state * sb_state = NULL;
429 memset(sb_state, 0, sizeof(struct swapbypass_vm_state));
430 sb_state->shdw_ptr_ht = v3_create_htable(0, swap_hash_fn, swap_eq_fn);
432 #ifdef V3_CONFIG_SWAPBYPASS_TELEMETRY
433 if (vm->enable_telemetry) {
434 v3_add_telemetry_cb(vm, telemetry_cb, NULL);
438 impl_state->impl_data = sb_state;
440 PrintDebug(vm, VCORE_NONE, "Initialized SwapBypass\n");
446 static int sb_deinit(struct v3_vm_info * vm) {
450 static int sb_local_init(struct guest_info * core) {
451 struct v3_shdw_pg_state * state = &(core->shdw_pg_state);
452 struct swapbypass_local_state * swapbypass_state = NULL;
454 V3_Print(core->vm_info, core, "SWAPBYPASS local initialization\n");
456 swapbypass_state = (struct swapbypass_local_state *)V3_Malloc(sizeof(struct swapbypass_local_state));
458 if (!swapbypass_state) {
459 PrintError(core->vm_info, core, "Cannot allocate\n");
464 INIT_LIST_HEAD(&(swapbypass_state->page_list));
466 state->local_impl_data = swapbypass_state;
472 static int sb_activate_shdw_pt(struct guest_info * core) {
473 switch (v3_get_vm_cpu_mode(core)) {
476 return activate_shadow_pt_32(core);
478 return activate_shadow_pt_32pae(core);
482 return activate_shadow_pt_64(core);
484 PrintError(core->vm_info, core, "Invalid CPU mode: %s\n", v3_cpu_mode_to_str(v3_get_vm_cpu_mode(core)));
491 static int sb_invalidate_shdw_pt(struct guest_info * core) {
492 return sb_activate_shdw_pt(core);
496 static int sb_handle_pf(struct guest_info * core, addr_t fault_addr, pf_error_t error_code) {
498 switch (v3_get_vm_cpu_mode(core)) {
500 return handle_shadow_pagefault_32(core, fault_addr, error_code);
503 return handle_shadow_pagefault_32pae(core, fault_addr, error_code);
507 return handle_shadow_pagefault_64(core, fault_addr, error_code);
510 PrintError(core->vm_info, core, "Unhandled CPU Mode: %s\n", v3_cpu_mode_to_str(v3_get_vm_cpu_mode(core)));
516 static int sb_handle_invlpg(struct guest_info * core, addr_t vaddr) {
518 switch (v3_get_vm_cpu_mode(core)) {
520 return handle_shadow_invlpg_32(core, vaddr);
522 return handle_shadow_invlpg_32pae(core, vaddr);
526 return handle_shadow_invlpg_64(core, vaddr);
528 PrintError(core->vm_info, core, "Invalid CPU mode: %s\n", v3_cpu_mode_to_str(v3_get_vm_cpu_mode(core)));
533 static struct v3_shdw_pg_impl sb_impl = {
534 .name = "SWAPBYPASS",
537 .local_init = sb_local_init,
538 .handle_pagefault = sb_handle_pf,
539 .handle_invlpg = sb_handle_invlpg,
540 .activate_shdw_pt = sb_activate_shdw_pt,
541 .invalidate_shdw_pt = sb_invalidate_shdw_pt
548 register_shdw_pg_impl(&sb_impl);