2 * This file is part of the Palacios Virtual Machine Monitor developed
3 * by the V3VEE Project with funding from the United States National
4 * Science Foundation and the Department of Energy.
6 * The V3VEE Project is a joint project between Northwestern University
7 * and the University of New Mexico. You can find out more at
10 * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu>
11 * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org>
12 * All rights reserved.
14 * Author: Jack Lange <jarusl@cs.northwestern.edu>
16 * This is free software. You are permitted to use,
17 * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
20 #include <palacios/vmm_shadow_paging.h>
21 #include <palacios/vmm_swapbypass.h>
22 #include <palacios/vmm_ctrl_regs.h>
24 #include <palacios/vm_guest.h>
25 #include <palacios/vm_guest_mem.h>
26 #include <palacios/vmm_paging.h>
27 #include <palacios/vmm_hashtable.h>
30 #ifdef CONFIG_SWAPBYPASS_TELEMETRY
31 #include <palacios/vmm_telemetry.h>
39 // This is a hack and 32 bit linux specific.... need to fix...
42 uint32_t dev_index : 8;
43 uint32_t pg_index : 23;
47 struct shadow_pointer {
55 struct list_head node;
63 struct shadow_page_data {
67 struct list_head page_list_node;
71 struct swapbypass_local_state {
73 struct list_head page_list;
80 struct v3_swap_ops * ops;
86 struct swapbypass_vm_state {
87 struct v3_swap_dev devs[256];
89 #ifdef CONFIG_SWAPBYPASS_TELEMETRY
91 uint32_t write_faults;
93 uint32_t mapped_pages;
98 struct hashtable * shdw_ptr_ht;
104 static uint_t swap_hash_fn(addr_t key) {
105 return v3_hash_long(key, 32);
109 static int swap_eq_fn(addr_t key1, addr_t key2) {
110 return (key1 == key2);
115 static inline uint32_t get_pg_index(pte32_t * pte) {
116 return ((struct swap_pte *)pte)->pg_index;
120 static inline uint32_t get_dev_index(pte32_t * pte) {
121 return ((struct swap_pte *)pte)->dev_index;
125 // Present = 0 and Dirty = 0
127 static inline int is_swapped_pte32(pte32_t * pte) {
128 return ((pte->present == 0) && (*(uint32_t *)pte != 0));
134 static struct shadow_page_data * create_new_shadow_pt(struct guest_info * core);
138 #ifdef CONFIG_SWAPBYPASS_TELEMETRY
139 static void telemetry_cb(struct v3_vm_info * vm, void * private_data, char * hdr) {
140 struct swapbypass_vm_state * swap_state = (struct swapbypass_vm_state *)(vm->shdw_impl.impl_data);
142 V3_Print("%sSymbiotic Swap:\n", hdr);
143 V3_Print("%s\tRead faults=%d\n", hdr, swap_state->read_faults);
144 V3_Print("%s\tWrite faults=%d\n", hdr, swap_state->write_faults);
145 V3_Print("%s\tMapped Pages=%d\n", hdr, swap_state->mapped_pages);
146 V3_Print("%s\tFlushes=%d\n", hdr, swap_state->flushes);
147 V3_Print("%s\tlist size=%d\n", hdr, swap_state->list_size);
159 static int get_vaddr_perms(struct guest_info * info, addr_t vaddr, pte32_t * guest_pte, pf_error_t * page_perms) {
160 uint64_t pte_val = (uint64_t)*(uint32_t *)guest_pte;
162 // symcall to check if page is in cache or on swap disk
163 if (v3_sym_call3(info, SYMCALL_MEM_LOOKUP, (uint64_t *)&vaddr, (uint64_t *)&pte_val, (uint64_t *)page_perms) == -1) {
164 PrintError("Sym call error?? that's weird... \n");
168 // V3_Print("page perms = %x\n", *(uint32_t *)page_perms);
178 static addr_t get_swapped_pg_addr(struct v3_vm_info * vm, pte32_t * guest_pte) {
179 struct swapbypass_vm_state * swap_state = (struct swapbypass_vm_state *)(vm->shdw_impl.impl_data);
180 int dev_index = get_dev_index(guest_pte);
181 struct v3_swap_dev * swp_dev = &(swap_state->devs[dev_index]);
184 if (! swp_dev->present ) {
188 return (addr_t)swp_dev->ops->get_swap_entry(get_pg_index(guest_pte), swp_dev->private_data);
193 static addr_t map_swp_page(struct v3_vm_info * vm, pte32_t * shadow_pte, pte32_t * guest_pte, void * swp_page_ptr) {
194 struct swapbypass_vm_state * swap_state = (struct swapbypass_vm_state *)(vm->shdw_impl.impl_data);
195 struct list_head * shdw_ptr_list = NULL;
196 struct shadow_pointer * shdw_ptr = NULL;
200 if (swp_page_ptr == NULL) {
201 // PrintError("Swapped out page not found on swap device\n");
205 shdw_ptr_list = (struct list_head *)v3_htable_search(swap_state->shdw_ptr_ht, (addr_t)*(uint32_t *)guest_pte);
207 if (shdw_ptr_list == NULL) {
208 shdw_ptr_list = (struct list_head *)V3_Malloc(sizeof(struct list_head *));
209 #ifdef CONFIG_SWAPBYPASS_TELEMETRY
210 swap_state->list_size++;
212 INIT_LIST_HEAD(shdw_ptr_list);
213 v3_htable_insert(swap_state->shdw_ptr_ht, (addr_t)*(uint32_t *)guest_pte, (addr_t)shdw_ptr_list);
216 shdw_ptr = (struct shadow_pointer *)V3_Malloc(sizeof(struct shadow_pointer));
218 if (shdw_ptr == NULL) {
219 PrintError("MEMORY LEAK\n");
220 #ifdef CONFIG_SWAPBYPASS_TELEMETRY
221 telemetry_cb(vm, NULL, "");
226 shdw_ptr->shadow_pte = shadow_pte;
227 shdw_ptr->guest_pte = *(uint32_t *)guest_pte;
228 shdw_ptr->pg_index = get_pg_index(guest_pte);
229 shdw_ptr->dev_index = get_dev_index(guest_pte);
231 // We don't check for conflicts, because it should not happen...
232 list_add(&(shdw_ptr->node), shdw_ptr_list);
234 return PAGE_BASE_ADDR((addr_t)V3_PAddr(swp_page_ptr));
241 #include "vmm_shdw_pg_swapbypass_32.h"
242 #include "vmm_shdw_pg_swapbypass_32pae.h"
243 #include "vmm_shdw_pg_swapbypass_64.h"
246 static struct shadow_page_data * create_new_shadow_pt(struct guest_info * core) {
247 struct v3_shdw_pg_state * state = &(core->shdw_pg_state);
248 struct swapbypass_local_state * impl_state = (struct swapbypass_local_state *)(state->local_impl_data);
249 v3_reg_t cur_cr3 = core->ctrl_regs.cr3;
250 struct shadow_page_data * page_tail = NULL;
251 addr_t shdw_page = 0;
253 if (!list_empty(&(impl_state->page_list))) {
254 page_tail = list_tail_entry(&(impl_state->page_list), struct shadow_page_data, page_list_node);
257 if (page_tail->cr3 != cur_cr3) {
258 PrintDebug("Reusing old shadow Page: %p (cur_CR3=%p)(page_cr3=%p) \n",
259 (void *)(addr_t)page_tail->page_pa,
260 (void *)(addr_t)cur_cr3,
261 (void *)(addr_t)(page_tail->cr3));
263 list_move(&(page_tail->page_list_node), &(impl_state->page_list));
265 memset(V3_VAddr((void *)(page_tail->page_pa)), 0, PAGE_SIZE_4KB);
274 page_tail = (struct shadow_page_data *)V3_Malloc(sizeof(struct shadow_page_data));
275 page_tail->page_pa = (addr_t)V3_AllocPages(1);
277 PrintDebug("Allocating new shadow Page: %p (cur_cr3=%p)\n",
278 (void *)(addr_t)page_tail->page_pa,
279 (void *)(addr_t)cur_cr3);
281 page_tail->cr3 = cur_cr3;
282 list_add(&(page_tail->page_list_node), &(impl_state->page_list));
284 shdw_page = (addr_t)V3_VAddr((void *)(page_tail->page_pa));
285 memset((void *)shdw_page, 0, PAGE_SIZE_4KB);
295 int v3_register_swap_disk(struct v3_vm_info * vm, int dev_index,
296 struct v3_swap_ops * ops, void * private_data) {
297 struct swapbypass_vm_state * swap_state = (struct swapbypass_vm_state *)(vm->shdw_impl.impl_data);
299 swap_state->devs[dev_index].present = 1;
300 swap_state->devs[dev_index].private_data = private_data;
301 swap_state->devs[dev_index].ops = ops;
309 int v3_swap_in_notify(struct v3_vm_info * vm, int pg_index, int dev_index) {
310 struct list_head * shdw_ptr_list = NULL;
311 struct swapbypass_vm_state * swap_state = (struct swapbypass_vm_state *)(vm->shdw_impl.impl_data);
312 struct shadow_pointer * tmp_shdw_ptr = NULL;
313 struct shadow_pointer * shdw_ptr = NULL;
314 struct swap_pte guest_pte = {0, dev_index, pg_index};
316 shdw_ptr_list = (struct list_head * )v3_htable_search(swap_state->shdw_ptr_ht, *(addr_t *)&(guest_pte));
318 if (shdw_ptr_list == NULL) {
322 list_for_each_entry_safe(shdw_ptr, tmp_shdw_ptr, shdw_ptr_list, node) {
323 if ((shdw_ptr->pg_index == pg_index) &&
324 (shdw_ptr->dev_index == dev_index)) {
326 // Trigger faults for next shadow access
327 shdw_ptr->shadow_pte->present = 0;
329 // Delete entry from list
330 list_del(&(shdw_ptr->node));
340 int v3_swap_flush(struct v3_vm_info * vm) {
341 struct swapbypass_vm_state * swap_state = (struct swapbypass_vm_state *)(vm->shdw_impl.impl_data);
342 struct hashtable_iter * ht_iter = v3_create_htable_iter(swap_state->shdw_ptr_ht);
344 // PrintDebug("Flushing Symbiotic Swap table\n");
346 #ifdef CONFIG_SWAPBYPASS_TELEMETRY
347 swap_state->flushes++;
351 PrintError("NULL iterator in swap flush!! Probably will crash soon...\n");
354 while (ht_iter->entry) {
355 struct shadow_pointer * tmp_shdw_ptr = NULL;
356 struct shadow_pointer * shdw_ptr = NULL;
357 struct list_head * shdw_ptr_list = (struct list_head *)v3_htable_get_iter_value(ht_iter);
359 // delete all swapped entries
360 // we can leave the list_head structures and reuse them for the next round
362 list_for_each_entry_safe(shdw_ptr, tmp_shdw_ptr, shdw_ptr_list, node) {
363 if (shdw_ptr == NULL) {
364 PrintError("Null shadow pointer in swap flush!! Probably crashing soon...\n");
367 // Trigger faults for next shadow access
368 shdw_ptr->shadow_pte->present = 0;
370 // Delete entry from list
371 list_del(&(shdw_ptr->node));
375 v3_htable_iter_advance(ht_iter);
389 static int sb_init(struct v3_vm_info * vm, v3_cfg_tree_t * cfg) {
390 struct v3_shdw_impl_state * impl_state = &(vm->shdw_impl);
391 struct swapbypass_vm_state * sb_state = NULL;
393 memset(sb_state, 0, sizeof(struct swapbypass_vm_state));
394 sb_state->shdw_ptr_ht = v3_create_htable(0, swap_hash_fn, swap_eq_fn);
396 #ifdef CONFIG_SWAPBYPASS_TELEMETRY
397 if (vm->enable_telemetry) {
398 v3_add_telemetry_cb(vm, telemetry_cb, NULL);
402 impl_state->impl_data = sb_state;
404 PrintDebug("Initialized SwapBypass\n");
410 static int sb_deinit(struct v3_vm_info * vm) {
414 static int sb_local_init(struct guest_info * core) {
415 struct v3_shdw_pg_state * state = &(core->shdw_pg_state);
416 struct swapbypass_local_state * swapbypass_state = NULL;
418 V3_Print("SWAPBYPASS local initialization\n");
420 swapbypass_state = (struct swapbypass_local_state *)V3_Malloc(sizeof(struct swapbypass_local_state));
422 INIT_LIST_HEAD(&(swapbypass_state->page_list));
424 state->local_impl_data = swapbypass_state;
430 static int sb_activate_shdw_pt(struct guest_info * core) {
431 switch (v3_get_vm_cpu_mode(core)) {
434 return activate_shadow_pt_32(core);
436 return activate_shadow_pt_32pae(core);
440 return activate_shadow_pt_64(core);
442 PrintError("Invalid CPU mode: %s\n", v3_cpu_mode_to_str(v3_get_vm_cpu_mode(core)));
449 static int sb_invalidate_shdw_pt(struct guest_info * core) {
450 return sb_activate_shdw_pt(core);
454 static int sb_handle_pf(struct guest_info * core, addr_t fault_addr, pf_error_t error_code) {
456 switch (v3_get_vm_cpu_mode(core)) {
458 return handle_shadow_pagefault_32(core, fault_addr, error_code);
461 return handle_shadow_pagefault_32pae(core, fault_addr, error_code);
465 return handle_shadow_pagefault_64(core, fault_addr, error_code);
468 PrintError("Unhandled CPU Mode: %s\n", v3_cpu_mode_to_str(v3_get_vm_cpu_mode(core)));
474 static int sb_handle_invlpg(struct guest_info * core, addr_t vaddr) {
476 switch (v3_get_vm_cpu_mode(core)) {
478 return handle_shadow_invlpg_32(core, vaddr);
480 return handle_shadow_invlpg_32pae(core, vaddr);
484 return handle_shadow_invlpg_64(core, vaddr);
486 PrintError("Invalid CPU mode: %s\n", v3_cpu_mode_to_str(v3_get_vm_cpu_mode(core)));
491 static struct v3_shdw_pg_impl sb_impl = {
492 .name = "SWAPBYPASS",
495 .local_init = sb_local_init,
496 .handle_pagefault = sb_handle_pf,
497 .handle_invlpg = sb_handle_invlpg,
498 .activate_shdw_pt = sb_activate_shdw_pt,
499 .invalidate_shdw_pt = sb_invalidate_shdw_pt
506 register_shdw_pg_impl(&sb_impl);