2 * This file is part of the Palacios Virtual Machine Monitor developed
3 * by the V3VEE Project with funding from the United States National
4 * Science Foundation and the Department of Energy.
6 * The V3VEE Project is a joint project between Northwestern University
7 * and the University of New Mexico. You can find out more at
10 * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu>
11 * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org>
12 * All rights reserved.
14 * Author: Jack Lange <jarusl@cs.northwestern.edu>
16 * This is free software. You are permitted to use,
17 * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
21 #include <palacios/vmm_shadow_paging.h>
24 #include <palacios/vmm.h>
25 #include <palacios/vm_guest_mem.h>
26 #include <palacios/vmm_decoder.h>
27 #include <palacios/vmm_ctrl_regs.h>
29 #include <palacios/vmm_hashtable.h>
31 #include <palacios/vmm_direct_paging.h>
36 #ifdef V3_CONFIG_SHADOW_PAGING_TELEMETRY
37 #include <palacios/vmm_telemetry.h>
40 #ifdef V3_CONFIG_SYMBIOTIC_SWAP
41 #include <palacios/vmm_sym_swap.h>
44 #ifndef V3_CONFIG_DEBUG_SHADOW_PAGING
46 #define PrintDebug(fmt, args...)
50 static const char default_strategy[] = "VTLB";
53 static struct hashtable * master_shdw_pg_table = NULL;
56 struct event_callback {
57 int (*callback)(struct guest_info *core, struct v3_shdw_pg_event *event, void *priv_data);
60 struct list_head node;
63 static uint_t shdw_pg_hash_fn(addr_t key) {
64 char * name = (char *)key;
65 return v3_hash_buffer((uint8_t *)name, strlen(name));
68 static int shdw_pg_eq_fn(addr_t key1, addr_t key2) {
69 char * name1 = (char *)key1;
70 char * name2 = (char *)key2;
72 return (strcmp(name1, name2) == 0);
75 static int have_callbacks(struct guest_info *core)
77 // lock acquistion unnecessary
78 // caller will acquire the lock before *iterating* through the list
79 // so any race will be resolved then
80 return !list_empty(&(core->vm_info->shdw_impl.event_callback_list));
83 static void dispatch_event(struct guest_info *core, struct v3_shdw_pg_event *event)
85 struct event_callback *cb,*temp;
87 v3_read_lock(&(core->vm_info->shdw_impl.event_callback_lock));
89 list_for_each_entry_safe(cb,
91 &(core->vm_info->shdw_impl.event_callback_list),
93 cb->callback(core,event,cb->priv_data);
96 v3_read_unlock(&(core->vm_info->shdw_impl.event_callback_lock));
100 int V3_init_shdw_paging() {
101 extern struct v3_shdw_pg_impl * __start__v3_shdw_pg_impls[];
102 extern struct v3_shdw_pg_impl * __stop__v3_shdw_pg_impls[];
103 struct v3_shdw_pg_impl ** tmp_impl = __start__v3_shdw_pg_impls;
106 master_shdw_pg_table = v3_create_htable(0, shdw_pg_hash_fn, shdw_pg_eq_fn);
109 while (tmp_impl != __stop__v3_shdw_pg_impls) {
110 V3_Print(VM_NONE, VCORE_NONE, "Registering Shadow Paging Impl (%s)\n", (*tmp_impl)->name);
112 if (v3_htable_search(master_shdw_pg_table, (addr_t)((*tmp_impl)->name))) {
113 PrintError(VM_NONE, VCORE_NONE, "Multiple instances of shadow paging impl (%s)\n", (*tmp_impl)->name);
117 if (v3_htable_insert(master_shdw_pg_table,
118 (addr_t)((*tmp_impl)->name),
119 (addr_t)(*tmp_impl)) == 0) {
120 PrintError(VM_NONE, VCORE_NONE, "Could not register shadow paging impl (%s)\n", (*tmp_impl)->name);
124 tmp_impl = &(__start__v3_shdw_pg_impls[++i]);
130 int V3_deinit_shdw_paging() {
131 v3_free_htable(master_shdw_pg_table, 0, 0);
142 #ifdef V3_CONFIG_SHADOW_PAGING_TELEMETRY
143 static void telemetry_cb(struct v3_vm_info * vm, void * private_data, char * hdr) {
145 for (i = 0; i < vm->num_cores; i++) {
146 struct guest_info * core = &(vm->cores[i]);
148 V3_Print(vm, core, "%s Guest Page faults: %d\n", hdr, core->shdw_pg_state.guest_faults);
155 int v3_init_shdw_pg_state(struct guest_info * core) {
156 struct v3_shdw_pg_state * state = &(core->shdw_pg_state);
157 struct v3_shdw_pg_impl * impl = core->vm_info->shdw_impl.current_impl;
160 state->guest_cr3 = 0;
161 state->guest_cr0 = 0;
162 state->guest_efer.value = 0x0LL;
164 if (impl->local_init(core) == -1) {
165 PrintError(core->vm_info, core, "Error in Shadow paging local initialization (%s)\n", impl->name);
170 #ifdef V3_CONFIG_SHADOW_PAGING_TELEMETRY
171 v3_add_telemetry_cb(core->vm_info, telemetry_cb, NULL);
179 int v3_deinit_shdw_pg_state(struct guest_info * core) {
180 struct v3_shdw_pg_impl * impl = NULL;
182 if (!core || !core->vm_info) {
186 impl = core->vm_info->shdw_impl.current_impl;
188 if (impl && impl->local_deinit(core) == -1) {
189 PrintError(core->vm_info, core, "Error deinitializing shadow paging state\n");
199 int v3_init_shdw_impl(struct v3_vm_info * vm) {
200 struct v3_shdw_impl_state * impl_state = &(vm->shdw_impl);
201 v3_cfg_tree_t * pg_cfg = v3_cfg_subtree(vm->cfg_data->cfg, "paging");
202 char * pg_mode = v3_cfg_val(pg_cfg, "mode");
203 char * pg_strat = v3_cfg_val(pg_cfg, "strategy");
204 struct v3_shdw_pg_impl * impl = NULL;
206 PrintDebug(vm, VCORE_NONE, "Checking if shadow paging requested.\n");
207 if (pg_mode == NULL) {
208 V3_Print(vm, VCORE_NONE, "No paging mode specified, assuming shadow with defaults\n");
211 if (strcasecmp(pg_mode, "nested") == 0) {
212 // this check is repeated here (compare to vmm_config's determine paging mode) since
213 // shadow paging initialization *precedes* per-core pre-config.
214 extern v3_cpu_arch_t v3_mach_type;
215 if ((v3_mach_type == V3_SVM_REV3_CPU) ||
216 (v3_mach_type == V3_VMX_EPT_CPU) ||
217 (v3_mach_type == V3_VMX_EPT_UG_CPU)) {
218 PrintDebug(vm, VCORE_NONE, "Nested paging specified on machine that supports it - not initializing shadow paging\n");
221 V3_Print(vm, VCORE_NONE, "Nested paging specified but machine does not support it - falling back to shadow paging with defaults\n");
224 } else if (strcasecmp(pg_mode, "shadow") != 0) {
225 V3_Print(vm, VCORE_NONE, "Unknown paging mode '%s' specified - falling back to shadow paging with defaults\n",pg_mode);
230 if (pg_strat == NULL) {
231 pg_strat = (char *)default_strategy;
234 V3_Print(vm, VCORE_NONE,"Initialization of Shadow Paging implementation\n");
236 impl = (struct v3_shdw_pg_impl *)v3_htable_search(master_shdw_pg_table, (addr_t)pg_strat);
239 PrintError(vm, VCORE_NONE, "Could not find shadow paging impl (%s)\n", pg_strat);
243 INIT_LIST_HEAD(&(impl_state->event_callback_list));
244 v3_rw_lock_init(&(impl_state->event_callback_lock));
246 impl_state->current_impl = impl;
248 if (impl->init(vm, pg_cfg) == -1) {
249 PrintError(vm, VCORE_NONE, "Could not initialize Shadow paging implemenation (%s)\n", impl->name);
258 int v3_deinit_shdw_impl(struct v3_vm_info * vm) {
259 struct v3_shdw_pg_impl * impl = vm->shdw_impl.current_impl;
260 struct event_callback *cb,*temp;
264 // Shadow paging not implemented
268 if (impl->deinit(vm) == -1) {
269 PrintError(vm, VCORE_NONE,"Error deinitializing shadow paging implementation\n");
273 flags=v3_write_lock_irqsave(&(vm->shdw_impl.event_callback_lock));
275 list_for_each_entry_safe(cb,
277 &(vm->shdw_impl.event_callback_list),
279 list_del(&(cb->node));
283 v3_write_unlock_irqrestore(&(vm->shdw_impl.event_callback_lock),flags);
285 v3_rw_lock_deinit(&(vm->shdw_impl.event_callback_lock));
291 // Reads the guest CR3 register
292 // creates new shadow page tables
293 // updates the shadow CR3 register to point to the new pts
294 int v3_activate_shadow_pt(struct guest_info * core) {
295 struct v3_shdw_impl_state * state = &(core->vm_info->shdw_impl);
296 struct v3_shdw_pg_impl * impl = state->current_impl;
298 if (!have_callbacks(core)) {
299 return impl->activate_shdw_pt(core);
302 struct v3_shdw_pg_event event_pre={SHADOW_ACTIVATE,SHADOW_PREIMPL,0,{0,0,0,0,0,0}};
303 struct v3_shdw_pg_event event_post={SHADOW_ACTIVATE,SHADOW_POSTIMPL,0,{0,0,0,0,0,0}};
305 dispatch_event(core,&event_pre);
307 rc =impl->activate_shdw_pt(core);
309 dispatch_event(core,&event_post);
317 // This must flush any caches
318 // and reset the cr3 value to the correct value
319 int v3_invalidate_shadow_pts(struct guest_info * core) {
320 struct v3_shdw_impl_state * state = &(core->vm_info->shdw_impl);
321 struct v3_shdw_pg_impl * impl = state->current_impl;
323 if (!have_callbacks(core)) {
324 return impl->invalidate_shdw_pt(core);
327 struct v3_shdw_pg_event event_pre={SHADOW_INVALIDATE,SHADOW_PREIMPL,0,{0,0,0,0,0,0}};
328 struct v3_shdw_pg_event event_post={SHADOW_INVALIDATE,SHADOW_POSTIMPL,0,{0,0,0,0,0,0}};
330 dispatch_event(core,&event_pre);
332 rc = impl->invalidate_shdw_pt(core);
334 dispatch_event(core,&event_post);
341 int v3_handle_shadow_pagefault(struct guest_info * core, addr_t fault_addr, pf_error_t error_code)
346 if (have_callbacks(core)) {
347 struct v3_shdw_pg_event event={SHADOW_PAGEFAULT,SHADOW_PREIMPL,fault_addr,error_code};
348 dispatch_event(core,&event);
351 if (v3_get_vm_mem_mode(core) == PHYSICAL_MEM) {
352 // If paging is not turned on we need to handle the special cases
353 rc = v3_handle_passthrough_pagefault(core, fault_addr, error_code,NULL,NULL);
354 } else if (v3_get_vm_mem_mode(core) == VIRTUAL_MEM) {
355 struct v3_shdw_impl_state * state = &(core->vm_info->shdw_impl);
356 struct v3_shdw_pg_impl * impl = state->current_impl;
358 rc = impl->handle_pagefault(core, fault_addr, error_code);
360 PrintError(core->vm_info, core, "Invalid Memory mode\n");
364 if (have_callbacks(core)) {
365 struct v3_shdw_pg_event event={SHADOW_PAGEFAULT,SHADOW_POSTIMPL,fault_addr,error_code};
366 dispatch_event(core,&event);
373 int v3_handle_shadow_invlpg(struct guest_info * core) {
375 struct x86_instr dec_instr;
379 if (v3_get_vm_mem_mode(core) != VIRTUAL_MEM) {
380 // Paging must be turned on...
381 // should handle with some sort of fault I think
382 PrintError(core->vm_info, core, "ERROR: INVLPG called in non paged mode\n");
386 if (v3_get_vm_mem_mode(core) == PHYSICAL_MEM) {
387 ret = v3_read_gpa_memory(core, get_addr_linear(core, core->rip, &(core->segments.cs)), 15, instr);
389 ret = v3_read_gva_memory(core, get_addr_linear(core, core->rip, &(core->segments.cs)), 15, instr);
393 PrintError(core->vm_info, core, "Could not read instruction into buffer\n");
397 if (v3_decode(core, (addr_t)instr, &dec_instr) == -1) {
398 PrintError(core->vm_info, core, "Decoding Error\n");
402 if ((dec_instr.op_type != V3_OP_INVLPG) ||
403 (dec_instr.num_operands != 1) ||
404 (dec_instr.dst_operand.type != MEM_OPERAND)) {
405 PrintError(core->vm_info, core, "Decoder Error: Not a valid INVLPG instruction...\n");
409 vaddr = dec_instr.dst_operand.operand;
411 core->rip += dec_instr.instr_length;
414 struct v3_shdw_impl_state * state = &(core->vm_info->shdw_impl);
415 struct v3_shdw_pg_impl * impl = state->current_impl;
418 if (have_callbacks(core)) {
419 struct v3_shdw_pg_event event={SHADOW_INVLPG,SHADOW_PREIMPL,vaddr,{0,0,0,0,0,0}};
420 dispatch_event(core,&event);
423 rc=impl->handle_invlpg(core, vaddr);
425 if (have_callbacks(core)) {
426 struct v3_shdw_pg_event event={SHADOW_INVLPG,SHADOW_POSTIMPL,vaddr,{0,0,0,0,0,0}};
427 dispatch_event(core,&event);
439 int v3_inject_guest_pf(struct guest_info * core, addr_t fault_addr, pf_error_t error_code) {
440 core->ctrl_regs.cr2 = fault_addr;
442 #ifdef V3_CONFIG_SHADOW_PAGING_TELEMETRY
443 core->shdw_pg_state.guest_faults++;
446 return v3_raise_exception_with_error(core, PF_EXCEPTION, *(uint_t *)&error_code);
450 int v3_is_guest_pf(pt_access_status_t guest_access, pt_access_status_t shadow_access) {
451 /* basically the reasoning is that there can be multiple reasons for a page fault:
452 If there is a permissions failure for a page present in the guest _BUT_
453 the reason for the fault was that the page is not present in the shadow,
454 _THEN_ we have to map the shadow page in and reexecute, this will generate
455 a permissions fault which is _THEN_ valid to send to the guest
456 _UNLESS_ both the guest and shadow have marked the page as not present
460 if (guest_access != PT_ACCESS_OK) {
461 // Guest Access Error
463 if ((shadow_access != PT_ACCESS_NOT_PRESENT) &&
464 (guest_access != PT_ACCESS_NOT_PRESENT)) {
465 // aka (guest permission error)
470 if ((shadow_access == PT_ACCESS_NOT_PRESENT) &&
471 (guest_access == PT_ACCESS_NOT_PRESENT)) {
472 // Page tables completely blank, handle guest first
477 if (guest_access == PT_ACCESS_NOT_PRESENT) {
478 // Page tables completely blank, handle guest first
482 // Otherwise we'll handle the guest fault later...?
489 int v3_register_shadow_paging_event_callback(struct v3_vm_info *vm,
490 int (*callback)(struct guest_info *core,
491 struct v3_shdw_pg_event *event,
495 struct event_callback *ec = V3_Malloc(sizeof(struct event_callback));
499 PrintError(vm, VCORE_NONE, "Unable to allocate for a shadow paging event callback\n");
503 ec->callback = callback;
504 ec->priv_data = priv_data;
506 flags=v3_write_lock_irqsave(&(vm->shdw_impl.event_callback_lock));
507 list_add(&(ec->node),&(vm->shdw_impl.event_callback_list));
508 v3_write_unlock_irqrestore(&(vm->shdw_impl.event_callback_lock),flags);
514 int v3_unregister_shadow_paging_event_callback(struct v3_vm_info *vm,
515 int (*callback)(struct guest_info *core,
516 struct v3_shdw_pg_event *event,
520 struct event_callback *cb,*temp;
523 flags=v3_write_lock_irqsave(&(vm->shdw_impl.event_callback_lock));
525 list_for_each_entry_safe(cb,
527 &(vm->shdw_impl.event_callback_list),
529 if ((callback == cb->callback) && (priv_data == cb->priv_data)) {
530 list_del(&(cb->node));
531 v3_write_unlock_irqrestore(&(vm->shdw_impl.event_callback_lock),flags);
537 v3_write_unlock_irqrestore(&(vm->shdw_impl.event_callback_lock),flags);
539 PrintError(vm, VCORE_NONE, "No callback found!\n");