2 * This file is part of the Palacios Virtual Machine Monitor developed
3 * by the V3VEE Project with funding from the United States National
4 * Science Foundation and the Department of Energy.
6 * The V3VEE Project is a joint project between Northwestern University
7 * and the University of New Mexico. You can find out more at
10 * Copyright (c) 2008, Steven Jaconette <stevenjaconette2007@u.northwestern.edu>
11 * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu>
12 * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org>
13 * All rights reserved.
15 * Author: Steven Jaconette <stevenjaconette2007@u.northwestern.edu>
16 * Peter Dinda <pdinda@northwestern.edu> (refactor + events)
18 * This is free software. You are permitted to use,
19 * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
22 #include <palacios/vmm_direct_paging.h>
23 #include <palacios/vmm_paging.h>
24 #include <palacios/vmm.h>
25 #include <palacios/vm_guest_mem.h>
26 #include <palacios/vm_guest.h>
27 #include <palacios/vmm_ctrl_regs.h>
30 #if !defined(V3_CONFIG_DEBUG_NESTED_PAGING) && !defined(V3_CONFIG_DEBUG_SHADOW_PAGING)
32 #define PrintDebug(fmt, args...)
39 "Direct Paging" combines these three functionalities:
41 1. Passthrough paging for SVM and VMX
43 Passthrough paging is used for shadow paging when
44 the guest does not have paging turn on, for example
45 when it is running in real mode or protected mode
46 early in a typical boot process. Passthrough page
47 tables are shadow page tables that are built assuming
48 the guest virtual to guest physical mapping is the identity.
49 Thus, what they implement are the GPA->HPA mapping.
51 Passthrough page tables are built using 32PAE paging.
54 2. Nested paging on SVM
56 The SVM nested page tables have the same format as
57 regular page tables. For this reason, we can reuse
58 much of the passthrough implementation. A nested page
59 table mapping is a GPA->HPA mapping, creating a very
60 simlar model as with passthrough paging, just that it's
61 always active, whether the guest has paging on or not.
64 3. Nested paging on VMX
66 The VMX nested page tables have a different format
67 than regular page tables. For this reason, we have
68 implemented them in the vmx_npt.h file. The code
69 here then is a wrapper, allowing us to make nested
70 paging functionality appear uniform across VMX and SVM
71 elsewhere in the codebase.
77 static inline int is_vmx_nested()
79 extern v3_cpu_arch_t v3_mach_type;
81 return (v3_mach_type==V3_VMX_EPT_CPU || v3_mach_type==V3_VMX_EPT_UG_CPU);
84 static inline int is_svm_nested()
86 extern v3_cpu_arch_t v3_mach_type;
88 return (v3_mach_type==V3_SVM_REV3_CPU);
92 struct passthrough_event_callback {
93 int (*callback)(struct guest_info *core, struct v3_passthrough_pg_event *event, void *priv_data);
96 struct list_head node;
100 static int have_passthrough_callbacks(struct guest_info *core)
102 // lock acquistion unnecessary
103 // caller will acquire the lock before *iterating* through the list
104 // so any race will be resolved then
105 return !list_empty(&(core->vm_info->passthrough_impl.event_callback_list));
108 static void dispatch_passthrough_event(struct guest_info *core, struct v3_passthrough_pg_event *event)
110 struct passthrough_event_callback *cb,*temp;
112 v3_read_lock(&(core->vm_info->passthrough_impl.event_callback_lock));
114 list_for_each_entry_safe(cb,
116 &(core->vm_info->passthrough_impl.event_callback_list),
118 cb->callback(core,event,cb->priv_data);
121 v3_read_unlock(&(core->vm_info->passthrough_impl.event_callback_lock));
125 struct nested_event_callback {
126 int (*callback)(struct guest_info *core, struct v3_nested_pg_event *event, void *priv_data);
129 struct list_head node;
133 static int have_nested_callbacks(struct guest_info *core)
135 // lock acquistion unnecessary
136 // caller will acquire the lock before *iterating* through the list
137 // so any race will be resolved then
138 return !list_empty(&(core->vm_info->nested_impl.event_callback_list));
141 static void dispatch_nested_event(struct guest_info *core, struct v3_nested_pg_event *event)
143 struct nested_event_callback *cb,*temp;
145 v3_read_lock(&(core->vm_info->nested_impl.event_callback_lock));
147 list_for_each_entry_safe(cb,
149 &(core->vm_info->nested_impl.event_callback_list),
151 cb->callback(core,event,cb->priv_data);
154 v3_read_unlock(&(core->vm_info->nested_impl.event_callback_lock));
160 static addr_t create_generic_pt_page(struct guest_info *core) {
164 temp = V3_AllocPagesExtended(1, PAGE_SIZE_4KB, -1, 0); // no constraints
167 PrintError(VM_NONE, VCORE_NONE,"Cannot allocate page\n");
171 page = V3_VAddr(temp);
172 memset(page, 0, PAGE_SIZE);
177 // Inline handler functions for each cpu mode
178 #include "vmm_direct_paging_32.h"
179 #include "vmm_direct_paging_32pae.h"
180 #include "vmm_direct_paging_64.h"
184 int v3_init_passthrough_pts(struct guest_info * info) {
185 info->direct_map_pt = (addr_t)V3_PAddr((void *)create_generic_pt_page(info));
190 int v3_free_passthrough_pts(struct guest_info * core) {
191 v3_cpu_mode_t mode = v3_get_vm_cpu_mode(core);
193 // Delete the old direct map page tables
197 // Intentional fallthrough here
198 // There are *only* PAE tables
202 // Long mode will only use 32PAE page tables...
203 delete_page_tables_32pae((pdpe32pae_t *)V3_VAddr((void *)(core->direct_map_pt)));
206 PrintError(core->vm_info, core, "Unknown CPU Mode\n");
215 int v3_reset_passthrough_pts(struct guest_info * core) {
217 v3_free_passthrough_pts(core);
219 // create new direct map page table
220 v3_init_passthrough_pts(core);
227 int v3_activate_passthrough_pt(struct guest_info * info) {
228 // For now... But we need to change this....
229 // As soon as shadow paging becomes active the passthrough tables are hosed
230 // So this will cause chaos if it is called at that time
232 if (have_passthrough_callbacks(info)) {
233 struct v3_passthrough_pg_event event={PASSTHROUGH_ACTIVATE,PASSTHROUGH_PREIMPL,0,{0,0,0,0,0,0},0,0};
234 dispatch_passthrough_event(info,&event);
237 struct cr3_32_PAE * shadow_cr3 = (struct cr3_32_PAE *) &(info->ctrl_regs.cr3);
238 struct cr4_32 * shadow_cr4 = (struct cr4_32 *) &(info->ctrl_regs.cr4);
239 addr_t shadow_pt_addr = *(addr_t*)&(info->direct_map_pt);
240 // Passthrough PTs will only be PAE page tables.
241 shadow_cr3->pdpt_base_addr = shadow_pt_addr >> 5;
243 PrintDebug(info->vm_info, info, "Activated Passthrough Page tables\n");
245 if (have_passthrough_callbacks(info)) {
246 struct v3_passthrough_pg_event event={PASSTHROUGH_ACTIVATE,PASSTHROUGH_POSTIMPL,0,{0,0,0,0,0,0},0,0};
247 dispatch_passthrough_event(info,&event);
255 int v3_handle_passthrough_pagefault(struct guest_info * info, addr_t fault_addr, pf_error_t error_code,
256 addr_t *actual_start, addr_t *actual_end) {
257 v3_cpu_mode_t mode = v3_get_vm_cpu_mode(info);
261 if (have_passthrough_callbacks(info)) {
262 struct v3_passthrough_pg_event event={PASSTHROUGH_PAGEFAULT,PASSTHROUGH_PREIMPL,fault_addr,error_code,fault_addr,fault_addr};
263 dispatch_passthrough_event(info,&event);
266 if (!actual_start) { actual_start=&start; }
267 if (!actual_end) { actual_end=&end; }
275 // Note intentional fallthrough here
276 // There are only PAE page tables now
280 // Long mode will only use 32PAE page tables...
281 rc=handle_passthrough_pagefault_32pae(info, fault_addr, error_code, actual_start, actual_end);
284 PrintError(info->vm_info, info, "Unknown CPU Mode\n");
288 if (have_passthrough_callbacks(info)) {
289 struct v3_passthrough_pg_event event={PASSTHROUGH_PAGEFAULT,PASSTHROUGH_POSTIMPL,fault_addr,error_code,*actual_start,*actual_end};
290 dispatch_passthrough_event(info,&event);
298 int v3_invalidate_passthrough_addr(struct guest_info * info, addr_t inv_addr,
299 addr_t *actual_start, addr_t *actual_end) {
301 v3_cpu_mode_t mode = v3_get_vm_cpu_mode(info);
305 if (have_passthrough_callbacks(info)) {
306 struct v3_passthrough_pg_event event={PASSTHROUGH_INVALIDATE_RANGE,PASSTHROUGH_PREIMPL,0,{0,0,0,0,0,0},PAGE_ADDR(inv_addr),PAGE_ADDR(inv_addr)+PAGE_SIZE-1};
307 dispatch_passthrough_event(info,&event);
310 if (!actual_start) { actual_start=&start;}
311 if (!actual_end) { actual_end=&end;}
320 // Intentional fallthrough - there
321 // are only PAE page tables now
325 // Long mode will only use 32PAE page tables...
326 rc=invalidate_addr_32pae(info, inv_addr, actual_start, actual_end);
329 PrintError(info->vm_info, info, "Unknown CPU Mode\n");
333 if (have_passthrough_callbacks(info)) {
334 struct v3_passthrough_pg_event event={PASSTHROUGH_INVALIDATE_RANGE,PASSTHROUGH_POSTIMPL,0,{0,0,0,0,0,0},*actual_start,*actual_end};
335 dispatch_passthrough_event(info,&event);
343 int v3_invalidate_passthrough_addr_range(struct guest_info * info,
344 addr_t inv_addr_start, addr_t inv_addr_end,
345 addr_t *actual_start, addr_t *actual_end) {
346 v3_cpu_mode_t mode = v3_get_vm_cpu_mode(info);
350 if (!actual_start) { actual_start=&start;}
351 if (!actual_end) { actual_end=&end;}
353 if (have_passthrough_callbacks(info)) {
354 struct v3_passthrough_pg_event event={PASSTHROUGH_INVALIDATE_RANGE,PASSTHROUGH_PREIMPL,0,{0,0,0,0,0,0},PAGE_ADDR(inv_addr_start),PAGE_ADDR(inv_addr_end-1)+PAGE_SIZE-1};
355 dispatch_passthrough_event(info,&event);
363 // Intentional fallthrough
364 // There are only PAE PTs now
368 // Long mode will only use 32PAE page tables...
369 rc=invalidate_addr_32pae_range(info, inv_addr_start, inv_addr_end, actual_start, actual_end);
372 PrintError(info->vm_info, info, "Unknown CPU Mode\n");
376 if (have_passthrough_callbacks(info)) {
377 struct v3_passthrough_pg_event event={PASSTHROUGH_INVALIDATE_RANGE,PASSTHROUGH_POSTIMPL,0,{0,0,0,0,0,0},*actual_start,*actual_end};
378 dispatch_passthrough_event(info,&event);
385 int v3_init_passthrough_paging(struct v3_vm_info *vm)
387 INIT_LIST_HEAD(&(vm->passthrough_impl.event_callback_list));
388 v3_rw_lock_init(&(vm->passthrough_impl.event_callback_lock));
392 int v3_deinit_passthrough_paging(struct v3_vm_info *vm)
394 struct passthrough_event_callback *cb,*temp;
397 flags=v3_write_lock_irqsave(&(vm->passthrough_impl.event_callback_lock));
399 list_for_each_entry_safe(cb,
401 &(vm->passthrough_impl.event_callback_list),
403 list_del(&(cb->node));
407 v3_write_unlock_irqrestore(&(vm->passthrough_impl.event_callback_lock),flags);
409 v3_rw_lock_deinit(&(vm->passthrough_impl.event_callback_lock));
414 int v3_init_passthrough_paging_core(struct guest_info *core)
416 // currently nothing to init
420 int v3_deinit_passthrough_paging_core(struct guest_info *core)
422 // currently nothing to deinit
427 int v3_register_passthrough_paging_event_callback(struct v3_vm_info *vm,
428 int (*callback)(struct guest_info *core,
429 struct v3_passthrough_pg_event *,
433 struct passthrough_event_callback *ec = V3_Malloc(sizeof(struct passthrough_event_callback));
437 PrintError(vm, VCORE_NONE, "Unable to allocate for a nested paging event callback\n");
441 ec->callback = callback;
442 ec->priv_data = priv_data;
444 flags=v3_write_lock_irqsave(&(vm->passthrough_impl.event_callback_lock));
445 list_add(&(ec->node),&(vm->passthrough_impl.event_callback_list));
446 v3_write_unlock_irqrestore(&(vm->passthrough_impl.event_callback_lock),flags);
454 int v3_unregister_passthrough_paging_event_callback(struct v3_vm_info *vm,
455 int (*callback)(struct guest_info *core,
456 struct v3_passthrough_pg_event *,
460 struct passthrough_event_callback *cb,*temp;
463 flags=v3_write_lock_irqsave(&(vm->passthrough_impl.event_callback_lock));
465 list_for_each_entry_safe(cb,
467 &(vm->passthrough_impl.event_callback_list),
469 if ((callback == cb->callback) && (priv_data == cb->priv_data)) {
470 list_del(&(cb->node));
472 v3_write_unlock_irqrestore(&(vm->passthrough_impl.event_callback_lock),flags);
477 v3_write_unlock_irqrestore(&(vm->passthrough_impl.event_callback_lock),flags);
479 PrintError(vm, VCORE_NONE, "No callback found!\n");
485 // inline nested paging support for Intel and AMD
490 inline void convert_to_pf_error(void *pfinfo, pf_error_t *out)
492 if (is_vmx_nested()) {
494 ept_exit_qual_to_pf_error((struct ept_exit_qual *)pfinfo, out);
497 *out = *(pf_error_t *)pfinfo;
501 int v3_handle_nested_pagefault(struct guest_info * info, addr_t fault_addr, void *pfinfo, addr_t *actual_start, addr_t *actual_end)
507 if (!actual_start) { actual_start=&start; }
508 if (!actual_end) { actual_end=&end; }
510 convert_to_pf_error(pfinfo,&err);
512 if (have_nested_callbacks(info)) {
513 struct v3_nested_pg_event event={NESTED_PAGEFAULT,NESTED_PREIMPL,fault_addr,err,fault_addr,fault_addr};
514 dispatch_nested_event(info,&event);
518 if (is_vmx_nested()) {
519 rc = handle_vmx_nested_pagefault(info,fault_addr,pfinfo,actual_start,actual_end);
521 rc = handle_svm_nested_pagefault(info,fault_addr,pfinfo,actual_start,actual_end);
524 if (have_nested_callbacks(info)) {
525 struct v3_nested_pg_event event={NESTED_PAGEFAULT,NESTED_POSTIMPL,fault_addr,err,*actual_start,*actual_end};
526 dispatch_nested_event(info,&event);
534 int v3_invalidate_nested_addr(struct guest_info * info, addr_t inv_addr,
535 addr_t *actual_start, addr_t *actual_end)
541 if (!actual_start) { actual_start=&start; }
542 if (!actual_end) { actual_end=&end; }
545 if (have_nested_callbacks(info)) {
546 struct v3_nested_pg_event event={NESTED_INVALIDATE_RANGE,NESTED_PREIMPL,0,{0,0,0,0,0,0},PAGE_ADDR(inv_addr),PAGE_ADDR(inv_addr)+PAGE_SIZE-1};
547 dispatch_nested_event(info,&event);
550 if (is_vmx_nested()) {
551 rc = handle_vmx_invalidate_nested_addr(info, inv_addr, actual_start, actual_end);
553 rc = handle_svm_invalidate_nested_addr(info, inv_addr, actual_start, actual_end);
556 if (have_nested_callbacks(info)) {
557 struct v3_nested_pg_event event={NESTED_INVALIDATE_RANGE,NESTED_POSTIMPL,0,{0,0,0,0,0,0},*actual_start, *actual_end};
558 dispatch_nested_event(info,&event);
564 int v3_invalidate_nested_addr_range(struct guest_info * info,
565 addr_t inv_addr_start, addr_t inv_addr_end,
566 addr_t *actual_start, addr_t *actual_end)
572 if (!actual_start) { actual_start=&start; }
573 if (!actual_end) { actual_end=&end; }
575 if (have_nested_callbacks(info)) {
576 struct v3_nested_pg_event event={NESTED_INVALIDATE_RANGE,NESTED_PREIMPL,0,{0,0,0,0,0,0},PAGE_ADDR(inv_addr_start),PAGE_ADDR(inv_addr_end-1)+PAGE_SIZE-1};
577 dispatch_nested_event(info,&event);
580 if (is_vmx_nested()) {
581 rc = handle_vmx_invalidate_nested_addr_range(info, inv_addr_start, inv_addr_end, actual_start, actual_end);
583 rc = handle_svm_invalidate_nested_addr_range(info, inv_addr_start, inv_addr_end, actual_start, actual_end);
587 if (have_nested_callbacks(info)) {
588 struct v3_nested_pg_event event={NESTED_INVALIDATE_RANGE,NESTED_PREIMPL,0,{0,0,0,0,0,0},*actual_start, *actual_end};
589 dispatch_nested_event(info,&event);
597 int v3_init_nested_paging(struct v3_vm_info *vm)
599 INIT_LIST_HEAD(&(vm->nested_impl.event_callback_list));
600 v3_rw_lock_init(&(vm->nested_impl.event_callback_lock));
604 int v3_init_nested_paging_core(struct guest_info *core, void *hwinfo)
606 if (is_vmx_nested()) {
607 return init_ept(core, (struct vmx_hw_info *) hwinfo);
609 // no initialization for SVM
614 int v3_deinit_nested_paging(struct v3_vm_info *vm)
616 struct nested_event_callback *cb,*temp;
619 flags=v3_write_lock_irqsave(&(vm->nested_impl.event_callback_lock));
621 list_for_each_entry_safe(cb,
623 &(vm->nested_impl.event_callback_list),
625 list_del(&(cb->node));
629 v3_write_unlock_irqrestore(&(vm->nested_impl.event_callback_lock),flags);
631 v3_rw_lock_deinit(&(vm->nested_impl.event_callback_lock));
636 int v3_deinit_nested_paging_core(struct guest_info *core)
638 // nothing to do.. probably dealloc? FIXME PAD
644 int v3_register_nested_paging_event_callback(struct v3_vm_info *vm,
645 int (*callback)(struct guest_info *core,
646 struct v3_nested_pg_event *,
650 struct nested_event_callback *ec = V3_Malloc(sizeof(struct nested_event_callback));
654 PrintError(vm, VCORE_NONE, "Unable to allocate for a nested paging event callback\n");
658 ec->callback = callback;
659 ec->priv_data = priv_data;
661 flags=v3_write_lock_irqsave(&(vm->nested_impl.event_callback_lock));
662 list_add(&(ec->node),&(vm->nested_impl.event_callback_list));
663 v3_write_unlock_irqrestore(&(vm->nested_impl.event_callback_lock),flags);
671 int v3_unregister_nested_paging_event_callback(struct v3_vm_info *vm,
672 int (*callback)(struct guest_info *core,
673 struct v3_nested_pg_event *,
677 struct nested_event_callback *cb,*temp;
680 flags=v3_write_lock_irqsave(&(vm->nested_impl.event_callback_lock));
682 list_for_each_entry_safe(cb,
684 &(vm->nested_impl.event_callback_list),
686 if ((callback == cb->callback) && (priv_data == cb->priv_data)) {
687 list_del(&(cb->node));
689 v3_write_unlock_irqrestore(&(vm->nested_impl.event_callback_lock),flags);
694 v3_write_unlock_irqrestore(&(vm->nested_impl.event_callback_lock),flags);
696 PrintError(vm, VCORE_NONE, "No callback found!\n");