2 * This file is part of the Palacios Virtual Machine Monitor developed
3 * by the V3VEE Project with funding from the United States National
4 * Science Foundation and the Department of Energy.
6 * The V3VEE Project is a joint project between Northwestern University
7 * and the University of New Mexico. You can find out more at
10 * Copyright (c) 2008, Steven Jaconette <stevenjaconette2007@u.northwestern.edu>
11 * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu>
12 * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org>
13 * All rights reserved.
15 * Author: Steven Jaconette <stevenjaconette2007@u.northwestern.edu>
16 * Peter Dinda <pdinda@northwestern.edu> (refactor + events)
18 * This is free software. You are permitted to use,
19 * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
22 #include <palacios/vmm_direct_paging.h>
23 #include <palacios/vmm_paging.h>
24 #include <palacios/vmm.h>
25 #include <palacios/vm_guest_mem.h>
26 #include <palacios/vm_guest.h>
27 #include <palacios/vmm_ctrl_regs.h>
30 #if !defined(V3_CONFIG_DEBUG_NESTED_PAGING) && !defined(V3_CONFIG_DEBUG_SHADOW_PAGING)
32 #define PrintDebug(fmt, args...)
39 "Direct Paging" combines these three functionalities:
41 1. Passthrough paging for SVM and VMX
43 Passthrough paging is used for shadow paging when
44 the guest does not have paging turn on, for example
45 when it is running in real mode or protected mode
46 early in a typical boot process. Passthrough page
47 tables are shadow page tables that are built assuming
48 the guest virtual to guest physical mapping is the identity.
49 Thus, what they implement are the GPA->HPA mapping.
51 Passthrough page tables are built using 32PAE paging.
54 2. Nested paging on SVM
56 The SVM nested page tables have the same format as
57 regular page tables. For this reason, we can reuse
58 much of the passthrough implementation. A nested page
59 table mapping is a GPA->HPA mapping, creating a very
60 simlar model as with passthrough paging, just that it's
61 always active, whether the guest has paging on or not.
64 3. Nested paging on VMX
66 The VMX nested page tables have a different format
67 than regular page tables. For this reason, we have
68 implemented them in the vmx_npt.h file. The code
69 here then is a wrapper, allowing us to make nested
70 paging functionality appear uniform across VMX and SVM
71 elsewhere in the codebase.
77 static inline int is_vmx_nested()
79 extern v3_cpu_arch_t v3_mach_type;
81 return (v3_mach_type==V3_VMX_EPT_CPU || v3_mach_type==V3_VMX_EPT_UG_CPU);
84 static inline int is_svm_nested()
86 extern v3_cpu_arch_t v3_mach_type;
88 return (v3_mach_type==V3_SVM_REV3_CPU);
92 struct passthrough_event_callback {
93 int (*callback)(struct guest_info *core, struct v3_passthrough_pg_event *event, void *priv_data);
96 struct list_head node;
100 static int have_passthrough_callbacks(struct guest_info *core)
102 // lock acquistion unnecessary
103 // caller will acquire the lock before *iterating* through the list
104 // so any race will be resolved then
105 return !list_empty(&(core->vm_info->passthrough_impl.event_callback_list));
108 static void dispatch_passthrough_event(struct guest_info *core, struct v3_passthrough_pg_event *event)
110 struct passthrough_event_callback *cb,*temp;
112 v3_read_lock(&(core->vm_info->passthrough_impl.event_callback_lock));
114 list_for_each_entry_safe(cb,
116 &(core->vm_info->passthrough_impl.event_callback_list),
118 cb->callback(core,event,cb->priv_data);
121 v3_read_unlock(&(core->vm_info->passthrough_impl.event_callback_lock));
125 struct nested_event_callback {
126 int (*callback)(struct guest_info *core, struct v3_nested_pg_event *event, void *priv_data);
129 struct list_head node;
133 static int have_nested_callbacks(struct guest_info *core)
135 // lock acquistion unnecessary
136 // caller will acquire the lock before *iterating* through the list
137 // so any race will be resolved then
138 return !list_empty(&(core->vm_info->nested_impl.event_callback_list));
141 static void dispatch_nested_event(struct guest_info *core, struct v3_nested_pg_event *event)
143 struct nested_event_callback *cb,*temp;
145 v3_read_lock(&(core->vm_info->nested_impl.event_callback_lock));
147 list_for_each_entry_safe(cb,
149 &(core->vm_info->nested_impl.event_callback_list),
151 cb->callback(core,event,cb->priv_data);
154 v3_read_unlock(&(core->vm_info->nested_impl.event_callback_lock));
160 static addr_t create_generic_pt_page(struct guest_info *core) {
164 temp = V3_AllocPagesExtended(1, PAGE_SIZE_4KB, -1, 0); // no constraints
167 PrintError(VM_NONE, VCORE_NONE,"Cannot allocate page\n");
171 page = V3_VAddr(temp);
172 memset(page, 0, PAGE_SIZE);
177 // Inline handler functions for each cpu mode
178 #include "vmm_direct_paging_32.h"
179 #include "vmm_direct_paging_32pae.h"
180 #include "vmm_direct_paging_64.h"
184 int v3_init_passthrough_pts(struct guest_info * info) {
185 info->direct_map_pt = (addr_t)V3_PAddr((void *)create_generic_pt_page(info));
190 int v3_free_passthrough_pts(struct guest_info * core) {
191 v3_cpu_mode_t mode = v3_get_vm_cpu_mode(core);
193 // Delete the old direct map page tables
197 // Intentional fallthrough here
198 // There are *only* PAE tables
202 // Long mode will only use 32PAE page tables...
203 if (core->direct_map_pt) {
204 delete_page_tables_32pae((pdpe32pae_t *)V3_VAddr((void *)(core->direct_map_pt)));
208 PrintError(core->vm_info, core, "Unknown CPU Mode\n");
217 int v3_reset_passthrough_pts(struct guest_info * core) {
219 v3_free_passthrough_pts(core);
221 // create new direct map page table
222 v3_init_passthrough_pts(core);
229 int v3_activate_passthrough_pt(struct guest_info * info) {
230 // For now... But we need to change this....
231 // As soon as shadow paging becomes active the passthrough tables are hosed
232 // So this will cause chaos if it is called at that time
234 if (have_passthrough_callbacks(info)) {
235 struct v3_passthrough_pg_event event={PASSTHROUGH_ACTIVATE,PASSTHROUGH_PREIMPL,0,{0,0,0,0,0,0},0,0};
236 dispatch_passthrough_event(info,&event);
239 struct cr3_32_PAE * shadow_cr3 = (struct cr3_32_PAE *) &(info->ctrl_regs.cr3);
240 struct cr4_32 * shadow_cr4 = (struct cr4_32 *) &(info->ctrl_regs.cr4);
241 addr_t shadow_pt_addr = *(addr_t*)&(info->direct_map_pt);
242 // Passthrough PTs will only be PAE page tables.
243 shadow_cr3->pdpt_base_addr = shadow_pt_addr >> 5;
245 PrintDebug(info->vm_info, info, "Activated Passthrough Page tables\n");
247 if (have_passthrough_callbacks(info)) {
248 struct v3_passthrough_pg_event event={PASSTHROUGH_ACTIVATE,PASSTHROUGH_POSTIMPL,0,{0,0,0,0,0,0},0,0};
249 dispatch_passthrough_event(info,&event);
257 int v3_handle_passthrough_pagefault(struct guest_info * info, addr_t fault_addr, pf_error_t error_code,
258 addr_t *actual_start, addr_t *actual_end) {
259 v3_cpu_mode_t mode = v3_get_vm_cpu_mode(info);
263 if (have_passthrough_callbacks(info)) {
264 struct v3_passthrough_pg_event event={PASSTHROUGH_PAGEFAULT,PASSTHROUGH_PREIMPL,fault_addr,error_code,fault_addr,fault_addr};
265 dispatch_passthrough_event(info,&event);
268 if (!actual_start) { actual_start=&start; }
269 if (!actual_end) { actual_end=&end; }
277 // Note intentional fallthrough here
278 // There are only PAE page tables now
282 // Long mode will only use 32PAE page tables...
283 rc=handle_passthrough_pagefault_32pae(info, fault_addr, error_code, actual_start, actual_end);
286 PrintError(info->vm_info, info, "Unknown CPU Mode\n");
290 if (have_passthrough_callbacks(info)) {
291 struct v3_passthrough_pg_event event={PASSTHROUGH_PAGEFAULT,PASSTHROUGH_POSTIMPL,fault_addr,error_code,*actual_start,*actual_end};
292 dispatch_passthrough_event(info,&event);
300 int v3_invalidate_passthrough_addr(struct guest_info * info, addr_t inv_addr,
301 addr_t *actual_start, addr_t *actual_end) {
303 v3_cpu_mode_t mode = v3_get_vm_cpu_mode(info);
307 if (have_passthrough_callbacks(info)) {
308 struct v3_passthrough_pg_event event={PASSTHROUGH_INVALIDATE_RANGE,PASSTHROUGH_PREIMPL,0,{0,0,0,0,0,0},PAGE_ADDR(inv_addr),PAGE_ADDR(inv_addr)+PAGE_SIZE-1};
309 dispatch_passthrough_event(info,&event);
312 if (!actual_start) { actual_start=&start;}
313 if (!actual_end) { actual_end=&end;}
322 // Intentional fallthrough - there
323 // are only PAE page tables now
327 // Long mode will only use 32PAE page tables...
328 rc=invalidate_addr_32pae(info, inv_addr, actual_start, actual_end);
331 PrintError(info->vm_info, info, "Unknown CPU Mode\n");
335 if (have_passthrough_callbacks(info)) {
336 struct v3_passthrough_pg_event event={PASSTHROUGH_INVALIDATE_RANGE,PASSTHROUGH_POSTIMPL,0,{0,0,0,0,0,0},*actual_start,*actual_end};
337 dispatch_passthrough_event(info,&event);
345 int v3_invalidate_passthrough_addr_range(struct guest_info * info,
346 addr_t inv_addr_start, addr_t inv_addr_end,
347 addr_t *actual_start, addr_t *actual_end) {
348 v3_cpu_mode_t mode = v3_get_vm_cpu_mode(info);
352 if (!actual_start) { actual_start=&start;}
353 if (!actual_end) { actual_end=&end;}
355 if (have_passthrough_callbacks(info)) {
356 struct v3_passthrough_pg_event event={PASSTHROUGH_INVALIDATE_RANGE,PASSTHROUGH_PREIMPL,0,{0,0,0,0,0,0},PAGE_ADDR(inv_addr_start),PAGE_ADDR(inv_addr_end-1)+PAGE_SIZE-1};
357 dispatch_passthrough_event(info,&event);
365 // Intentional fallthrough
366 // There are only PAE PTs now
370 // Long mode will only use 32PAE page tables...
371 rc=invalidate_addr_32pae_range(info, inv_addr_start, inv_addr_end, actual_start, actual_end);
374 PrintError(info->vm_info, info, "Unknown CPU Mode\n");
378 if (have_passthrough_callbacks(info)) {
379 struct v3_passthrough_pg_event event={PASSTHROUGH_INVALIDATE_RANGE,PASSTHROUGH_POSTIMPL,0,{0,0,0,0,0,0},*actual_start,*actual_end};
380 dispatch_passthrough_event(info,&event);
387 int v3_init_passthrough_paging(struct v3_vm_info *vm)
389 INIT_LIST_HEAD(&(vm->passthrough_impl.event_callback_list));
390 v3_rw_lock_init(&(vm->passthrough_impl.event_callback_lock));
394 int v3_deinit_passthrough_paging(struct v3_vm_info *vm)
396 struct passthrough_event_callback *cb,*temp;
399 flags=v3_write_lock_irqsave(&(vm->passthrough_impl.event_callback_lock));
401 list_for_each_entry_safe(cb,
403 &(vm->passthrough_impl.event_callback_list),
405 list_del(&(cb->node));
409 v3_write_unlock_irqrestore(&(vm->passthrough_impl.event_callback_lock),flags);
411 v3_rw_lock_deinit(&(vm->passthrough_impl.event_callback_lock));
416 int v3_init_passthrough_paging_core(struct guest_info *core)
418 // currently nothing to init
422 int v3_deinit_passthrough_paging_core(struct guest_info *core)
424 // currently nothing to deinit
429 int v3_register_passthrough_paging_event_callback(struct v3_vm_info *vm,
430 int (*callback)(struct guest_info *core,
431 struct v3_passthrough_pg_event *,
435 struct passthrough_event_callback *ec = V3_Malloc(sizeof(struct passthrough_event_callback));
439 PrintError(vm, VCORE_NONE, "Unable to allocate for a nested paging event callback\n");
443 ec->callback = callback;
444 ec->priv_data = priv_data;
446 flags=v3_write_lock_irqsave(&(vm->passthrough_impl.event_callback_lock));
447 list_add(&(ec->node),&(vm->passthrough_impl.event_callback_list));
448 v3_write_unlock_irqrestore(&(vm->passthrough_impl.event_callback_lock),flags);
456 int v3_unregister_passthrough_paging_event_callback(struct v3_vm_info *vm,
457 int (*callback)(struct guest_info *core,
458 struct v3_passthrough_pg_event *,
462 struct passthrough_event_callback *cb,*temp;
465 flags=v3_write_lock_irqsave(&(vm->passthrough_impl.event_callback_lock));
467 list_for_each_entry_safe(cb,
469 &(vm->passthrough_impl.event_callback_list),
471 if ((callback == cb->callback) && (priv_data == cb->priv_data)) {
472 list_del(&(cb->node));
474 v3_write_unlock_irqrestore(&(vm->passthrough_impl.event_callback_lock),flags);
479 v3_write_unlock_irqrestore(&(vm->passthrough_impl.event_callback_lock),flags);
481 PrintError(vm, VCORE_NONE, "No callback found!\n");
487 // inline nested paging support for Intel and AMD
492 inline void convert_to_pf_error(void *pfinfo, pf_error_t *out)
494 if (is_vmx_nested()) {
496 ept_exit_qual_to_pf_error((struct ept_exit_qual *)pfinfo, out);
499 *out = *(pf_error_t *)pfinfo;
503 int v3_handle_nested_pagefault(struct guest_info * info, addr_t fault_addr, void *pfinfo, addr_t *actual_start, addr_t *actual_end)
509 if (!actual_start) { actual_start=&start; }
510 if (!actual_end) { actual_end=&end; }
512 convert_to_pf_error(pfinfo,&err);
514 if (have_nested_callbacks(info)) {
515 struct v3_nested_pg_event event={NESTED_PAGEFAULT,NESTED_PREIMPL,fault_addr,err,fault_addr,fault_addr};
516 dispatch_nested_event(info,&event);
520 if (is_vmx_nested()) {
521 rc = handle_vmx_nested_pagefault(info,fault_addr,pfinfo,actual_start,actual_end);
523 rc = handle_svm_nested_pagefault(info,fault_addr,pfinfo,actual_start,actual_end);
526 if (have_nested_callbacks(info)) {
527 struct v3_nested_pg_event event={NESTED_PAGEFAULT,NESTED_POSTIMPL,fault_addr,err,*actual_start,*actual_end};
528 dispatch_nested_event(info,&event);
536 int v3_invalidate_nested_addr(struct guest_info * info, addr_t inv_addr,
537 addr_t *actual_start, addr_t *actual_end)
543 if (!actual_start) { actual_start=&start; }
544 if (!actual_end) { actual_end=&end; }
547 if (have_nested_callbacks(info)) {
548 struct v3_nested_pg_event event={NESTED_INVALIDATE_RANGE,NESTED_PREIMPL,0,{0,0,0,0,0,0},PAGE_ADDR(inv_addr),PAGE_ADDR(inv_addr)+PAGE_SIZE-1};
549 dispatch_nested_event(info,&event);
552 if (is_vmx_nested()) {
553 rc = handle_vmx_invalidate_nested_addr(info, inv_addr, actual_start, actual_end);
555 rc = handle_svm_invalidate_nested_addr(info, inv_addr, actual_start, actual_end);
558 if (have_nested_callbacks(info)) {
559 struct v3_nested_pg_event event={NESTED_INVALIDATE_RANGE,NESTED_POSTIMPL,0,{0,0,0,0,0,0},*actual_start, *actual_end};
560 dispatch_nested_event(info,&event);
566 int v3_invalidate_nested_addr_range(struct guest_info * info,
567 addr_t inv_addr_start, addr_t inv_addr_end,
568 addr_t *actual_start, addr_t *actual_end)
574 if (!actual_start) { actual_start=&start; }
575 if (!actual_end) { actual_end=&end; }
577 if (have_nested_callbacks(info)) {
578 struct v3_nested_pg_event event={NESTED_INVALIDATE_RANGE,NESTED_PREIMPL,0,{0,0,0,0,0,0},PAGE_ADDR(inv_addr_start),PAGE_ADDR(inv_addr_end-1)+PAGE_SIZE-1};
579 dispatch_nested_event(info,&event);
582 if (is_vmx_nested()) {
583 rc = handle_vmx_invalidate_nested_addr_range(info, inv_addr_start, inv_addr_end, actual_start, actual_end);
585 rc = handle_svm_invalidate_nested_addr_range(info, inv_addr_start, inv_addr_end, actual_start, actual_end);
589 if (have_nested_callbacks(info)) {
590 struct v3_nested_pg_event event={NESTED_INVALIDATE_RANGE,NESTED_PREIMPL,0,{0,0,0,0,0,0},*actual_start, *actual_end};
591 dispatch_nested_event(info,&event);
599 int v3_init_nested_paging(struct v3_vm_info *vm)
601 INIT_LIST_HEAD(&(vm->nested_impl.event_callback_list));
602 v3_rw_lock_init(&(vm->nested_impl.event_callback_lock));
606 int v3_init_nested_paging_core(struct guest_info *core, void *hwinfo)
608 if (is_vmx_nested()) {
609 return init_ept(core, (struct vmx_hw_info *) hwinfo);
611 // no initialization for SVM
616 int v3_deinit_nested_paging(struct v3_vm_info *vm)
618 struct nested_event_callback *cb,*temp;
621 flags=v3_write_lock_irqsave(&(vm->nested_impl.event_callback_lock));
623 list_for_each_entry_safe(cb,
625 &(vm->nested_impl.event_callback_list),
627 list_del(&(cb->node));
631 v3_write_unlock_irqrestore(&(vm->nested_impl.event_callback_lock),flags);
633 v3_rw_lock_deinit(&(vm->nested_impl.event_callback_lock));
638 int v3_deinit_nested_paging_core(struct guest_info *core)
640 // nothing to do.. probably dealloc? FIXME PAD
646 int v3_register_nested_paging_event_callback(struct v3_vm_info *vm,
647 int (*callback)(struct guest_info *core,
648 struct v3_nested_pg_event *,
652 struct nested_event_callback *ec = V3_Malloc(sizeof(struct nested_event_callback));
656 PrintError(vm, VCORE_NONE, "Unable to allocate for a nested paging event callback\n");
660 ec->callback = callback;
661 ec->priv_data = priv_data;
663 flags=v3_write_lock_irqsave(&(vm->nested_impl.event_callback_lock));
664 list_add(&(ec->node),&(vm->nested_impl.event_callback_list));
665 v3_write_unlock_irqrestore(&(vm->nested_impl.event_callback_lock),flags);
673 int v3_unregister_nested_paging_event_callback(struct v3_vm_info *vm,
674 int (*callback)(struct guest_info *core,
675 struct v3_nested_pg_event *,
679 struct nested_event_callback *cb,*temp;
682 flags=v3_write_lock_irqsave(&(vm->nested_impl.event_callback_lock));
684 list_for_each_entry_safe(cb,
686 &(vm->nested_impl.event_callback_list),
688 if ((callback == cb->callback) && (priv_data == cb->priv_data)) {
689 list_del(&(cb->node));
691 v3_write_unlock_irqrestore(&(vm->nested_impl.event_callback_lock),flags);
696 v3_write_unlock_irqrestore(&(vm->nested_impl.event_callback_lock),flags);
698 PrintError(vm, VCORE_NONE, "No callback found!\n");