2 * This file is part of the Palacios Virtual Machine Monitor developed
3 * by the V3VEE Project with funding from the United States National
4 * Science Foundation and the Department of Energy.
6 * The V3VEE Project is a joint project between Northwestern University
7 * and the University of New Mexico. You can find out more at
10 * Copyright (c) 2008, Steven Jaconette <stevenjaconette2007@u.northwestern.edu>
11 * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu>
12 * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org>
13 * All rights reserved.
15 * Author: Steven Jaconette <stevenjaconette2007@u.northwestern.edu>
16 * Peter Dinda <pdinda@northwestern.edu> (refactor + events)
18 * This is free software. You are permitted to use,
19 * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
22 #include <palacios/vmm_direct_paging.h>
23 #include <palacios/vmm_paging.h>
24 #include <palacios/vmm.h>
25 #include <palacios/vm_guest_mem.h>
26 #include <palacios/vm_guest.h>
27 #include <palacios/vmm_ctrl_regs.h>
30 #if !defined(V3_CONFIG_DEBUG_NESTED_PAGING) && !defined(V3_CONFIG_DEBUG_SHADOW_PAGING)
32 #define PrintDebug(fmt, args...)
39 "Direct Paging" combines these three functionalities:
41 1. Passthrough paging for SVM and VMX
43 Passthrough paging is used for shadow paging when
44 the guest does not have paging turn on, for example
45 when it is running in real mode or protected mode
46 early in a typical boot process. Passthrough page
47 tables are shadow page tables that are built assuming
48 the guest virtual to guest physical mapping is the identity.
49 Thus, what they implement are the GPA->HPA mapping.
51 Passthrough page tables are built using 32PAE paging.
54 2. Nested paging on SVM
56 The SVM nested page tables have the same format as
57 regular page tables. For this reason, we can reuse
58 much of the passthrough implementation. A nested page
59 table mapping is a GPA->HPA mapping, creating a very
60 simlar model as with passthrough paging, just that it's
61 always active, whether the guest has paging on or not.
64 3. Nested paging on VMX
66 The VMX nested page tables have a different format
67 than regular page tables. For this reason, we have
68 implemented them in the vmx_npt.h file. The code
69 here then is a wrapper, allowing us to make nested
70 paging functionality appear uniform across VMX and SVM
71 elsewhere in the codebase.
77 static inline int is_vmx_nested()
79 extern v3_cpu_arch_t v3_mach_type;
81 return (v3_mach_type==V3_VMX_EPT_CPU || v3_mach_type==V3_VMX_EPT_UG_CPU);
84 static inline int is_svm_nested()
86 extern v3_cpu_arch_t v3_mach_type;
88 return (v3_mach_type==V3_SVM_REV3_CPU);
92 struct passthrough_event_callback {
93 int (*callback)(struct guest_info *core, struct v3_passthrough_pg_event *event, void *priv_data);
96 struct list_head node;
100 static int have_passthrough_callbacks(struct guest_info *core)
102 // lock acquistion unnecessary
103 // caller will acquire the lock before *iterating* through the list
104 // so any race will be resolved then
105 return !list_empty(&(core->vm_info->passthrough_impl.event_callback_list));
108 static void dispatch_passthrough_event(struct guest_info *core, struct v3_passthrough_pg_event *event)
110 struct passthrough_event_callback *cb,*temp;
112 v3_read_lock(&(core->vm_info->passthrough_impl.event_callback_lock));
114 list_for_each_entry_safe(cb,
116 &(core->vm_info->passthrough_impl.event_callback_list),
118 cb->callback(core,event,cb->priv_data);
121 v3_read_unlock(&(core->vm_info->passthrough_impl.event_callback_lock));
125 struct nested_event_callback {
126 int (*callback)(struct guest_info *core, struct v3_nested_pg_event *event, void *priv_data);
129 struct list_head node;
133 static int have_nested_callbacks(struct guest_info *core)
135 // lock acquistion unnecessary
136 // caller will acquire the lock before *iterating* through the list
137 // so any race will be resolved then
138 return !list_empty(&(core->vm_info->nested_impl.event_callback_list));
141 static void dispatch_nested_event(struct guest_info *core, struct v3_nested_pg_event *event)
143 struct nested_event_callback *cb,*temp;
145 v3_read_lock(&(core->vm_info->nested_impl.event_callback_lock));
147 list_for_each_entry_safe(cb,
149 &(core->vm_info->nested_impl.event_callback_list),
151 cb->callback(core,event,cb->priv_data);
154 v3_read_unlock(&(core->vm_info->nested_impl.event_callback_lock));
160 static addr_t create_generic_pt_page(struct guest_info *core) {
164 temp = V3_AllocPagesExtended(1, PAGE_SIZE_4KB, -1, 0, 0); // no constraints
167 PrintError(VM_NONE, VCORE_NONE,"Cannot allocate page\n");
171 page = V3_VAddr(temp);
172 memset(page, 0, PAGE_SIZE);
177 // Inline handler functions for each cpu mode
178 #include "vmm_direct_paging_32.h"
179 #include "vmm_direct_paging_32pae.h"
180 #include "vmm_direct_paging_64.h"
184 int v3_init_passthrough_pts(struct guest_info * info) {
185 if (info->shdw_pg_mode == NESTED_PAGING && is_vmx_nested()) {
186 // skip - ept_init will do this allocation
189 info->direct_map_pt = (addr_t)V3_PAddr((void *)create_generic_pt_page(info));
194 int v3_free_passthrough_pts(struct guest_info * core) {
195 v3_cpu_mode_t mode = v3_get_vm_cpu_mode(core);
197 if (core->shdw_pg_mode == NESTED_PAGING && is_vmx_nested()) {
198 // there are no passthrough page tables, but
199 // the EPT implementation is using direct_map_pt to store
200 // the EPT root table pointer... and the EPT tables
201 // are not compatible with regular x86 tables, so we
202 // must not attempt to free them here...
206 // we are either in shadow or in SVM nested
207 // in either case, we can nuke the PTs
209 // Delete the old direct map page tables
213 // Intentional fallthrough here
214 // There are *only* PAE tables
218 // Long mode will only use 32PAE page tables...
219 if (core->direct_map_pt) {
220 delete_page_tables_32pae((pdpe32pae_t *)V3_VAddr((void *)(core->direct_map_pt)));
224 PrintError(core->vm_info, core, "Unknown CPU Mode\n");
233 int v3_reset_passthrough_pts(struct guest_info * core) {
235 v3_free_passthrough_pts(core);
237 // create new direct map page table
238 v3_init_passthrough_pts(core);
245 int v3_activate_passthrough_pt(struct guest_info * info) {
246 // For now... But we need to change this....
247 // As soon as shadow paging becomes active the passthrough tables are hosed
248 // So this will cause chaos if it is called at that time
250 if (have_passthrough_callbacks(info)) {
251 struct v3_passthrough_pg_event event={PASSTHROUGH_ACTIVATE,PASSTHROUGH_PREIMPL,0,{0,0,0,0,0,0},0,0};
252 dispatch_passthrough_event(info,&event);
255 struct cr3_32_PAE * shadow_cr3 = (struct cr3_32_PAE *) &(info->ctrl_regs.cr3);
256 struct cr4_32 * shadow_cr4 = (struct cr4_32 *) &(info->ctrl_regs.cr4);
257 addr_t shadow_pt_addr = *(addr_t*)&(info->direct_map_pt);
258 // Passthrough PTs will only be PAE page tables.
259 shadow_cr3->pdpt_base_addr = shadow_pt_addr >> 5;
261 PrintDebug(info->vm_info, info, "Activated Passthrough Page tables\n");
263 if (have_passthrough_callbacks(info)) {
264 struct v3_passthrough_pg_event event={PASSTHROUGH_ACTIVATE,PASSTHROUGH_POSTIMPL,0,{0,0,0,0,0,0},0,0};
265 dispatch_passthrough_event(info,&event);
273 int v3_handle_passthrough_pagefault(struct guest_info * info, addr_t fault_addr, pf_error_t error_code,
274 addr_t *actual_start, addr_t *actual_end) {
275 v3_cpu_mode_t mode = v3_get_vm_cpu_mode(info);
279 if (have_passthrough_callbacks(info)) {
280 struct v3_passthrough_pg_event event={PASSTHROUGH_PAGEFAULT,PASSTHROUGH_PREIMPL,fault_addr,error_code,fault_addr,fault_addr};
281 dispatch_passthrough_event(info,&event);
284 if (!actual_start) { actual_start=&start; }
285 if (!actual_end) { actual_end=&end; }
293 // Note intentional fallthrough here
294 // There are only PAE page tables now
298 // Long mode will only use 32PAE page tables...
299 rc=handle_passthrough_pagefault_32pae(info, fault_addr, error_code, actual_start, actual_end);
302 PrintError(info->vm_info, info, "Unknown CPU Mode\n");
306 if (have_passthrough_callbacks(info)) {
307 struct v3_passthrough_pg_event event={PASSTHROUGH_PAGEFAULT,PASSTHROUGH_POSTIMPL,fault_addr,error_code,*actual_start,*actual_end};
308 dispatch_passthrough_event(info,&event);
316 int v3_invalidate_passthrough_addr(struct guest_info * info, addr_t inv_addr,
317 addr_t *actual_start, addr_t *actual_end) {
319 v3_cpu_mode_t mode = v3_get_vm_cpu_mode(info);
323 if (have_passthrough_callbacks(info)) {
324 struct v3_passthrough_pg_event event={PASSTHROUGH_INVALIDATE_RANGE,PASSTHROUGH_PREIMPL,0,{0,0,0,0,0,0},PAGE_ADDR(inv_addr),PAGE_ADDR(inv_addr)+PAGE_SIZE-1};
325 dispatch_passthrough_event(info,&event);
328 if (!actual_start) { actual_start=&start;}
329 if (!actual_end) { actual_end=&end;}
338 // Intentional fallthrough - there
339 // are only PAE page tables now
343 // Long mode will only use 32PAE page tables...
344 rc=invalidate_addr_32pae(info, inv_addr, actual_start, actual_end);
347 PrintError(info->vm_info, info, "Unknown CPU Mode\n");
351 if (have_passthrough_callbacks(info)) {
352 struct v3_passthrough_pg_event event={PASSTHROUGH_INVALIDATE_RANGE,PASSTHROUGH_POSTIMPL,0,{0,0,0,0,0,0},*actual_start,*actual_end};
353 dispatch_passthrough_event(info,&event);
361 int v3_invalidate_passthrough_addr_range(struct guest_info * info,
362 addr_t inv_addr_start, addr_t inv_addr_end,
363 addr_t *actual_start, addr_t *actual_end) {
364 v3_cpu_mode_t mode = v3_get_vm_cpu_mode(info);
368 if (!actual_start) { actual_start=&start;}
369 if (!actual_end) { actual_end=&end;}
371 if (have_passthrough_callbacks(info)) {
372 struct v3_passthrough_pg_event event={PASSTHROUGH_INVALIDATE_RANGE,PASSTHROUGH_PREIMPL,0,{0,0,0,0,0,0},PAGE_ADDR(inv_addr_start),PAGE_ADDR(inv_addr_end-1)+PAGE_SIZE-1};
373 dispatch_passthrough_event(info,&event);
381 // Intentional fallthrough
382 // There are only PAE PTs now
386 // Long mode will only use 32PAE page tables...
387 rc=invalidate_addr_32pae_range(info, inv_addr_start, inv_addr_end, actual_start, actual_end);
390 PrintError(info->vm_info, info, "Unknown CPU Mode\n");
394 if (have_passthrough_callbacks(info)) {
395 struct v3_passthrough_pg_event event={PASSTHROUGH_INVALIDATE_RANGE,PASSTHROUGH_POSTIMPL,0,{0,0,0,0,0,0},*actual_start,*actual_end};
396 dispatch_passthrough_event(info,&event);
403 int v3_init_passthrough_paging(struct v3_vm_info *vm)
405 INIT_LIST_HEAD(&(vm->passthrough_impl.event_callback_list));
406 v3_rw_lock_init(&(vm->passthrough_impl.event_callback_lock));
410 int v3_deinit_passthrough_paging(struct v3_vm_info *vm)
412 struct passthrough_event_callback *cb,*temp;
415 flags=v3_write_lock_irqsave(&(vm->passthrough_impl.event_callback_lock));
417 list_for_each_entry_safe(cb,
419 &(vm->passthrough_impl.event_callback_list),
421 list_del(&(cb->node));
425 v3_write_unlock_irqrestore(&(vm->passthrough_impl.event_callback_lock),flags);
427 v3_rw_lock_deinit(&(vm->passthrough_impl.event_callback_lock));
432 int v3_init_passthrough_paging_core(struct guest_info *core)
434 // currently nothing to init
438 int v3_deinit_passthrough_paging_core(struct guest_info *core)
440 // currently nothing to deinit
445 int v3_register_passthrough_paging_event_callback(struct v3_vm_info *vm,
446 int (*callback)(struct guest_info *core,
447 struct v3_passthrough_pg_event *,
451 struct passthrough_event_callback *ec = V3_Malloc(sizeof(struct passthrough_event_callback));
455 PrintError(vm, VCORE_NONE, "Unable to allocate for a nested paging event callback\n");
459 ec->callback = callback;
460 ec->priv_data = priv_data;
462 flags=v3_write_lock_irqsave(&(vm->passthrough_impl.event_callback_lock));
463 list_add(&(ec->node),&(vm->passthrough_impl.event_callback_list));
464 v3_write_unlock_irqrestore(&(vm->passthrough_impl.event_callback_lock),flags);
472 int v3_unregister_passthrough_paging_event_callback(struct v3_vm_info *vm,
473 int (*callback)(struct guest_info *core,
474 struct v3_passthrough_pg_event *,
478 struct passthrough_event_callback *cb,*temp;
481 flags=v3_write_lock_irqsave(&(vm->passthrough_impl.event_callback_lock));
483 list_for_each_entry_safe(cb,
485 &(vm->passthrough_impl.event_callback_list),
487 if ((callback == cb->callback) && (priv_data == cb->priv_data)) {
488 list_del(&(cb->node));
490 v3_write_unlock_irqrestore(&(vm->passthrough_impl.event_callback_lock),flags);
495 v3_write_unlock_irqrestore(&(vm->passthrough_impl.event_callback_lock),flags);
497 PrintError(vm, VCORE_NONE, "No callback found!\n");
503 // inline nested paging support for Intel and AMD
508 inline void convert_to_pf_error(void *pfinfo, pf_error_t *out)
510 if (is_vmx_nested()) {
512 ept_exit_qual_to_pf_error((struct ept_exit_qual *)pfinfo, out);
515 *out = *(pf_error_t *)pfinfo;
519 int v3_handle_nested_pagefault(struct guest_info * info, addr_t fault_addr, void *pfinfo, addr_t *actual_start, addr_t *actual_end)
525 if (!actual_start) { actual_start=&start; }
526 if (!actual_end) { actual_end=&end; }
528 convert_to_pf_error(pfinfo,&err);
530 if (have_nested_callbacks(info)) {
531 struct v3_nested_pg_event event={NESTED_PAGEFAULT,NESTED_PREIMPL,fault_addr,err,fault_addr,fault_addr};
532 dispatch_nested_event(info,&event);
536 if (is_vmx_nested()) {
537 rc = handle_vmx_nested_pagefault(info,fault_addr,pfinfo,actual_start,actual_end);
539 rc = handle_svm_nested_pagefault(info,fault_addr,pfinfo,actual_start,actual_end);
542 if (have_nested_callbacks(info)) {
543 struct v3_nested_pg_event event={NESTED_PAGEFAULT,NESTED_POSTIMPL,fault_addr,err,*actual_start,*actual_end};
544 dispatch_nested_event(info,&event);
552 int v3_invalidate_nested_addr(struct guest_info * info, addr_t inv_addr,
553 addr_t *actual_start, addr_t *actual_end)
559 if (!actual_start) { actual_start=&start; }
560 if (!actual_end) { actual_end=&end; }
563 if (have_nested_callbacks(info)) {
564 struct v3_nested_pg_event event={NESTED_INVALIDATE_RANGE,NESTED_PREIMPL,0,{0,0,0,0,0,0},PAGE_ADDR(inv_addr),PAGE_ADDR(inv_addr)+PAGE_SIZE-1};
565 dispatch_nested_event(info,&event);
568 if (is_vmx_nested()) {
569 rc = handle_vmx_invalidate_nested_addr(info, inv_addr, actual_start, actual_end);
571 rc = handle_svm_invalidate_nested_addr(info, inv_addr, actual_start, actual_end);
574 if (have_nested_callbacks(info)) {
575 struct v3_nested_pg_event event={NESTED_INVALIDATE_RANGE,NESTED_POSTIMPL,0,{0,0,0,0,0,0},*actual_start, *actual_end};
576 dispatch_nested_event(info,&event);
582 int v3_invalidate_nested_addr_range(struct guest_info * info,
583 addr_t inv_addr_start, addr_t inv_addr_end,
584 addr_t *actual_start, addr_t *actual_end)
590 if (!actual_start) { actual_start=&start; }
591 if (!actual_end) { actual_end=&end; }
593 if (have_nested_callbacks(info)) {
594 struct v3_nested_pg_event event={NESTED_INVALIDATE_RANGE,NESTED_PREIMPL,0,{0,0,0,0,0,0},PAGE_ADDR(inv_addr_start),PAGE_ADDR(inv_addr_end-1)+PAGE_SIZE-1};
595 dispatch_nested_event(info,&event);
598 if (is_vmx_nested()) {
599 rc = handle_vmx_invalidate_nested_addr_range(info, inv_addr_start, inv_addr_end, actual_start, actual_end);
601 rc = handle_svm_invalidate_nested_addr_range(info, inv_addr_start, inv_addr_end, actual_start, actual_end);
605 if (have_nested_callbacks(info)) {
606 struct v3_nested_pg_event event={NESTED_INVALIDATE_RANGE,NESTED_PREIMPL,0,{0,0,0,0,0,0},*actual_start, *actual_end};
607 dispatch_nested_event(info,&event);
615 int v3_init_nested_paging(struct v3_vm_info *vm)
617 INIT_LIST_HEAD(&(vm->nested_impl.event_callback_list));
618 v3_rw_lock_init(&(vm->nested_impl.event_callback_lock));
622 int v3_init_nested_paging_core(struct guest_info *core, void *hwinfo)
624 if (is_vmx_nested()) {
625 return init_ept(core, (struct vmx_hw_info *) hwinfo);
627 // no initialization for SVM
628 // the direct map page tables are used since the
629 // nested pt format is identical to the main pt format
634 int v3_deinit_nested_paging(struct v3_vm_info *vm)
636 struct nested_event_callback *cb,*temp;
639 flags=v3_write_lock_irqsave(&(vm->nested_impl.event_callback_lock));
641 list_for_each_entry_safe(cb,
643 &(vm->nested_impl.event_callback_list),
645 list_del(&(cb->node));
649 v3_write_unlock_irqrestore(&(vm->nested_impl.event_callback_lock),flags);
651 v3_rw_lock_deinit(&(vm->nested_impl.event_callback_lock));
656 int v3_deinit_nested_paging_core(struct guest_info *core)
658 if (core->shdw_pg_mode == NESTED_PAGING) {
659 if (is_vmx_nested()) {
660 return deinit_ept(core);
662 // SVM nested deinit is handled by the passthrough paging teardown
672 int v3_register_nested_paging_event_callback(struct v3_vm_info *vm,
673 int (*callback)(struct guest_info *core,
674 struct v3_nested_pg_event *,
678 struct nested_event_callback *ec = V3_Malloc(sizeof(struct nested_event_callback));
682 PrintError(vm, VCORE_NONE, "Unable to allocate for a nested paging event callback\n");
686 ec->callback = callback;
687 ec->priv_data = priv_data;
689 flags=v3_write_lock_irqsave(&(vm->nested_impl.event_callback_lock));
690 list_add(&(ec->node),&(vm->nested_impl.event_callback_list));
691 v3_write_unlock_irqrestore(&(vm->nested_impl.event_callback_lock),flags);
699 int v3_unregister_nested_paging_event_callback(struct v3_vm_info *vm,
700 int (*callback)(struct guest_info *core,
701 struct v3_nested_pg_event *,
705 struct nested_event_callback *cb,*temp;
708 flags=v3_write_lock_irqsave(&(vm->nested_impl.event_callback_lock));
710 list_for_each_entry_safe(cb,
712 &(vm->nested_impl.event_callback_list),
714 if ((callback == cb->callback) && (priv_data == cb->priv_data)) {
715 list_del(&(cb->node));
717 v3_write_unlock_irqrestore(&(vm->nested_impl.event_callback_lock),flags);
722 v3_write_unlock_irqrestore(&(vm->nested_impl.event_callback_lock),flags);
724 PrintError(vm, VCORE_NONE, "No callback found!\n");