2 * This file is part of the Palacios Virtual Machine Monitor developed
3 * by the V3VEE Project with funding from the United States National
4 * Science Foundation and the Department of Energy.
6 * The V3VEE Project is a joint project between Northwestern University
7 * and the University of New Mexico. You can find out more at
10 * Copyright (c) 2008, Steven Jaconette <stevenjaconette2007@u.northwestern.edu>
11 * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu>
12 * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org>
13 * All rights reserved.
15 * Author: Steven Jaconette <stevenjaconette2007@u.northwestern.edu>
16 * Peter Dinda <pdinda@northwestern.edu> (refactor + events)
18 * This is free software. You are permitted to use,
19 * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
22 #include <palacios/vmm_direct_paging.h>
23 #include <palacios/vmm_paging.h>
24 #include <palacios/vmm.h>
25 #include <palacios/vm_guest_mem.h>
26 #include <palacios/vm_guest.h>
27 #include <palacios/vmm_ctrl_regs.h>
30 #if !defined(V3_CONFIG_DEBUG_NESTED_PAGING) && !defined(V3_CONFIG_DEBUG_SHADOW_PAGING)
32 #define PrintDebug(fmt, args...)
39 "Direct Paging" combines these three functionalities:
41 1. Passthrough paging for SVM and VMX
43 Passthrough paging is used for shadow paging when
44 the guest does not have paging turn on, for example
45 when it is running in real mode or protected mode
46 early in a typical boot process. Passthrough page
47 tables are shadow page tables that are built assuming
48 the guest virtual to guest physical mapping is the identity.
49 Thus, what they implement are the GPA->HPA mapping.
51 Passthrough page tables are built using 32PAE paging.
54 2. Nested paging on SVM
56 The SVM nested page tables have the same format as
57 regular page tables. For this reason, we can reuse
58 much of the passthrough implementation. A nested page
59 table mapping is a GPA->HPA mapping, creating a very
60 simlar model as with passthrough paging, just that it's
61 always active, whether the guest has paging on or not.
64 3. Nested paging on VMX
66 The VMX nested page tables have a different format
67 than regular page tables. For this reason, we have
68 implemented them in the vmx_npt.h file. The code
69 here then is a wrapper, allowing us to make nested
70 paging functionality appear uniform across VMX and SVM
71 elsewhere in the codebase.
77 static inline int is_vmx_nested()
79 extern v3_cpu_arch_t v3_mach_type;
81 return (v3_mach_type==V3_VMX_EPT_CPU || v3_mach_type==V3_VMX_EPT_UG_CPU);
84 static inline int is_svm_nested()
86 extern v3_cpu_arch_t v3_mach_type;
88 return (v3_mach_type==V3_SVM_REV3_CPU);
92 struct passthrough_event_callback {
93 int (*callback)(struct guest_info *core, struct v3_passthrough_pg_event *event, void *priv_data);
96 struct list_head node;
100 static int have_passthrough_callbacks(struct guest_info *core)
102 // lock acquistion unnecessary
103 // caller will acquire the lock before *iterating* through the list
104 // so any race will be resolved then
105 return !list_empty(&(core->vm_info->passthrough_impl.event_callback_list));
108 static void dispatch_passthrough_event(struct guest_info *core, struct v3_passthrough_pg_event *event)
110 struct passthrough_event_callback *cb,*temp;
112 v3_read_lock(&(core->vm_info->passthrough_impl.event_callback_lock));
114 list_for_each_entry_safe(cb,
116 &(core->vm_info->passthrough_impl.event_callback_list),
118 cb->callback(core,event,cb->priv_data);
121 v3_read_unlock(&(core->vm_info->passthrough_impl.event_callback_lock));
125 struct nested_event_callback {
126 int (*callback)(struct guest_info *core, struct v3_nested_pg_event *event, void *priv_data);
129 struct list_head node;
133 static int have_nested_callbacks(struct guest_info *core)
135 // lock acquistion unnecessary
136 // caller will acquire the lock before *iterating* through the list
137 // so any race will be resolved then
138 return !list_empty(&(core->vm_info->nested_impl.event_callback_list));
141 static void dispatch_nested_event(struct guest_info *core, struct v3_nested_pg_event *event)
143 struct nested_event_callback *cb,*temp;
145 v3_read_lock(&(core->vm_info->nested_impl.event_callback_lock));
147 list_for_each_entry_safe(cb,
149 &(core->vm_info->nested_impl.event_callback_list),
151 cb->callback(core,event,cb->priv_data);
154 v3_read_unlock(&(core->vm_info->nested_impl.event_callback_lock));
160 static addr_t create_generic_pt_page(struct guest_info *core) {
164 temp = V3_AllocPagesExtended(1, PAGE_SIZE_4KB,
165 core->resource_control.pg_node_id,
166 core->resource_control.pg_filter_func,
167 core->resource_control.pg_filter_state);
170 PrintError(VM_NONE, VCORE_NONE,"Cannot allocate page\n");
174 page = V3_VAddr(temp);
175 memset(page, 0, PAGE_SIZE);
180 // Inline handler functions for each cpu mode
181 #include "vmm_direct_paging_32.h"
182 #include "vmm_direct_paging_32pae.h"
183 #include "vmm_direct_paging_64.h"
187 int v3_init_passthrough_pts(struct guest_info * info) {
188 if (info->shdw_pg_mode == NESTED_PAGING && is_vmx_nested()) {
189 // skip - ept_init will do this allocation
192 info->direct_map_pt = (addr_t)V3_PAddr((void *)create_generic_pt_page(info));
197 int v3_free_passthrough_pts(struct guest_info * core) {
198 v3_cpu_mode_t mode = v3_get_vm_cpu_mode(core);
200 if (core->shdw_pg_mode == NESTED_PAGING && is_vmx_nested()) {
201 // there are no passthrough page tables, but
202 // the EPT implementation is using direct_map_pt to store
203 // the EPT root table pointer... and the EPT tables
204 // are not compatible with regular x86 tables, so we
205 // must not attempt to free them here...
209 // we are either in shadow or in SVM nested
210 // in either case, we can nuke the PTs
212 // Delete the old direct map page tables
216 // Intentional fallthrough here
217 // There are *only* PAE tables
221 // Long mode will only use 32PAE page tables...
222 if (core->direct_map_pt) {
223 delete_page_tables_32pae((pdpe32pae_t *)V3_VAddr((void *)(core->direct_map_pt)));
227 PrintError(core->vm_info, core, "Unknown CPU Mode\n");
236 int v3_reset_passthrough_pts(struct guest_info * core) {
238 v3_free_passthrough_pts(core);
240 // create new direct map page table
241 v3_init_passthrough_pts(core);
248 int v3_activate_passthrough_pt(struct guest_info * info) {
249 // For now... But we need to change this....
250 // As soon as shadow paging becomes active the passthrough tables are hosed
251 // So this will cause chaos if it is called at that time
253 if (have_passthrough_callbacks(info)) {
254 struct v3_passthrough_pg_event event={PASSTHROUGH_ACTIVATE,PASSTHROUGH_PREIMPL,0,{0,0,0,0,0,0},0,0};
255 dispatch_passthrough_event(info,&event);
258 struct cr3_32_PAE * shadow_cr3 = (struct cr3_32_PAE *) &(info->ctrl_regs.cr3);
259 struct cr4_32 * shadow_cr4 = (struct cr4_32 *) &(info->ctrl_regs.cr4);
260 addr_t shadow_pt_addr = *(addr_t*)&(info->direct_map_pt);
261 // Passthrough PTs will only be PAE page tables.
262 shadow_cr3->pdpt_base_addr = shadow_pt_addr >> 5;
264 PrintDebug(info->vm_info, info, "Activated Passthrough Page tables\n");
266 if (have_passthrough_callbacks(info)) {
267 struct v3_passthrough_pg_event event={PASSTHROUGH_ACTIVATE,PASSTHROUGH_POSTIMPL,0,{0,0,0,0,0,0},0,0};
268 dispatch_passthrough_event(info,&event);
276 int v3_handle_passthrough_pagefault(struct guest_info * info, addr_t fault_addr, pf_error_t error_code,
277 addr_t *actual_start, addr_t *actual_end) {
278 v3_cpu_mode_t mode = v3_get_vm_cpu_mode(info);
282 if (have_passthrough_callbacks(info)) {
283 struct v3_passthrough_pg_event event={PASSTHROUGH_PAGEFAULT,PASSTHROUGH_PREIMPL,fault_addr,error_code,fault_addr,fault_addr};
284 dispatch_passthrough_event(info,&event);
287 if (!actual_start) { actual_start=&start; }
288 if (!actual_end) { actual_end=&end; }
296 // Note intentional fallthrough here
297 // There are only PAE page tables now
301 // Long mode will only use 32PAE page tables...
302 rc=handle_passthrough_pagefault_32pae(info, fault_addr, error_code, actual_start, actual_end);
305 PrintError(info->vm_info, info, "Unknown CPU Mode\n");
309 if (have_passthrough_callbacks(info)) {
310 struct v3_passthrough_pg_event event={PASSTHROUGH_PAGEFAULT,PASSTHROUGH_POSTIMPL,fault_addr,error_code,*actual_start,*actual_end};
311 dispatch_passthrough_event(info,&event);
319 int v3_invalidate_passthrough_addr(struct guest_info * info, addr_t inv_addr,
320 addr_t *actual_start, addr_t *actual_end) {
322 v3_cpu_mode_t mode = v3_get_vm_cpu_mode(info);
326 if (have_passthrough_callbacks(info)) {
327 struct v3_passthrough_pg_event event={PASSTHROUGH_INVALIDATE_RANGE,PASSTHROUGH_PREIMPL,0,{0,0,0,0,0,0},PAGE_ADDR(inv_addr),PAGE_ADDR(inv_addr)+PAGE_SIZE-1};
328 dispatch_passthrough_event(info,&event);
331 if (!actual_start) { actual_start=&start;}
332 if (!actual_end) { actual_end=&end;}
341 // Intentional fallthrough - there
342 // are only PAE page tables now
346 // Long mode will only use 32PAE page tables...
347 rc=invalidate_addr_32pae(info, inv_addr, actual_start, actual_end);
350 PrintError(info->vm_info, info, "Unknown CPU Mode\n");
354 if (have_passthrough_callbacks(info)) {
355 struct v3_passthrough_pg_event event={PASSTHROUGH_INVALIDATE_RANGE,PASSTHROUGH_POSTIMPL,0,{0,0,0,0,0,0},*actual_start,*actual_end};
356 dispatch_passthrough_event(info,&event);
364 int v3_invalidate_passthrough_addr_range(struct guest_info * info,
365 addr_t inv_addr_start, addr_t inv_addr_end,
366 addr_t *actual_start, addr_t *actual_end) {
367 v3_cpu_mode_t mode = v3_get_vm_cpu_mode(info);
371 if (!actual_start) { actual_start=&start;}
372 if (!actual_end) { actual_end=&end;}
374 if (have_passthrough_callbacks(info)) {
375 struct v3_passthrough_pg_event event={PASSTHROUGH_INVALIDATE_RANGE,PASSTHROUGH_PREIMPL,0,{0,0,0,0,0,0},PAGE_ADDR(inv_addr_start),PAGE_ADDR(inv_addr_end-1)+PAGE_SIZE-1};
376 dispatch_passthrough_event(info,&event);
384 // Intentional fallthrough
385 // There are only PAE PTs now
389 // Long mode will only use 32PAE page tables...
390 rc=invalidate_addr_32pae_range(info, inv_addr_start, inv_addr_end, actual_start, actual_end);
393 PrintError(info->vm_info, info, "Unknown CPU Mode\n");
397 if (have_passthrough_callbacks(info)) {
398 struct v3_passthrough_pg_event event={PASSTHROUGH_INVALIDATE_RANGE,PASSTHROUGH_POSTIMPL,0,{0,0,0,0,0,0},*actual_start,*actual_end};
399 dispatch_passthrough_event(info,&event);
406 int v3_init_passthrough_paging(struct v3_vm_info *vm)
408 INIT_LIST_HEAD(&(vm->passthrough_impl.event_callback_list));
409 v3_rw_lock_init(&(vm->passthrough_impl.event_callback_lock));
410 vm->passthrough_impl.inited=1;
414 int v3_deinit_passthrough_paging(struct v3_vm_info *vm)
416 struct passthrough_event_callback *cb,*temp;
419 if (!vm->passthrough_impl.inited) {
423 flags=v3_write_lock_irqsave(&(vm->passthrough_impl.event_callback_lock));
425 list_for_each_entry_safe(cb,
427 &(vm->passthrough_impl.event_callback_list),
429 list_del(&(cb->node));
433 v3_write_unlock_irqrestore(&(vm->passthrough_impl.event_callback_lock),flags);
435 v3_rw_lock_deinit(&(vm->passthrough_impl.event_callback_lock));
440 int v3_init_passthrough_paging_core(struct guest_info *core)
442 // currently nothing to init
446 int v3_deinit_passthrough_paging_core(struct guest_info *core)
448 // currently nothing to deinit
453 int v3_register_passthrough_paging_event_callback(struct v3_vm_info *vm,
454 int (*callback)(struct guest_info *core,
455 struct v3_passthrough_pg_event *,
459 struct passthrough_event_callback *ec = V3_Malloc(sizeof(struct passthrough_event_callback));
463 PrintError(vm, VCORE_NONE, "Unable to allocate for a nested paging event callback\n");
467 ec->callback = callback;
468 ec->priv_data = priv_data;
470 flags=v3_write_lock_irqsave(&(vm->passthrough_impl.event_callback_lock));
471 list_add(&(ec->node),&(vm->passthrough_impl.event_callback_list));
472 v3_write_unlock_irqrestore(&(vm->passthrough_impl.event_callback_lock),flags);
480 int v3_unregister_passthrough_paging_event_callback(struct v3_vm_info *vm,
481 int (*callback)(struct guest_info *core,
482 struct v3_passthrough_pg_event *,
486 struct passthrough_event_callback *cb,*temp;
489 flags=v3_write_lock_irqsave(&(vm->passthrough_impl.event_callback_lock));
491 list_for_each_entry_safe(cb,
493 &(vm->passthrough_impl.event_callback_list),
495 if ((callback == cb->callback) && (priv_data == cb->priv_data)) {
496 list_del(&(cb->node));
498 v3_write_unlock_irqrestore(&(vm->passthrough_impl.event_callback_lock),flags);
503 v3_write_unlock_irqrestore(&(vm->passthrough_impl.event_callback_lock),flags);
505 PrintError(vm, VCORE_NONE, "No callback found!\n");
511 // inline nested paging support for Intel and AMD
516 inline void convert_to_pf_error(void *pfinfo, pf_error_t *out)
518 if (is_vmx_nested()) {
520 ept_exit_qual_to_pf_error((struct ept_exit_qual *)pfinfo, out);
523 *out = *(pf_error_t *)pfinfo;
527 int v3_handle_nested_pagefault(struct guest_info * info, addr_t fault_addr, void *pfinfo, addr_t *actual_start, addr_t *actual_end)
533 if (!actual_start) { actual_start=&start; }
534 if (!actual_end) { actual_end=&end; }
536 convert_to_pf_error(pfinfo,&err);
538 if (have_nested_callbacks(info)) {
539 struct v3_nested_pg_event event={NESTED_PAGEFAULT,NESTED_PREIMPL,fault_addr,err,fault_addr,fault_addr};
540 dispatch_nested_event(info,&event);
544 if (is_vmx_nested()) {
545 rc = handle_vmx_nested_pagefault(info,fault_addr,pfinfo,actual_start,actual_end);
547 rc = handle_svm_nested_pagefault(info,fault_addr,pfinfo,actual_start,actual_end);
550 if (have_nested_callbacks(info)) {
551 struct v3_nested_pg_event event={NESTED_PAGEFAULT,NESTED_POSTIMPL,fault_addr,err,*actual_start,*actual_end};
552 dispatch_nested_event(info,&event);
560 int v3_invalidate_nested_addr(struct guest_info * info, addr_t inv_addr,
561 addr_t *actual_start, addr_t *actual_end)
567 if (!actual_start) { actual_start=&start; }
568 if (!actual_end) { actual_end=&end; }
571 if (have_nested_callbacks(info)) {
572 struct v3_nested_pg_event event={NESTED_INVALIDATE_RANGE,NESTED_PREIMPL,0,{0,0,0,0,0,0},PAGE_ADDR(inv_addr),PAGE_ADDR(inv_addr)+PAGE_SIZE-1};
573 dispatch_nested_event(info,&event);
576 if (is_vmx_nested()) {
577 rc = handle_vmx_invalidate_nested_addr(info, inv_addr, actual_start, actual_end);
579 rc = handle_svm_invalidate_nested_addr(info, inv_addr, actual_start, actual_end);
582 if (have_nested_callbacks(info)) {
583 struct v3_nested_pg_event event={NESTED_INVALIDATE_RANGE,NESTED_POSTIMPL,0,{0,0,0,0,0,0},*actual_start, *actual_end};
584 dispatch_nested_event(info,&event);
590 int v3_invalidate_nested_addr_range(struct guest_info * info,
591 addr_t inv_addr_start, addr_t inv_addr_end,
592 addr_t *actual_start, addr_t *actual_end)
598 if (!actual_start) { actual_start=&start; }
599 if (!actual_end) { actual_end=&end; }
601 if (have_nested_callbacks(info)) {
602 struct v3_nested_pg_event event={NESTED_INVALIDATE_RANGE,NESTED_PREIMPL,0,{0,0,0,0,0,0},PAGE_ADDR(inv_addr_start),PAGE_ADDR(inv_addr_end-1)+PAGE_SIZE-1};
603 dispatch_nested_event(info,&event);
606 if (is_vmx_nested()) {
607 rc = handle_vmx_invalidate_nested_addr_range(info, inv_addr_start, inv_addr_end, actual_start, actual_end);
609 rc = handle_svm_invalidate_nested_addr_range(info, inv_addr_start, inv_addr_end, actual_start, actual_end);
613 if (have_nested_callbacks(info)) {
614 struct v3_nested_pg_event event={NESTED_INVALIDATE_RANGE,NESTED_PREIMPL,0,{0,0,0,0,0,0},*actual_start, *actual_end};
615 dispatch_nested_event(info,&event);
623 int v3_init_nested_paging(struct v3_vm_info *vm)
625 INIT_LIST_HEAD(&(vm->nested_impl.event_callback_list));
626 v3_rw_lock_init(&(vm->nested_impl.event_callback_lock));
627 vm->nested_impl.inited=1;
631 int v3_init_nested_paging_core(struct guest_info *core, void *hwinfo)
633 if (is_vmx_nested()) {
634 return init_ept(core, (struct vmx_hw_info *) hwinfo);
636 // no initialization for SVM
637 // the direct map page tables are used since the
638 // nested pt format is identical to the main pt format
643 int v3_deinit_nested_paging(struct v3_vm_info *vm)
645 struct nested_event_callback *cb,*temp;
648 if (!vm->nested_impl.inited) {
652 flags=v3_write_lock_irqsave(&(vm->nested_impl.event_callback_lock));
654 list_for_each_entry_safe(cb,
656 &(vm->nested_impl.event_callback_list),
658 list_del(&(cb->node));
662 v3_write_unlock_irqrestore(&(vm->nested_impl.event_callback_lock),flags);
664 v3_rw_lock_deinit(&(vm->nested_impl.event_callback_lock));
669 int v3_deinit_nested_paging_core(struct guest_info *core)
671 if (core->shdw_pg_mode == NESTED_PAGING) {
672 if (is_vmx_nested()) {
673 return deinit_ept(core);
675 // SVM nested deinit is handled by the passthrough paging teardown
685 int v3_register_nested_paging_event_callback(struct v3_vm_info *vm,
686 int (*callback)(struct guest_info *core,
687 struct v3_nested_pg_event *,
691 struct nested_event_callback *ec = V3_Malloc(sizeof(struct nested_event_callback));
695 PrintError(vm, VCORE_NONE, "Unable to allocate for a nested paging event callback\n");
699 ec->callback = callback;
700 ec->priv_data = priv_data;
702 flags=v3_write_lock_irqsave(&(vm->nested_impl.event_callback_lock));
703 list_add(&(ec->node),&(vm->nested_impl.event_callback_list));
704 v3_write_unlock_irqrestore(&(vm->nested_impl.event_callback_lock),flags);
712 int v3_unregister_nested_paging_event_callback(struct v3_vm_info *vm,
713 int (*callback)(struct guest_info *core,
714 struct v3_nested_pg_event *,
718 struct nested_event_callback *cb,*temp;
721 flags=v3_write_lock_irqsave(&(vm->nested_impl.event_callback_lock));
723 list_for_each_entry_safe(cb,
725 &(vm->nested_impl.event_callback_list),
727 if ((callback == cb->callback) && (priv_data == cb->priv_data)) {
728 list_del(&(cb->node));
730 v3_write_unlock_irqrestore(&(vm->nested_impl.event_callback_lock),flags);
735 v3_write_unlock_irqrestore(&(vm->nested_impl.event_callback_lock),flags);
737 PrintError(vm, VCORE_NONE, "No callback found!\n");