2 * This file is part of the Palacios Virtual Machine Monitor developed
3 * by the V3VEE Project with funding from the United States National
4 * Science Foundation and the Department of Energy.
6 * The V3VEE Project is a joint project between Northwestern University
7 * and the University of New Mexico. You can find out more at
10 * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu>
11 * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org>
12 * All rights reserved.
14 * Author: Jack Lange <jarusl@cs.northwestern.edu>
16 * This is free software. You are permitted to use,
17 * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
21 #ifndef __VMM_PAGING_H__
22 #define __VMM_PAGING_H__
27 #include <palacios/vmm_types.h>
28 #include <palacios/vmm_util.h>
33 In the following, when we say "page table", we mean the whole 2 or 4 layer
34 page table (PDEs, PTEs), etc.
37 guest-visible paging state
38 This is the state that the guest thinks the machine is using
40 - guest physical memory
41 The physical memory addresses the guest is allowed to use
42 (see shadow page maps, below)
44 (we care about when the current one changes)
45 - guest paging registers (these are never written to hardware)
51 This the state that the machine will actually use when the guest
52 is running. It consists of:
53 - current shadow page table
54 This is the page table actually useed when the guest is running.
55 It is changed/regenerated when the guest page table changes
56 It mostly reflects the guest page table, except that it restricts
57 physical addresses to those the VMM allocates to the guest.
59 This is a mapping from guest physical memory addresses to
60 the current location of the guest physical memory content.
61 It maps from regions of physical memory addresses to regions
62 located in physical memory or elsewhere.
63 (8192,16384) -> MEM(8912,...)
64 (0,8191) -> DISK(65536,..)
65 - guest paging registers (these are written to guest state)
70 This is the state we expect to be operative when the VMM is running.
71 Typically, this is set up by the host os into which we have embedded
72 the VMM, but we include the description here for clarity.
74 This is the page table we use when we are executing in
75 the VMM (or the host os)
81 The reason why the shadow paging state and the host paging state are
82 distinct is to permit the guest to use any virtual address it wants,
83 irrespective of the addresses the VMM or the host os use. These guest
84 virtual addresses are reflected in the shadow paging state. When we
85 exit from the guest, we switch to the host paging state so that any
86 virtual addresses that overlap between the guest and VMM/host now map
87 to the physical addresses epxected by the VMM/host. On AMD SVM, this
88 switch is done by the hardware. On Intel VT, the switch is done
89 by the hardware as well, but we are responsible for manually updating
90 the host state in the vmcs before entering the guest.
96 #define MAX_PDE32_ENTRIES 1024
97 #define MAX_PTE32_ENTRIES 1024
99 #define MAX_PDPE32PAE_ENTRIES 4
100 #define MAX_PDE32PAE_ENTRIES 512
101 #define MAX_PTE32PAE_ENTRIES 512
103 #define MAX_PML4E64_ENTRIES 512
104 #define MAX_PDPE64_ENTRIES 512
105 #define MAX_PDE64_ENTRIES 512
106 #define MAX_PTE64_ENTRIES 512
109 typedef enum {PAGE_4KB, PAGE_2MB, PAGE_4MB, PAGE_1GB,
111 PAGE_PT32, PAGE_PD32,
112 PAGE_PDP32PAE, PAGE_PD32PAE, PAGE_PT32PAE,
113 PAGE_PML464, PAGE_PDP64, PAGE_PD64, PAGE_PT64} page_type_t;
116 /* Converts an address into a page table index */
117 #define PDE32_INDEX(x) ((((uint_t)x) >> 22) & 0x3ff)
118 #define PTE32_INDEX(x) ((((uint_t)x) >> 12) & 0x3ff)
121 #define PDPE32PAE_INDEX(x) ((((uint_t)x) >> 30) & 0x3)
122 #define PDE32PAE_INDEX(x) ((((uint_t)x) >> 21) & 0x1ff)
123 #define PTE32PAE_INDEX(x) ((((uint_t)x) >> 12) & 0x1ff)
125 #define PML4E64_INDEX(x) ((((ullong_t)x) >> 39) & 0x1ff)
126 #define PDPE64_INDEX(x) ((((ullong_t)x) >> 30) & 0x1ff)
127 #define PDE64_INDEX(x) ((((ullong_t)x) >> 21) & 0x1ff)
128 #define PTE64_INDEX(x) ((((ullong_t)x) >> 12) & 0x1ff)
131 /* Gets the base address needed for a Page Table entry */
132 /* Deprecate these :*/
134 #define PD32_BASE_ADDR(x) (((uint_t)x) >> 12)
135 #define PT32_BASE_ADDR(x) (((uint_t)x) >> 12)
136 #define PD32_4MB_BASE_ADDR(x) (((uint_t)x) >> 22)
138 #define PML4E64_BASE_ADDR(x) (((ullong_t)x) >> 12)
139 #define PDPE64_BASE_ADDR(x) (((ullong_t)x) >> 12)
140 #define PDE64_BASE_ADDR(x) (((ullong_t)x) >> 12)
141 #define PTE64_BASE_ADDR(x) (((ullong_t)x) >> 12)
143 // Accessor functions for the page table structures
144 #define PDE32_T_ADDR(x) (((x).pt_base_addr) << 12)
145 #define PTE32_T_ADDR(x) (((x).page_base_addr) << 12)
146 #define PDE32_4MB_T_ADDR(x) (((x).page_base_addr) << 22)
148 /* Replace The above with these... */
149 #define PAGE_BASE_ADDR(x) ((x) >> 12)
150 #define PAGE_BASE_ADDR_4KB(x) ((x) >> 12)
151 #define PAGE_BASE_ADDR_2MB(x) ((x) >> 21)
152 #define PAGE_BASE_ADDR_4MB(x) ((x) >> 22)
153 #define PAGE_BASE_ADDR_1GB(x) ((x) >> 30)
155 #define BASE_TO_PAGE_ADDR(x) (((addr_t)x) << 12)
156 #define BASE_TO_PAGE_ADDR_4KB(x) (((addr_t)x) << 12)
157 #define BASE_TO_PAGE_ADDR_2MB(x) (((addr_t)x) << 21)
158 #define BASE_TO_PAGE_ADDR_4MB(x) (((addr_t)x) << 22)
159 #define BASE_TO_PAGE_ADDR_1GB(x) (((addr_t)x) << 30)
164 #define PT32_PAGE_OFFSET(x) (((uint_t)x) & 0xfff)
165 #define PD32_4MB_PAGE_OFFSET(x) (((uint_t)x) & 0x003fffff)
167 #define PT32_PAGE_ADDR(x) (((uint_t)x) & 0xfffff000)
168 #define PD32_4MB_PAGE_ADDR(x) (((uint_t)x) & 0xffc00000)
170 #define PT32_PAGE_POWER 12
171 #define PAGE_ALIGNED_ADDR(x) (((uint_t) (x)) >> 12)
172 //#define PAGE_ADDR(x) (PAGE_ALIGNED_ADDR(x) << 12)
173 #define PAGE_POWER 12
174 #define PAGE_SIZE 4096
176 /* use these instead */
177 #define PAGE_OFFSET(x) ((x) & 0xfff)
178 #define PAGE_OFFSET_4KB(x) ((x) & 0xfff)
179 #define PAGE_OFFSET_2MB(x) ((x) & 0x1fffff)
180 #define PAGE_OFFSET_4MB(x) ((x) & 0x3fffff)
181 #define PAGE_OFFSET_1GB(x) ((x) & 0x3fffffff)
183 #define PAGE_POWER 12
184 #define PAGE_POWER_4KB 12
185 #define PAGE_POWER_2MB 21
186 #define PAGE_POWER_4MB 22
187 #define PAGE_POWER_1GB 30
189 // We shift instead of mask because we don't know the address size
190 #define PAGE_ADDR(x) (((x) >> PAGE_POWER) << PAGE_POWER)
191 #define PAGE_ADDR_4KB(x) (((x) >> PAGE_POWER_4KB) << PAGE_POWER_4KB)
192 #define PAGE_ADDR_2MB(x) (((x) >> PAGE_POWER_2MB) << PAGE_POWER_2MB)
193 #define PAGE_ADDR_4MB(x) (((x) >> PAGE_POWER_4MB) << PAGE_POWER_4MB)
194 #define PAGE_ADDR_1GB(x) (((x) >> PAGE_POWER_1GB) << PAGE_POWER_1GB)
196 #define PAGE_SIZE 4096
197 #define PAGE_SIZE_4KB 4096
198 #define PAGE_SIZE_2MB (4096 * 512)
199 #define PAGE_SIZE_4MB (4096 * 1024)
200 #define PAGE_SIZE_1GB 0x40000000
208 #define CR3_TO_PDE32_PA(cr3) ((addr_t)(((uint_t)cr3) & 0xfffff000))
209 #define CR3_TO_PDPE32PAE_PA(cr3) ((addr_t)(((uint_t)cr3) & 0xffffffe0))
210 #define CR3_TO_PML4E64_PA(cr3) ((addr_t)(((ullong_t)cr3) & 0x000ffffffffff000LL))
212 #define CR3_TO_PDE32_VA(cr3) ((pde32_t *)V3_VAddr((void *)(addr_t)(((uint_t)cr3) & 0xfffff000)))
213 #define CR3_TO_PDPE32PAE_VA(cr3) ((pdpe32pae_t *)V3_VAddr((void *)(addr_t)(((uint_t)cr3) & 0xffffffe0)))
214 #define CR3_TO_PML4E64_VA(cr3) ((pml4e64_t *)V3_VAddr((void *)(addr_t)(((ullong_t)cr3) & 0x000ffffffffff000LL)))
221 /* Page Table Flag Values */
222 #define PT32_HOOK 0x1
223 #define PT32_GUEST_PT 0x2
227 /* We'll use the general form for now....
228 typedef enum {PDE32_ENTRY_NOT_PRESENT, PDE32_ENTRY_PTE32, PDE32_ENTRY_LARGE_PAGE} pde32_entry_type_t;
229 typedef enum {PTE32_ENTRY_NOT_PRESENT, PTE32_ENTRY_PAGE} pte32_entry_type_t;
231 typedef enum {PDPE32PAE_ENTRY_NOT_PRESENT, PDPE32PAE_ENTRY_PAGE} pdpe32pae_entry_type_t;
232 typedef enum {PDE32PAE_ENTRY_NOT_PRESENT, PDE32PAE_ENTRY_PTE32, PDE32PAE_ENTRY_LARGE_PAGE} pde32pae_entry_type_t;
233 typedef enum {PTE32PAE_ENTRY_NOT_PRESENT, PTE32PAE_ENTRY_PAGE} pte32pae_entry_type_t;
235 typedef enum {PML4E64_ENTRY_NOT_PRESENT, PML4E64_ENTRY_PAGE} pml4e64_entry_type_t;
236 typedef enum {PDPE64_ENTRY_NOT_PRESENT, PDPE64_ENTRY_PTE32, PDPE64_ENTRY_LARGE_PAGE} pdpe64_entry_type_t;
237 typedef enum {PDE64_ENTRY_NOT_PRESENT, PDE64_ENTRY_PTE32, PDE64_ENTRY_LARGE_PAGE} pde64_entry_type_t;
238 typedef enum {PTE64_ENTRY_NOT_PRESENT, PTE64_ENTRY_PAGE} pte64_entry_type_t;
242 typedef enum {PT_ENTRY_NOT_PRESENT, PT_ENTRY_LARGE_PAGE, PT_ENTRY_PAGE} pt_entry_type_t;
243 typedef enum {PT_ACCESS_OK, PT_ACCESS_NOT_PRESENT, PT_ACCESS_WRITE_ERROR, PT_ACCESS_USER_ERROR} pt_access_status_t;
246 typedef struct gen_pt {
249 uint_t user_page : 1;
250 } __attribute__((packed)) gen_pt_t;
252 typedef struct pde32 {
255 uint_t user_page : 1;
256 uint_t write_through : 1;
257 uint_t cache_disable : 1;
260 uint_t large_page : 1;
261 uint_t global_page : 1;
263 uint_t pt_base_addr : 20;
264 } __attribute__((packed)) pde32_t;
266 typedef struct pde32_4MB {
269 uint_t user_page : 1;
270 uint_t write_through : 1;
271 uint_t cache_disable : 1;
274 uint_t large_page : 1;
275 uint_t global_page : 1;
279 uint_t page_base_addr : 10;
281 } __attribute__((packed)) pde32_4MB_t;
283 typedef struct pte32 {
286 uint_t user_page : 1;
287 uint_t write_through : 1;
288 uint_t cache_disable : 1;
292 uint_t global_page : 1;
294 uint_t page_base_addr : 20;
295 } __attribute__((packed)) pte32_t;
298 /* 32 bit PAE PAGE STRUCTURES */
299 typedef struct pdpe32pae {
301 uint_t rsvd : 2; // MBZ
302 uint_t write_through : 1;
303 uint_t cache_disable : 1;
306 uint_t rsvd2 : 2; // MBZ
308 uint_t pd_base_addr : 24;
309 uint_t rsvd3 : 28; // MBZ
310 } __attribute__((packed)) pdpe32pae_t;
314 typedef struct pde32pae {
317 uint_t user_page : 1;
318 uint_t write_through : 1;
319 uint_t cache_disable : 1;
322 uint_t large_page : 1;
323 uint_t global_page : 1;
325 uint_t pt_base_addr : 24;
327 } __attribute__((packed)) pde32pae_t;
329 typedef struct pde32pae_2MB {
332 uint_t user_page : 1;
333 uint_t write_through : 1;
334 uint_t cache_disable : 1;
338 uint_t global_page : 1;
342 uint_t page_base_addr : 15;
345 } __attribute__((packed)) pde32pae_2MB_t;
347 typedef struct pte32pae {
350 uint_t user_page : 1;
351 uint_t write_through : 1;
352 uint_t cache_disable : 1;
356 uint_t global_page : 1;
358 uint_t page_base_addr : 24;
360 } __attribute__((packed)) pte32pae_t;
369 /* LONG MODE 64 bit PAGE STRUCTURES */
370 typedef struct pml4e64 {
373 uint_t user_page : 1;
374 uint_t write_through : 1;
375 uint_t cache_disable : 1;
380 ullong_t pdp_base_addr : 40;
381 uint_t available : 11;
382 uint_t no_execute : 1;
383 } __attribute__((packed)) pml4e64_t;
386 typedef struct pdpe64 {
389 uint_t user_page : 1;
390 uint_t write_through : 1;
391 uint_t cache_disable : 1;
394 uint_t large_page : 1;
397 ullong_t pd_base_addr : 40;
398 uint_t available : 11;
399 uint_t no_execute : 1;
400 } __attribute__((packed)) pdpe64_t;
403 // We Don't support this
404 typedef struct pdpe64_1GB {
407 uint_t user_page : 1;
408 uint_t write_through : 1;
409 uint_t cache_disable : 1;
412 uint_t large_page : 1;
413 uint_t global_page : 1;
417 ullong_t page_base_addr : 22;
418 uint_t available : 11;
419 uint_t no_execute : 1;
420 } __attribute__((packed)) pdpe64_1GB_t;
424 typedef struct pde64 {
427 uint_t user_page : 1;
428 uint_t write_through : 1;
429 uint_t cache_disable : 1;
432 uint_t large_page : 1;
433 uint_t global_page : 1;
435 ullong_t pt_base_addr : 40;
436 uint_t available : 11;
437 uint_t no_execute : 1;
438 } __attribute__((packed)) pde64_t;
440 typedef struct pde64_2MB {
443 uint_t user_page : 1;
444 uint_t write_through : 1;
445 uint_t cache_disable : 1;
448 uint_t large_page : 1;
449 uint_t global_page : 1;
453 ullong_t page_base_addr : 31;
454 uint_t available : 11;
455 uint_t no_execute : 1;
456 } __attribute__((packed)) pde64_2MB_t;
459 typedef struct pte64 {
462 uint_t user_page : 1;
463 uint_t write_through : 1;
464 uint_t cache_disable : 1;
468 uint_t global_page : 1;
470 ullong_t page_base_addr : 40;
471 uint_t available : 11;
472 uint_t no_execute : 1;
473 } __attribute__((packed)) pte64_t;
475 /* *************** */
477 typedef struct pf_error_code {
478 uint_t present : 1; // if 0, fault due to page not present
479 uint_t write : 1; // if 1, faulting access was a write
480 uint_t user : 1; // if 1, faulting access was in user mode
481 uint_t rsvd_access : 1; // if 1, fault from reading a 1 from a reserved field (?)
482 uint_t ifetch : 1; // if 1, faulting access was an instr fetch (only with NX)
484 } __attribute__((packed)) pf_error_t;
492 int v3_translate_guest_pt_32(struct guest_info * info, v3_reg_t guest_cr3, addr_t vaddr, addr_t * paddr);
493 int v3_translate_guest_pt_32pae(struct guest_info * info, v3_reg_t guest_cr3, addr_t vaddr, addr_t * paddr);
494 int v3_translate_guest_pt_64(struct guest_info * info, v3_reg_t guest_cr3, addr_t vaddr, addr_t * paddr);
496 int v3_translate_host_pt_32(v3_reg_t host_cr3, addr_t vaddr, addr_t * paddr);
497 int v3_translate_host_pt_32pae(v3_reg_t host_cr3, addr_t vaddr, addr_t * paddr);
498 int v3_translate_host_pt_64(v3_reg_t host_cr3, addr_t vaddr, addr_t * paddr);
501 int v3_find_host_pt_32_page(v3_reg_t host_cr3, page_type_t type, addr_t vaddr, addr_t * page_addr);
502 int v3_find_host_pt_32pae_page(v3_reg_t host_cr3, page_type_t type, addr_t vaddr, addr_t * page_addr);
503 int v3_find_host_pt_64_page(v3_reg_t host_cr3, page_type_t type, addr_t vaddr, addr_t * page_addr);
504 int v3_find_guest_pt_32_page(struct guest_info * info, v3_reg_t guest_cr3,
505 page_type_t type, addr_t vaddr,
507 int v3_find_guest_pt_32pae_page(struct guest_info * info, v3_reg_t guest_cr3,
508 page_type_t type, addr_t vaddr,
510 int v3_find_guest_pt_64_page(struct guest_info * info, v3_reg_t guest_cr3,
511 page_type_t type, addr_t vaddr,
514 pt_access_status_t inline v3_can_access_pde32(pde32_t * pde, addr_t addr, pf_error_t access_type);
515 pt_access_status_t inline v3_can_access_pte32(pte32_t * pte, addr_t addr, pf_error_t access_type);
517 pt_access_status_t inline v3_can_access_pdpe32pae(pdpe32pae_t * pdpe, addr_t addr, pf_error_t access_type);
518 pt_access_status_t inline v3_can_access_pde32pae(pde32pae_t * pde, addr_t addr, pf_error_t access_type);
519 pt_access_status_t inline v3_can_access_pte32pae(pte32pae_t * pte, addr_t addr, pf_error_t access_type);
521 pt_access_status_t inline v3_can_access_pml4e64(pml4e64_t * pmle, addr_t addr, pf_error_t access_type);
522 pt_access_status_t inline v3_can_access_pdpe64(pdpe64_t * pdpe, addr_t addr, pf_error_t access_type);
523 pt_access_status_t inline v3_can_access_pde64(pde64_t * pde, addr_t addr, pf_error_t access_type);
524 pt_access_status_t inline v3_can_access_pte64(pte64_t * pte, addr_t addr, pf_error_t access_type);
527 int v3_check_host_pt_32(v3_reg_t host_cr3, addr_t vaddr,
528 pf_error_t access_type, pt_access_status_t * access_status);
529 int v3_check_host_pt_32pae(v3_reg_t host_cr3, addr_t vaddr,
530 pf_error_t access_type, pt_access_status_t * access_status);
531 int v3_check_host_pt_64(v3_reg_t host_cr3, addr_t vaddr,
532 pf_error_t access_type, pt_access_status_t * access_status);
533 int v3_check_guest_pt_32(struct guest_info * info, v3_reg_t guest_cr3, addr_t vaddr,
534 pf_error_t access_type, pt_access_status_t * access_status);
535 int v3_check_guest_pt_32pae(struct guest_info * info, v3_reg_t guest_cr3, addr_t vaddr,
536 pf_error_t access_type, pt_access_status_t * access_status);
537 int v3_check_guest_pt_64(struct guest_info * info, v3_reg_t guest_cr3, addr_t vaddr,
538 pf_error_t access_type, pt_access_status_t * access_status);
543 int v3_drill_host_pt_32(v3_reg_t host_cr3, addr_t vaddr,
544 int (*callback)(page_type_t type, addr_t vaddr, addr_t page_ptr, addr_t page_pa, void * private_data),
545 void * private_data);
546 int v3_drill_host_pt_32pae(v3_reg_t host_cr3, addr_t vaddr,
547 int (*callback)(page_type_t type, addr_t vaddr, addr_t page_ptr, addr_t page_pa, void * private_data),
548 void * private_data);
549 int v3_drill_host_pt_64(v3_reg_t host_cr3, addr_t vaddr,
550 int (*callback)(page_type_t type, addr_t vaddr, addr_t page_ptr, addr_t page_pa, void * private_data),
551 void * private_data);
553 int v3_drill_guest_pt_32(struct guest_info * info, v3_reg_t guest_cr3, addr_t vaddr,
554 int (*callback)(page_type_t type, addr_t vaddr, addr_t page_ptr, addr_t page_pa, void * private_data),
555 void * private_data);
556 int v3_drill_guest_pt_32pae(struct guest_info * info, v3_reg_t guest_cr3, addr_t vaddr,
557 int (*callback)(page_type_t type, addr_t vaddr, addr_t page_ptr, addr_t page_pa, void * private_data),
558 void * private_data);
559 int v3_drill_guest_pt_64(struct guest_info * info, v3_reg_t guest_cr3, addr_t vaddr,
560 int (*callback)(page_type_t type, addr_t vaddr, addr_t page_ptr, addr_t page_pa, void * private_data),
561 void * private_data);
565 int v3_walk_host_pt_32(v3_reg_t host_cr3,
566 int (*callback)(page_type_t type, addr_t vaddr, addr_t page_va, addr_t page_pa, void * private_data),
567 void * private_data);
569 int v3_walk_host_pt_32pae(v3_reg_t host_cr3,
570 int (*callback)(page_type_t type, addr_t vaddr, addr_t page_va, addr_t page_pa, void * private_data),
571 void * private_data);
573 int v3_walk_host_pt_64(v3_reg_t host_cr3,
574 int (*callback)(page_type_t type, addr_t vaddr, addr_t page_va, addr_t page_pa, void * private_data),
575 void * private_data);
577 int v3_walk_guest_pt_32(struct guest_info * info, v3_reg_t guest_cr3,
578 int (*callback)(page_type_t type, addr_t vaddr, addr_t page_va, addr_t page_pa, void * private_data),
579 void * private_data);
581 int v3_walk_guest_pt_32pae(struct guest_info * info, v3_reg_t guest_cr3,
582 int (*callback)(page_type_t type, addr_t vaddr, addr_t page_va, addr_t page_pa, void * private_data),
583 void * private_data);
585 int v3_walk_guest_pt_64(struct guest_info * info, v3_reg_t guest_cr3,
586 int (*callback)(page_type_t type, addr_t vaddr, addr_t page_va, addr_t page_pa, void * private_data),
587 void * private_data);
590 pde32_t * create_passthrough_pts_32(struct guest_info * guest_info);
591 pdpe32pae_t * create_passthrough_pts_32PAE(struct guest_info * guest_info);
592 pml4e64_t * create_passthrough_pts_64(struct guest_info * info);
595 void delete_page_tables_32(pde32_t * pde);
596 void delete_page_tables_32PAE(pdpe32pae_t * pdpe);
597 void delete_page_tables_64(pml4e64_t * pml4);
601 const uchar_t * v3_page_type_to_str(page_type_t type);
605 void PrintPTEntry(page_type_t type, addr_t vaddr, void * entry);
606 void PrintHostPageTables(v3_vm_cpu_mode_t cpu_mode, addr_t cr3);
607 void PrintGuestPageTables(struct guest_info * info, addr_t cr3);
608 void PrintHostPageTree(v3_vm_cpu_mode_t cpu_mode, addr_t virtual_addr, addr_t cr3);
609 void PrintGuestPageTree(struct guest_info * info, addr_t virtual_addr, addr_t cr3);