2 * This file is part of the Palacios Virtual Machine Monitor developed
3 * by the V3VEE Project with funding from the United States National
4 * Science Foundation and the Department of Energy.
6 * The V3VEE Project is a joint project between Northwestern University
7 * and the University of New Mexico. You can find out more at
10 * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu>
11 * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org>
12 * All rights reserved.
14 * Author: Jack Lange <jarusl@cs.northwestern.edu>
16 * This is free software. You are permitted to use,
17 * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
21 #ifndef __VMM_PAGING_H__
22 #define __VMM_PAGING_H__
27 #include <palacios/vmm_types.h>
28 #include <palacios/vmm_util.h>
33 In the following, when we say "page table", we mean the whole 2 or 4 layer
34 page table (PDEs, PTEs), etc.
37 guest-visible paging state
38 This is the state that the guest thinks the machine is using
40 - guest physical memory
41 The physical memory addresses the guest is allowed to use
42 (see shadow page maps, below)
44 (we care about when the current one changes)
45 - guest paging registers (these are never written to hardware)
51 This the state that the machine will actually use when the guest
52 is running. It consists of:
53 - current shadow page table
54 This is the page table actually useed when the guest is running.
55 It is changed/regenerated when the guest page table changes
56 It mostly reflects the guest page table, except that it restricts
57 physical addresses to those the VMM allocates to the guest.
59 This is a mapping from guest physical memory addresses to
60 the current location of the guest physical memory content.
61 It maps from regions of physical memory addresses to regions
62 located in physical memory or elsewhere.
63 (8192,16384) -> MEM(8912,...)
64 (0,8191) -> DISK(65536,..)
65 - guest paging registers (these are written to guest state)
70 This is the state we expect to be operative when the VMM is running.
71 Typically, this is set up by the host os into which we have embedded
72 the VMM, but we include the description here for clarity.
74 This is the page table we use when we are executing in
75 the VMM (or the host os)
81 The reason why the shadow paging state and the host paging state are
82 distinct is to permit the guest to use any virtual address it wants,
83 irrespective of the addresses the VMM or the host os use. These guest
84 virtual addresses are reflected in the shadow paging state. When we
85 exit from the guest, we switch to the host paging state so that any
86 virtual addresses that overlap between the guest and VMM/host now map
87 to the physical addresses epxected by the VMM/host. On AMD SVM, this
88 switch is done by the hardware. On Intel VT, the switch is done
89 by the hardware as well, but we are responsible for manually updating
90 the host state in the vmcs before entering the guest.
95 #define MAX_PDE32_ENTRIES 1024
96 #define MAX_PTE32_ENTRIES 1024
98 #define MAX_PDPE32PAE_ENTRIES 4
99 #define MAX_PDE32PAE_ENTRIES 512
100 #define MAX_PTE32PAE_ENTRIES 512
102 #define MAX_PML4E64_ENTRIES 512
103 #define MAX_PDPE64_ENTRIES 512
104 #define MAX_PDE64_ENTRIES 512
105 #define MAX_PTE64_ENTRIES 512
108 typedef enum {PAGE_4KB, PAGE_2MB, PAGE_4MB, PAGE_1GB,
110 PAGE_PT32, PAGE_PD32,
111 PAGE_PDP32PAE, PAGE_PD32PAE, PAGE_PT32PAE,
112 PAGE_PML464, PAGE_PDP64, PAGE_PD64, PAGE_PT64} page_type_t;
115 /* Converts an address into a page table index */
116 #define PDE32_INDEX(x) ((((uint_t)x) >> 22) & 0x3ff)
117 #define PTE32_INDEX(x) ((((uint_t)x) >> 12) & 0x3ff)
120 #define PDPE32PAE_INDEX(x) ((((uint_t)x) >> 30) & 0x3)
121 #define PDE32PAE_INDEX(x) ((((uint_t)x) >> 21) & 0x1ff)
122 #define PTE32PAE_INDEX(x) ((((uint_t)x) >> 12) & 0x1ff)
124 #define PML4E64_INDEX(x) ((((ullong_t)x) >> 39) & 0x1ff)
125 #define PDPE64_INDEX(x) ((((ullong_t)x) >> 30) & 0x1ff)
126 #define PDE64_INDEX(x) ((((ullong_t)x) >> 21) & 0x1ff)
127 #define PTE64_INDEX(x) ((((ullong_t)x) >> 12) & 0x1ff)
130 /* Gets the base address needed for a Page Table entry */
131 #define PAGE_BASE_ADDR(x) ((x) >> 12)
132 #define PAGE_BASE_ADDR_4KB(x) ((x) >> 12)
133 #define PAGE_BASE_ADDR_2MB(x) ((x) >> 21)
134 #define PAGE_BASE_ADDR_4MB(x) ((x) >> 22)
135 #define PAGE_BASE_ADDR_1GB(x) ((x) >> 30)
136 #define PAGE_BASE_ADDR_512GB(x) ((x) >> 39)
138 #define BASE_TO_PAGE_ADDR(x) (((addr_t)x) << 12)
139 #define BASE_TO_PAGE_ADDR_4KB(x) (((addr_t)x) << 12)
140 #define BASE_TO_PAGE_ADDR_2MB(x) (((addr_t)x) << 21)
141 #define BASE_TO_PAGE_ADDR_4MB(x) (((addr_t)x) << 22)
142 #define BASE_TO_PAGE_ADDR_1GB(x) (((addr_t)x) << 30)
143 #define BASE_TO_PAGE_ADDR_512GB(x) (((addr_t)x) << 39)
147 #define PAGE_OFFSET(x) ((x) & 0xfff)
148 #define PAGE_OFFSET_4KB(x) ((x) & 0xfff)
149 #define PAGE_OFFSET_2MB(x) ((x) & 0x1fffff)
150 #define PAGE_OFFSET_4MB(x) ((x) & 0x3fffff)
151 #define PAGE_OFFSET_1GB(x) ((x) & 0x3fffffff)
153 #define PAGE_POWER 12
154 #define PAGE_POWER_4KB 12
155 #define PAGE_POWER_2MB 21
156 #define PAGE_POWER_4MB 22
157 #define PAGE_POWER_1GB 30
159 // We shift instead of mask because we don't know the address size
160 #define PAGE_ADDR(x) (((x) >> PAGE_POWER) << PAGE_POWER)
161 #define PAGE_ADDR_4KB(x) (((x) >> PAGE_POWER_4KB) << PAGE_POWER_4KB)
162 #define PAGE_ADDR_2MB(x) (((x) >> PAGE_POWER_2MB) << PAGE_POWER_2MB)
163 #define PAGE_ADDR_4MB(x) (((x) >> PAGE_POWER_4MB) << PAGE_POWER_4MB)
164 #define PAGE_ADDR_1GB(x) (((x) >> PAGE_POWER_1GB) << PAGE_POWER_1GB)
166 #define PAGE_SIZE 4096
167 #define PAGE_SIZE_4KB 4096
168 #define PAGE_SIZE_2MB (4096 * 512)
169 #define PAGE_SIZE_4MB (4096 * 1024)
170 #define PAGE_SIZE_1GB 0x40000000
171 #define PAGE_SIZE_512GB (512ULL * PAGE_SIZE_1GB)
179 #define CR3_TO_PDE32_PA(cr3) ((addr_t)(((uint_t)cr3) & 0xfffff000))
180 #define CR3_TO_PDPE32PAE_PA(cr3) ((addr_t)(((uint_t)cr3) & 0xffffffe0))
181 #define CR3_TO_PML4E64_PA(cr3) ((addr_t)(((ullong_t)cr3) & 0x000ffffffffff000LL))
183 #define CR3_TO_PDE32_VA(cr3) ((pde32_t *)V3_VAddr((void *)(addr_t)(((uint_t)cr3) & 0xfffff000)))
184 #define CR3_TO_PDPE32PAE_VA(cr3) ((pdpe32pae_t *)V3_VAddr((void *)(addr_t)(((uint_t)cr3) & 0xffffffe0)))
185 #define CR3_TO_PML4E64_VA(cr3) ((pml4e64_t *)V3_VAddr((void *)(addr_t)(((ullong_t)cr3) & 0x000ffffffffff000LL)))
191 /* We'll use the general form for now....
192 typedef enum {PDE32_ENTRY_NOT_PRESENT, PDE32_ENTRY_PTE32, PDE32_ENTRY_LARGE_PAGE} pde32_entry_type_t;
193 typedef enum {PTE32_ENTRY_NOT_PRESENT, PTE32_ENTRY_PAGE} pte32_entry_type_t;
195 typedef enum {PDPE32PAE_ENTRY_NOT_PRESENT, PDPE32PAE_ENTRY_PAGE} pdpe32pae_entry_type_t;
196 typedef enum {PDE32PAE_ENTRY_NOT_PRESENT, PDE32PAE_ENTRY_PTE32, PDE32PAE_ENTRY_LARGE_PAGE} pde32pae_entry_type_t;
197 typedef enum {PTE32PAE_ENTRY_NOT_PRESENT, PTE32PAE_ENTRY_PAGE} pte32pae_entry_type_t;
199 typedef enum {PML4E64_ENTRY_NOT_PRESENT, PML4E64_ENTRY_PAGE} pml4e64_entry_type_t;
200 typedef enum {PDPE64_ENTRY_NOT_PRESENT, PDPE64_ENTRY_PTE32, PDPE64_ENTRY_LARGE_PAGE} pdpe64_entry_type_t;
201 typedef enum {PDE64_ENTRY_NOT_PRESENT, PDE64_ENTRY_PTE32, PDE64_ENTRY_LARGE_PAGE} pde64_entry_type_t;
202 typedef enum {PTE64_ENTRY_NOT_PRESENT, PTE64_ENTRY_PAGE} pte64_entry_type_t;
206 typedef enum {PT_ENTRY_NOT_PRESENT, PT_ENTRY_LARGE_PAGE, PT_ENTRY_PAGE} pt_entry_type_t;
208 typedef enum {PT_ACCESS_OK, PT_ACCESS_NOT_PRESENT, PT_ACCESS_WRITE_ERROR, PT_ACCESS_USER_ERROR} pt_access_status_t;
210 /* Page table flag values */
211 #define V3_LARGE_PG 0x2
214 typedef struct gen_pt {
217 uint_t user_page : 1;
218 } __attribute__((packed)) gen_pt_t;
220 typedef struct pde32 {
223 uint_t user_page : 1;
224 uint_t write_through : 1;
225 uint_t cache_disable : 1;
228 uint_t large_page : 1;
229 uint_t global_page : 1;
231 uint_t pt_base_addr : 20;
232 } __attribute__((packed)) pde32_t;
234 typedef struct pde32_4MB {
237 uint_t user_page : 1;
238 uint_t write_through : 1;
239 uint_t cache_disable : 1;
242 uint_t large_page : 1;
243 uint_t global_page : 1;
247 uint_t page_base_addr : 10;
249 } __attribute__((packed)) pde32_4MB_t;
251 typedef struct pte32 {
254 uint_t user_page : 1;
255 uint_t write_through : 1;
256 uint_t cache_disable : 1;
260 uint_t global_page : 1;
262 uint_t page_base_addr : 20;
263 } __attribute__((packed)) pte32_t;
266 /* 32 bit PAE PAGE STRUCTURES */
267 typedef struct pdpe32pae {
269 uint_t rsvd : 2; // MBZ
270 uint_t write_through : 1;
271 uint_t cache_disable : 1;
274 uint_t rsvd2 : 2; // MBZ
276 uint_t pd_base_addr : 24;
277 uint_t rsvd3 : 28; // MBZ
278 } __attribute__((packed)) pdpe32pae_t;
282 typedef struct pde32pae {
285 uint_t user_page : 1;
286 uint_t write_through : 1;
287 uint_t cache_disable : 1;
290 uint_t large_page : 1;
291 uint_t global_page : 1;
293 uint_t pt_base_addr : 24;
295 } __attribute__((packed)) pde32pae_t;
297 typedef struct pde32pae_2MB {
300 uint_t user_page : 1;
301 uint_t write_through : 1;
302 uint_t cache_disable : 1;
306 uint_t global_page : 1;
310 uint_t page_base_addr : 15;
313 } __attribute__((packed)) pde32pae_2MB_t;
315 typedef struct pte32pae {
318 uint_t user_page : 1;
319 uint_t write_through : 1;
320 uint_t cache_disable : 1;
324 uint_t global_page : 1;
326 uint_t page_base_addr : 24;
328 } __attribute__((packed)) pte32pae_t;
337 /* LONG MODE 64 bit PAGE STRUCTURES */
338 typedef struct pml4e64 {
341 uint_t user_page : 1;
342 uint_t write_through : 1;
343 uint_t cache_disable : 1;
348 ullong_t pdp_base_addr : 40;
349 uint_t available : 11;
350 uint_t no_execute : 1;
351 } __attribute__((packed)) pml4e64_t;
354 typedef struct pdpe64 {
357 uint_t user_page : 1;
358 uint_t write_through : 1;
359 uint_t cache_disable : 1;
362 uint_t large_page : 1;
365 ullong_t pd_base_addr : 40;
366 uint_t available : 11;
367 uint_t no_execute : 1;
368 } __attribute__((packed)) pdpe64_t;
371 // We Don't support this
372 typedef struct pdpe64_1GB {
375 uint_t user_page : 1;
376 uint_t write_through : 1;
377 uint_t cache_disable : 1;
380 uint_t large_page : 1;
381 uint_t global_page : 1;
385 ullong_t page_base_addr : 22;
386 uint_t available : 11;
387 uint_t no_execute : 1;
388 } __attribute__((packed)) pdpe64_1GB_t;
392 typedef struct pde64 {
395 uint_t user_page : 1;
396 uint_t write_through : 1;
397 uint_t cache_disable : 1;
400 uint_t large_page : 1;
401 uint_t global_page : 1;
403 ullong_t pt_base_addr : 40;
404 uint_t available : 11;
405 uint_t no_execute : 1;
406 } __attribute__((packed)) pde64_t;
408 typedef struct pde64_2MB {
411 uint_t user_page : 1;
412 uint_t write_through : 1;
413 uint_t cache_disable : 1;
416 uint_t large_page : 1;
417 uint_t global_page : 1;
421 ullong_t page_base_addr : 31;
422 uint_t available : 11;
423 uint_t no_execute : 1;
424 } __attribute__((packed)) pde64_2MB_t;
427 typedef struct pte64 {
430 uint_t user_page : 1;
431 uint_t write_through : 1;
432 uint_t cache_disable : 1;
436 uint_t global_page : 1;
438 ullong_t page_base_addr : 40;
439 uint_t available : 11;
440 uint_t no_execute : 1;
441 } __attribute__((packed)) pte64_t;
443 /* *************** */
445 typedef struct pf_error_code {
446 uint_t present : 1; // if 0, fault due to page not present
447 uint_t write : 1; // if 1, faulting access was a write
448 uint_t user : 1; // if 1, faulting access was in user mode
449 uint_t rsvd_access : 1; // if 1, fault from reading a 1 from a reserved field (?)
450 uint_t ifetch : 1; // if 1, faulting access was an instr fetch (only with NX)
452 } __attribute__((packed)) pf_error_t;
460 int v3_translate_guest_pt_32(struct guest_info * info, v3_reg_t guest_cr3, addr_t vaddr, addr_t * paddr);
461 int v3_translate_guest_pt_32pae(struct guest_info * info, v3_reg_t guest_cr3, addr_t vaddr, addr_t * paddr);
462 int v3_translate_guest_pt_64(struct guest_info * info, v3_reg_t guest_cr3, addr_t vaddr, addr_t * paddr);
464 int v3_translate_host_pt_32(struct guest_info * info, v3_reg_t host_cr3, addr_t vaddr, addr_t * paddr);
465 int v3_translate_host_pt_32pae(struct guest_info * info, v3_reg_t host_cr3, addr_t vaddr, addr_t * paddr);
466 int v3_translate_host_pt_64(struct guest_info * info, v3_reg_t host_cr3, addr_t vaddr, addr_t * paddr);
469 int v3_find_host_pt_32_page(struct guest_info * info, v3_reg_t host_cr3, page_type_t type, addr_t vaddr,
470 addr_t * page_ptr, addr_t * page_pa);
471 int v3_find_host_pt_32pae_page(struct guest_info * info, v3_reg_t host_cr3, page_type_t type, addr_t vaddr,
472 addr_t * page_ptr, addr_t * page_pa);
473 int v3_find_host_pt_64_page(struct guest_info * info, v3_reg_t host_cr3, page_type_t type, addr_t vaddr,
474 addr_t * page_ptr, addr_t * page_pa);
475 int v3_find_guest_pt_32_page(struct guest_info * info, v3_reg_t guest_cr3,
476 page_type_t type, addr_t vaddr,
477 addr_t * page_ptr, addr_t * page_pa);
478 int v3_find_guest_pt_32pae_page(struct guest_info * info, v3_reg_t guest_cr3,
479 page_type_t type, addr_t vaddr,
480 addr_t * page_ptr, addr_t * page_pa);
481 int v3_find_guest_pt_64_page(struct guest_info * info, v3_reg_t guest_cr3,
482 page_type_t type, addr_t vaddr,
483 addr_t * page_ptr, addr_t * page_pa);
487 pt_access_status_t inline v3_can_access_pde32(pde32_t * pde, addr_t addr, pf_error_t access_type);
488 pt_access_status_t inline v3_can_access_pte32(pte32_t * pte, addr_t addr, pf_error_t access_type);
490 pt_access_status_t inline v3_can_access_pdpe32pae(pdpe32pae_t * pdpe, addr_t addr, pf_error_t access_type);
491 pt_access_status_t inline v3_can_access_pde32pae(pde32pae_t * pde, addr_t addr, pf_error_t access_type);
492 pt_access_status_t inline v3_can_access_pte32pae(pte32pae_t * pte, addr_t addr, pf_error_t access_type);
494 pt_access_status_t inline v3_can_access_pml4e64(pml4e64_t * pmle, addr_t addr, pf_error_t access_type);
495 pt_access_status_t inline v3_can_access_pdpe64(pdpe64_t * pdpe, addr_t addr, pf_error_t access_type);
496 pt_access_status_t inline v3_can_access_pde64(pde64_t * pde, addr_t addr, pf_error_t access_type);
497 pt_access_status_t inline v3_can_access_pte64(pte64_t * pte, addr_t addr, pf_error_t access_type);
500 int v3_check_host_pt_32(struct guest_info * info, v3_reg_t host_cr3, addr_t vaddr,
501 pf_error_t access_type, pt_access_status_t * access_status);
502 int v3_check_host_pt_32pae(struct guest_info * info, v3_reg_t host_cr3, addr_t vaddr,
503 pf_error_t access_type, pt_access_status_t * access_status);
504 int v3_check_host_pt_64(struct guest_info * info, v3_reg_t host_cr3, addr_t vaddr,
505 pf_error_t access_type, pt_access_status_t * access_status);
506 int v3_check_guest_pt_32(struct guest_info * info, v3_reg_t guest_cr3, addr_t vaddr,
507 pf_error_t access_type, pt_access_status_t * access_status);
508 int v3_check_guest_pt_32pae(struct guest_info * info, v3_reg_t guest_cr3, addr_t vaddr,
509 pf_error_t access_type, pt_access_status_t * access_status);
510 int v3_check_guest_pt_64(struct guest_info * info, v3_reg_t guest_cr3, addr_t vaddr,
511 pf_error_t access_type, pt_access_status_t * access_status);
515 page_type_t v3_get_guest_data_page_type_32(struct guest_info * info, v3_reg_t guest_cr3, addr_t vaddr);
516 page_type_t v3_get_guest_data_page_type_32pae(struct guest_info * info, v3_reg_t guest_cr3, addr_t vaddr);
517 page_type_t v3_get_guest_data_page_type_64(struct guest_info * info, v3_reg_t guest_cr3, addr_t vaddr);
518 page_type_t v3_get_host_data_page_type_32(struct guest_info * info, v3_reg_t guest_cr3, addr_t vaddr);
519 page_type_t v3_get_host_data_page_type_32pae(struct guest_info * info, v3_reg_t guest_cr3, addr_t vaddr);
520 page_type_t v3_get_host_data_page_type_64(struct guest_info * info, v3_reg_t guest_cr3, addr_t vaddr);
523 int v3_drill_host_pt_32(struct guest_info * info, v3_reg_t host_cr3, addr_t vaddr,
524 int (*callback)(struct guest_info * info, page_type_t type, addr_t vaddr, addr_t page_ptr, addr_t page_pa, void * private_data),
525 void * private_data);
526 int v3_drill_host_pt_32pae(struct guest_info * info, v3_reg_t host_cr3, addr_t vaddr,
527 int (*callback)(struct guest_info * info, page_type_t type, addr_t vaddr, addr_t page_ptr, addr_t page_pa, void * private_data),
528 void * private_data);
529 int v3_drill_host_pt_64(struct guest_info * info, v3_reg_t host_cr3, addr_t vaddr,
530 int (*callback)(struct guest_info * info, page_type_t type, addr_t vaddr, addr_t page_ptr, addr_t page_pa, void * private_data),
531 void * private_data);
533 int v3_drill_guest_pt_32(struct guest_info * info, v3_reg_t guest_cr3, addr_t vaddr,
534 int (*callback)(struct guest_info * info, page_type_t type, addr_t vaddr, addr_t page_ptr, addr_t page_pa, void * private_data),
535 void * private_data);
536 int v3_drill_guest_pt_32pae(struct guest_info * info, v3_reg_t guest_cr3, addr_t vaddr,
537 int (*callback)(struct guest_info * info, page_type_t type, addr_t vaddr, addr_t page_ptr, addr_t page_pa, void * private_data),
538 void * private_data);
539 int v3_drill_guest_pt_64(struct guest_info * info, v3_reg_t guest_cr3, addr_t vaddr,
540 int (*callback)(struct guest_info * info, page_type_t type, addr_t vaddr, addr_t page_ptr, addr_t page_pa, void * private_data),
541 void * private_data);
546 int v3_walk_host_pt_32(struct guest_info * info, v3_reg_t host_cr3,
547 int (*callback)(struct guest_info * info, page_type_t type, addr_t vaddr, addr_t page_va, addr_t page_pa, void * private_data),
548 void * private_data);
550 int v3_walk_host_pt_32pae(struct guest_info * info, v3_reg_t host_cr3,
551 int (*callback)(struct guest_info * info, page_type_t type, addr_t vaddr, addr_t page_va, addr_t page_pa, void * private_data),
552 void * private_data);
554 int v3_walk_host_pt_64(struct guest_info * info, v3_reg_t host_cr3,
555 int (*callback)(struct guest_info * info, page_type_t type, addr_t vaddr, addr_t page_va, addr_t page_pa, void * private_data),
556 void * private_data);
558 int v3_walk_guest_pt_32(struct guest_info * info, v3_reg_t guest_cr3,
559 int (*callback)(struct guest_info * info, page_type_t type, addr_t vaddr, addr_t page_va, addr_t page_pa, void * private_data),
560 void * private_data);
562 int v3_walk_guest_pt_32pae(struct guest_info * info, v3_reg_t guest_cr3,
563 int (*callback)(struct guest_info * info, page_type_t type, addr_t vaddr, addr_t page_va, addr_t page_pa, void * private_data),
564 void * private_data);
566 int v3_walk_guest_pt_64(struct guest_info * info, v3_reg_t guest_cr3,
567 int (*callback)(struct guest_info * info, page_type_t type, addr_t vaddr, addr_t page_va, addr_t page_pa, void * private_data),
568 void * private_data);
571 pde32_t * create_passthrough_pts_32(struct guest_info * guest_info);
572 pdpe32pae_t * create_passthrough_pts_32PAE(struct guest_info * guest_info);
573 pml4e64_t * create_passthrough_pts_64(struct guest_info * info);
576 // note that these take host virtual addresses
577 void delete_page_tables_32(pde32_t * pde);
578 void delete_page_tables_32pae(pdpe32pae_t * pdpe);
579 void delete_page_tables_64(pml4e64_t * pml4);
583 const uchar_t * v3_page_type_to_str(page_type_t type);
587 void PrintPTEntry(struct guest_info * info, page_type_t type, addr_t vaddr, void * entry);
588 void PrintHostPageTables(struct guest_info * info, v3_cpu_mode_t cpu_mode, addr_t cr3);
589 void PrintGuestPageTables(struct guest_info * info, addr_t cr3);
590 void PrintHostPageTree(struct guest_info * info, addr_t virtual_addr, addr_t cr3);
591 void PrintGuestPageTree(struct guest_info * info, addr_t virtual_addr, addr_t cr3);