2 * This file is part of the Palacios Virtual Machine Monitor developed
3 * by the V3VEE Project with funding from the United States National
4 * Science Foundation and the Department of Energy.
6 * The V3VEE Project is a joint project between Northwestern University
7 * and the University of New Mexico. You can find out more at
10 * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu>
11 * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org>
12 * All rights reserved.
14 * Author: Jack Lange <jarusl@cs.northwestern.edu>
16 * This is free software. You are permitted to use,
17 * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
21 #ifndef __VMM_PAGING_H__
22 #define __VMM_PAGING_H__
27 #include <palacios/vmm_types.h>
28 #include <palacios/vmm_util.h>
33 In the following, when we say "page table", we mean the whole 2 or 4 layer
34 page table (PDEs, PTEs), etc.
37 guest-visible paging state
38 This is the state that the guest thinks the machine is using
40 - guest physical memory
41 The physical memory addresses the guest is allowed to use
42 (see shadow page maps, below)
44 (we care about when the current one changes)
45 - guest paging registers (these are never written to hardware)
51 This the state that the machine will actually use when the guest
52 is running. It consists of:
53 - current shadow page table
54 This is the page table actually useed when the guest is running.
55 It is changed/regenerated when the guest page table changes
56 It mostly reflects the guest page table, except that it restricts
57 physical addresses to those the VMM allocates to the guest.
59 This is a mapping from guest physical memory addresses to
60 the current location of the guest physical memory content.
61 It maps from regions of physical memory addresses to regions
62 located in physical memory or elsewhere.
63 (8192,16384) -> MEM(8912,...)
64 (0,8191) -> DISK(65536,..)
65 - guest paging registers (these are written to guest state)
70 This is the state we expect to be operative when the VMM is running.
71 Typically, this is set up by the host os into which we have embedded
72 the VMM, but we include the description here for clarity.
74 This is the page table we use when we are executing in
75 the VMM (or the host os)
81 The reason why the shadow paging state and the host paging state are
82 distinct is to permit the guest to use any virtual address it wants,
83 irrespective of the addresses the VMM or the host os use. These guest
84 virtual addresses are reflected in the shadow paging state. When we
85 exit from the guest, we switch to the host paging state so that any
86 virtual addresses that overlap between the guest and VMM/host now map
87 to the physical addresses epxected by the VMM/host. On AMD SVM, this
88 switch is done by the hardware. On Intel VT, the switch is done
89 by the hardware as well, but we are responsible for manually updating
90 the host state in the vmcs before entering the guest.
96 #define MAX_PTE32_ENTRIES 1024
97 #define MAX_PDE32_ENTRIES 1024
99 #define MAX_PTE32PAE_ENTRIES 512
100 #define MAX_PDE32PAE_ENTRIES 512
101 #define MAX_PDPE32PAE_ENTRIES 4
103 #define MAX_PTE64_ENTRIES 512
104 #define MAX_PDE64_ENTRIES 512
105 #define MAX_PDPE64_ENTRIES 512
106 #define MAX_PML4E64_ENTRIES 512
109 /* Converts an address into a page table index */
110 #define PDE32_INDEX(x) ((((uint_t)x) >> 22) & 0x3ff)
111 #define PTE32_INDEX(x) ((((uint_t)x) >> 12) & 0x3ff)
114 #define PDPE32PAE_INDEX(x) ((((uint_t)x) >> 30) & 0x3)
115 #define PDE32PAE_INDEX(x) ((((uint_t)x) >> 21) & 0x1ff)
116 #define PTE32PAE_INDEX(x) ((((uint_t)x) >> 12) & 0x1ff)
118 #define PML4E64_INDEX(x) ((((ullong_t)x) >> 39) & 0x1ff)
119 #define PDPE64_INDEX(x) ((((ullong_t)x) >> 30) & 0x1ff)
120 #define PDE64_INDEX(x) ((((ullong_t)x) >> 21) & 0x1ff)
121 #define PTE64_INDEX(x) ((((ullong_t)x) >> 12) & 0x1ff)
124 /* Gets the base address needed for a Page Table entry */
125 /* Deprecate these :*/
127 #define PD32_BASE_ADDR(x) (((uint_t)x) >> 12)
128 #define PT32_BASE_ADDR(x) (((uint_t)x) >> 12)
129 #define PD32_4MB_BASE_ADDR(x) (((uint_t)x) >> 22)
131 #define PML4E64_BASE_ADDR(x) (((ullong_t)x) >> 12)
132 #define PDPE64_BASE_ADDR(x) (((ullong_t)x) >> 12)
133 #define PDE64_BASE_ADDR(x) (((ullong_t)x) >> 12)
134 #define PTE64_BASE_ADDR(x) (((ullong_t)x) >> 12)
136 // Accessor functions for the page table structures
137 #define PDE32_T_ADDR(x) (((x).pt_base_addr) << 12)
138 #define PTE32_T_ADDR(x) (((x).page_base_addr) << 12)
139 #define PDE32_4MB_T_ADDR(x) (((x).page_base_addr) << 22)
141 /* Replace The above with these... */
142 #define PAGE_BASE_ADDR(x) ((x) >> 12)
143 #define PAGE_BASE_ADDR_2MB(x) ((x) >> 21)
144 #define PAGE_BASE_ADDR_4MB(x) ((x) >> 22)
146 #define BASE_TO_PAGE_ADDR(x) (((addr_t)x) << 12)
147 #define BASE_TO_PAGE_ADDR_2MB(x) (((addr_t)x) << 21)
148 #define BASE_TO_PAGE_ADDR_4MB(x) (((addr_t)x) << 22)
153 #define PT32_PAGE_OFFSET(x) (((uint_t)x) & 0xfff)
154 #define PD32_4MB_PAGE_OFFSET(x) (((uint_t)x) & 0x003fffff)
156 #define PT32_PAGE_ADDR(x) (((uint_t)x) & 0xfffff000)
157 #define PD32_4MB_PAGE_ADDR(x) (((uint_t)x) & 0xffc00000)
159 #define PT32_PAGE_POWER 12
160 #define PAGE_ALIGNED_ADDR(x) (((uint_t) (x)) >> 12)
161 //#define PAGE_ADDR(x) (PAGE_ALIGNED_ADDR(x) << 12)
162 #define PAGE_POWER 12
163 #define PAGE_SIZE 4096
165 /* use these instead */
166 #define PAGE_OFFSET(x) ((x) & 0xfff)
167 #define PAGE_OFFSET_2MB(x) ((x) & 0x1fffff)
168 #define PAGE_OFFSET_4MB(x) ((x) & 0x3fffff)
170 #define PAGE_POWER 12
171 #define PAGE_POWER_2MB 22
172 #define PAGE_POWER_4MB 21
174 // We shift instead of mask because we don't know the address size
175 #define PAGE_ADDR(x) (((x) >> PAGE_POWER) << PAGE_POWER)
176 #define PAGE_ADDR_2MB(x) (((x) >> PAGE_POWER_2MB) << PAGE_POWER_2MB)
177 #define PAGE_ADDR_4MB(x) (((x) >> PAGE_POWER_4MB) << PAGE_POWER_4MB)
179 #define PAGE_SIZE 4096
180 #define PAGE_SIZE_2MB (4096 * 512)
181 #define PAGE_SIZE_4MB (4096 * 1024)
190 #define CR3_TO_PDE32_PA(cr3) ((addr_t)(((ulong_t)cr3) & 0xfffff000))
191 #define CR3_TO_PDPTRE_PA(cr3) ((addr_t)(((ulong_t)cr3) & 0xffffffe0))
192 #define CR3_TO_PML4E64_PA(cr3) ((addr_t)(((ullong_t)cr3) & 0x000ffffffffff000LL))
194 #define CR3_TO_PDE32_VA(cr3) ((pde32_t *)V3_VAddr((void *)(addr_t)(((ulong_t)cr3) & 0xfffff000)))
195 #define CR3_TO_PDPTRE_VA(cr3) (V3_VAddr((void *)(((ulong_t)cr3) & 0xffffffe0)))
196 #define CR3_TO_PML4E64_VA(cr3) ((pml4e64_t *)V3_VAddr((void *)(addr_t)(((ullong_t)cr3) & 0x000ffffffffff000LL)))
203 /* Page Table Flag Values */
204 #define PT32_HOOK 0x1
205 #define PT32_GUEST_PT 0x2
210 /* PDE 32 bit PAGE STRUCTURES */
211 typedef enum {PDE32_ENTRY_NOT_PRESENT, PDE32_ENTRY_PTE32, PDE32_ENTRY_LARGE_PAGE} pde32_entry_type_t;
212 typedef enum {PT_ACCESS_OK, PT_ENTRY_NOT_PRESENT, PT_WRITE_ERROR, PT_USER_ERROR} pt_access_status_t;
214 typedef struct pde32 {
217 uint_t user_page : 1;
218 uint_t write_through : 1;
219 uint_t cache_disable : 1;
222 uint_t large_page : 1;
223 uint_t global_page : 1;
225 uint_t pt_base_addr : 20;
226 } __attribute__((packed)) pde32_t;
228 typedef struct pde32_4MB {
231 uint_t user_page : 1;
232 uint_t write_through : 1;
233 uint_t cache_disable : 1;
237 uint_t global_page : 1;
241 uint_t page_base_addr : 10;
243 } __attribute__((packed)) pde32_4MB_t;
245 typedef struct pte32 {
248 uint_t user_page : 1;
249 uint_t write_through : 1;
250 uint_t cache_disable : 1;
254 uint_t global_page : 1;
256 uint_t page_base_addr : 20;
257 } __attribute__((packed)) pte32_t;
260 /* 32 bit PAE PAGE STRUCTURES */
261 typedef struct pdpe32pae {
263 uint_t rsvd : 2; // MBZ
264 uint_t write_through : 1;
265 uint_t cache_disable : 1;
268 uint_t rsvd2 : 2; // MBZ
270 uint_t pd_base_addr : 24;
271 uint_t rsvd3 : 28; // MBZ
272 } __attribute__((packed)) pdpe32pae_t;
276 typedef struct pde32pae {
279 uint_t user_page : 1;
280 uint_t write_through : 1;
281 uint_t cache_disable : 1;
284 uint_t large_page : 1;
285 uint_t global_page : 1;
287 uint_t pt_base_addr : 24;
289 } __attribute__((packed)) pde32pae_t;
291 typedef struct pde32pae_4MB {
294 uint_t user_page : 1;
295 uint_t write_through : 1;
296 uint_t cache_disable : 1;
300 uint_t global_page : 1;
304 uint_t page_base_addr : 14;
307 } __attribute__((packed)) pde32pae_4MB_t;
309 typedef struct pte32pae {
312 uint_t user_page : 1;
313 uint_t write_through : 1;
314 uint_t cache_disable : 1;
318 uint_t global_page : 1;
320 uint_t page_base_addr : 24;
322 } __attribute__((packed)) pte32pae_t;
331 /* LONG MODE 64 bit PAGE STRUCTURES */
332 typedef struct pml4e64 {
335 uint_t user_page : 1;
336 uint_t write_through : 1;
337 uint_t cache_disable : 1;
342 ullong_t pdp_base_addr : 40;
343 uint_t available : 11;
344 uint_t no_execute : 1;
345 } __attribute__((packed)) pml4e64_t;
348 typedef struct pdpe64 {
351 uint_t user_page : 1;
352 uint_t write_through : 1;
353 uint_t cache_disable : 1;
356 uint_t large_page : 1;
359 ullong_t pd_base_addr : 40;
360 uint_t available : 11;
361 uint_t no_execute : 1;
362 } __attribute__((packed)) pdpe64_t;
367 typedef struct pde64 {
370 uint_t user_page : 1;
371 uint_t write_through : 1;
372 uint_t cache_disable : 1;
375 uint_t large_page : 1;
376 uint_t reserved2 : 1;
378 ullong_t pt_base_addr : 40;
379 uint_t available : 11;
380 uint_t no_execute : 1;
381 } __attribute__((packed)) pde64_t;
383 typedef struct pte64 {
386 uint_t user_page : 1;
387 uint_t write_through : 1;
388 uint_t cache_disable : 1;
392 uint_t global_page : 1;
394 ullong_t page_base_addr : 40;
395 uint_t available : 11;
396 uint_t no_execute : 1;
397 } __attribute__((packed)) pte64_t;
399 /* *************** */
401 typedef struct pf_error_code {
402 uint_t present : 1; // if 0, fault due to page not present
403 uint_t write : 1; // if 1, faulting access was a write
404 uint_t user : 1; // if 1, faulting access was in user mode
405 uint_t rsvd_access : 1; // if 1, fault from reading a 1 from a reserved field (?)
406 uint_t ifetch : 1; // if 1, faulting access was an instr fetch (only with NX)
408 } __attribute__((packed)) pf_error_t;
413 void delete_page_tables_32(pde32_t * pde);
414 void delete_page_tables_32PAE(pdpe32pae_t * pdpe);
415 void delete_page_tables_64(pml4e64_t * pml4);
417 pde32_entry_type_t pde32_lookup(pde32_t * pd, addr_t addr, addr_t * entry);
418 int pte32_lookup(pte32_t * pte, addr_t addr, addr_t * entry);
420 // This assumes that the page table resides in the host address space
421 // IE. IT DOES NO VM ADDR TRANSLATION
422 int pt32_lookup(pde32_t * pd, addr_t vaddr, addr_t * paddr);
426 pt_access_status_t can_access_pde32(pde32_t * pde, addr_t addr, pf_error_t access_type);
427 pt_access_status_t can_access_pte32(pte32_t * pte, addr_t addr, pf_error_t access_type);
435 pde32_t * create_passthrough_pts_32(struct guest_info * guest_info);
436 pdpe32pae_t * create_passthrough_pts_32PAE(struct guest_info * guest_info);
437 pml4e64_t * create_passthrough_pts_64(struct guest_info * info);
442 //#include <palacios/vm_guest.h>
444 void PrintDebugPageTables(pde32_t * pde);
447 void PrintPageTree(v3_vm_cpu_mode_t cpu_mode, addr_t virtual_addr, addr_t cr3);
448 void PrintPageTree_64(addr_t virtual_addr, pml4e64_t * pml);
451 void PrintPT32(addr_t starting_address, pte32_t * pte);
452 void PrintPD32(pde32_t * pde);
453 void PrintPTE32(addr_t virtual_address, pte32_t * pte);
454 void PrintPDE32(addr_t virtual_address, pde32_t * pde);
456 void PrintDebugPageTables32PAE(pdpe32pae_t * pde);
457 void PrintPTE32PAE(addr_t virtual_address, pte32pae_t * pte);
458 void PrintPDE32PAE(addr_t virtual_address, pde32pae_t * pde);
459 void PrintPTE64(addr_t virtual_address, pte64_t * pte);