2 * This file is part of the Palacios Virtual Machine Monitor developed
3 * by the V3VEE Project with funding from the United States National
4 * Science Foundation and the Department of Energy.
6 * The V3VEE Project is a joint project between Northwestern University
7 * and the University of New Mexico. You can find out more at
10 * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu>
11 * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org>
12 * All rights reserved.
14 * Author: Jack Lange <jarusl@cs.northwestern.edu>
16 * This is free software. You are permitted to use,
17 * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
21 #ifndef __VMM_PAGING_H__
22 #define __VMM_PAGING_H__
27 #include <palacios/vmm_types.h>
28 #include <palacios/vmm_util.h>
32 In the following, when we say "page table", we mean the whole 2 or 4 layer
33 page table (PDEs, PTEs), etc.
36 guest-visible paging state
37 This is the state that the guest thinks the machine is using
39 - guest physical memory
40 The physical memory addresses the guest is allowed to use
41 (see shadow page maps, below)
43 (we care about when the current one changes)
44 - guest paging registers (these are never written to hardware)
50 This the state that the machine will actually use when the guest
51 is running. It consists of:
52 - current shadow page table
53 This is the page table actually useed when the guest is running.
54 It is changed/regenerated when the guest page table changes
55 It mostly reflects the guest page table, except that it restricts
56 physical addresses to those the VMM allocates to the guest.
58 This is a mapping from guest physical memory addresses to
59 the current location of the guest physical memory content.
60 It maps from regions of physical memory addresses to regions
61 located in physical memory or elsewhere.
62 (8192,16384) -> MEM(8912,...)
63 (0,8191) -> DISK(65536,..)
64 - guest paging registers (these are written to guest state)
69 This is the state we expect to be operative when the VMM is running.
70 Typically, this is set up by the host os into which we have embedded
71 the VMM, but we include the description here for clarity.
73 This is the page table we use when we are executing in
74 the VMM (or the host os)
80 The reason why the shadow paging state and the host paging state are
81 distinct is to permit the guest to use any virtual address it wants,
82 irrespective of the addresses the VMM or the host os use. These guest
83 virtual addresses are reflected in the shadow paging state. When we
84 exit from the guest, we switch to the host paging state so that any
85 virtual addresses that overlap between the guest and VMM/host now map
86 to the physical addresses epxected by the VMM/host. On AMD SVM, this
87 switch is done by the hardware. On Intel VT, the switch is done
88 by the hardware as well, but we are responsible for manually updating
89 the host state in the vmcs before entering the guest.
95 #define MAX_PTE32_ENTRIES 1024
96 #define MAX_PDE32_ENTRIES 1024
98 #define MAX_PTE32PAE_ENTRIES 512
99 #define MAX_PDE32PAE_ENTRIES 512
100 #define MAX_PDPE32PAE_ENTRIES 4
102 #define MAX_PTE64_ENTRIES 512
103 #define MAX_PDE64_ENTRIES 512
104 #define MAX_PDPE64_ENTRIES 512
105 #define MAX_PML4E64_ENTRIES 512
108 /* Converts an address into a page table index */
109 #define PDE32_INDEX(x) ((((uint_t)x) >> 22) & 0x3ff)
110 #define PTE32_INDEX(x) ((((uint_t)x) >> 12) & 0x3ff)
113 /* Gets the base address needed for a Page Table entry */
114 /* Deprecate these :*/
115 #define PD32_BASE_ADDR(x) (((uint_t)x) >> 12)
116 #define PT32_BASE_ADDR(x) (((uint_t)x) >> 12)
117 #define PD32_4MB_BASE_ADDR(x) (((uint_t)x) >> 22)
119 #define PML4E64_BASE_ADDR(x) (((ullong_t)x) >> 12)
120 #define PDPE64_BASE_ADDR(x) (((ullong_t)x) >> 12)
121 #define PDE64_BASE_ADDR(x) (((ullong_t)x) >> 12)
122 #define PTE64_BASE_ADDR(x) (((ullong_t)x) >> 12)
123 /* Accessor functions for the page table structures */
124 #define PDE32_T_ADDR(x) (((x).pt_base_addr) << 12)
125 #define PTE32_T_ADDR(x) (((x).page_base_addr) << 12)
126 #define PDE32_4MB_T_ADDR(x) (((x).page_base_addr) << 22)
128 /* Replace The above with these... */
129 #define PAGE_BASE_ADDR(x) (((uint_t)x) >> 12)
130 #define LARGE_PAGE_BASE_ADDR(x) (((uint_t)x) >> 22)
131 #define BASE_TO_PAGE_ADDR(x) (((uint_t)x) << 12)
132 #define LARGE_BASE_TO_PAGE_ADDR(x) (((uint_t)x) << 22)
136 #define PT32_PAGE_ADDR(x) (((uint_t)x) & 0xfffff000)
137 #define PT32_PAGE_OFFSET(x) (((uint_t)x) & 0xfff)
138 #define PT32_PAGE_POWER 12
140 #define PD32_4MB_PAGE_ADDR(x) (((uint_t)x) & 0xffc00000)
141 #define PD32_4MB_PAGE_OFFSET(x) (((uint_t)x) & 0x003fffff)
142 #define PAGE_SIZE_4MB (4096 * 1024)
144 /* The following should be phased out */
145 #define PAGE_OFFSET(x) ((((uint_t)x) & 0xfff))
146 #define PAGE_ALIGNED_ADDR(x) (((uint_t) (x)) >> 12)
147 #define PAGE_ADDR(x) (PAGE_ALIGNED_ADDR(x) << 12)
148 #define PAGE_POWER 12
149 #define PAGE_SIZE 4096
155 #define CR3_TO_PDE32(cr3) (V3_VAddr((void *)(((ulong_t)cr3) & 0xfffff000)))
156 #define CR3_TO_PDPTRE(cr3) (V3_VAddr((void *)(((ulong_t)cr3) & 0xffffffe0)))
157 #define CR3_TO_PML4E64(cr3) (V3_VAddr((void *)(((ullong_t)cr3) & 0x000ffffffffff000LL)))
164 /* Page Table Flag Values */
165 #define PT32_HOOK 0x1
166 #define PT32_GUEST_PT 0x2
171 /* PDE 32 bit PAGE STRUCTURES */
172 typedef enum {PDE32_ENTRY_NOT_PRESENT, PDE32_ENTRY_PTE32, PDE32_ENTRY_LARGE_PAGE} pde32_entry_type_t;
173 typedef enum {PT_ACCESS_OK, PT_ENTRY_NOT_PRESENT, PT_WRITE_ERROR, PT_USER_ERROR} pt_access_status_t;
175 typedef struct pde32 {
178 uint_t user_page : 1;
179 uint_t write_through : 1;
180 uint_t cache_disable : 1;
183 uint_t large_page : 1;
184 uint_t global_page : 1;
186 uint_t pt_base_addr : 20;
187 } __attribute__((packed)) pde32_t;
189 typedef struct pde32_4MB {
192 uint_t user_page : 1;
193 uint_t write_through : 1;
194 uint_t cache_disable : 1;
198 uint_t global_page : 1;
202 uint_t page_base_addr : 10;
204 } __attribute__((packed)) pde32_4MB_t;
206 typedef struct pte32 {
209 uint_t user_page : 1;
210 uint_t write_through : 1;
211 uint_t cache_disable : 1;
215 uint_t global_page : 1;
217 uint_t page_base_addr : 20;
218 } __attribute__((packed)) pte32_t;
221 /* 32 bit PAE PAGE STRUCTURES */
222 typedef struct pdpe32pae {
224 uint_t rsvd : 2; // MBZ
225 uint_t write_through : 1;
226 uint_t cache_disable : 1;
229 uint_t rsvd2 : 2; // MBZ
231 uint_t pd_base_addr : 24;
232 uint_t rsvd3 : 28; // MBZ
233 } __attribute__((packed)) pdpe32pae_t;
237 typedef struct pde32pae {
240 uint_t user_page : 1;
241 uint_t write_through : 1;
242 uint_t cache_disable : 1;
245 uint_t large_page : 1;
246 uint_t global_page : 1;
248 uint_t pt_base_addr : 24;
250 } __attribute__((packed)) pde32pae_t;
252 typedef struct pde32pae_4MB {
255 uint_t user_page : 1;
256 uint_t write_through : 1;
257 uint_t cache_disable : 1;
261 uint_t global_page : 1;
265 uint_t page_base_addr : 14;
268 } __attribute__((packed)) pde32pae_4MB_t;
270 typedef struct pte32pae {
273 uint_t user_page : 1;
274 uint_t write_through : 1;
275 uint_t cache_disable : 1;
279 uint_t global_page : 1;
281 uint_t page_base_addr : 24;
283 } __attribute__((packed)) pte32pae_t;
292 /* LONG MODE 64 bit PAGE STRUCTURES */
293 typedef struct pml4e64 {
296 uint_t user_page : 1;
297 uint_t write_through : 1;
298 uint_t cache_disable : 1;
303 ullong_t pdp_base_addr : 40;
304 uint_t available : 11;
305 uint_t no_execute : 1;
306 } __attribute__((packed)) pml4e64_t;
309 typedef struct pdpe64 {
312 uint_t user_page : 1;
313 uint_t write_through : 1;
314 uint_t cache_disable : 1;
317 uint_t large_page : 1;
320 ullong_t pd_base_addr : 40;
321 uint_t available : 11;
322 uint_t no_execute : 1;
323 } __attribute__((packed)) pdpe64_t;
328 typedef struct pde64 {
331 uint_t user_page : 1;
332 uint_t write_through : 1;
333 uint_t cache_disable : 1;
336 uint_t large_page : 1;
337 uint_t reserved2 : 1;
339 ullong_t pt_base_addr : 40;
340 uint_t available : 11;
341 uint_t no_execute : 1;
342 } __attribute__((packed)) pde64_t;
344 typedef struct pte64 {
347 uint_t user_page : 1;
348 uint_t write_through : 1;
349 uint_t cache_disable : 1;
353 uint_t global_page : 1;
355 ullong_t page_base_addr : 40;
356 uint_t available : 11;
357 uint_t no_execute : 1;
358 } __attribute__((packed)) pte64_t;
360 /* *************** */
362 typedef struct pf_error_code {
363 uint_t present : 1; // if 0, fault due to page not present
364 uint_t write : 1; // if 1, faulting access was a write
365 uint_t user : 1; // if 1, faulting access was in user mode
366 uint_t rsvd_access : 1; // if 1, fault from reading a 1 from a reserved field (?)
367 uint_t ifetch : 1; // if 1, faulting access was an instr fetch (only with NX)
369 } __attribute__((packed)) pf_error_t;
374 void delete_page_tables_pde32(pde32_t * pde);
377 pde32_entry_type_t pde32_lookup(pde32_t * pd, addr_t addr, addr_t * entry);
378 int pte32_lookup(pte32_t * pte, addr_t addr, addr_t * entry);
380 // This assumes that the page table resides in the host address space
381 // IE. IT DOES NO VM ADDR TRANSLATION
382 int pt32_lookup(pde32_t * pd, addr_t vaddr, addr_t * paddr);
386 pt_access_status_t can_access_pde32(pde32_t * pde, addr_t addr, pf_error_t access_type);
387 pt_access_status_t can_access_pte32(pte32_t * pte, addr_t addr, pf_error_t access_type);
395 pde32_t * create_passthrough_pts_32(struct guest_info * guest_info);
396 pdpe32pae_t * create_passthrough_pts_PAE32(struct guest_info * guest_info);
397 pml4e64_t * create_passthrough_pts_64(struct guest_info * info);
403 void PrintDebugPageTables(pde32_t * pde);
408 void PrintPT32(addr_t starting_address, pte32_t * pte);
409 void PrintPD32(pde32_t * pde);
410 void PrintPTE32(addr_t virtual_address, pte32_t * pte);
411 void PrintPDE32(addr_t virtual_address, pde32_t * pde);
413 void PrintDebugPageTables32PAE(pdpe32pae_t * pde);
414 void PrintPTE32PAE(addr_t virtual_address, pte32pae_t * pte);
415 void PrintPDE32PAE(addr_t virtual_address, pde32pae_t * pde);
416 void PrintPTE64(addr_t virtual_address, pte64_t * pte);