Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


5daea0fa938e6a84bfff6abaea5cca0a99348cc3
[palacios.git] / palacios / src / geekos / vmm_paging.c
1 #include <geekos/vmm_paging.h>
2
3 #include <geekos/vmm.h>
4
5
6
7 extern struct vmm_os_hooks * os_hooks;
8
9 void delete_page_tables_pde32(vmm_pde_t * pde) {
10   int i, j;
11
12   if (pde == NULL) { 
13     return;
14   }
15
16   for (i = 0; (i < MAX_PAGE_DIR_ENTRIES); i++) {
17     if (pde[i].present) {
18       vmm_pte_t * pte = (vmm_pte_t *)(pde[i].pt_base_addr << PAGE_POWER);
19       
20       for (j = 0; (j < MAX_PAGE_TABLE_ENTRIES); j++) {
21         if ((pte[j].present)) {
22           os_hooks->free_page((void *)(pte[j].page_base_addr << PAGE_POWER));
23         }
24       }
25       
26       os_hooks->free_page(pte);
27     }
28   }
29
30   os_hooks->free_page(pde);
31 }
32
33
34 int init_shadow_page_state(shadow_page_state_t * state) {
35   state->guest_mode = PDE32;
36   state->shadow_mode = PDE32;
37   
38   state->guest_cr3.r_reg = 0;
39   state->shadow_cr3.r_reg = 0;
40
41   return 0;
42 }
43   
44
45 int wholesale_update_shadow_page_state(shadow_page_state_t * state, shadow_map_t * mem_map) {
46   unsigned i, j;
47   vmm_pde_t * guest_pde;
48   vmm_pde_t * shadow_pde;
49
50
51   // For now, we'll only work with PDE32
52   if (state->guest_mode != PDE32) { 
53     return -1;
54   }
55
56
57   
58   shadow_pde = (vmm_pde_t *)(CR3_TO_PDE(state->shadow_cr3.e_reg.low));  
59   guest_pde = (vmm_pde_t *)(os_hooks->paddr_to_vaddr((void*)CR3_TO_PDE(state->guest_cr3.e_reg.low)));
60
61   // Delete the current page table
62   delete_page_tables_pde32(shadow_pde);
63
64   shadow_pde = os_hooks->allocate_pages(1);
65
66
67   state->shadow_cr3.e_reg.low = (addr_t)shadow_pde;
68
69   state->shadow_mode = PDE32;
70
71   
72   for (i = 0; i < MAX_PAGE_DIR_ENTRIES; i++) { 
73     shadow_pde[i] = guest_pde[i];
74
75     // The shadow can be identical to the guest if it's not present
76     if (!shadow_pde[i].present) { 
77       continue;
78     }
79
80     if (shadow_pde[i].large_pages) { 
81       // large page - just map it through shadow map to generate its physical location
82       addr_t guest_addr = PAGE_ADDR(shadow_pde[i].pt_base_addr);
83       addr_t host_addr;
84       shadow_region_t * ent;
85
86       ent = get_shadow_region_by_addr(mem_map, guest_addr);
87       
88       if (!ent) { 
89         // FIXME Panic here - guest is trying to map to physical memory
90         // it does not own in any way!
91         return -1;
92       }
93
94       // FIXME Bounds check here to see if it's trying to trick us
95       
96       switch (ent->host_type) { 
97       case HOST_REGION_PHYSICAL_MEMORY:
98         // points into currently allocated physical memory, so we just
99         // set up the shadow to point to the mapped location
100         if (guest_paddr_to_host_paddr(ent, guest_addr, &host_addr)) { 
101           // Panic here
102           return -1;
103         }
104
105         shadow_pde[i].pt_base_addr = PAGE_ALIGNED_ADDR(host_addr);
106         // FIXME set vmm_info bits here
107         break;
108       case HOST_REGION_UNALLOCATED:
109         // points to physical memory that is *allowed* but that we
110         // have not yet allocated.  We mark as not present and set a
111         // bit to remind us to allocate it later
112         shadow_pde[i].present = 0;
113         // FIXME Set vminfo bits here so that we know that we will be
114         // allocating it later
115         break;
116       case HOST_REGION_NOTHING:
117         // points to physical memory that is NOT ALLOWED.   
118         // We will mark it as not present and set a bit to remind
119         // us that it's bad later and insert a GPF then
120         shadow_pde[i].present = 0;
121         break;
122       case HOST_REGION_MEMORY_MAPPED_DEVICE:
123       case HOST_REGION_REMOTE:
124       case HOST_REGION_SWAPPED:
125       default:
126         // Panic.  Currently unhandled
127         return -1;
128         break;
129       }
130     } else {
131       vmm_pte_t * guest_pte;
132       vmm_pte_t * shadow_pte;
133       addr_t guest_addr;
134       addr_t guest_pte_host_addr;
135       shadow_region_t * ent;
136
137       // small page - set PDE and follow down to the child table
138       shadow_pde[i] = guest_pde[i];
139
140       guest_addr = PAGE_ADDR(guest_pde[i].pt_base_addr);
141
142       // Allocate a new second level page table for the shadow
143       shadow_pte = os_hooks->allocate_pages(1);
144
145       // make our first level page table in the shadow point to it
146       shadow_pde[i].pt_base_addr = PAGE_ALIGNED_ADDR(shadow_pte);
147       
148       ent = get_shadow_region_by_addr(mem_map, guest_addr);
149       
150
151       /* JRL: This is bad.... */
152       // For now the guest Page Table must always be mapped to host physical memory
153       /* If we swap out a page table or if it isn't present for some reason, this turns real ugly */
154
155       if ((!ent) || (ent->host_type != HOST_REGION_PHYSICAL_MEMORY)) { 
156         // FIXME Panic here - guest is trying to map to physical memory
157         // it does not own in any way!
158         return -1;
159       }
160
161       // Address of the relevant second level page table in the guest
162       if (guest_paddr_to_host_paddr(ent, guest_addr, &guest_pte_host_addr)) { 
163         // Panic here
164         return -1;
165       }
166
167
168       // host_addr now contains the host physical address for the guest's 2nd level page table
169       // Now we transform it to relevant virtual address
170       guest_pte = os_hooks->paddr_to_vaddr((void *)guest_pte_host_addr);
171
172       // Now we walk through the second level guest page table
173       // and clone it into the shadow
174       for (j = 0; j < MAX_PAGE_TABLE_ENTRIES; j++) { 
175         shadow_pte[j] = guest_pte[j];
176
177         addr_t guest_addr = PAGE_ADDR(shadow_pte[j].page_base_addr);
178         
179         shadow_region_t * ent;
180
181         ent = get_shadow_region_by_addr(mem_map, guest_addr);
182       
183         if (!ent) { 
184           // FIXME Panic here - guest is trying to map to physical memory
185           // it does not own in any way!
186           return -1;
187         }
188
189         switch (ent->host_type) { 
190         case HOST_REGION_PHYSICAL_MEMORY:
191           {
192             addr_t host_addr;
193             
194             // points into currently allocated physical memory, so we just
195             // set up the shadow to point to the mapped location
196             if (guest_paddr_to_host_paddr(ent, guest_addr, &host_addr)) { 
197               // Panic here
198               return -1;
199             }
200             
201             shadow_pte[j].page_base_addr = PAGE_ALIGNED_ADDR(host_addr);
202             // FIXME set vmm_info bits here
203             break;
204           }
205         case HOST_REGION_UNALLOCATED:
206           // points to physical memory that is *allowed* but that we
207           // have not yet allocated.  We mark as not present and set a
208           // bit to remind us to allocate it later
209           shadow_pte[j].present = 0;
210           // FIXME Set vminfo bits here so that we know that we will be
211           // allocating it later
212           break;
213         case HOST_REGION_NOTHING:
214           // points to physical memory that is NOT ALLOWED.   
215           // We will mark it as not present and set a bit to remind
216           // us that it's bad later and insert a GPF then
217           shadow_pte[j].present = 0;
218           break;
219         case HOST_REGION_MEMORY_MAPPED_DEVICE:
220         case HOST_REGION_REMOTE:
221         case HOST_REGION_SWAPPED:
222         default:
223           // Panic.  Currently unhandled
224           return -1;
225         break;
226         }
227       }
228     }
229   }
230   return 0;
231 }
232       
233
234
235
236 /* We generate a page table to correspond to a given memory layout
237  * pulling pages from the mem_list when necessary
238  * If there are any gaps in the layout, we add them as unmapped pages
239  */
240 vmm_pde_t * create_passthrough_pde32_pts(shadow_map_t * map) {
241   ullong_t current_page_addr = 0;
242   int i, j;
243
244
245   vmm_pde_t * pde = os_hooks->allocate_pages(1);
246
247   for (i = 0; i < MAX_PAGE_DIR_ENTRIES; i++) {
248     int pte_present = 0;
249     vmm_pte_t * pte = os_hooks->allocate_pages(1);
250     
251
252     for (j = 0; j < MAX_PAGE_TABLE_ENTRIES; j++) {
253       shadow_region_t * region = get_shadow_region_by_addr(map, current_page_addr);
254
255       if (!region || 
256           (region->host_type == HOST_REGION_NOTHING) || 
257           (region->host_type == HOST_REGION_UNALLOCATED) || 
258           (region->host_type == HOST_REGION_MEMORY_MAPPED_DEVICE) || 
259           (region->host_type == HOST_REGION_REMOTE) ||
260           (region->host_type == HOST_REGION_SWAPPED)) {
261         pte[j].present = 0;
262         pte[j].flags = 0;
263         pte[j].accessed = 0;
264         pte[j].dirty = 0;
265         pte[j].pte_attr = 0;
266         pte[j].global_page = 0;
267         pte[j].vmm_info = 0;
268         pte[j].page_base_addr = 0;
269       } else {
270         addr_t host_addr;
271         pte[j].present = 1;
272         pte[j].flags = VM_READ | VM_WRITE | VM_EXEC | VM_USER;   
273         
274         pte[j].accessed = 0;
275         pte[j].dirty = 0;
276         pte[j].pte_attr = 0;
277         pte[j].global_page = 0;
278         pte[j].vmm_info = 0;
279
280         if (guest_paddr_to_host_paddr(region, current_page_addr, &host_addr) == -1) {
281           // BIG ERROR
282           // PANIC
283           return NULL;
284         }
285         
286         pte[j].page_base_addr = host_addr >> 12;
287         
288         pte_present = 1;
289       }
290
291       current_page_addr += PAGE_SIZE;
292     }
293
294     if (pte_present == 0) { 
295       os_hooks->free_page(pte);
296
297       pde[i].present = 0;
298       pde[i].flags = 0;
299       pde[i].accessed = 0;
300       pde[i].reserved = 0;
301       pde[i].large_pages = 0;
302       pde[i].global_page = 0;
303       pde[i].vmm_info = 0;
304       pde[i].pt_base_addr = 0;
305     } else {
306       pde[i].present = 1;
307       pde[i].flags = VM_READ | VM_WRITE | VM_EXEC | VM_USER;
308       pde[i].accessed = 0;
309       pde[i].reserved = 0;
310       pde[i].large_pages = 0;
311       pde[i].global_page = 0;
312       pde[i].vmm_info = 0;
313       pde[i].pt_base_addr = PAGE_ALIGNED_ADDR(pte);
314     }
315
316   }
317
318   return pde;
319 }
320
321
322
323
324
325
326
327 void PrintPDE(void * virtual_address, vmm_pde_t * pde)
328 {
329   PrintDebug("PDE %p -> %p : present=%x, flags=%x, accessed=%x, reserved=%x, largePages=%x, globalPage=%x, kernelInfo=%x\n",
330               virtual_address,
331               (void *) (pde->pt_base_addr << PAGE_POWER),
332               pde->present,
333               pde->flags,
334               pde->accessed,
335               pde->reserved,
336               pde->large_pages,
337               pde->global_page,
338               pde->vmm_info);
339 }
340   
341 void PrintPTE(void * virtual_address, vmm_pte_t * pte)
342 {
343   PrintDebug("PTE %p -> %p : present=%x, flags=%x, accessed=%x, dirty=%x, pteAttribute=%x, globalPage=%x, vmm_info=%x\n",
344               virtual_address,
345               (void*)(pte->page_base_addr << PAGE_POWER),
346               pte->present,
347               pte->flags,
348               pte->accessed,
349               pte->dirty,
350               pte->pte_attr,
351               pte->global_page,
352               pte->vmm_info);
353 }
354
355
356
357 void PrintPD(vmm_pde_t * pde)
358 {
359   int i;
360
361   PrintDebug("Page Directory at %p:\n", pde);
362   for (i = 0; (i < MAX_PAGE_DIR_ENTRIES) && pde[i].present; i++) { 
363     PrintPDE((void*)(PAGE_SIZE * MAX_PAGE_TABLE_ENTRIES * i), &(pde[i]));
364   }
365 }
366
367 void PrintPT(void * starting_address, vmm_pte_t * pte) 
368 {
369   int i;
370
371   PrintDebug("Page Table at %p:\n", pte);
372   for (i = 0; (i < MAX_PAGE_TABLE_ENTRIES) && pte[i].present; i++) { 
373     PrintPTE(starting_address + (PAGE_SIZE * i), &(pte[i]));
374   }
375 }
376
377
378
379
380
381 void PrintDebugPageTables(vmm_pde_t * pde)
382 {
383   int i;
384   
385   PrintDebug("Dumping the pages starting with the pde page at %p\n", pde);
386
387   for (i = 0; (i < MAX_PAGE_DIR_ENTRIES) && pde[i].present; i++) { 
388     PrintPDE((void *)(PAGE_SIZE * MAX_PAGE_TABLE_ENTRIES * i), &(pde[i]));
389     PrintPT((void *)(PAGE_SIZE * MAX_PAGE_TABLE_ENTRIES * i), (void *)(pde[i].pt_base_addr << PAGE_POWER));
390   }
391 }
392     
393     
394
395 #if 0
396
397 pml4e64_t * generate_guest_page_tables_64(vmm_mem_layout_t * layout, vmm_mem_list_t * list) {
398   pml4e64_t * pml = os_hooks->allocate_pages(1);
399   int i, j, k, m;
400   ullong_t current_page_addr = 0;
401   uint_t layout_index = 0;
402   uint_t list_index = 0;
403   ullong_t layout_addr = 0;
404   uint_t num_entries = layout->num_pages;  // The number of pages left in the layout
405
406   for (m = 0; m < MAX_PAGE_MAP_ENTRIES_64; m++ ) {
407     if (num_entries == 0) {
408       pml[m].present = 0;
409       pml[m].writable = 0;
410       pml[m].user = 0;
411       pml[m].pwt = 0;
412       pml[m].pcd = 0;
413       pml[m].accessed = 0;
414       pml[m].reserved = 0;
415       pml[m].zero = 0;
416       pml[m].vmm_info = 0;
417       pml[m].pdp_base_addr_lo = 0;
418       pml[m].pdp_base_addr_hi = 0;
419       pml[m].available = 0;
420       pml[m].no_execute = 0;
421     } else {
422       pdpe64_t * pdpe = os_hooks->allocate_pages(1);
423       
424       pml[m].present = 1;
425       pml[m].writable = 1;
426       pml[m].user = 1;
427       pml[m].pwt = 0;
428       pml[m].pcd = 0;
429       pml[m].accessed = 0;
430       pml[m].reserved = 0;
431       pml[m].zero = 0;
432       pml[m].vmm_info = 0;
433       pml[m].pdp_base_addr_lo = PAGE_ALLIGNED_ADDR(pdpe) & 0xfffff;
434       pml[m].pdp_base_addr_hi = 0;
435       pml[m].available = 0;
436       pml[m].no_execute = 0;
437
438       for (k = 0; k < MAX_PAGE_DIR_PTR_ENTRIES_64; k++) {
439         if (num_entries == 0) {
440           pdpe[k].present = 0;
441           pdpe[k].writable = 0;
442           pdpe[k].user = 0;
443           pdpe[k].pwt = 0;
444           pdpe[k].pcd = 0;
445           pdpe[k].accessed = 0;
446           pdpe[k].reserved = 0;
447           pdpe[k].large_pages = 0;
448           pdpe[k].zero = 0;
449           pdpe[k].vmm_info = 0;
450           pdpe[k].pd_base_addr_lo = 0;
451           pdpe[k].pd_base_addr_hi = 0;
452           pdpe[k].available = 0;
453           pdpe[k].no_execute = 0;
454         } else {
455           pde64_t * pde = os_hooks->allocate_pages(1);
456
457           pdpe[k].present = 1;
458           pdpe[k].writable = 1;
459           pdpe[k].user = 1;
460           pdpe[k].pwt = 0;
461           pdpe[k].pcd = 0;
462           pdpe[k].accessed = 0;
463           pdpe[k].reserved = 0;
464           pdpe[k].large_pages = 0;
465           pdpe[k].zero = 0;
466           pdpe[k].vmm_info = 0;
467           pdpe[k].pd_base_addr_lo = PAGE_ALLIGNED_ADDR(pde) & 0xfffff;
468           pdpe[k].pd_base_addr_hi = 0;
469           pdpe[k].available = 0;
470           pdpe[k].no_execute = 0;
471
472
473
474           for (i = 0; i < MAX_PAGE_DIR_ENTRIES_64; i++) {
475             if (num_entries == 0) { 
476               pde[i].present = 0;
477               pde[i].flags = 0;
478               pde[i].accessed = 0;
479               pde[i].reserved = 0;
480               pde[i].large_pages = 0;
481               pde[i].reserved2 = 0;
482               pde[i].vmm_info = 0;
483               pde[i].pt_base_addr_lo = 0;
484               pde[i].pt_base_addr_hi = 0;
485               pde[i].available = 0;
486               pde[i].no_execute = 0;
487             } else {
488               pte64_t * pte = os_hooks->allocate_pages(1);
489               
490               pde[i].present = 1;
491               pde[i].flags = VM_READ | VM_WRITE | VM_EXEC | VM_USER;
492               pde[i].accessed = 0;
493               pde[i].reserved = 0;
494               pde[i].large_pages = 0;
495               pde[i].reserved2 = 0;
496               pde[i].vmm_info = 0;
497               pde[i].pt_base_addr_lo = PAGE_ALLIGNED_ADDR(pte) & 0xfffff;
498               pde[i].pt_base_addr_hi = 0;
499               pde[i].available = 0;
500               pde[i].no_execute = 0;
501
502               
503               for (j = 0; j < MAX_PAGE_TABLE_ENTRIES_64; j++) {
504                 layout_addr = get_mem_layout_addr(layout, layout_index);
505                 
506                 if ((current_page_addr < layout_addr) || (num_entries == 0)) {
507                   // We have a gap in the layout, fill with unmapped page
508                   pte[j].present = 0;
509                   pte[j].flags = 0;
510                   pte[j].accessed = 0;
511                   pte[j].dirty = 0;
512                   pte[j].pte_attr = 0;
513                   pte[j].global_page = 0;
514                   pte[j].vmm_info = 0;
515                   pte[j].page_base_addr_lo = 0;
516                   pte[j].page_base_addr_hi = 0;
517                   pte[j].available = 0;
518                   pte[j].no_execute = 0;
519
520                   current_page_addr += PAGE_SIZE;
521                 } else if (current_page_addr == layout_addr) {
522                   // Set up the Table entry to map correctly to the layout region
523                   layout_region_t * page_region = get_mem_layout_region(layout, layout_addr);
524                   
525                   if (page_region->type == UNMAPPED) {
526                     pte[j].present = 0;
527                     pte[j].flags = 0;
528                   } else {
529                     pte[j].present = 1;
530                     pte[j].flags = VM_READ | VM_WRITE | VM_EXEC | VM_USER;
531                   }         
532                   
533                   pte[j].accessed = 0;
534                   pte[j].dirty = 0;
535                   pte[j].pte_attr = 0;
536                   pte[j].global_page = 0;
537                   pte[j].vmm_info = 0;
538                   pte[j].available = 0;
539                   pte[j].no_execute = 0;
540
541                   if (page_region->type == UNMAPPED) {
542                     pte[j].page_base_addr_lo = 0;
543                     pte[j].page_base_addr_hi = 0;
544                   } else if (page_region->type == SHARED) {
545                     addr_t host_addr = page_region->host_addr + (layout_addr - page_region->start);
546                     
547                     pte[j].page_base_addr_lo = PAGE_ALLIGNED_ADDR(host_addr) & 0xfffff;
548                     pte[j].page_base_addr_hi = 0;
549                     pte[j].vmm_info = SHARED_PAGE;
550                   } else if (page_region->type == GUEST) {
551                     addr_t list_addr =  get_mem_list_addr(list, list_index++);
552                     
553                     if (list_addr == -1) {
554                       // error
555                       // cleanup...
556                       //free_guest_page_tables(pde);
557                       return NULL;
558                     }
559                     PrintDebug("Adding guest page (%x)\n", list_addr);
560                     pte[j].page_base_addr_lo = PAGE_ALLIGNED_ADDR(list_addr) & 0xfffff;
561                     pte[j].page_base_addr_hi = 0;
562
563                     // Reset this when we move over to dynamic page allocation
564                     //      pte[j].vmm_info = GUEST_PAGE;           
565                     pte[j].vmm_info = SHARED_PAGE;
566                   }
567                   
568                   num_entries--;
569                   current_page_addr += PAGE_SIZE;
570                   layout_index++;
571                 } else {
572                   // error
573                   PrintDebug("Error creating page table...\n");
574                   // cleanup
575                   //              free_guest_page_tables64(pde);
576                   return NULL;
577                 }
578               }
579             }
580           }
581         }
582       }
583     }
584   }
585   return pml;
586 }
587
588 #endif