2 * This file is part of the Palacios Virtual Machine Monitor developed
3 * by the V3VEE Project with funding from the United States National
4 * Science Foundation and the Department of Energy.
6 * The V3VEE Project is a joint project between Northwestern University
7 * and the University of New Mexico. You can find out more at
10 * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu>
11 * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org>
12 * All rights reserved.
14 * Author: Jack Lange <jarusl@cs.northwestern.edu>
16 * This is free software. You are permitted to use,
17 * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
20 #include <palacios/vmm_shadow_paging.h>
21 #include <palacios/vmm_ctrl_regs.h>
23 #include <palacios/vm_guest.h>
24 #include <palacios/vm_guest_mem.h>
26 #include <palacios/vmm_paging.h>
29 #ifndef CONFIG_DEBUG_SHDW_CACHE
31 #define PrintDebug(fmt, ...)
34 #ifdef CONFIG_SHADOW_CACHE
37 addr_t shadow_pdes[NR_PTE_CHAIN_ENTRIES];
38 struct hlist_node link;
42 addr_t shadow_ptes[RMAP_EXT];
46 static inline int activate_shadow_pt_32(struct guest_info * core);
47 static inline unsigned shadow_page_table_hashfn(addr_t guest_fn)
52 static void *shadow_cache_alloc(struct shadow_cache *mc, size_t size)
56 PrintDebug("at shadow_cache_alloc mc->nobjs non-exist\n");
59 p = mc->objects[--mc->nobjs];
65 static void shadow_cache_free(struct shadow_cache *mc, void *obj)
67 if (mc->nobjs < NR_MEM_OBJS) {
68 mc->objects[mc->nobjs++] = obj;
73 static struct rmap *shadow_alloc_rmap(struct guest_info *core)
75 return shadow_cache_alloc(&core->shadow_rmap_cache,sizeof(struct rmap));
78 static void shadow_free_rmap(struct guest_info *core,struct rmap *rd)
80 return shadow_cache_free(&core->shadow_rmap_cache,rd);
83 int shadow_topup_cache(struct shadow_cache * cache, size_t objsize, int min) {
87 if (cache->nobjs >= min) return 0;
88 while (cache->nobjs < ARRAY_SIZE(cache->objects)) {
89 obj = V3_Malloc(objsize);
91 PrintDebug("at shadow_topup_cache obj alloc fail\n");
94 cache->objects[cache->nobjs++] = obj;
100 static int shadow_topup_caches(struct guest_info * core) {
103 r = shadow_topup_cache(&core->shadow_pde_chain_cache,
104 sizeof(struct pde_chain), 4);
108 r = shadow_topup_cache(&core->shadow_rmap_cache,
109 sizeof(struct rmap), 1);
115 static struct pde_chain *shadow_alloc_pde_chain(struct guest_info *core)
117 return shadow_cache_alloc(&core->shadow_pde_chain_cache,
118 sizeof(struct pde_chain));
121 static void shadow_free_pde_chain(struct guest_info *core, struct pde_chain *pc)
123 PrintDebug("shdw_free_pdechain: start\n");
124 shadow_cache_free(&core->shadow_pde_chain_cache, pc);
125 PrintDebug("shdw_free_pdechain: return\n");
129 static void shadow_free_page (struct guest_info * core, struct shadow_page_cache_data * page)
131 list_del(&page->link);
133 V3_FreePage((void *)page->page_pa);
134 page->page_pa=(addr_t)V3_AllocPages(1);
136 list_add(&page->link,&core->free_pages);
137 ++core->n_free_shadow_pages;
141 static struct shadow_page_cache_data * shadow_alloc_page(struct guest_info * core, addr_t shadow_pde) {
143 struct shadow_page_cache_data * page;
145 if (list_empty(&core->free_pages)) return NULL;
147 page = list_entry(core->free_pages.next, struct shadow_page_cache_data, link);
148 list_del(&page->link);
150 list_add(&page->link, &core->active_shadow_pages);
151 page->multimapped = 0;
152 page->shadow_pde = shadow_pde;
153 --core->n_free_shadow_pages;
155 PrintDebug("alloc_page: n_free_shdw_pg %d page_pa %p page_va %p\n",
156 core->n_free_shadow_pages,(void *)(page->page_pa),V3_VAddr((void *)(page->page_pa)));
158 addr_t shdw_page = (addr_t)V3_VAddr((void *)(page->page_pa));
159 memset((void *)shdw_page, 0, PAGE_SIZE_4KB);
165 static void shadow_zap_page(struct guest_info * core, struct shadow_page_cache_data * page);
167 static void free_shadow_pages(struct guest_info * core)
169 struct shadow_page_cache_data *page;
171 while (!list_empty(&core->active_shadow_pages)) {
172 page = container_of(core->active_shadow_pages.next,
173 struct shadow_page_cache_data, link);
174 shadow_zap_page(core, page);
177 while (!list_empty(&core->free_pages)) {
178 page = list_entry(core->free_pages.next, struct shadow_page_cache_data, link);
179 list_del(&page->link);
180 V3_FreePage((void *)page->page_pa);
181 page->page_pa = ~(addr_t)0; //invalid address
185 static int alloc_shadow_pages(struct guest_info * core)
188 struct shadow_page_cache_data * page_header = NULL;
190 for (i = 0; i < NUM_SHADOW_PAGES; i++) {
191 page_header = &core->page_header_buf[i];
193 INIT_LIST_HEAD(&page_header->link);
194 if (!(page_header->page_pa = (addr_t)V3_AllocPages(1))) {
197 addr_t shdw_page = (addr_t)V3_VAddr((void *)(page_header->page_pa));
198 memset((void *)shdw_page, 0, PAGE_SIZE_4KB);
200 list_add(&page_header->link, &core->free_pages);
201 ++core->n_free_shadow_pages;
202 PrintDebug("alloc_shdw_pg: n_free_shdw_pg %d page_pa %p\n",
203 core->n_free_shadow_pages,(void*)page_header->page_pa);
208 free_shadow_pages(core);
209 return -1; //out of memory
213 static void shadow_page_add_shadow_pde(struct guest_info * core,
214 struct shadow_page_cache_data * page, addr_t shadow_pde)
216 struct pde_chain *pde_chain;
217 struct hlist_node *node;
225 if (!page->multimapped) {
226 old = page->shadow_pde;
229 page->shadow_pde = shadow_pde;
233 page->multimapped = 1;
234 pde_chain = shadow_alloc_pde_chain(core);
235 INIT_HLIST_HEAD(&page->shadow_pdes);
236 hlist_add_head(&pde_chain->link,&page->shadow_pdes);
237 pde_chain->shadow_pdes[0] = old;
240 hlist_for_each_entry(pde_chain, node, &page->shadow_pdes, link) {
241 if (pde_chain->shadow_pdes[NR_PTE_CHAIN_ENTRIES-1]) continue;
242 for(i=0; i < NR_PTE_CHAIN_ENTRIES; ++i)
243 if (!pde_chain->shadow_pdes[i]) {
244 pde_chain->shadow_pdes[i] = shadow_pde;
249 pde_chain = shadow_alloc_pde_chain(core);
251 hlist_add_head(&pde_chain->link,&page->shadow_pdes);
252 pde_chain->shadow_pdes[0] = shadow_pde;
256 static void shadow_page_remove_shadow_pde(struct guest_info * core,
257 struct shadow_page_cache_data * page, addr_t shadow_pde)
260 struct pde_chain * pde_chain;
261 struct hlist_node * node;
264 PrintDebug("rm_shdw_pde: multimap %d\n", page->multimapped);
265 if(!page->multimapped) {
266 PrintDebug("rm_shdw_pde: no multimap\n");
267 if(page->shadow_pde != shadow_pde)
268 PrintDebug("rm_shdw_pde: error page->shadow_pde is not equal to shadow_pde\n");
269 page->shadow_pde = 0;
270 PrintDebug("rm_shdw_pde: return\n");
274 PrintDebug("rm_shdw_pde: multimap\n");
276 hlist_for_each_entry (pde_chain, node, &page->shadow_pdes, link)
277 for (i=0; i < NR_PTE_CHAIN_ENTRIES; ++i) {
278 if(!pde_chain->shadow_pdes[i]) break;
279 if(pde_chain->shadow_pdes[i] != shadow_pde) continue;
281 PrintDebug("rm_shdw_pde: found shadow_pde at i %d\n",i);
282 while (i+1 < NR_PTE_CHAIN_ENTRIES && pde_chain->shadow_pdes[i+1]) {
283 pde_chain->shadow_pdes[i] = pde_chain->shadow_pdes[i+1];
286 pde_chain->shadow_pdes[i] = 0;
289 PrintDebug("rm_shdw_pde: only one!\n");
290 hlist_del(&pde_chain->link);
291 shadow_free_pde_chain(core, pde_chain);
292 if(hlist_empty(&page->shadow_pdes)) {
293 page->multimapped = 0;
294 page->shadow_pde = 0;
298 PrintDebug("rm_shdw_pde: return\n");
301 PrintDebug("rm_shdw_pde: return\n");
304 static void shadow_page_search_shadow_pde (struct guest_info* core, addr_t shadow_pde,
305 addr_t guest_pde, unsigned hlevel) {
307 struct shadow_page_cache_data* shdw_page;
309 struct hlist_head* bucket;
310 struct hlist_node* node;
311 int hugepage_access = 0;
312 union shadow_page_role role;
313 addr_t pt_base_addr = 0;
314 int metaphysical = 0;
316 PrintDebug("shadow_page_search_shadow_pde\n");
317 v3_cpu_mode_t mode = v3_get_vm_cpu_mode(core);
319 if (mode == PROTECTED) {
321 PrintDebug("shadow_page_search_shadow_pde: PROTECTED\n");
322 pt_base_addr = ((pde32_t*)guest_pde)->pt_base_addr;
324 if(((pde32_t*)guest_pde)->large_page == 1) {
325 PrintDebug("shadow_page_search_shadow_pde: large page\n");
326 hugepage_access = (((pde32_4MB_t *) guest_pde)->writable) | (((pde32_4MB_t*)guest_pde)->user_page << 1);
328 pt_base_addr = (addr_t) PAGE_BASE_ADDR(BASE_TO_PAGE_ADDR_4MB(((pde32_4MB_t*)guest_pde)->page_base_addr));
332 role.glevels = PT32_ROOT_LEVEL; //max level
333 role.hlevels = PT_PAGE_TABLE_LEVEL;
334 role.metaphysical = metaphysical;
335 role.hugepage_access = hugepage_access;
337 } else if (mode == LONG_32_COMPAT || mode == LONG) {
339 PrintDebug("shadow_page_search_shadow_pde: LONG_32_COMPAT/LONG\n");
340 pt_base_addr = ((pde64_t*)guest_pde)->pt_base_addr;
343 if(hlevel == PT_DIRECTORY_LEVEL) {
344 if(((pde64_t*)guest_pde)->large_page == 1) {
345 hugepage_access = (((pde64_2MB_t *) guest_pde)->writable) | (((pde64_2MB_t*)guest_pde)->user_page << 1);
347 pt_base_addr = (addr_t) PAGE_BASE_ADDR(BASE_TO_PAGE_ADDR_2MB(((pde64_2MB_t*)guest_pde)->page_base_addr));
349 role.hlevels = PT_PAGE_TABLE_LEVEL;
351 } else if(hlevel == PT32E_ROOT_LEVEL) {
352 if(((pdpe64_t*)guest_pde)->large_page == 1) {
353 hugepage_access = (((pdpe64_1GB_t *) guest_pde)->writable) | (((pdpe64_1GB_t*)guest_pde)->user_page << 1);
355 pt_base_addr = (addr_t) PAGE_BASE_ADDR(BASE_TO_PAGE_ADDR_1GB(((pdpe64_1GB_t*)guest_pde)->page_base_addr));
357 role.hlevels = PT_DIRECTORY_LEVEL;
359 } else if(hlevel == PT64_ROOT_LEVEL) {
360 if(((pdpe64_t*)guest_pde)->large_page == 1) {
361 hugepage_access = (((pdpe64_1GB_t *) guest_pde)->writable) | (((pdpe64_1GB_t*)guest_pde)->user_page << 1);
363 pt_base_addr = (addr_t) PAGE_BASE_ADDR(BASE_TO_PAGE_ADDR_1GB(((pdpe64_1GB_t*)guest_pde)->page_base_addr));
365 role.hlevels = PT32E_ROOT_LEVEL;
370 role.glevels = PT64_ROOT_LEVEL; //store numeric
371 role.metaphysical = metaphysical;
372 role.hugepage_access = hugepage_access;
376 index = shadow_page_table_hashfn(pt_base_addr) % NUM_SHADOW_PAGES;
377 bucket = &core->shadow_page_hash[index];
379 hlist_for_each_entry(shdw_page, node, bucket, hash_link)
380 if (shdw_page->guest_fn == pt_base_addr && shdw_page->role.word == role.word ) {
381 PrintDebug("shadow_page_search_shadow_pde: found\n");
382 shadow_page_remove_shadow_pde(core, shdw_page, (addr_t)shadow_pde);
389 static struct shadow_page_cache_data * shadow_page_lookup_page(struct guest_info *core, addr_t guest_fn, int opt) //purpose of this is write protection
392 struct hlist_head * bucket;
393 struct shadow_page_cache_data * page;
394 struct hlist_node * node;
396 PrintDebug("lookup: guest_fn addr %p\n",(void *)BASE_TO_PAGE_ADDR(guest_fn));
398 index = shadow_page_table_hashfn(guest_fn) % NUM_SHADOW_PAGES;
399 bucket = &core->shadow_page_hash[index];
400 PrintDebug("lookup: index %d bucket %p\n",index,(void*)bucket);
402 hlist_for_each_entry(page, node, bucket, hash_link)
404 PrintDebug("lookup: page->gfn %p gfn %p metaphysical %d\n",
405 (void*)BASE_TO_PAGE_ADDR(page->guest_fn),(void*)BASE_TO_PAGE_ADDR(guest_fn),page->role.metaphysical);
406 if (page->guest_fn == guest_fn && !page->role.metaphysical) {
410 else if(page->guest_fn == guest_fn) {
417 static void rmap_remove(struct guest_info * core, addr_t shadow_pte);
418 static void rmap_write_protect(struct guest_info * core, addr_t guest_fn);
420 struct shadow_page_cache_data * shadow_page_get_page(struct guest_info *core,
424 unsigned hugepage_access,
426 int force) //0:default 1:off cache 2:off debug print
428 struct shadow_page_cache_data *page;
429 union shadow_page_role role;
431 struct hlist_head *bucket;
432 struct hlist_node *node;
433 v3_cpu_mode_t mode = v3_get_vm_cpu_mode(core);
436 if (mode == REAL || mode == PROTECTED) role.glevels = PT32_ROOT_LEVEL;
437 //exceptional, longterm there should be argument
438 else if (mode == PROTECTED_PAE) role.glevels = PT32E_ROOT_LEVEL;
439 else if (mode == LONG || mode == LONG_32_COMPAT) role.glevels = PT64_ROOT_LEVEL;
443 role.hlevels = level;
444 role.metaphysical = metaphysical;
445 role.hugepage_access = hugepage_access;
447 index = shadow_page_table_hashfn(guest_fn) % NUM_SHADOW_PAGES;
448 bucket = &core->shadow_page_hash[index];
450 if (force != 2) PrintDebug("get_page: lvl %d idx %d gfn %p role %x\n", level, index, (void *)guest_fn,role.word);
452 hlist_for_each_entry(page, node, bucket, hash_link)
453 if (page->guest_fn == guest_fn && page->role.word == role.word) {
454 shadow_page_add_shadow_pde(core, page, shadow_pde); //guest_fn is right there
456 PrintDebug("get_page: found guest_fn %p, index %d, multi %d, next %p\n",
457 (void *)page->guest_fn, index, page->multimapped, (void *)page->hash_link.next);
458 if (force == 0 || force == 2)
461 shadow_zap_page(core,page);
466 PrintDebug("get_page: no found guest_fn %p, index %d, multimapped %d, next %p\n",
467 (void *)page->guest_fn, index, page->multimapped, (void *)page->hash_link.next);
471 PrintDebug("get_page: no found\n");
475 page=shadow_alloc_page(core, shadow_pde);
477 if (!page) return page;
479 page->guest_fn = guest_fn;
481 page->multimapped = 0;
482 page->shadow_pde = 0;
485 PrintDebug("get_page: hadd h->first %p, n %p, n->next %p\n",
486 (void *)bucket->first, (void *)&page->hash_link, (void *)page->hash_link.next);
488 hlist_add_head(&page->hash_link, bucket);
489 shadow_page_add_shadow_pde(core, page, shadow_pde);
491 if (force != 2) PrintDebug("get_page: hadd h->first %p, n %p, n->next %p\n",
492 (void *)bucket->first, (void *)&page->hash_link, (void *)page->hash_link.next);
494 if (!metaphysical) rmap_write_protect(core, guest_fn); //in case rmapped guest_fn being allocated as pt or pd
495 if (force != 2) PrintDebug("get_page: return\n");
501 static void shadow_page_unlink_children (struct guest_info * core, struct shadow_page_cache_data * page) {
504 uint32_t* shdw32_table;
505 uint32_t* shdw32_entry;
506 uint64_t* shdw64_table;
507 uint64_t* shdw64_entry;
509 uint32_t* guest32_table;
510 uint32_t* guest32_entry;
511 uint64_t* guest64_table;
512 uint64_t* guest64_entry;
514 v3_cpu_mode_t mode = v3_get_vm_cpu_mode(core);
516 if(page->role.hlevels == PT_PAGE_TABLE_LEVEL) {
518 if (mode == PROTECTED) {
520 shdw32_table = (uint32_t*) V3_VAddr((void *)(addr_t)CR3_TO_PDE32_PA(page->page_pa));
521 PrintDebug("ulink_chil: pte lvl\n");
523 for (i = 0; i < PT32_ENT_PER_PAGE; ++i) {
524 shdw32_entry = (uint32_t*)&(shdw32_table[i]);
525 if (*shdw32_entry & PT_PRESENT_MASK) {
526 rmap_remove(core, (addr_t)shdw32_entry);
527 PrintDebug("ulink_chil: %d pte: shadow %x\n", i, *shdw32_entry);
529 memset((void *)shdw32_entry, 0, sizeof(uint32_t));
531 PrintDebug("ulink_chil: return pte\n");
534 } else if (mode == LONG_32_COMPAT || mode == LONG) {
536 shdw64_table = (uint64_t*) V3_VAddr((void *)(addr_t)CR3_TO_PML4E64_PA(page->page_pa));
537 PrintDebug("ulink_chil: pte lvl\n");
539 for (i = 0; i < PT_ENT_PER_PAGE; ++i) {
540 shdw64_entry = (uint64_t*)&(shdw64_table[i]);
541 if (*shdw64_entry & PT_PRESENT_MASK) {
542 rmap_remove(core, (addr_t)shdw64_entry);
543 PrintDebug("ulink_chil: %d pte: shadow %p\n", i, (void*)*((uint64_t*)shdw64_entry));
545 memset((void *)shdw64_entry, 0, sizeof(uint64_t));
548 PrintDebug("ulink_chil: return pte\n");
553 PrintDebug("ulink_chil: pde lvl\n");
554 if (mode == PROTECTED) {
556 shdw32_table = (uint32_t*) V3_VAddr((void*)(addr_t)CR3_TO_PDE32_PA(page->page_pa));
558 if (guest_pa_to_host_va(core, BASE_TO_PAGE_ADDR(page->guest_fn), (addr_t*)&guest32_table) == -1) {
559 PrintError("Invalid Guest PDE Address: 0x%p\n", (void *)BASE_TO_PAGE_ADDR(page->guest_fn));
563 for (i = 0; i < PT32_ENT_PER_PAGE; ++i) {
565 shdw32_entry = (uint32_t*)&(shdw32_table[i]);
566 guest32_entry = (uint32_t*)&(guest32_table[i]);
567 present = *shdw32_entry & PT_PRESENT_MASK;
568 if(present) PrintDebug("ulink_chil: pde %dth: shadow %x\n", i, *((uint32_t*)shdw32_entry));
569 memset((void *)shdw32_entry, 0, sizeof(uint32_t));
570 if (present != 1) continue;
572 shadow_page_search_shadow_pde(core, (addr_t)shdw32_entry, (addr_t)guest32_entry, page->role.hlevels);
574 PrintDebug("ulink_child: before return at pde lvel\n");
577 }else if(mode == LONG_32_COMPAT || mode == LONG) {
579 shdw64_table = (uint64_t*) V3_VAddr((void*)(addr_t)CR3_TO_PML4E64_PA(page->page_pa));
581 if (guest_pa_to_host_va(core, BASE_TO_PAGE_ADDR(page->guest_fn), (addr_t*)&guest64_table) == -1) {
582 if(page->role.hlevels == PT_DIRECTORY_LEVEL)
583 PrintError("Invalid Guest PDE Address: 0x%p\n", (void *)BASE_TO_PAGE_ADDR(page->guest_fn));
584 if(page->role.hlevels == PT32E_ROOT_LEVEL)
585 PrintError("Invalid Guest PDPE Address: 0x%p\n", (void *)BASE_TO_PAGE_ADDR(page->guest_fn));
586 if(page->role.hlevels == PT64_ROOT_LEVEL)
587 PrintError("Invalid Guest PML4E Address: 0x%p\n", (void *)BASE_TO_PAGE_ADDR(page->guest_fn));
591 for (i = 0; i < PT_ENT_PER_PAGE; ++i) {
593 shdw64_entry = (uint64_t*)&(shdw64_table[i]);
594 guest64_entry = (uint64_t*)&(guest64_table[i]);
595 present = *shdw64_entry & PT_PRESENT_MASK;
596 if(present) PrintDebug("ulink_chil: pde: shadow %p\n",(void *)*((uint64_t *)shdw64_entry));
597 memset((void *)shdw64_entry, 0, sizeof(uint64_t));
598 if (present != 1) continue;
600 shadow_page_search_shadow_pde(core, (addr_t)shdw64_entry, (addr_t)guest64_entry, page->role.hlevels);
605 //PrintDebug("ulink_chil: return pde\n");
609 static void shadow_page_put_page(struct guest_info *core, struct shadow_page_cache_data * page, addr_t shadow_pde) {
611 PrintDebug("put_page: start\n");
612 shadow_page_remove_shadow_pde(core, page, shadow_pde);
614 PrintDebug("put_page: end\n");
618 static void shadow_zap_page(struct guest_info * core, struct shadow_page_cache_data * page) {
621 addr_t cr3_base_addr = 0;
622 v3_cpu_mode_t mode = v3_get_vm_cpu_mode(core);
624 PrintDebug("zap: multimapped %d, metaphysical %d\n", page->multimapped, page->role.metaphysical);
626 while (page->multimapped || page->shadow_pde) {
627 if (!page->multimapped) {
628 shadow_pde = page->shadow_pde;
630 struct pde_chain * chain;
631 chain = container_of(page->shadow_pdes.first, struct pde_chain, link);
632 shadow_pde = chain->shadow_pdes[0];
634 shadow_page_put_page(core, page, shadow_pde);
635 PrintDebug("zap_parent: pde: shadow %p\n",(void *)*((addr_t *)shadow_pde));
636 memset((void *)shadow_pde, 0, sizeof(struct pde32));
639 shadow_page_unlink_children(core, page);
641 PrintDebug("zap: end of unlink\n");
643 if (mode == PROTECTED) {
644 cr3_base_addr = ((struct cr3_32 *)&(core->shdw_pg_state.guest_cr3))->pdt_base_addr;
645 } else if (mode == LONG_32_COMPAT || mode == LONG) {
646 cr3_base_addr = ((struct cr3_64 *)&(core->shdw_pg_state.guest_cr3))->pml4t_base_addr;
650 PrintDebug("zap: before hlist_del\n");
651 PrintDebug("zap: page->guest_fn %p\n", (void*) page->guest_fn);
653 if (page->guest_fn != (addr_t)(cr3_base_addr)) {
654 PrintDebug("zap: first hlist_del\n");
656 hlist_del(&page->hash_link);
657 shadow_free_page(core, page);
660 PrintDebug("zap: second hlist_del\n");
662 list_del(&page->link);
663 list_add(&page->link,&core->active_shadow_pages);
666 PrintDebug("zap: end hlist_del\n");
670 int shadow_zap_hierarchy_32(struct guest_info * core, struct shadow_page_cache_data * page) {
678 if (page->role.hlevels != 2) return -1;
680 shadow_pd = CR3_TO_PDE32_VA(page->page_pa);
681 if (guest_pa_to_host_va(core, BASE_TO_PAGE_ADDR(page->guest_fn), (addr_t*)&guest_pd) == -1) {
682 PrintError("Invalid Guest PDE Address: 0x%p\n", (void*)BASE_TO_PAGE_ADDR(page->guest_fn));
686 for (i=0; i < PT32_ENT_PER_PAGE; ++i) {
688 shadow_pde = (pde32_t*)&(shadow_pd[i]);
689 guest_pde = (pde32_t*)&(guest_pd[i]);
690 present = shadow_pde->present;
691 if (shadow_pde->present) PrintDebug("ulink_child: pde shadow %x\n", *((uint32_t*)shadow_pde));
692 memset((void*)shadow_pde, 0, sizeof(struct pde32));
693 if (present != 1) continue;
695 struct shadow_page_cache_data *shdw_page;
697 struct hlist_head *bucket;
698 struct hlist_node *node;
699 int hugepage_access =0;
700 int metaphysical = 0;
701 union shadow_page_role role;
702 v3_cpu_mode_t mode = v3_get_vm_cpu_mode(core);
704 if (((pde32_t*)guest_pde)->large_page == 1) {
705 hugepage_access = (((pde32_4MB_t*)guest_pde)->writable) | (((pde32_4MB_t*)guest_pde)->user_page << 1);
710 if (mode == REAL || mode == PROTECTED) role.glevels = PT32_ROOT_LEVEL;
711 //exceptional, longterm there should be argument
712 else if (mode == PROTECTED_PAE) role.glevels = PT32E_ROOT_LEVEL;
713 else if (mode == LONG || mode == LONG_32_COMPAT) role.glevels = PT64_ROOT_LEVEL;
717 role.metaphysical = metaphysical;
718 role.hugepage_access = hugepage_access;
720 index = shadow_page_table_hashfn(guest_pde->pt_base_addr) % NUM_SHADOW_PAGES;
721 bucket = &core->shadow_page_hash[index];
723 hlist_for_each_entry(shdw_page, node, bucket, hash_link)
724 if (shdw_page->guest_fn == (guest_pde->pt_base_addr) && (shdw_page->role.word == role.word)) {
725 shadow_zap_page(core, shdw_page);
729 shadow_zap_page(core, page);
734 int shadow_unprotect_page(struct guest_info * core, addr_t guest_fn) {
737 struct hlist_head * bucket;
738 struct shadow_page_cache_data * page = NULL;
739 struct hlist_node * node;
740 struct hlist_node * n;
744 index = shadow_page_table_hashfn(guest_fn) % NUM_SHADOW_PAGES;
745 bucket = &core->shadow_page_hash[index];
746 PrintDebug("unprotect: gfn %p\n",(void *) guest_fn);
748 hlist_for_each_entry_safe(page, node, n, bucket, hash_link) {
749 //hlist_for_each_entry(page, node, bucket, hash_link) {
750 if ((page->guest_fn == guest_fn) && !(page->role.metaphysical)) {
751 PrintDebug("unprotect: match page.gfn %p page.role %x gfn %p\n",(void *) page->guest_fn,page->role.word,(void *)guest_fn);
752 shadow_zap_page(core, page);
757 PrintDebug("at shadow_unprotect_page return %d\n",r);
762 reverse mapping data structures:
763 if page_private bit zero is zero, then page->private points to the shadow page table entry that points to page address
764 if page_private bit zero is one, then page->private & ~1 points to a struct rmap containing more mappings
767 void rmap_add(struct guest_info *core, addr_t shadow_pte) {
769 addr_t page_private = 0;
770 gen_pt_t * shadow_pte_gen;
771 addr_t page_base_addr = 0;
774 v3_cpu_mode_t mode = v3_get_vm_cpu_mode(core);
776 shadow_pte_gen = (gen_pt_t *) shadow_pte;
778 if (mode == PROTECTED) {
779 page_base_addr = ((pte32_t *)shadow_pte)->page_base_addr;
780 PrintDebug("at rmap_add shadow_pte: %x\n", (uint32_t)*((uint32_t*)shadow_pte));
782 } else if (mode == LONG_32_COMPAT || mode == LONG) {
783 page_base_addr = ((pte64_t *)shadow_pte)->page_base_addr;
784 PrintDebug("at rmap_add shadow_pte: %p\n", (void*)*((uint64_t*)shadow_pte));
789 PrintDebug("debug rmap: at rmap_add shadow_pte->page_base_addr (%p), shadow_pte_present %d, shadow_pte_writable %d\n",
790 (void *)BASE_TO_PAGE_ADDR(page_base_addr), (shadow_pte_gen->present), (shadow_pte_gen->writable));
792 if (shadow_pte_gen->present == 0 || shadow_pte_gen->writable == 0)
795 PrintDebug("at rmap_add host_fn %p\n", (void *)BASE_TO_PAGE_ADDR(page_base_addr));
797 mem_map = core->vm_info.mem_map.base_region.mem_map;
798 page_private = mem_map[page_base_addr];
800 PrintDebug("at rmap_add page_private %p\n", (void *)page_private);
803 PrintDebug("at rmap_add initial\n");
804 mem_map[page_base_addr] = (addr_t)shadow_pte;
805 PrintDebug("rmap_add: shadow_pte %p\n", (void *)shadow_pte);
807 } else if (!(page_private & 1)) {
808 PrintDebug("at rmap_add into multi\n");
810 desc = shadow_alloc_rmap(core);
811 desc->shadow_ptes[0] = page_private;
812 desc->shadow_ptes[1] = shadow_pte;
813 mem_map[page_base_addr] = (addr_t)desc | 1;
815 PrintDebug("rmap_add: desc %p desc|1 %p\n",(void *)desc,(void *)((addr_t)desc |1));
818 PrintDebug("at rmap_add multimap\n");
819 desc = (struct rmap *)(page_private & ~1ul);
821 while (desc->more && desc->shadow_ptes[RMAP_EXT-1]) desc = desc->more;
823 if (desc->shadow_ptes[RMAP_EXT-1]) {
824 desc->more = shadow_alloc_rmap(core);
828 for (i = 0; desc->shadow_ptes[i]; ++i) ;
829 desc->shadow_ptes[i] = shadow_pte;
834 static void rmap_desc_remove_entry(struct guest_info *core,
835 addr_t * page_private,
838 struct rmap *prev_desc)
842 for (j = RMAP_EXT - 1; !desc->shadow_ptes[j] && j > i; --j) ;
843 desc->shadow_ptes[i] = desc->shadow_ptes[j];
844 desc->shadow_ptes[j] = 0;
847 PrintDebug("rmap_desc_rm: i %d j %d\n",i,j);
851 if (!prev_desc && !desc->more) {
852 PrintDebug("rmap_desc_rm: no more no less\n");
853 *page_private = desc->shadow_ptes[0];
854 } else { //more should be null
856 PrintDebug("rmap_desc_rm: no more\n");
857 prev_desc->more = desc->more;
859 PrintDebug("rmap_desc_rm: no less\n");
860 *page_private = (addr_t) desc->more | 1;
863 shadow_free_rmap(core, desc);
866 static void rmap_remove(struct guest_info * core, addr_t shadow_pte) {
868 struct rmap *prev_desc;
869 addr_t page_private = 0;
870 gen_pt_t * shadow_pte_gen;
871 addr_t page_base_addr = 0;
875 v3_cpu_mode_t mode = v3_get_vm_cpu_mode(core);
877 if (mode == PROTECTED) {
878 PrintDebug("rmap_rm: PROTECTED %d\n", mode);
879 page_base_addr = ((pte32_t *)shadow_pte)->page_base_addr;
881 } else if (mode == LONG_32_COMPAT || mode == LONG) {
882 PrintDebug("rmap_rm: LONG_32_COMPAT/LONG %d\n", mode);
883 page_base_addr = ((pte64_t *)shadow_pte)->page_base_addr;
886 PrintDebug("rmap_rm: mode %d\n", mode);
889 shadow_pte_gen = (gen_pt_t*)shadow_pte;
891 if (shadow_pte_gen->present == 0 || shadow_pte_gen->writable == 0) {
892 PrintDebug("rmap_rm: present %d, write %d, pte %p\n",
893 shadow_pte_gen->present, shadow_pte_gen->writable,
894 (void*)*((addr_t*)shadow_pte));
897 PrintDebug("rmap_rm: shadow_pte->page_base_addr (%p)\n", (void *)BASE_TO_PAGE_ADDR(page_base_addr));
899 mem_map = core->vm_info.mem_map.base_region.mem_map;
900 page_private = mem_map[page_base_addr];
902 PrintDebug("rmap_rm: page_private %p page_private&1 %p\n",(void *)page_private,(void*)(page_private&1));
905 PrintDebug("rmap_rm: single page_prive %p\n",(void *)page_private);
907 } else if (!(page_private & 1)) {
908 PrintDebug("rmap_rm: multi page_prive %p\n",(void *)page_private);
909 mem_map[page_base_addr] = (addr_t)0;
912 PrintDebug("rmap_rm: multimap page_prive %p\n",(void *)page_private);
913 desc = (struct rmap *)(page_private & ~1ul);
917 PrintDebug("rmap_rm: desc loop\n");
918 for (i = 0; i < RMAP_EXT && desc->shadow_ptes[i]; ++i)
919 if (desc->shadow_ptes[i] == shadow_pte) {
920 PrintDebug("rmap_rm: rmap_desc_remove_entry i %d\n",i);
921 rmap_desc_remove_entry(core, &mem_map[page_base_addr], desc, i, prev_desc);
930 static inline int activate_shadow_pt_32(struct guest_info * core);
932 static void rmap_write_protect(struct guest_info * core, addr_t guest_fn) {
934 //pte32_t * shadow_pte;
939 PrintDebug("rmap_wrprot: gfn %p\n",(void *) guest_fn);
941 if (guest_pa_to_host_pa(core, BASE_TO_PAGE_ADDR(guest_fn), &host_pa)!=0) {
942 PrintDebug("rmap_wrprot: error \n");
945 page_private = core->vm_info.mem_map.base_region.mem_map[PAGE_BASE_ADDR(host_pa)];
947 PrintDebug("rmap_wrprot: host_fn %p\n",(void *)PAGE_BASE_ADDR(host_pa));
949 while(page_private) {
950 PrintDebug("rmap_wrprot: page_private %p\n", (void*)page_private);
951 if(!(page_private & 1)) {
952 PrintDebug("rmap_wrprot: reverse desc single\n");
953 shadow_pte = page_private;
956 desc = (struct rmap *) (page_private & ~1ul);
957 PrintDebug("rmap_wrprot: reverse desc multimap\n");
958 shadow_pte = desc->shadow_ptes[0];
961 PrintDebug("rmap_wrprot: pg_priv %p, host_fn %p, shdw_pte %p\n",
962 (void *)page_private, (void *)PAGE_BASE_ADDR(host_pa), (void*)*((uint64_t*)shadow_pte));
965 rmap_remove(core, shadow_pte);
967 //PrintDebug("rmap_wrprot: shadow_pte->page_base_addr (%p)\n",
968 // (void *)BASE_TO_PAGE_ADDR(shadow_pte->page_base_addr));
970 ((gen_pt_t *)shadow_pte)->writable = 0;
971 PrintDebug("rmap_wrprot: %p\n",(void*)*((uint64_t *)shadow_pte));
973 page_private = core->vm_info.mem_map.base_region.mem_map[PAGE_BASE_ADDR(host_pa)];
975 PrintDebug("rmap_wrprot: page_private %p\n",(void*)page_private);
978 PrintDebug("rmap_wrprot: done\n");
982 void shadow_page_pre_write(struct guest_info * core, addr_t guest_pa, int bytes, int force) {
983 //guest frame number is not guest physical address
984 addr_t guest_fn = PAGE_BASE_ADDR(guest_pa);
985 struct shadow_page_cache_data * page;
986 struct hlist_node *node, *n;
987 struct hlist_head * bucket;
990 uint32_t* shdw32_table = NULL;
991 uint32_t* shdw32_entry = NULL;
992 uint64_t* shdw64_table = NULL;
993 uint64_t* shdw64_entry = NULL;
996 unsigned offset = PAGE_OFFSET(guest_pa);
997 unsigned misaligned = 0;
1001 v3_cpu_mode_t mode = v3_get_vm_cpu_mode(core);
1003 if (guest_fn == core->last_pt_write_guest_fn) {
1004 ++core->last_pt_write_count;
1005 if (core->last_pt_write_count >= 3) flooded = 1;
1007 core->last_pt_write_guest_fn = guest_fn;
1008 core->last_pt_write_count = 1;
1011 PrintDebug("shdw_pre-write: gpa %p byte %d force %d flood %d last_gfn %p last_cnt %d\n",
1012 (void *)guest_pa,bytes,force,flooded,(void*)core->last_pt_write_guest_fn,core->last_pt_write_count);
1014 index = shadow_page_table_hashfn(guest_fn) % NUM_SHADOW_PAGES;
1015 bucket = &core->shadow_page_hash[index];
1017 PrintDebug("shdw_pre-write: check point after bucket\n");
1019 //hlist_for_each_entry_safe(page, node, bucket, hash_link) {
1020 hlist_for_each_entry_safe(page, node, n, bucket, hash_link) {
1022 if (page->guest_fn != guest_fn || page->role.metaphysical) continue;
1024 pte_size = 4; //because 32bit nonPAE for now
1025 pte_size = page->role.glevels == 2 ? 4 : 8;
1027 if (!force) misaligned = (offset & (offset + bytes -1)) & ~(pte_size -1);
1029 if (misaligned || flooded || force) {
1031 * Misaligned accesses are too much trobule to fix up
1032 * also they usually indicate a page is not used as a page table
1034 PrintDebug("shdw_pre-write: misaligned\n");
1035 shadow_zap_page(core, page);
1039 level = page->role.hlevels;
1041 PrintDebug("shdw_pre-write: found out one page at the level of %d\n", level);
1043 if (mode == PROTECTED) {
1044 shdw32_table = (uint32_t*)V3_VAddr((void *)(addr_t)BASE_TO_PAGE_ADDR(PAGE_BASE_ADDR(page->page_pa)));
1045 shdw32_entry = (uint32_t*)&(shdw32_table[offset/sizeof(uint32_t)]);
1047 if (*shdw32_entry & PT_PRESENT_MASK) {
1048 if (level == PT_PAGE_TABLE_LEVEL) {
1049 PrintDebug("shdw_pre-write: pte idx %d\n", (unsigned int)(offset/sizeof(uint32_t)));
1050 rmap_remove(core, (addr_t)shdw32_entry);
1051 memset((void*)shdw32_entry, 0, sizeof(uint32_t));
1054 shadow_page_remove_shadow_pde(core, page, (addr_t)shdw32_entry);
1055 memset((void*)shdw32_entry, 0, sizeof(uint32_t));
1059 } else if (mode == LONG_32_COMPAT || mode == LONG) {
1061 shdw64_table = (uint64_t*)V3_VAddr((void*)(addr_t)BASE_TO_PAGE_ADDR(PAGE_BASE_ADDR(page->page_pa)));
1062 shdw64_entry = (uint64_t*)&(shdw64_table[offset/sizeof(uint64_t)]);
1064 if (*shdw64_entry & PT_PRESENT_MASK) {
1065 if (level == PT_PAGE_TABLE_LEVEL) {
1066 PrintDebug("shdw_pre-write: pte idx %d\n", (unsigned int)(offset/sizeof(uint64_t)));
1067 rmap_remove(core, (addr_t)shdw64_entry);
1068 memset((void*)shdw64_entry, 0, sizeof(uint64_t));
1070 shadow_page_remove_shadow_pde(core, page, (addr_t)shdw64_entry);
1071 memset((void*)shdw64_entry, 0, sizeof(uint64_t));
1078 //emulation for synchronization
1079 void shadow_page_post_write(struct guest_info * core, addr_t guest_pa) {
1083 int shadow_unprotect_page_virt(struct guest_info * core, addr_t guest_va) {
1086 if (guest_va_to_guest_pa(core, guest_va, &guest_pa) != 0) {
1087 PrintError("In GVA->HVA: Invalid GVA(%p)->GPA lookup\n",
1092 return shadow_unprotect_page(core, PAGE_BASE_ADDR(guest_pa));
1095 void shadow_free_some_pages(struct guest_info * core) {
1096 while (core->n_free_shadow_pages < REFILE_PAGES) {
1097 struct shadow_page_cache_data * page;
1098 page = container_of(core->active_shadow_pages.prev,
1099 struct shadow_page_cache_data, link);
1100 shadow_zap_page(core,page);
1104 void shadow_free_all_pages(struct guest_info *core) {
1106 struct shadow_page_cache_data * sp, *node;
1107 list_for_each_entry_safe(sp, node, &core->active_shadow_pages, link) {
1108 shadow_zap_page(core , sp);
1113 static struct shadow_page_cache_data * create_new_shadow_pt(struct guest_info * core);
1116 #include "vmm_shdw_pg_cache_32.h"
1117 #include "vmm_shdw_pg_cache_32pae.h"
1118 #include "vmm_shdw_pg_cache_64.h"
1120 static int vtlb_caching_init(struct v3_vm_info * vm, v3_cfg_tree_t * cfg) {
1122 V3_Print("VTLB Caching initialization\n");
1126 static int vtlb_caching_deinit(struct v3_vm_info * vm) {
1130 static int vtlb_caching_local_init(struct guest_info * core) {
1132 V3_Print("VTLB local initialization\n");
1134 INIT_LIST_HEAD(&core->active_shadow_pages);
1135 INIT_LIST_HEAD(&core->free_pages);
1137 alloc_shadow_pages(core);
1139 shadow_topup_caches(core);
1141 core->prev_cr3_pdt_base = 0;
1147 static int vtlb_caching_activate_shdw_pt(struct guest_info * core) {
1148 switch (v3_get_vm_cpu_mode(core)) {
1151 return activate_shadow_pt_32(core);
1153 return activate_shadow_pt_32pae(core);
1155 case LONG_32_COMPAT:
1156 case LONG_16_COMPAT:
1157 return activate_shadow_pt_64(core);
1159 PrintError("Invalid CPU mode: %s\n", v3_cpu_mode_to_str(v3_get_vm_cpu_mode(core)));
1166 static int vtlb_caching_invalidate_shdw_pt(struct guest_info * core) {
1167 return vtlb_caching_activate_shdw_pt(core);
1171 static int vtlb_caching_handle_pf(struct guest_info * core, addr_t fault_addr, pf_error_t error_code) {
1173 switch (v3_get_vm_cpu_mode(core)) {
1175 return handle_shadow_pagefault_32(core, fault_addr, error_code);
1178 return handle_shadow_pagefault_32pae(core, fault_addr, error_code);
1180 case LONG_32_COMPAT:
1181 case LONG_16_COMPAT:
1182 return handle_shadow_pagefault_64(core, fault_addr, error_code);
1185 PrintError("Unhandled CPU Mode: %s\n", v3_cpu_mode_to_str(v3_get_vm_cpu_mode(core)));
1191 static int vtlb_caching_handle_invlpg(struct guest_info * core, addr_t vaddr) {
1193 switch (v3_get_vm_cpu_mode(core)) {
1195 return handle_shadow_invlpg_32(core, vaddr);
1197 return handle_shadow_invlpg_32pae(core, vaddr);
1199 case LONG_32_COMPAT:
1200 case LONG_16_COMPAT:
1201 return handle_shadow_invlpg_64(core, vaddr);
1203 PrintError("Invalid CPU mode: %s\n", v3_cpu_mode_to_str(v3_get_vm_cpu_mode(core)));
1208 static struct v3_shdw_pg_impl vtlb_caching_impl = {
1209 .name = "VTLB_CACHING",
1210 .init = vtlb_caching_init,
1211 .deinit = vtlb_caching_deinit,
1212 .local_init = vtlb_caching_local_init,
1213 .handle_pagefault = vtlb_caching_handle_pf,
1214 .handle_invlpg = vtlb_caching_handle_invlpg,
1215 .activate_shdw_pt = vtlb_caching_activate_shdw_pt,
1216 .invalidate_shdw_pt = vtlb_caching_invalidate_shdw_pt
1223 register_shdw_pg_impl(&vtlb_caching_impl);