2 * Shadow page cache implementation that has been stolen from Linux's KVM Implementation
3 * This module is licensed under the GPL
6 #include <palacios/vmm_shadow_paging.h>
7 #include <palacios/vmm_ctrl_regs.h>
9 #include <palacios/vm_guest.h>
10 #include <palacios/vm_guest_mem.h>
12 #include <palacios/vmm_paging.h>
15 #ifndef V3_CONFIG_DEBUG_SHDW_CACHE
17 #define PrintDebug(fmt, ...)
20 #ifdef V3_CONFIG_SHADOW_CACHE
23 addr_t shadow_pdes[NR_PTE_CHAIN_ENTRIES];
24 struct hlist_node link;
28 addr_t shadow_ptes[RMAP_EXT];
32 static inline int activate_shadow_pt_32(struct guest_info * core);
33 static inline unsigned shadow_page_table_hashfn(addr_t guest_fn)
38 static void *shadow_cache_alloc(struct shadow_cache *mc, size_t size)
42 PrintDebug("at shadow_cache_alloc mc->nobjs non-exist\n");
45 p = mc->objects[--mc->nobjs];
51 static void shadow_cache_free(struct shadow_cache *mc, void *obj)
53 if (mc->nobjs < NR_MEM_OBJS) {
54 mc->objects[mc->nobjs++] = obj;
59 static struct rmap *shadow_alloc_rmap(struct guest_info *core)
61 return shadow_cache_alloc(&core->shadow_rmap_cache,sizeof(struct rmap));
64 static void shadow_free_rmap(struct guest_info *core,struct rmap *rd)
66 return shadow_cache_free(&core->shadow_rmap_cache,rd);
69 int shadow_topup_cache(struct shadow_cache * cache, size_t objsize, int min) {
73 if (cache->nobjs >= min) return 0;
74 while (cache->nobjs < ARRAY_SIZE(cache->objects)) {
75 obj = V3_Malloc(objsize);
77 PrintDebug("at shadow_topup_cache obj alloc fail\n");
80 cache->objects[cache->nobjs++] = obj;
86 static int shadow_topup_caches(struct guest_info * core) {
89 r = shadow_topup_cache(&core->shadow_pde_chain_cache,
90 sizeof(struct pde_chain), 4);
94 r = shadow_topup_cache(&core->shadow_rmap_cache,
95 sizeof(struct rmap), 1);
101 static struct pde_chain *shadow_alloc_pde_chain(struct guest_info *core)
103 return shadow_cache_alloc(&core->shadow_pde_chain_cache,
104 sizeof(struct pde_chain));
107 static void shadow_free_pde_chain(struct guest_info *core, struct pde_chain *pc)
109 PrintDebug("shdw_free_pdechain: start\n");
110 shadow_cache_free(&core->shadow_pde_chain_cache, pc);
111 PrintDebug("shdw_free_pdechain: return\n");
115 static void shadow_free_page (struct guest_info * core, struct shadow_page_cache_data * page)
117 list_del(&page->link);
119 V3_FreePages((void *)page->page_pa, 1);
120 page->page_pa=(addr_t)V3_AllocPages(1);
122 list_add(&page->link,&core->free_pages);
123 ++core->n_free_shadow_pages;
127 static struct shadow_page_cache_data * shadow_alloc_page(struct guest_info * core, addr_t shadow_pde) {
129 struct shadow_page_cache_data * page;
131 if (list_empty(&core->free_pages)) return NULL;
133 page = list_entry(core->free_pages.next, struct shadow_page_cache_data, link);
134 list_del(&page->link);
136 list_add(&page->link, &core->active_shadow_pages);
137 page->multimapped = 0;
138 page->shadow_pde = shadow_pde;
139 --core->n_free_shadow_pages;
141 PrintDebug("alloc_page: n_free_shdw_pg %d page_pa %p page_va %p\n",
142 core->n_free_shadow_pages,(void *)(page->page_pa),V3_VAddr((void *)(page->page_pa)));
144 addr_t shdw_page = (addr_t)V3_VAddr((void *)(page->page_pa));
145 memset((void *)shdw_page, 0, PAGE_SIZE_4KB);
151 static void shadow_zap_page(struct guest_info * core, struct shadow_page_cache_data * page);
153 static void free_shadow_pages(struct guest_info * core)
155 struct shadow_page_cache_data *page;
157 while (!list_empty(&core->active_shadow_pages)) {
158 page = container_of(core->active_shadow_pages.next,
159 struct shadow_page_cache_data, link);
160 shadow_zap_page(core, page);
163 while (!list_empty(&core->free_pages)) {
164 page = list_entry(core->free_pages.next, struct shadow_page_cache_data, link);
165 list_del(&page->link);
166 V3_FreePages((void *)page->page_pa, 1);
167 page->page_pa = ~(addr_t)0; //invalid address
171 static int alloc_shadow_pages(struct guest_info * core)
174 struct shadow_page_cache_data * page_header = NULL;
176 for (i = 0; i < NUM_SHADOW_PAGES; i++) {
177 page_header = &core->page_header_buf[i];
179 INIT_LIST_HEAD(&page_header->link);
180 if (!(page_header->page_pa = (addr_t)V3_AllocPages(1))) {
183 addr_t shdw_page = (addr_t)V3_VAddr((void *)(page_header->page_pa));
184 memset((void *)shdw_page, 0, PAGE_SIZE_4KB);
186 list_add(&page_header->link, &core->free_pages);
187 ++core->n_free_shadow_pages;
188 PrintDebug("alloc_shdw_pg: n_free_shdw_pg %d page_pa %p\n",
189 core->n_free_shadow_pages,(void*)page_header->page_pa);
194 free_shadow_pages(core);
195 return -1; //out of memory
199 static void shadow_page_add_shadow_pde(struct guest_info * core,
200 struct shadow_page_cache_data * page, addr_t shadow_pde)
202 struct pde_chain *pde_chain;
203 struct hlist_node *node;
211 if (!page->multimapped) {
212 old = page->shadow_pde;
215 page->shadow_pde = shadow_pde;
219 page->multimapped = 1;
220 pde_chain = shadow_alloc_pde_chain(core);
221 INIT_HLIST_HEAD(&page->shadow_pdes);
222 hlist_add_head(&pde_chain->link,&page->shadow_pdes);
223 pde_chain->shadow_pdes[0] = old;
226 hlist_for_each_entry(pde_chain, node, &page->shadow_pdes, link) {
227 if (pde_chain->shadow_pdes[NR_PTE_CHAIN_ENTRIES-1]) continue;
228 for(i=0; i < NR_PTE_CHAIN_ENTRIES; ++i)
229 if (!pde_chain->shadow_pdes[i]) {
230 pde_chain->shadow_pdes[i] = shadow_pde;
235 pde_chain = shadow_alloc_pde_chain(core);
237 hlist_add_head(&pde_chain->link,&page->shadow_pdes);
238 pde_chain->shadow_pdes[0] = shadow_pde;
242 static void shadow_page_remove_shadow_pde(struct guest_info * core,
243 struct shadow_page_cache_data * page, addr_t shadow_pde)
246 struct pde_chain * pde_chain;
247 struct hlist_node * node;
250 PrintDebug("rm_shdw_pde: multimap %d\n", page->multimapped);
251 if(!page->multimapped) {
252 PrintDebug("rm_shdw_pde: no multimap\n");
253 if(page->shadow_pde != shadow_pde)
254 PrintDebug("rm_shdw_pde: error page->shadow_pde is not equal to shadow_pde\n");
255 page->shadow_pde = 0;
256 PrintDebug("rm_shdw_pde: return\n");
260 PrintDebug("rm_shdw_pde: multimap\n");
262 hlist_for_each_entry (pde_chain, node, &page->shadow_pdes, link)
263 for (i=0; i < NR_PTE_CHAIN_ENTRIES; ++i) {
264 if(!pde_chain->shadow_pdes[i]) break;
265 if(pde_chain->shadow_pdes[i] != shadow_pde) continue;
267 PrintDebug("rm_shdw_pde: found shadow_pde at i %d\n",i);
268 while (i+1 < NR_PTE_CHAIN_ENTRIES && pde_chain->shadow_pdes[i+1]) {
269 pde_chain->shadow_pdes[i] = pde_chain->shadow_pdes[i+1];
272 pde_chain->shadow_pdes[i] = 0;
275 PrintDebug("rm_shdw_pde: only one!\n");
276 hlist_del(&pde_chain->link);
277 shadow_free_pde_chain(core, pde_chain);
278 if(hlist_empty(&page->shadow_pdes)) {
279 page->multimapped = 0;
280 page->shadow_pde = 0;
284 PrintDebug("rm_shdw_pde: return\n");
287 PrintDebug("rm_shdw_pde: return\n");
290 static void shadow_page_search_shadow_pde (struct guest_info* core, addr_t shadow_pde,
291 addr_t guest_pde, unsigned hlevel) {
293 struct shadow_page_cache_data* shdw_page;
295 struct hlist_head* bucket;
296 struct hlist_node* node;
297 int hugepage_access = 0;
298 union shadow_page_role role;
299 addr_t pt_base_addr = 0;
300 int metaphysical = 0;
302 PrintDebug("shadow_page_search_shadow_pde\n");
303 v3_cpu_mode_t mode = v3_get_vm_cpu_mode(core);
305 if (mode == PROTECTED) {
307 PrintDebug("shadow_page_search_shadow_pde: PROTECTED\n");
308 pt_base_addr = ((pde32_t*)guest_pde)->pt_base_addr;
310 if(((pde32_t*)guest_pde)->large_page == 1) {
311 PrintDebug("shadow_page_search_shadow_pde: large page\n");
312 hugepage_access = (((pde32_4MB_t *) guest_pde)->writable) | (((pde32_4MB_t*)guest_pde)->user_page << 1);
314 pt_base_addr = (addr_t) PAGE_BASE_ADDR(BASE_TO_PAGE_ADDR_4MB(((pde32_4MB_t*)guest_pde)->page_base_addr));
318 role.glevels = PT32_ROOT_LEVEL; //max level
319 role.hlevels = PT_PAGE_TABLE_LEVEL;
320 role.metaphysical = metaphysical;
321 role.hugepage_access = hugepage_access;
323 } else if (mode == LONG_32_COMPAT || mode == LONG) {
325 PrintDebug("shadow_page_search_shadow_pde: LONG_32_COMPAT/LONG\n");
326 pt_base_addr = ((pde64_t*)guest_pde)->pt_base_addr;
329 if(hlevel == PT_DIRECTORY_LEVEL) {
330 if(((pde64_t*)guest_pde)->large_page == 1) {
331 hugepage_access = (((pde64_2MB_t *) guest_pde)->writable) | (((pde64_2MB_t*)guest_pde)->user_page << 1);
333 pt_base_addr = (addr_t) PAGE_BASE_ADDR(BASE_TO_PAGE_ADDR_2MB(((pde64_2MB_t*)guest_pde)->page_base_addr));
335 role.hlevels = PT_PAGE_TABLE_LEVEL;
337 } else if(hlevel == PT32E_ROOT_LEVEL) {
338 if(((pdpe64_t*)guest_pde)->large_page == 1) {
339 hugepage_access = (((pdpe64_1GB_t *) guest_pde)->writable) | (((pdpe64_1GB_t*)guest_pde)->user_page << 1);
341 pt_base_addr = (addr_t) PAGE_BASE_ADDR(BASE_TO_PAGE_ADDR_1GB(((pdpe64_1GB_t*)guest_pde)->page_base_addr));
343 role.hlevels = PT_DIRECTORY_LEVEL;
345 } else if(hlevel == PT64_ROOT_LEVEL) {
346 if(((pdpe64_t*)guest_pde)->large_page == 1) {
347 hugepage_access = (((pdpe64_1GB_t *) guest_pde)->writable) | (((pdpe64_1GB_t*)guest_pde)->user_page << 1);
349 pt_base_addr = (addr_t) PAGE_BASE_ADDR(BASE_TO_PAGE_ADDR_1GB(((pdpe64_1GB_t*)guest_pde)->page_base_addr));
351 role.hlevels = PT32E_ROOT_LEVEL;
356 role.glevels = PT64_ROOT_LEVEL; //store numeric
357 role.metaphysical = metaphysical;
358 role.hugepage_access = hugepage_access;
362 index = shadow_page_table_hashfn(pt_base_addr) % NUM_SHADOW_PAGES;
363 bucket = &core->shadow_page_hash[index];
365 hlist_for_each_entry(shdw_page, node, bucket, hash_link)
366 if (shdw_page->guest_fn == pt_base_addr && shdw_page->role.word == role.word ) {
367 PrintDebug("shadow_page_search_shadow_pde: found\n");
368 shadow_page_remove_shadow_pde(core, shdw_page, (addr_t)shadow_pde);
375 static struct shadow_page_cache_data * shadow_page_lookup_page(struct guest_info *core, addr_t guest_fn, int opt) //purpose of this is write protection
378 struct hlist_head * bucket;
379 struct shadow_page_cache_data * page;
380 struct hlist_node * node;
382 PrintDebug("lookup: guest_fn addr %p\n",(void *)BASE_TO_PAGE_ADDR(guest_fn));
384 index = shadow_page_table_hashfn(guest_fn) % NUM_SHADOW_PAGES;
385 bucket = &core->shadow_page_hash[index];
386 PrintDebug("lookup: index %d bucket %p\n",index,(void*)bucket);
388 hlist_for_each_entry(page, node, bucket, hash_link)
390 PrintDebug("lookup: page->gfn %p gfn %p metaphysical %d\n",
391 (void*)BASE_TO_PAGE_ADDR(page->guest_fn),(void*)BASE_TO_PAGE_ADDR(guest_fn),page->role.metaphysical);
392 if (page->guest_fn == guest_fn && !page->role.metaphysical) {
396 else if(page->guest_fn == guest_fn) {
403 static void rmap_remove(struct guest_info * core, addr_t shadow_pte);
404 static void rmap_write_protect(struct guest_info * core, addr_t guest_fn);
406 struct shadow_page_cache_data * shadow_page_get_page(struct guest_info *core,
410 unsigned hugepage_access,
412 int force) //0:default 1:off cache 2:off debug print
414 struct shadow_page_cache_data *page;
415 union shadow_page_role role;
417 struct hlist_head *bucket;
418 struct hlist_node *node;
419 v3_cpu_mode_t mode = v3_get_vm_cpu_mode(core);
422 if (mode == REAL || mode == PROTECTED) role.glevels = PT32_ROOT_LEVEL;
423 //exceptional, longterm there should be argument
424 else if (mode == PROTECTED_PAE) role.glevels = PT32E_ROOT_LEVEL;
425 else if (mode == LONG || mode == LONG_32_COMPAT) role.glevels = PT64_ROOT_LEVEL;
429 role.hlevels = level;
430 role.metaphysical = metaphysical;
431 role.hugepage_access = hugepage_access;
433 index = shadow_page_table_hashfn(guest_fn) % NUM_SHADOW_PAGES;
434 bucket = &core->shadow_page_hash[index];
436 if (force != 2) PrintDebug("get_page: lvl %d idx %d gfn %p role %x\n", level, index, (void *)guest_fn,role.word);
438 hlist_for_each_entry(page, node, bucket, hash_link)
439 if (page->guest_fn == guest_fn && page->role.word == role.word) {
440 shadow_page_add_shadow_pde(core, page, shadow_pde); //guest_fn is right there
442 PrintDebug("get_page: found guest_fn %p, index %d, multi %d, next %p\n",
443 (void *)page->guest_fn, index, page->multimapped, (void *)page->hash_link.next);
444 if (force == 0 || force == 2)
447 shadow_zap_page(core,page);
452 PrintDebug("get_page: no found guest_fn %p, index %d, multimapped %d, next %p\n",
453 (void *)page->guest_fn, index, page->multimapped, (void *)page->hash_link.next);
457 PrintDebug("get_page: no found\n");
461 page=shadow_alloc_page(core, shadow_pde);
463 if (!page) return page;
465 page->guest_fn = guest_fn;
467 page->multimapped = 0;
468 page->shadow_pde = 0;
471 PrintDebug("get_page: hadd h->first %p, n %p, n->next %p\n",
472 (void *)bucket->first, (void *)&page->hash_link, (void *)page->hash_link.next);
474 hlist_add_head(&page->hash_link, bucket);
475 shadow_page_add_shadow_pde(core, page, shadow_pde);
477 if (force != 2) PrintDebug("get_page: hadd h->first %p, n %p, n->next %p\n",
478 (void *)bucket->first, (void *)&page->hash_link, (void *)page->hash_link.next);
480 if (!metaphysical) rmap_write_protect(core, guest_fn); //in case rmapped guest_fn being allocated as pt or pd
481 if (force != 2) PrintDebug("get_page: return\n");
487 static void shadow_page_unlink_children (struct guest_info * core, struct shadow_page_cache_data * page) {
490 uint32_t* shdw32_table;
491 uint32_t* shdw32_entry;
492 uint64_t* shdw64_table;
493 uint64_t* shdw64_entry;
495 uint32_t* guest32_table;
496 uint32_t* guest32_entry;
497 uint64_t* guest64_table;
498 uint64_t* guest64_entry;
500 v3_cpu_mode_t mode = v3_get_vm_cpu_mode(core);
502 if(page->role.hlevels == PT_PAGE_TABLE_LEVEL) {
504 if (mode == PROTECTED) {
506 shdw32_table = (uint32_t*) V3_VAddr((void *)(addr_t)CR3_TO_PDE32_PA(page->page_pa));
507 PrintDebug("ulink_chil: pte lvl\n");
509 for (i = 0; i < PT32_ENT_PER_PAGE; ++i) {
510 shdw32_entry = (uint32_t*)&(shdw32_table[i]);
511 if (*shdw32_entry & PT_PRESENT_MASK) {
512 rmap_remove(core, (addr_t)shdw32_entry);
513 PrintDebug("ulink_chil: %d pte: shadow %x\n", i, *shdw32_entry);
515 memset((void *)shdw32_entry, 0, sizeof(uint32_t));
517 PrintDebug("ulink_chil: return pte\n");
520 } else if (mode == LONG_32_COMPAT || mode == LONG) {
522 shdw64_table = (uint64_t*) V3_VAddr((void *)(addr_t)CR3_TO_PML4E64_PA(page->page_pa));
523 PrintDebug("ulink_chil: pte lvl\n");
525 for (i = 0; i < PT_ENT_PER_PAGE; ++i) {
526 shdw64_entry = (uint64_t*)&(shdw64_table[i]);
527 if (*shdw64_entry & PT_PRESENT_MASK) {
528 rmap_remove(core, (addr_t)shdw64_entry);
529 PrintDebug("ulink_chil: %d pte: shadow %p\n", i, (void*)*((uint64_t*)shdw64_entry));
531 memset((void *)shdw64_entry, 0, sizeof(uint64_t));
534 PrintDebug("ulink_chil: return pte\n");
539 PrintDebug("ulink_chil: pde lvl\n");
540 if (mode == PROTECTED) {
542 shdw32_table = (uint32_t*) V3_VAddr((void*)(addr_t)CR3_TO_PDE32_PA(page->page_pa));
544 if (guest_pa_to_host_va(core, BASE_TO_PAGE_ADDR(page->guest_fn), (addr_t*)&guest32_table) == -1) {
545 PrintError("Invalid Guest PDE Address: 0x%p\n", (void *)BASE_TO_PAGE_ADDR(page->guest_fn));
549 for (i = 0; i < PT32_ENT_PER_PAGE; ++i) {
551 shdw32_entry = (uint32_t*)&(shdw32_table[i]);
552 guest32_entry = (uint32_t*)&(guest32_table[i]);
553 present = *shdw32_entry & PT_PRESENT_MASK;
554 if(present) PrintDebug("ulink_chil: pde %dth: shadow %x\n", i, *((uint32_t*)shdw32_entry));
555 memset((void *)shdw32_entry, 0, sizeof(uint32_t));
556 if (present != 1) continue;
558 shadow_page_search_shadow_pde(core, (addr_t)shdw32_entry, (addr_t)guest32_entry, page->role.hlevels);
560 PrintDebug("ulink_child: before return at pde lvel\n");
563 }else if(mode == LONG_32_COMPAT || mode == LONG) {
565 shdw64_table = (uint64_t*) V3_VAddr((void*)(addr_t)CR3_TO_PML4E64_PA(page->page_pa));
567 if (guest_pa_to_host_va(core, BASE_TO_PAGE_ADDR(page->guest_fn), (addr_t*)&guest64_table) == -1) {
568 if(page->role.hlevels == PT_DIRECTORY_LEVEL)
569 PrintError("Invalid Guest PDE Address: 0x%p\n", (void *)BASE_TO_PAGE_ADDR(page->guest_fn));
570 if(page->role.hlevels == PT32E_ROOT_LEVEL)
571 PrintError("Invalid Guest PDPE Address: 0x%p\n", (void *)BASE_TO_PAGE_ADDR(page->guest_fn));
572 if(page->role.hlevels == PT64_ROOT_LEVEL)
573 PrintError("Invalid Guest PML4E Address: 0x%p\n", (void *)BASE_TO_PAGE_ADDR(page->guest_fn));
577 for (i = 0; i < PT_ENT_PER_PAGE; ++i) {
579 shdw64_entry = (uint64_t*)&(shdw64_table[i]);
580 guest64_entry = (uint64_t*)&(guest64_table[i]);
581 present = *shdw64_entry & PT_PRESENT_MASK;
582 if(present) PrintDebug("ulink_chil: pde: shadow %p\n",(void *)*((uint64_t *)shdw64_entry));
583 memset((void *)shdw64_entry, 0, sizeof(uint64_t));
584 if (present != 1) continue;
586 shadow_page_search_shadow_pde(core, (addr_t)shdw64_entry, (addr_t)guest64_entry, page->role.hlevels);
591 //PrintDebug("ulink_chil: return pde\n");
595 static void shadow_page_put_page(struct guest_info *core, struct shadow_page_cache_data * page, addr_t shadow_pde) {
597 PrintDebug("put_page: start\n");
598 shadow_page_remove_shadow_pde(core, page, shadow_pde);
600 PrintDebug("put_page: end\n");
604 static void shadow_zap_page(struct guest_info * core, struct shadow_page_cache_data * page) {
607 addr_t cr3_base_addr = 0;
608 v3_cpu_mode_t mode = v3_get_vm_cpu_mode(core);
610 PrintDebug("zap: multimapped %d, metaphysical %d\n", page->multimapped, page->role.metaphysical);
612 while (page->multimapped || page->shadow_pde) {
613 if (!page->multimapped) {
614 shadow_pde = page->shadow_pde;
616 struct pde_chain * chain;
617 chain = container_of(page->shadow_pdes.first, struct pde_chain, link);
618 shadow_pde = chain->shadow_pdes[0];
620 shadow_page_put_page(core, page, shadow_pde);
621 PrintDebug("zap_parent: pde: shadow %p\n",(void *)*((addr_t *)shadow_pde));
622 memset((void *)shadow_pde, 0, sizeof(struct pde32));
625 shadow_page_unlink_children(core, page);
627 PrintDebug("zap: end of unlink\n");
629 if (mode == PROTECTED) {
630 cr3_base_addr = ((struct cr3_32 *)&(core->shdw_pg_state.guest_cr3))->pdt_base_addr;
631 } else if (mode == LONG_32_COMPAT || mode == LONG) {
632 cr3_base_addr = ((struct cr3_64 *)&(core->shdw_pg_state.guest_cr3))->pml4t_base_addr;
636 PrintDebug("zap: before hlist_del\n");
637 PrintDebug("zap: page->guest_fn %p\n", (void*) page->guest_fn);
639 if (page->guest_fn != (addr_t)(cr3_base_addr)) {
640 PrintDebug("zap: first hlist_del\n");
642 hlist_del(&page->hash_link);
643 shadow_free_page(core, page);
646 PrintDebug("zap: second hlist_del\n");
648 list_del(&page->link);
649 list_add(&page->link,&core->active_shadow_pages);
652 PrintDebug("zap: end hlist_del\n");
656 int shadow_zap_hierarchy_32(struct guest_info * core, struct shadow_page_cache_data * page) {
664 if (page->role.hlevels != 2) return -1;
666 shadow_pd = CR3_TO_PDE32_VA(page->page_pa);
667 if (guest_pa_to_host_va(core, BASE_TO_PAGE_ADDR(page->guest_fn), (addr_t*)&guest_pd) == -1) {
668 PrintError("Invalid Guest PDE Address: 0x%p\n", (void*)BASE_TO_PAGE_ADDR(page->guest_fn));
672 for (i=0; i < PT32_ENT_PER_PAGE; ++i) {
674 shadow_pde = (pde32_t*)&(shadow_pd[i]);
675 guest_pde = (pde32_t*)&(guest_pd[i]);
676 present = shadow_pde->present;
677 if (shadow_pde->present) PrintDebug("ulink_child: pde shadow %x\n", *((uint32_t*)shadow_pde));
678 memset((void*)shadow_pde, 0, sizeof(struct pde32));
679 if (present != 1) continue;
681 struct shadow_page_cache_data *shdw_page;
683 struct hlist_head *bucket;
684 struct hlist_node *node;
685 int hugepage_access =0;
686 int metaphysical = 0;
687 union shadow_page_role role;
688 v3_cpu_mode_t mode = v3_get_vm_cpu_mode(core);
690 if (((pde32_t*)guest_pde)->large_page == 1) {
691 hugepage_access = (((pde32_4MB_t*)guest_pde)->writable) | (((pde32_4MB_t*)guest_pde)->user_page << 1);
696 if (mode == REAL || mode == PROTECTED) role.glevels = PT32_ROOT_LEVEL;
697 //exceptional, longterm there should be argument
698 else if (mode == PROTECTED_PAE) role.glevels = PT32E_ROOT_LEVEL;
699 else if (mode == LONG || mode == LONG_32_COMPAT) role.glevels = PT64_ROOT_LEVEL;
703 role.metaphysical = metaphysical;
704 role.hugepage_access = hugepage_access;
706 index = shadow_page_table_hashfn(guest_pde->pt_base_addr) % NUM_SHADOW_PAGES;
707 bucket = &core->shadow_page_hash[index];
709 hlist_for_each_entry(shdw_page, node, bucket, hash_link)
710 if (shdw_page->guest_fn == (guest_pde->pt_base_addr) && (shdw_page->role.word == role.word)) {
711 shadow_zap_page(core, shdw_page);
715 shadow_zap_page(core, page);
720 int shadow_unprotect_page(struct guest_info * core, addr_t guest_fn) {
723 struct hlist_head * bucket;
724 struct shadow_page_cache_data * page = NULL;
725 struct hlist_node * node;
726 struct hlist_node * n;
730 index = shadow_page_table_hashfn(guest_fn) % NUM_SHADOW_PAGES;
731 bucket = &core->shadow_page_hash[index];
732 PrintDebug("unprotect: gfn %p\n",(void *) guest_fn);
734 hlist_for_each_entry_safe(page, node, n, bucket, hash_link) {
735 //hlist_for_each_entry(page, node, bucket, hash_link) {
736 if ((page->guest_fn == guest_fn) && !(page->role.metaphysical)) {
737 PrintDebug("unprotect: match page.gfn %p page.role %x gfn %p\n",(void *) page->guest_fn,page->role.word,(void *)guest_fn);
738 shadow_zap_page(core, page);
743 PrintDebug("at shadow_unprotect_page return %d\n",r);
748 reverse mapping data structures:
749 if page_private bit zero is zero, then page->private points to the shadow page table entry that points to page address
750 if page_private bit zero is one, then page->private & ~1 points to a struct rmap containing more mappings
753 void rmap_add(struct guest_info *core, addr_t shadow_pte) {
755 addr_t page_private = 0;
756 gen_pt_t * shadow_pte_gen;
757 addr_t page_base_addr = 0;
760 v3_cpu_mode_t mode = v3_get_vm_cpu_mode(core);
762 shadow_pte_gen = (gen_pt_t *) shadow_pte;
764 if (mode == PROTECTED) {
765 page_base_addr = ((pte32_t *)shadow_pte)->page_base_addr;
766 PrintDebug("at rmap_add shadow_pte: %x\n", (uint32_t)*((uint32_t*)shadow_pte));
768 } else if (mode == LONG_32_COMPAT || mode == LONG) {
769 page_base_addr = ((pte64_t *)shadow_pte)->page_base_addr;
770 PrintDebug("at rmap_add shadow_pte: %p\n", (void*)*((uint64_t*)shadow_pte));
775 PrintDebug("debug rmap: at rmap_add shadow_pte->page_base_addr (%p), shadow_pte_present %d, shadow_pte_writable %d\n",
776 (void *)BASE_TO_PAGE_ADDR(page_base_addr), (shadow_pte_gen->present), (shadow_pte_gen->writable));
778 if (shadow_pte_gen->present == 0 || shadow_pte_gen->writable == 0)
781 PrintDebug("at rmap_add host_fn %p\n", (void *)BASE_TO_PAGE_ADDR(page_base_addr));
783 mem_map = core->vm_info.mem_map.base_region.mem_map;
784 page_private = mem_map[page_base_addr];
786 PrintDebug("at rmap_add page_private %p\n", (void *)page_private);
789 PrintDebug("at rmap_add initial\n");
790 mem_map[page_base_addr] = (addr_t)shadow_pte;
791 PrintDebug("rmap_add: shadow_pte %p\n", (void *)shadow_pte);
793 } else if (!(page_private & 1)) {
794 PrintDebug("at rmap_add into multi\n");
796 desc = shadow_alloc_rmap(core);
797 desc->shadow_ptes[0] = page_private;
798 desc->shadow_ptes[1] = shadow_pte;
799 mem_map[page_base_addr] = (addr_t)desc | 1;
801 PrintDebug("rmap_add: desc %p desc|1 %p\n",(void *)desc,(void *)((addr_t)desc |1));
804 PrintDebug("at rmap_add multimap\n");
805 desc = (struct rmap *)(page_private & ~1ul);
807 while (desc->more && desc->shadow_ptes[RMAP_EXT-1]) desc = desc->more;
809 if (desc->shadow_ptes[RMAP_EXT-1]) {
810 desc->more = shadow_alloc_rmap(core);
814 for (i = 0; desc->shadow_ptes[i]; ++i) ;
815 desc->shadow_ptes[i] = shadow_pte;
820 static void rmap_desc_remove_entry(struct guest_info *core,
821 addr_t * page_private,
824 struct rmap *prev_desc)
828 for (j = RMAP_EXT - 1; !desc->shadow_ptes[j] && j > i; --j) ;
829 desc->shadow_ptes[i] = desc->shadow_ptes[j];
830 desc->shadow_ptes[j] = 0;
833 PrintDebug("rmap_desc_rm: i %d j %d\n",i,j);
837 if (!prev_desc && !desc->more) {
838 PrintDebug("rmap_desc_rm: no more no less\n");
839 *page_private = desc->shadow_ptes[0];
840 } else { //more should be null
842 PrintDebug("rmap_desc_rm: no more\n");
843 prev_desc->more = desc->more;
845 PrintDebug("rmap_desc_rm: no less\n");
846 *page_private = (addr_t) desc->more | 1;
849 shadow_free_rmap(core, desc);
852 static void rmap_remove(struct guest_info * core, addr_t shadow_pte) {
854 struct rmap *prev_desc;
855 addr_t page_private = 0;
856 gen_pt_t * shadow_pte_gen;
857 addr_t page_base_addr = 0;
861 v3_cpu_mode_t mode = v3_get_vm_cpu_mode(core);
863 if (mode == PROTECTED) {
864 PrintDebug("rmap_rm: PROTECTED %d\n", mode);
865 page_base_addr = ((pte32_t *)shadow_pte)->page_base_addr;
867 } else if (mode == LONG_32_COMPAT || mode == LONG) {
868 PrintDebug("rmap_rm: LONG_32_COMPAT/LONG %d\n", mode);
869 page_base_addr = ((pte64_t *)shadow_pte)->page_base_addr;
872 PrintDebug("rmap_rm: mode %d\n", mode);
875 shadow_pte_gen = (gen_pt_t*)shadow_pte;
877 if (shadow_pte_gen->present == 0 || shadow_pte_gen->writable == 0) {
878 PrintDebug("rmap_rm: present %d, write %d, pte %p\n",
879 shadow_pte_gen->present, shadow_pte_gen->writable,
880 (void*)*((addr_t*)shadow_pte));
883 PrintDebug("rmap_rm: shadow_pte->page_base_addr (%p)\n", (void *)BASE_TO_PAGE_ADDR(page_base_addr));
885 mem_map = core->vm_info.mem_map.base_region.mem_map;
886 page_private = mem_map[page_base_addr];
888 PrintDebug("rmap_rm: page_private %p page_private&1 %p\n",(void *)page_private,(void*)(page_private&1));
891 PrintDebug("rmap_rm: single page_prive %p\n",(void *)page_private);
893 } else if (!(page_private & 1)) {
894 PrintDebug("rmap_rm: multi page_prive %p\n",(void *)page_private);
895 mem_map[page_base_addr] = (addr_t)0;
898 PrintDebug("rmap_rm: multimap page_prive %p\n",(void *)page_private);
899 desc = (struct rmap *)(page_private & ~1ul);
903 PrintDebug("rmap_rm: desc loop\n");
904 for (i = 0; i < RMAP_EXT && desc->shadow_ptes[i]; ++i)
905 if (desc->shadow_ptes[i] == shadow_pte) {
906 PrintDebug("rmap_rm: rmap_desc_remove_entry i %d\n",i);
907 rmap_desc_remove_entry(core, &mem_map[page_base_addr], desc, i, prev_desc);
916 static inline int activate_shadow_pt_32(struct guest_info * core);
918 static void rmap_write_protect(struct guest_info * core, addr_t guest_fn) {
920 //pte32_t * shadow_pte;
925 PrintDebug("rmap_wrprot: gfn %p\n",(void *) guest_fn);
927 if (guest_pa_to_host_pa(core, BASE_TO_PAGE_ADDR(guest_fn), &host_pa)!=0) {
928 PrintDebug("rmap_wrprot: error \n");
931 page_private = core->vm_info.mem_map.base_region.mem_map[PAGE_BASE_ADDR(host_pa)];
933 PrintDebug("rmap_wrprot: host_fn %p\n",(void *)PAGE_BASE_ADDR(host_pa));
935 while(page_private) {
936 PrintDebug("rmap_wrprot: page_private %p\n", (void*)page_private);
937 if(!(page_private & 1)) {
938 PrintDebug("rmap_wrprot: reverse desc single\n");
939 shadow_pte = page_private;
942 desc = (struct rmap *) (page_private & ~1ul);
943 PrintDebug("rmap_wrprot: reverse desc multimap\n");
944 shadow_pte = desc->shadow_ptes[0];
947 PrintDebug("rmap_wrprot: pg_priv %p, host_fn %p, shdw_pte %p\n",
948 (void *)page_private, (void *)PAGE_BASE_ADDR(host_pa), (void*)*((uint64_t*)shadow_pte));
951 rmap_remove(core, shadow_pte);
953 //PrintDebug("rmap_wrprot: shadow_pte->page_base_addr (%p)\n",
954 // (void *)BASE_TO_PAGE_ADDR(shadow_pte->page_base_addr));
956 ((gen_pt_t *)shadow_pte)->writable = 0;
957 PrintDebug("rmap_wrprot: %p\n",(void*)*((uint64_t *)shadow_pte));
959 page_private = core->vm_info.mem_map.base_region.mem_map[PAGE_BASE_ADDR(host_pa)];
961 PrintDebug("rmap_wrprot: page_private %p\n",(void*)page_private);
964 PrintDebug("rmap_wrprot: done\n");
968 void shadow_page_pre_write(struct guest_info * core, addr_t guest_pa, int bytes, int force) {
969 //guest frame number is not guest physical address
970 addr_t guest_fn = PAGE_BASE_ADDR(guest_pa);
971 struct shadow_page_cache_data * page;
972 struct hlist_node *node, *n;
973 struct hlist_head * bucket;
976 uint32_t* shdw32_table = NULL;
977 uint32_t* shdw32_entry = NULL;
978 uint64_t* shdw64_table = NULL;
979 uint64_t* shdw64_entry = NULL;
982 unsigned offset = PAGE_OFFSET(guest_pa);
983 unsigned misaligned = 0;
987 v3_cpu_mode_t mode = v3_get_vm_cpu_mode(core);
989 if (guest_fn == core->last_pt_write_guest_fn) {
990 ++core->last_pt_write_count;
991 if (core->last_pt_write_count >= 3) flooded = 1;
993 core->last_pt_write_guest_fn = guest_fn;
994 core->last_pt_write_count = 1;
997 PrintDebug("shdw_pre-write: gpa %p byte %d force %d flood %d last_gfn %p last_cnt %d\n",
998 (void *)guest_pa,bytes,force,flooded,(void*)core->last_pt_write_guest_fn,core->last_pt_write_count);
1000 index = shadow_page_table_hashfn(guest_fn) % NUM_SHADOW_PAGES;
1001 bucket = &core->shadow_page_hash[index];
1003 PrintDebug("shdw_pre-write: check point after bucket\n");
1005 //hlist_for_each_entry_safe(page, node, bucket, hash_link) {
1006 hlist_for_each_entry_safe(page, node, n, bucket, hash_link) {
1008 if (page->guest_fn != guest_fn || page->role.metaphysical) continue;
1010 pte_size = 4; //because 32bit nonPAE for now
1011 pte_size = page->role.glevels == 2 ? 4 : 8;
1013 if (!force) misaligned = (offset & (offset + bytes -1)) & ~(pte_size -1);
1015 if (misaligned || flooded || force) {
1017 * Misaligned accesses are too much trobule to fix up
1018 * also they usually indicate a page is not used as a page table
1020 PrintDebug("shdw_pre-write: misaligned\n");
1021 shadow_zap_page(core, page);
1025 level = page->role.hlevels;
1027 PrintDebug("shdw_pre-write: found out one page at the level of %d\n", level);
1029 if (mode == PROTECTED) {
1030 shdw32_table = (uint32_t*)V3_VAddr((void *)(addr_t)BASE_TO_PAGE_ADDR(PAGE_BASE_ADDR(page->page_pa)));
1031 shdw32_entry = (uint32_t*)&(shdw32_table[offset/sizeof(uint32_t)]);
1033 if (*shdw32_entry & PT_PRESENT_MASK) {
1034 if (level == PT_PAGE_TABLE_LEVEL) {
1035 PrintDebug("shdw_pre-write: pte idx %d\n", (unsigned int)(offset/sizeof(uint32_t)));
1036 rmap_remove(core, (addr_t)shdw32_entry);
1037 memset((void*)shdw32_entry, 0, sizeof(uint32_t));
1040 shadow_page_remove_shadow_pde(core, page, (addr_t)shdw32_entry);
1041 memset((void*)shdw32_entry, 0, sizeof(uint32_t));
1045 } else if (mode == LONG_32_COMPAT || mode == LONG) {
1047 shdw64_table = (uint64_t*)V3_VAddr((void*)(addr_t)BASE_TO_PAGE_ADDR(PAGE_BASE_ADDR(page->page_pa)));
1048 shdw64_entry = (uint64_t*)&(shdw64_table[offset/sizeof(uint64_t)]);
1050 if (*shdw64_entry & PT_PRESENT_MASK) {
1051 if (level == PT_PAGE_TABLE_LEVEL) {
1052 PrintDebug("shdw_pre-write: pte idx %d\n", (unsigned int)(offset/sizeof(uint64_t)));
1053 rmap_remove(core, (addr_t)shdw64_entry);
1054 memset((void*)shdw64_entry, 0, sizeof(uint64_t));
1056 shadow_page_remove_shadow_pde(core, page, (addr_t)shdw64_entry);
1057 memset((void*)shdw64_entry, 0, sizeof(uint64_t));
1064 //emulation for synchronization
1065 void shadow_page_post_write(struct guest_info * core, addr_t guest_pa) {
1069 int shadow_unprotect_page_virt(struct guest_info * core, addr_t guest_va) {
1072 if (guest_va_to_guest_pa(core, guest_va, &guest_pa) != 0) {
1073 PrintError("In GVA->HVA: Invalid GVA(%p)->GPA lookup\n",
1078 return shadow_unprotect_page(core, PAGE_BASE_ADDR(guest_pa));
1081 void shadow_free_some_pages(struct guest_info * core) {
1082 while (core->n_free_shadow_pages < REFILE_PAGES) {
1083 struct shadow_page_cache_data * page;
1084 page = container_of(core->active_shadow_pages.prev,
1085 struct shadow_page_cache_data, link);
1086 shadow_zap_page(core,page);
1090 void shadow_free_all_pages(struct guest_info *core) {
1092 struct shadow_page_cache_data * sp, *node;
1093 list_for_each_entry_safe(sp, node, &core->active_shadow_pages, link) {
1094 shadow_zap_page(core , sp);
1099 static struct shadow_page_cache_data * create_new_shadow_pt(struct guest_info * core);
1102 #include "vmm_shdw_pg_cache_32.h"
1103 #include "vmm_shdw_pg_cache_32pae.h"
1104 #include "vmm_shdw_pg_cache_64.h"
1106 static int vtlb_caching_init(struct v3_vm_info * vm, v3_cfg_tree_t * cfg) {
1108 V3_Print("VTLB Caching initialization\n");
1112 static int vtlb_caching_deinit(struct v3_vm_info * vm) {
1116 static int vtlb_caching_local_init(struct guest_info * core) {
1118 V3_Print("VTLB local initialization\n");
1120 INIT_LIST_HEAD(&core->active_shadow_pages);
1121 INIT_LIST_HEAD(&core->free_pages);
1123 alloc_shadow_pages(core);
1125 shadow_topup_caches(core);
1127 core->prev_cr3_pdt_base = 0;
1133 static int vtlb_caching_activate_shdw_pt(struct guest_info * core) {
1134 switch (v3_get_vm_cpu_mode(core)) {
1137 return activate_shadow_pt_32(core);
1139 return activate_shadow_pt_32pae(core);
1141 case LONG_32_COMPAT:
1142 case LONG_16_COMPAT:
1143 return activate_shadow_pt_64(core);
1145 PrintError("Invalid CPU mode: %s\n", v3_cpu_mode_to_str(v3_get_vm_cpu_mode(core)));
1152 static int vtlb_caching_invalidate_shdw_pt(struct guest_info * core) {
1153 return vtlb_caching_activate_shdw_pt(core);
1157 static int vtlb_caching_handle_pf(struct guest_info * core, addr_t fault_addr, pf_error_t error_code) {
1159 switch (v3_get_vm_cpu_mode(core)) {
1161 return handle_shadow_pagefault_32(core, fault_addr, error_code);
1164 return handle_shadow_pagefault_32pae(core, fault_addr, error_code);
1166 case LONG_32_COMPAT:
1167 case LONG_16_COMPAT:
1168 return handle_shadow_pagefault_64(core, fault_addr, error_code);
1171 PrintError("Unhandled CPU Mode: %s\n", v3_cpu_mode_to_str(v3_get_vm_cpu_mode(core)));
1177 static int vtlb_caching_handle_invlpg(struct guest_info * core, addr_t vaddr) {
1179 switch (v3_get_vm_cpu_mode(core)) {
1181 return handle_shadow_invlpg_32(core, vaddr);
1183 return handle_shadow_invlpg_32pae(core, vaddr);
1185 case LONG_32_COMPAT:
1186 case LONG_16_COMPAT:
1187 return handle_shadow_invlpg_64(core, vaddr);
1189 PrintError("Invalid CPU mode: %s\n", v3_cpu_mode_to_str(v3_get_vm_cpu_mode(core)));
1194 static struct v3_shdw_pg_impl vtlb_caching_impl = {
1195 .name = "VTLB_CACHING",
1196 .init = vtlb_caching_init,
1197 .deinit = vtlb_caching_deinit,
1198 .local_init = vtlb_caching_local_init,
1199 .handle_pagefault = vtlb_caching_handle_pf,
1200 .handle_invlpg = vtlb_caching_handle_invlpg,
1201 .activate_shdw_pt = vtlb_caching_activate_shdw_pt,
1202 .invalidate_shdw_pt = vtlb_caching_invalidate_shdw_pt
1209 register_shdw_pg_impl(&vtlb_caching_impl);