X-Git-Url: http://v3vee.org/palacios/gitweb/gitweb.cgi?a=blobdiff_plain;f=palacios%2Fsrc%2Fpalacios%2Fvmm_checkpoint.c;h=82c505b9d68905b5474b343b2b0f2a3fbb403175;hb=022f63d320c2837822dc7f6be84f860a01f02d7c;hp=db9f42f195adf969b5ebfb0e12b7d93dde1789e7;hpb=6b055801f5e04e134b779ad49bc4826179ebf6bb;p=palacios.git diff --git a/palacios/src/palacios/vmm_checkpoint.c b/palacios/src/palacios/vmm_checkpoint.c index db9f42f..82c505b 100644 --- a/palacios/src/palacios/vmm_checkpoint.c +++ b/palacios/src/palacios/vmm_checkpoint.c @@ -315,8 +315,11 @@ static int load_memory(struct v3_vm_info * vm, struct v3_chkpt * chkpt) { void * guest_mem_base = NULL; void * ctx = NULL; uint64_t ret = 0; - - guest_mem_base = V3_VAddr((void *)vm->mem_map.base_region.host_addr); + uint64_t saved_mem_block_size; + uint32_t saved_num_base_regions; + char buf[128]; + int i; + extern uint64_t v3_mem_block_size; ctx = v3_chkpt_open_ctx(chkpt, "memory_img"); @@ -325,10 +328,34 @@ static int load_memory(struct v3_vm_info * vm, struct v3_chkpt * chkpt) { return -1; } - if (v3_chkpt_load(ctx, "memory_img", vm->mem_size, guest_mem_base)) { - PrintError(vm, VCORE_NONE, "Unable to load all of memory (requested=%llu bytes, result=%llu bytes\n",(uint64_t)(vm->mem_size),ret); - v3_chkpt_close_ctx(ctx); + if (V3_CHKPT_LOAD(ctx, "region_size",saved_mem_block_size)) { + PrintError(vm, VCORE_NONE, "Unable to load memory region size\n"); + return -1; + } + + if (V3_CHKPT_LOAD(ctx, "num_regions",saved_num_base_regions)) { + PrintError(vm, VCORE_NONE, "Unable to load number of regions\n"); + return -1; + } + + if (saved_mem_block_size != v3_mem_block_size) { + PrintError(vm, VCORE_NONE, "Unable to load as memory block size differs\n"); + return -1; + } // support will eventually be added for this + + if (saved_num_base_regions != vm->mem_map.num_base_regions) { + PrintError(vm, VCORE_NONE, "Unable to laod as number of base regions differs\n"); return -1; + } // support will eventually be added for this + + for (i=0;imem_map.num_base_regions;i++) { + guest_mem_base = V3_VAddr((void *)vm->mem_map.base_regions[i].host_addr); + sprintf(buf,"memory_img%d",i); + if (v3_chkpt_load(ctx, buf, v3_mem_block_size, guest_mem_base)) { + PrintError(vm, VCORE_NONE, "Unable to load all of memory (region %d) (requested=%llu bytes, result=%llu bytes\n",i,(uint64_t)(vm->mem_size),ret); + v3_chkpt_close_ctx(ctx); + return -1; + } } v3_chkpt_close_ctx(ctx); @@ -340,9 +367,11 @@ static int load_memory(struct v3_vm_info * vm, struct v3_chkpt * chkpt) { static int save_memory(struct v3_vm_info * vm, struct v3_chkpt * chkpt) { void * guest_mem_base = NULL; void * ctx = NULL; + char buf[128]; // region name uint64_t ret = 0; + extern uint64_t v3_mem_block_size; + int i; - guest_mem_base = V3_VAddr((void *)vm->mem_map.base_region.host_addr); ctx = v3_chkpt_open_ctx(chkpt, "memory_img"); @@ -351,12 +380,26 @@ static int save_memory(struct v3_vm_info * vm, struct v3_chkpt * chkpt) { return -1; } - if (v3_chkpt_save(ctx, "memory_img", vm->mem_size, guest_mem_base)) { - PrintError(vm, VCORE_NONE, "Unable to save all of memory (requested=%llu, received=%llu)\n",(uint64_t)(vm->mem_size),ret); - v3_chkpt_close_ctx(ctx); + if (V3_CHKPT_SAVE(ctx, "region_size",v3_mem_block_size)) { + PrintError(vm, VCORE_NONE, "Unable to save memory region size\n"); return -1; } + if (V3_CHKPT_SAVE(ctx, "num_regions",vm->mem_map.num_base_regions)) { + PrintError(vm, VCORE_NONE, "Unable to save number of regions\n"); + return -1; + } + + for (i=0;imem_map.num_base_regions;i++) { + guest_mem_base = V3_VAddr((void *)vm->mem_map.base_regions[i].host_addr); + sprintf(buf,"memory_img%d",i); + if (v3_chkpt_save(ctx, buf, v3_mem_block_size, guest_mem_base)) { + PrintError(vm, VCORE_NONE, "Unable to save all of memory (region %d) (requested=%llu, received=%llu)\n",i,(uint64_t)(vm->mem_size),ret); + v3_chkpt_close_ctx(ctx); + return -1; + } + } + v3_chkpt_close_ctx(ctx); return 0; @@ -369,15 +412,15 @@ struct mem_migration_state { struct v3_bitmap modified_pages; }; -static int paging_callback(struct guest_info *core, - struct v3_shdw_pg_event *event, - void *priv_data) +static int shadow_paging_callback(struct guest_info *core, + struct v3_shdw_pg_event *event, + void *priv_data) { struct mem_migration_state *m = (struct mem_migration_state *)priv_data; if (event->event_type==SHADOW_PAGEFAULT && event->event_order==SHADOW_PREIMPL && - event->error_code.write) { + event->error_code.write) { // Note, assumes VTLB behavior where we will see the write even if preceded by a read addr_t gpa; if (!v3_gva_to_gpa(core,event->gva,&gpa)) { // write to this page @@ -391,7 +434,30 @@ static int paging_callback(struct guest_info *core, return 0; } - + + +/* +static int nested_paging_callback(struct guest_info *core, + struct v3_nested_pg_event *event, + void *priv_data) +{ + struct mem_migration_state *m = (struct mem_migration_state *)priv_data; + + if (event->event_type==NESTED_PAGEFAULT && + event->event_order==NESTED_PREIMPL && + event->error_code.write) { // Assumes we will see a write after reads + if (event->gpavm_info->mem_size) { + v3_bitmap_set(&(m->modified_pages),(event->gpa)>>12); + } else { + // no worries, this isn't physical memory + } + } else { + // we don't care about other events + } + + return 0; +} +*/ static struct mem_migration_state *start_page_tracking(struct v3_vm_info *vm) @@ -413,10 +479,27 @@ static struct mem_migration_state *start_page_tracking(struct v3_vm_info *vm) V3_Free(m); } - v3_register_shadow_paging_event_callback(vm,paging_callback,m); + // We assume that the migrator has already verified that all cores are + // using the identical model (shadow or nested) + // This must not change over the execution of the migration + + if (vm->cores[0].shdw_pg_mode==SHADOW_PAGING) { + v3_register_shadow_paging_event_callback(vm,shadow_paging_callback,m); - for (i=0;inum_cores;i++) { + for (i=0;inum_cores;i++) { v3_invalidate_shadow_pts(&(vm->cores[i])); + } + } else if (vm->cores[0].shdw_pg_mode==NESTED_PAGING) { + //v3_register_nested_paging_event_callback(vm,nested_paging_callback,m); + + for (i=0;inum_cores;i++) { + //v3_invalidate_nested_addr_range(&(vm->cores[i]),0,vm->mem_size-1); + } + } else { + PrintError(vm, VCORE_NONE, "Unsupported paging mode\n"); + v3_bitmap_deinit(&(m->modified_pages)); + V3_Free(m); + return 0; } // and now we should get callbacks as writes happen @@ -426,11 +509,15 @@ static struct mem_migration_state *start_page_tracking(struct v3_vm_info *vm) static void stop_page_tracking(struct mem_migration_state *m) { - v3_unregister_shadow_paging_event_callback(m->vm,paging_callback,m); - - v3_bitmap_deinit(&(m->modified_pages)); + if (m->vm->cores[0].shdw_pg_mode==SHADOW_PAGING) { + v3_unregister_shadow_paging_event_callback(m->vm,shadow_paging_callback,m); + } else { + //v3_unregister_nested_paging_event_callback(m->vm,nested_paging_callback,m); + } - V3_Free(m); + v3_bitmap_deinit(&(m->modified_pages)); + + V3_Free(m); } @@ -448,13 +535,10 @@ static int save_inc_memory(struct v3_vm_info * vm, int page_size_bytes = 1 << 12; // assuming 4k pages right now void * ctx = NULL; int i = 0; - void * guest_mem_base = NULL; int bitmap_num_bytes = (mod_pgs_to_send->num_bits / 8) + ((mod_pgs_to_send->num_bits % 8) > 0); - guest_mem_base = V3_VAddr((void *)vm->mem_map.base_region.host_addr); - PrintDebug(vm, VCORE_NONE, "Saving incremental memory.\n"); ctx = v3_chkpt_open_ctx(chkpt,"memory_bitmap_bits"); @@ -481,7 +565,12 @@ static int save_inc_memory(struct v3_vm_info * vm, // Dirty memory pages are sent in bitmap order for (i = 0; i < mod_pgs_to_send->num_bits; i++) { if (v3_bitmap_check(mod_pgs_to_send, i)) { - // PrintDebug(vm, VCORE_NONE, "Sending memory page %d.\n",i); + struct v3_mem_region *region = v3_get_base_region(vm,page_size_bytes * i); + if (!region) { + PrintError(vm, VCORE_NONE, "Failed to find base region for page %d\n",i); + return -1; + } + // PrintDebug(vm, VCORE_NONE, "Sending memory page %d.\n",i); ctx = v3_chkpt_open_ctx(chkpt, "memory_page"); if (!ctx) { PrintError(vm, VCORE_NONE, "Unable to open context to send memory page\n"); @@ -490,7 +579,7 @@ static int save_inc_memory(struct v3_vm_info * vm, if (v3_chkpt_save(ctx, "memory_page", page_size_bytes, - guest_mem_base + (page_size_bytes * i))) { + (void*)(region->host_addr + page_size_bytes * i - region->guest_start))) { PrintError(vm, VCORE_NONE, "Unable to send a memory page\n"); v3_chkpt_close_ctx(ctx); return -1; @@ -515,14 +604,11 @@ static int load_inc_memory(struct v3_vm_info * vm, int page_size_bytes = 1 << 12; // assuming 4k pages right now void * ctx = NULL; int i = 0; - void * guest_mem_base = NULL; bool empty_bitmap = true; int bitmap_num_bytes = (mod_pgs->num_bits / 8) + ((mod_pgs->num_bits % 8) > 0); - guest_mem_base = V3_VAddr((void *)vm->mem_map.base_region.host_addr); - ctx = v3_chkpt_open_ctx(chkpt, "memory_bitmap_bits"); if (!ctx) { @@ -544,7 +630,12 @@ static int load_inc_memory(struct v3_vm_info * vm, // Receive also follows bitmap order for (i = 0; i < mod_pgs->num_bits; i ++) { if (v3_bitmap_check(mod_pgs, i)) { - PrintDebug(vm, VCORE_NONE, "Loading page %d\n", i); + struct v3_mem_region *region = v3_get_base_region(vm,page_size_bytes * i); + if (!region) { + PrintError(vm, VCORE_NONE, "Failed to find base region for page %d\n",i); + return -1; + } + //PrintDebug(vm, VCORE_NONE, "Loading page %d\n", i); empty_bitmap = false; ctx = v3_chkpt_open_ctx(chkpt, "memory_page"); if (!ctx) { @@ -555,7 +646,7 @@ static int load_inc_memory(struct v3_vm_info * vm, if (v3_chkpt_load(ctx, "memory_page", page_size_bytes, - guest_mem_base + (page_size_bytes * i))) { + (void*)(region->host_addr + page_size_bytes * i - region->guest_start))) { PrintError(vm, VCORE_NONE, "Did not receive all of memory page\n"); v3_chkpt_close_ctx(ctx); return -1; @@ -624,6 +715,11 @@ static int load_header(struct v3_vm_info * vm, struct v3_chkpt * chkpt) { ctx = v3_chkpt_open_ctx(chkpt, "header"); + if (!ctx) { + PrintError(vm, VCORE_NONE, "Cannot open context to load header\n"); + return -1; + } + switch (v3_mach_type) { case V3_SVM_CPU: case V3_SVM_REV3_CPU: { @@ -684,6 +780,19 @@ static int load_core(struct guest_info * info, struct v3_chkpt * chkpt, v3_chkpt PrintError(info->vm_info, info, "Could not open context to load core\n"); goto loadfailout; } + + // Run state is needed to determine when AP cores need + // to be immediately run after resume + V3_CHKPT_LOAD(ctx,"run_state",info->core_run_state,loadfailout); + V3_CHKPT_LOAD(ctx,"cpu_mode",info->cpu_mode,loadfailout); + V3_CHKPT_LOAD(ctx,"mem_mode",info->mem_mode,loadfailout); + + V3_CHKPT_LOAD(ctx,"CPL",info->cpl,loadfailout); + + if (info->cpl != info->segments.ss.dpl) { + V3_Print(info->vm_info,info,"Strange, CPL=%d but ss.dpl=%d on core save\n",info->cpl,info->segments.ss.dpl); + } + V3_CHKPT_LOAD(ctx, "RIP", info->rip, loadfailout); @@ -739,7 +848,11 @@ static int load_core(struct guest_info * info, struct v3_chkpt * chkpt, v3_chkpt V3_CHKPT_LOAD(ctx, "GDTR", info->segments.gdtr, loadfailout); V3_CHKPT_LOAD(ctx, "IDTR", info->segments.idtr, loadfailout); V3_CHKPT_LOAD(ctx, "TR", info->segments.tr, loadfailout); - + + if (info->cpl != info->segments.ss.dpl) { + V3_Print(info->vm_info,info,"Strange, CPL=%d but ss.dpl=%d on core load\n",info->cpl,info->segments.ss.dpl); + } + // several MSRs... V3_CHKPT_LOAD(ctx, "STAR", info->msrs.star, loadfailout); V3_CHKPT_LOAD(ctx, "LSTAR", info->msrs.lstar, loadfailout); @@ -748,9 +861,14 @@ static int load_core(struct guest_info * info, struct v3_chkpt * chkpt, v3_chkpt // Some components of guest state captured in the shadow pager V3_CHKPT_LOAD(ctx, "GUEST_CR3", info->shdw_pg_state.guest_cr3, loadfailout); - V3_CHKPT_LOAD(ctx, "GUEST_CRO", info->shdw_pg_state.guest_cr0, loadfailout); + V3_CHKPT_LOAD(ctx, "GUEST_CR0", info->shdw_pg_state.guest_cr0, loadfailout); V3_CHKPT_LOAD(ctx, "GUEST_EFER", info->shdw_pg_state.guest_efer, loadfailout); + // floating point + if (v3_load_fp_state(ctx,info)) { + goto loadfailout; + } + v3_chkpt_close_ctx(ctx); ctx=0; PrintDebug(info->vm_info, info, "Finished reading guest_info information\n"); @@ -865,9 +983,14 @@ static int save_core(struct guest_info * info, struct v3_chkpt * chkpt, v3_chkpt goto savefailout; } + V3_CHKPT_SAVE(ctx,"run_state",info->core_run_state,savefailout); + V3_CHKPT_SAVE(ctx,"cpu_mode",info->cpu_mode,savefailout); + V3_CHKPT_SAVE(ctx,"mem_mode",info->mem_mode,savefailout); + + V3_CHKPT_SAVE(ctx,"CPL",info->cpl,savefailout); V3_CHKPT_SAVE(ctx, "RIP", info->rip, savefailout); - + // GPRs V3_CHKPT_SAVE(ctx,"RDI",info->vm_regs.rdi, savefailout); V3_CHKPT_SAVE(ctx,"RSI",info->vm_regs.rsi, savefailout); @@ -929,9 +1052,14 @@ static int save_core(struct guest_info * info, struct v3_chkpt * chkpt, v3_chkpt // Some components of guest state captured in the shadow pager V3_CHKPT_SAVE(ctx, "GUEST_CR3", info->shdw_pg_state.guest_cr3, savefailout); - V3_CHKPT_SAVE(ctx, "GUEST_CRO", info->shdw_pg_state.guest_cr0, savefailout); + V3_CHKPT_SAVE(ctx, "GUEST_CR0", info->shdw_pg_state.guest_cr0, savefailout); V3_CHKPT_SAVE(ctx, "GUEST_EFER", info->shdw_pg_state.guest_efer, savefailout); + // floating point + if (v3_save_fp_state(ctx,info)) { + goto savefailout; + } + v3_chkpt_close_ctx(ctx); ctx=0; if (opts & V3_CHKPT_OPT_SKIP_ARCHDEP) { @@ -1147,17 +1275,21 @@ int v3_chkpt_send_vm(struct v3_vm_info * vm, char * store, char * url, v3_chkpt_ int iter = 0; bool last_modpage_iteration=false; struct v3_bitmap modified_pages_to_send; - uint64_t start_time; + uint64_t start_time=0; uint64_t stop_time; int num_mod_pages=0; struct mem_migration_state *mm_state; int i; - // Currently will work only for shadow paging - for (i=0;inum_cores;i++) { - if (vm->cores[i].shdw_pg_mode!=SHADOW_PAGING && !(opts & V3_CHKPT_OPT_SKIP_MEM)) { - PrintError(vm, VCORE_NONE, "Cannot currently handle nested paging\n"); - return -1; + // Cores must all be in the same mode + // or we must be skipping mmeory + if (!(opts & V3_CHKPT_OPT_SKIP_MEM)) { + v3_paging_mode_t mode = vm->cores[0].shdw_pg_mode; + for (i=1;inum_cores;i++) { + if (vm->cores[i].shdw_pg_mode != mode) { + PrintError(vm, VCORE_NONE, "Cores having different paging modes (nested and shadow) are not supported\n"); + return -1; + } } }