X-Git-Url: http://v3vee.org/palacios/gitweb/gitweb.cgi?a=blobdiff_plain;f=palacios%2Fsrc%2Fpalacios%2Fvmm_checkpoint.c;h=82c505b9d68905b5474b343b2b0f2a3fbb403175;hb=022f63d320c2837822dc7f6be84f860a01f02d7c;hp=db9f42f195adf969b5ebfb0e12b7d93dde1789e7;hpb=6b055801f5e04e134b779ad49bc4826179ebf6bb;p=palacios.git

diff --git a/palacios/src/palacios/vmm_checkpoint.c b/palacios/src/palacios/vmm_checkpoint.c
index db9f42f..82c505b 100644
--- a/palacios/src/palacios/vmm_checkpoint.c
+++ b/palacios/src/palacios/vmm_checkpoint.c
@@ -315,8 +315,11 @@ static int load_memory(struct v3_vm_info * vm, struct v3_chkpt * chkpt) {
     void * guest_mem_base = NULL;
     void * ctx = NULL;
     uint64_t ret = 0;
-
-    guest_mem_base = V3_VAddr((void *)vm->mem_map.base_region.host_addr);
+    uint64_t saved_mem_block_size;
+    uint32_t saved_num_base_regions;
+    char buf[128];
+    int i;
+    extern uint64_t v3_mem_block_size;
 
     ctx = v3_chkpt_open_ctx(chkpt, "memory_img");
     
@@ -325,10 +328,34 @@ static int load_memory(struct v3_vm_info * vm, struct v3_chkpt * chkpt) {
 	return -1;
     }
 		     
-    if (v3_chkpt_load(ctx, "memory_img", vm->mem_size, guest_mem_base)) {
-	PrintError(vm, VCORE_NONE, "Unable to load all of memory (requested=%llu bytes, result=%llu bytes\n",(uint64_t)(vm->mem_size),ret);
-	v3_chkpt_close_ctx(ctx);
+    if (V3_CHKPT_LOAD(ctx, "region_size",saved_mem_block_size)) { 
+	PrintError(vm, VCORE_NONE, "Unable to load memory region size\n");
+	return -1;
+    }
+    
+    if (V3_CHKPT_LOAD(ctx, "num_regions",saved_num_base_regions)) {
+	PrintError(vm, VCORE_NONE, "Unable to load number of regions\n");
+	return -1;
+    }
+
+    if (saved_mem_block_size != v3_mem_block_size) { 
+	PrintError(vm, VCORE_NONE, "Unable to load as memory block size differs\n");
+	return -1;
+    } // support will eventually be added for this
+
+    if (saved_num_base_regions != vm->mem_map.num_base_regions) { 
+	PrintError(vm, VCORE_NONE, "Unable to laod as number of base regions differs\n");
 	return -1;
+    } // support will eventually be added for this
+
+    for (i=0;i<vm->mem_map.num_base_regions;i++) {
+	guest_mem_base = V3_VAddr((void *)vm->mem_map.base_regions[i].host_addr);
+	sprintf(buf,"memory_img%d",i);
+	if (v3_chkpt_load(ctx, buf, v3_mem_block_size, guest_mem_base)) {
+	    PrintError(vm, VCORE_NONE, "Unable to load all of memory (region %d) (requested=%llu bytes, result=%llu bytes\n",i,(uint64_t)(vm->mem_size),ret);
+	    v3_chkpt_close_ctx(ctx);
+	    return -1;
+	}
     }
     
     v3_chkpt_close_ctx(ctx);
@@ -340,9 +367,11 @@ static int load_memory(struct v3_vm_info * vm, struct v3_chkpt * chkpt) {
 static int save_memory(struct v3_vm_info * vm, struct v3_chkpt * chkpt) {
     void * guest_mem_base = NULL;
     void * ctx = NULL;
+    char buf[128]; // region name
     uint64_t ret = 0;
+    extern uint64_t v3_mem_block_size;
+    int i;
 
-    guest_mem_base = V3_VAddr((void *)vm->mem_map.base_region.host_addr);
 
     ctx = v3_chkpt_open_ctx(chkpt, "memory_img");
 
@@ -351,12 +380,26 @@ static int save_memory(struct v3_vm_info * vm, struct v3_chkpt * chkpt) {
 	return -1;
     }
 
-    if (v3_chkpt_save(ctx, "memory_img", vm->mem_size, guest_mem_base)) {
-	PrintError(vm, VCORE_NONE, "Unable to save all of memory (requested=%llu, received=%llu)\n",(uint64_t)(vm->mem_size),ret);
-	v3_chkpt_close_ctx(ctx);  
+    if (V3_CHKPT_SAVE(ctx, "region_size",v3_mem_block_size)) { 
+	PrintError(vm, VCORE_NONE, "Unable to save memory region size\n");
 	return -1;
     }
 
+    if (V3_CHKPT_SAVE(ctx, "num_regions",vm->mem_map.num_base_regions)) {
+	PrintError(vm, VCORE_NONE, "Unable to save number of regions\n");
+	return -1;
+    }
+
+    for (i=0;i<vm->mem_map.num_base_regions;i++) {
+	guest_mem_base = V3_VAddr((void *)vm->mem_map.base_regions[i].host_addr);
+	sprintf(buf,"memory_img%d",i);
+	if (v3_chkpt_save(ctx, buf, v3_mem_block_size, guest_mem_base)) {
+	    PrintError(vm, VCORE_NONE, "Unable to save all of memory (region %d) (requested=%llu, received=%llu)\n",i,(uint64_t)(vm->mem_size),ret);
+	    v3_chkpt_close_ctx(ctx);  
+	    return -1;
+	}
+    }
+
     v3_chkpt_close_ctx(ctx);
 
     return 0;
@@ -369,15 +412,15 @@ struct mem_migration_state {
     struct v3_bitmap  modified_pages; 
 };
 
-static int paging_callback(struct guest_info *core, 
-			   struct v3_shdw_pg_event *event,
-			   void      *priv_data)
+static int shadow_paging_callback(struct guest_info *core, 
+				  struct v3_shdw_pg_event *event,
+				  void      *priv_data)
 {
     struct mem_migration_state *m = (struct mem_migration_state *)priv_data;
     
     if (event->event_type==SHADOW_PAGEFAULT &&
 	event->event_order==SHADOW_PREIMPL &&
-	event->error_code.write) { 
+	event->error_code.write) { // Note, assumes VTLB behavior where we will see the write even if preceded by a read
 	addr_t gpa;
 	if (!v3_gva_to_gpa(core,event->gva,&gpa)) {
 	    // write to this page
@@ -391,7 +434,30 @@ static int paging_callback(struct guest_info *core,
     
     return 0;
 }
-	
+
+
+/*
+static int nested_paging_callback(struct guest_info *core, 
+				  struct v3_nested_pg_event *event,
+				  void      *priv_data)
+{
+    struct mem_migration_state *m = (struct mem_migration_state *)priv_data;
+    
+    if (event->event_type==NESTED_PAGEFAULT &&
+	event->event_order==NESTED_PREIMPL &&
+	event->error_code.write) { // Assumes we will see a write after reads
+	if (event->gpa<core->vm_info->mem_size) { 
+	  v3_bitmap_set(&(m->modified_pages),(event->gpa)>>12);
+	} else {
+	  // no worries, this isn't physical memory
+	}
+    } else {
+      // we don't care about other events
+    }
+    
+    return 0;
+}
+*/	
 
 
 static struct mem_migration_state *start_page_tracking(struct v3_vm_info *vm)
@@ -413,10 +479,27 @@ static struct mem_migration_state *start_page_tracking(struct v3_vm_info *vm)
 	V3_Free(m);
     }
 
-    v3_register_shadow_paging_event_callback(vm,paging_callback,m);
+    // We assume that the migrator has already verified that all cores are
+    // using the identical model (shadow or nested)
+    // This must not change over the execution of the migration
+
+    if (vm->cores[0].shdw_pg_mode==SHADOW_PAGING) { 
+      v3_register_shadow_paging_event_callback(vm,shadow_paging_callback,m);
 
-    for (i=0;i<vm->num_cores;i++) {
+      for (i=0;i<vm->num_cores;i++) {
 	v3_invalidate_shadow_pts(&(vm->cores[i]));
+      }
+    } else if (vm->cores[0].shdw_pg_mode==NESTED_PAGING) { 
+	//v3_register_nested_paging_event_callback(vm,nested_paging_callback,m);
+      
+      for (i=0;i<vm->num_cores;i++) {
+	//v3_invalidate_nested_addr_range(&(vm->cores[i]),0,vm->mem_size-1);
+      }
+    } else {
+      PrintError(vm, VCORE_NONE, "Unsupported paging mode\n");
+      v3_bitmap_deinit(&(m->modified_pages));
+      V3_Free(m);
+      return 0;
     }
     
     // and now we should get callbacks as writes happen
@@ -426,11 +509,15 @@ static struct mem_migration_state *start_page_tracking(struct v3_vm_info *vm)
 
 static void stop_page_tracking(struct mem_migration_state *m)
 {
-    v3_unregister_shadow_paging_event_callback(m->vm,paging_callback,m);
-    
-    v3_bitmap_deinit(&(m->modified_pages));
+  if (m->vm->cores[0].shdw_pg_mode==SHADOW_PAGING) { 
+    v3_unregister_shadow_paging_event_callback(m->vm,shadow_paging_callback,m);
+  } else {
+    //v3_unregister_nested_paging_event_callback(m->vm,nested_paging_callback,m);
+  }
     
-    V3_Free(m);
+  v3_bitmap_deinit(&(m->modified_pages));
+  
+  V3_Free(m);
 }
 
 	    
@@ -448,13 +535,10 @@ static int save_inc_memory(struct v3_vm_info * vm,
     int page_size_bytes = 1 << 12; // assuming 4k pages right now
     void * ctx = NULL;
     int i = 0; 
-    void * guest_mem_base = NULL;
     int bitmap_num_bytes = (mod_pgs_to_send->num_bits / 8) 
                            + ((mod_pgs_to_send->num_bits % 8) > 0);
 
    
-    guest_mem_base = V3_VAddr((void *)vm->mem_map.base_region.host_addr);
-    
     PrintDebug(vm, VCORE_NONE, "Saving incremental memory.\n");
 
     ctx = v3_chkpt_open_ctx(chkpt,"memory_bitmap_bits");
@@ -481,7 +565,12 @@ static int save_inc_memory(struct v3_vm_info * vm,
     // Dirty memory pages are sent in bitmap order
     for (i = 0; i < mod_pgs_to_send->num_bits; i++) {
         if (v3_bitmap_check(mod_pgs_to_send, i)) {
-           // PrintDebug(vm, VCORE_NONE, "Sending memory page %d.\n",i);
+	    struct v3_mem_region *region = v3_get_base_region(vm,page_size_bytes * i);
+	    if (!region) { 
+		PrintError(vm, VCORE_NONE, "Failed to find base region for page %d\n",i);
+		return -1;
+	    }
+	    // PrintDebug(vm, VCORE_NONE, "Sending memory page %d.\n",i);
             ctx = v3_chkpt_open_ctx(chkpt, "memory_page");
 	    if (!ctx) { 
 		PrintError(vm, VCORE_NONE, "Unable to open context to send memory page\n");
@@ -490,7 +579,7 @@ static int save_inc_memory(struct v3_vm_info * vm,
             if (v3_chkpt_save(ctx, 
 			      "memory_page", 
 			      page_size_bytes,
-			      guest_mem_base + (page_size_bytes * i))) {
+			      (void*)(region->host_addr + page_size_bytes * i - region->guest_start))) {
 		PrintError(vm, VCORE_NONE, "Unable to send a memory page\n");
 		v3_chkpt_close_ctx(ctx);
 		return -1;
@@ -515,14 +604,11 @@ static int load_inc_memory(struct v3_vm_info * vm,
     int page_size_bytes = 1 << 12; // assuming 4k pages right now
     void * ctx = NULL;
     int i = 0; 
-    void * guest_mem_base = NULL;
     bool empty_bitmap = true;
     int bitmap_num_bytes = (mod_pgs->num_bits / 8) 
                            + ((mod_pgs->num_bits % 8) > 0);
 
 
-    guest_mem_base = V3_VAddr((void *)vm->mem_map.base_region.host_addr);
-
     ctx = v3_chkpt_open_ctx(chkpt, "memory_bitmap_bits");
 
     if (!ctx) { 
@@ -544,7 +630,12 @@ static int load_inc_memory(struct v3_vm_info * vm,
     // Receive also follows bitmap order
     for (i = 0; i < mod_pgs->num_bits; i ++) {
         if (v3_bitmap_check(mod_pgs, i)) {
-            PrintDebug(vm, VCORE_NONE, "Loading page %d\n", i);
+	    struct v3_mem_region *region = v3_get_base_region(vm,page_size_bytes * i);
+	    if (!region) { 
+		PrintError(vm, VCORE_NONE, "Failed to find base region for page %d\n",i);
+		return -1;
+	    }
+            //PrintDebug(vm, VCORE_NONE, "Loading page %d\n", i);
             empty_bitmap = false;
             ctx = v3_chkpt_open_ctx(chkpt, "memory_page");
 	    if (!ctx) { 
@@ -555,7 +646,7 @@ static int load_inc_memory(struct v3_vm_info * vm,
             if (v3_chkpt_load(ctx, 
 			      "memory_page", 
 			      page_size_bytes,
-			      guest_mem_base + (page_size_bytes * i))) {
+			      (void*)(region->host_addr + page_size_bytes * i - region->guest_start))) {
 		PrintError(vm, VCORE_NONE, "Did not receive all of memory page\n");
 		v3_chkpt_close_ctx(ctx);
 		return -1;
@@ -624,6 +715,11 @@ static int load_header(struct v3_vm_info * vm, struct v3_chkpt * chkpt) {
     
     ctx = v3_chkpt_open_ctx(chkpt, "header");
 
+    if (!ctx) { 
+	PrintError(vm, VCORE_NONE, "Cannot open context to load header\n");
+        return -1;
+    }
+
     switch (v3_mach_type) {
 	case V3_SVM_CPU:
 	case V3_SVM_REV3_CPU: {
@@ -684,6 +780,19 @@ static int load_core(struct guest_info * info, struct v3_chkpt * chkpt, v3_chkpt
 	PrintError(info->vm_info, info, "Could not open context to load core\n");
 	goto loadfailout;
     }
+    
+    // Run state is needed to determine when AP cores need
+    // to be immediately run after resume
+    V3_CHKPT_LOAD(ctx,"run_state",info->core_run_state,loadfailout);
+    V3_CHKPT_LOAD(ctx,"cpu_mode",info->cpu_mode,loadfailout);
+    V3_CHKPT_LOAD(ctx,"mem_mode",info->mem_mode,loadfailout);
+
+    V3_CHKPT_LOAD(ctx,"CPL",info->cpl,loadfailout);
+
+    if (info->cpl != info->segments.ss.dpl) { 
+	V3_Print(info->vm_info,info,"Strange, CPL=%d but ss.dpl=%d on core save\n",info->cpl,info->segments.ss.dpl);
+    }
+
 
     V3_CHKPT_LOAD(ctx, "RIP", info->rip, loadfailout);
     
@@ -739,7 +848,11 @@ static int load_core(struct guest_info * info, struct v3_chkpt * chkpt, v3_chkpt
     V3_CHKPT_LOAD(ctx, "GDTR", info->segments.gdtr, loadfailout);
     V3_CHKPT_LOAD(ctx, "IDTR", info->segments.idtr, loadfailout);
     V3_CHKPT_LOAD(ctx, "TR", info->segments.tr, loadfailout);
-    
+
+    if (info->cpl != info->segments.ss.dpl) { 
+	V3_Print(info->vm_info,info,"Strange, CPL=%d but ss.dpl=%d on core load\n",info->cpl,info->segments.ss.dpl);
+    }
+
     // several MSRs...
     V3_CHKPT_LOAD(ctx, "STAR", info->msrs.star, loadfailout);
     V3_CHKPT_LOAD(ctx, "LSTAR", info->msrs.lstar, loadfailout);
@@ -748,9 +861,14 @@ static int load_core(struct guest_info * info, struct v3_chkpt * chkpt, v3_chkpt
         
     // Some components of guest state captured in the shadow pager
     V3_CHKPT_LOAD(ctx, "GUEST_CR3", info->shdw_pg_state.guest_cr3, loadfailout);
-    V3_CHKPT_LOAD(ctx, "GUEST_CRO", info->shdw_pg_state.guest_cr0, loadfailout);
+    V3_CHKPT_LOAD(ctx, "GUEST_CR0", info->shdw_pg_state.guest_cr0, loadfailout);
     V3_CHKPT_LOAD(ctx, "GUEST_EFER", info->shdw_pg_state.guest_efer, loadfailout);
 
+    // floating point
+    if (v3_load_fp_state(ctx,info)) {
+      goto loadfailout;
+    }
+
     v3_chkpt_close_ctx(ctx); ctx=0;
 
     PrintDebug(info->vm_info, info, "Finished reading guest_info information\n");
@@ -865,9 +983,14 @@ static int save_core(struct guest_info * info, struct v3_chkpt * chkpt, v3_chkpt
 	goto savefailout;
     }
 
+    V3_CHKPT_SAVE(ctx,"run_state",info->core_run_state,savefailout);
+    V3_CHKPT_SAVE(ctx,"cpu_mode",info->cpu_mode,savefailout);
+    V3_CHKPT_SAVE(ctx,"mem_mode",info->mem_mode,savefailout);
+    
+    V3_CHKPT_SAVE(ctx,"CPL",info->cpl,savefailout);
 
     V3_CHKPT_SAVE(ctx, "RIP", info->rip, savefailout);
-    
+
     // GPRs
     V3_CHKPT_SAVE(ctx,"RDI",info->vm_regs.rdi, savefailout); 
     V3_CHKPT_SAVE(ctx,"RSI",info->vm_regs.rsi, savefailout); 
@@ -929,9 +1052,14 @@ static int save_core(struct guest_info * info, struct v3_chkpt * chkpt, v3_chkpt
         
     // Some components of guest state captured in the shadow pager
     V3_CHKPT_SAVE(ctx, "GUEST_CR3", info->shdw_pg_state.guest_cr3, savefailout);
-    V3_CHKPT_SAVE(ctx, "GUEST_CRO", info->shdw_pg_state.guest_cr0, savefailout);
+    V3_CHKPT_SAVE(ctx, "GUEST_CR0", info->shdw_pg_state.guest_cr0, savefailout);
     V3_CHKPT_SAVE(ctx, "GUEST_EFER", info->shdw_pg_state.guest_efer, savefailout);
 
+    // floating point
+    if (v3_save_fp_state(ctx,info)) {
+      goto savefailout;
+    }
+
     v3_chkpt_close_ctx(ctx); ctx=0;
 
     if (opts & V3_CHKPT_OPT_SKIP_ARCHDEP) {
@@ -1147,17 +1275,21 @@ int v3_chkpt_send_vm(struct v3_vm_info * vm, char * store, char * url, v3_chkpt_
     int iter = 0;
     bool last_modpage_iteration=false;
     struct v3_bitmap modified_pages_to_send;
-    uint64_t start_time;
+    uint64_t start_time=0;
     uint64_t stop_time;
     int num_mod_pages=0;
     struct mem_migration_state *mm_state;
     int i;
 
-    // Currently will work only for shadow paging
-    for (i=0;i<vm->num_cores;i++) { 
-      if (vm->cores[i].shdw_pg_mode!=SHADOW_PAGING && !(opts & V3_CHKPT_OPT_SKIP_MEM)) { 
-	PrintError(vm, VCORE_NONE, "Cannot currently handle nested paging\n");
-	return -1;
+    // Cores must all be in the same mode
+    // or we must be skipping mmeory
+    if (!(opts & V3_CHKPT_OPT_SKIP_MEM)) { 
+      v3_paging_mode_t mode = vm->cores[0].shdw_pg_mode;
+      for (i=1;i<vm->num_cores;i++) { 
+	if (vm->cores[i].shdw_pg_mode != mode) { 
+	  PrintError(vm, VCORE_NONE, "Cores having different paging modes (nested and shadow) are not supported\n");
+	  return -1;
+	}
       }
     }