2 * This file is part of the Palacios Virtual Machine Monitor developed
3 * by the V3VEE Project with funding from the United States National
4 * Science Foundation and the Department of Energy.
6 * The V3VEE Project is a joint project between Northwestern University
7 * and the University of New Mexico. You can find out more at
10 * Copyright (c) 2011, Madhav Suresh <madhav@u.northwestern.edu>
11 * Copyright (c) 2011, The V3VEE Project <http://www.v3vee.org>
12 * All rights reserved.
14 * Author: Madhav Suresh <madhav@u.northwestern.edu>
15 * Arefin Huq <fig@arefin.net>
16 * Peter Dinda <pdinda@northwestern.edu> (store interface changes)
19 * This is free software. You are permitted to use,
20 * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
23 #include <palacios/vmm.h>
24 #include <palacios/vmm_sprintf.h>
25 #include <palacios/vm_guest.h>
26 #include <palacios/svm.h>
27 #include <palacios/vmx.h>
28 #include <palacios/vmm_checkpoint.h>
29 #include <palacios/vmm_hashtable.h>
30 #include <palacios/vmm_direct_paging.h>
31 #include <palacios/vmm_debug.h>
33 #include <palacios/vmm_dev_mgr.h>
35 #ifdef V3_CONFIG_LIVE_MIGRATION
36 #include <palacios/vmm_time.h>
37 #include <palacios/vm_guest_mem.h>
38 #include <palacios/vmm_shadow_paging.h>
41 #ifndef V3_CONFIG_DEBUG_CHECKPOINT
43 #define PrintDebug(fmt, args...)
47 static struct hashtable * store_table = NULL;
51 typedef enum {SAVE, LOAD} chkpt_mode_t;
53 struct chkpt_interface {
55 // Opening a checkpoint should return a pointer to the internal representation
56 // of the checkpoint in the store. This will be passed back
57 // as "store_data". Return NULL if the context cannot be opened
58 void * (*open_chkpt)(char * url, chkpt_mode_t mode);
59 // Closing the checkpoint should return -1 on failure, 0 on success
60 int (*close_chkpt)(void * store_data);
62 // Opening a context on the checkpoint with a given name should return
63 // a pointer to an internal representation of the context. This pointer
64 // is then passed back as "ctx".
65 // We will open only a single context at a time.
66 void * (*open_ctx)(void * store_data, char *name);
67 // Closing the context should return -1 on failure, 0 on success
68 int (*close_ctx)(void * store_data, void * ctx);
70 // Save and load include a tagged data buffer. These are
71 // "all or nothing" writes and reads.
72 // return -1 on failure, and 0 on success
74 int (*save)(void * store_data, void * ctx, char * tag, uint64_t len, void * buf);
75 int (*load)(void * store_data, void * ctx, char * tag, uint64_t len, void * buf);
80 struct v3_vm_info * vm;
82 struct v3_chkpt_ctx *current_ctx;
84 struct chkpt_interface * interface;
92 static uint_t store_hash_fn(addr_t key) {
93 char * name = (char *)key;
94 return v3_hash_buffer((uint8_t *)name, strlen(name));
97 static int store_eq_fn(addr_t key1, addr_t key2) {
98 char * name1 = (char *)key1;
99 char * name2 = (char *)key2;
101 return (strcmp(name1, name2) == 0);
106 #include "vmm_chkpt_stores.h"
109 int V3_init_checkpoint() {
110 extern struct chkpt_interface * __start__v3_chkpt_stores[];
111 extern struct chkpt_interface * __stop__v3_chkpt_stores[];
112 struct chkpt_interface ** tmp_store = __start__v3_chkpt_stores;
115 store_table = v3_create_htable(0, store_hash_fn, store_eq_fn);
117 while (tmp_store != __stop__v3_chkpt_stores) {
118 V3_Print("Registering Checkpoint Backing Store (%s)\n", (*tmp_store)->name);
120 if (v3_htable_search(store_table, (addr_t)((*tmp_store)->name))) {
121 PrintError("Multiple instances of Checkpoint backing Store (%s)\n", (*tmp_store)->name);
125 if (v3_htable_insert(store_table, (addr_t)((*tmp_store)->name), (addr_t)(*tmp_store)) == 0) {
126 PrintError("Could not register Checkpoint backing store (%s)\n", (*tmp_store)->name);
130 tmp_store = &(__start__v3_chkpt_stores[++i]);
136 int V3_deinit_checkpoint() {
137 v3_free_htable(store_table, 0, 0);
142 static char svm_chkpt_header[] = "v3vee palacios checkpoint version: x.x, SVM x.x";
143 static char vmx_chkpt_header[] = "v3vee palacios checkpoint version: x.x, VMX x.x";
145 static int chkpt_close(struct v3_chkpt * chkpt) {
149 rc = chkpt->interface->close_chkpt(chkpt->store_data);
154 PrintError("Internal store failed to close valid checkpoint\n");
160 PrintError("Attempt to close null checkpoint\n");
166 static struct v3_chkpt * chkpt_open(struct v3_vm_info * vm, char * store, char * url, chkpt_mode_t mode) {
167 struct chkpt_interface * iface = NULL;
168 struct v3_chkpt * chkpt = NULL;
169 void * store_data = NULL;
171 iface = (void *)v3_htable_search(store_table, (addr_t)store);
174 V3_Print("Error: Could not locate Checkpoint interface for store (%s)\n", store);
178 store_data = iface->open_chkpt(url, mode);
180 if (store_data == NULL) {
181 PrintError("Could not open url (%s) for backing store (%s)\n", url, store);
186 chkpt = V3_Malloc(sizeof(struct v3_chkpt));
189 PrintError("Could not allocate checkpoint state, closing checkpoint\n");
190 iface->close_chkpt(store_data);
194 memset(chkpt,0,sizeof(struct v3_chkpt));
196 chkpt->interface = iface;
198 chkpt->store_data = store_data;
199 chkpt->current_ctx = NULL;
204 struct v3_chkpt_ctx * v3_chkpt_open_ctx(struct v3_chkpt * chkpt, char * name) {
205 struct v3_chkpt_ctx * ctx;
207 if (chkpt->current_ctx) {
208 PrintError("Attempt to open context %s before old context has been closed\n", name);
212 ctx = V3_Malloc(sizeof(struct v3_chkpt_ctx));
215 PrintError("Unable to allocate context\n");
219 memset(ctx, 0, sizeof(struct v3_chkpt_ctx));
222 ctx->store_ctx = chkpt->interface->open_ctx(chkpt->store_data, name);
224 if (!(ctx->store_ctx)) {
225 PrintError("Underlying store failed to open context %s\n",name);
230 chkpt->current_ctx = ctx;
235 int v3_chkpt_close_ctx(struct v3_chkpt_ctx * ctx) {
236 struct v3_chkpt * chkpt = ctx->chkpt;
239 if (chkpt->current_ctx != ctx) {
240 PrintError("Attempt to close a context that is not the current context on the store\n");
244 ret = chkpt->interface->close_ctx(chkpt->store_data, ctx->store_ctx);
247 PrintError("Failed to close context on store, closing device-independent context anyway - bad\n");
251 chkpt->current_ctx=NULL;
262 int v3_chkpt_save(struct v3_chkpt_ctx * ctx, char * tag, uint64_t len, void * buf) {
263 struct v3_chkpt * chkpt = ctx->chkpt;
267 PrintError("Attempt to save tag %s on null context\n",tag);
271 if (chkpt->current_ctx != ctx) {
272 PrintError("Attempt to save on context that is not the current context for the store\n");
276 rc = chkpt->interface->save(chkpt->store_data, ctx->store_ctx, tag , len, buf);
279 PrintError("Underlying store failed to save tag %s on valid context\n",tag);
287 int v3_chkpt_load(struct v3_chkpt_ctx * ctx, char * tag, uint64_t len, void * buf) {
288 struct v3_chkpt * chkpt = ctx->chkpt;
292 PrintError("Attempt to load tag %s from null context\n",tag);
296 if (chkpt->current_ctx != ctx) {
297 PrintError("Attempt to load from context that is not the current context for the store\n");
301 rc = chkpt->interface->load(chkpt->store_data, ctx->store_ctx, tag, len, buf);
304 PrintError("Underlying store failed to load tag %s from valid context\n",tag);
313 static int load_memory(struct v3_vm_info * vm, struct v3_chkpt * chkpt) {
315 void * guest_mem_base = NULL;
319 guest_mem_base = V3_VAddr((void *)vm->mem_map.base_region.host_addr);
321 ctx = v3_chkpt_open_ctx(chkpt, "memory_img");
324 PrintError("Unable to open context for memory load\n");
328 if (v3_chkpt_load(ctx, "memory_img", vm->mem_size, guest_mem_base)) {
329 PrintError("Unable to load all of memory (requested=%llu bytes, result=%llu bytes\n",(uint64_t)(vm->mem_size),ret);
330 v3_chkpt_close_ctx(ctx);
334 v3_chkpt_close_ctx(ctx);
340 static int save_memory(struct v3_vm_info * vm, struct v3_chkpt * chkpt) {
341 void * guest_mem_base = NULL;
345 guest_mem_base = V3_VAddr((void *)vm->mem_map.base_region.host_addr);
347 ctx = v3_chkpt_open_ctx(chkpt, "memory_img");
350 PrintError("Unable to open context to save memory\n");
354 if (v3_chkpt_save(ctx, "memory_img", vm->mem_size, guest_mem_base)) {
355 PrintError("Unable to save all of memory (requested=%llu, received=%llu)\n",(uint64_t)(vm->mem_size),ret);
356 v3_chkpt_close_ctx(ctx);
360 v3_chkpt_close_ctx(ctx);
365 #ifdef V3_CONFIG_LIVE_MIGRATION
367 struct mem_migration_state {
368 struct v3_vm_info *vm;
369 struct v3_bitmap modified_pages;
372 static int paging_callback(struct guest_info *core,
373 struct v3_shdw_pg_event *event,
376 struct mem_migration_state *m = (struct mem_migration_state *)priv_data;
378 if (event->event_type==SHADOW_PAGEFAULT &&
379 event->event_order==SHADOW_PREIMPL &&
380 event->error_code.write) {
382 if (!v3_gva_to_gpa(core,event->gva,&gpa)) {
383 // write to this page
384 v3_bitmap_set(&(m->modified_pages),gpa>>12);
386 // no worries, this isn't physical memory
389 // we don't care about other events
397 static struct mem_migration_state *start_page_tracking(struct v3_vm_info *vm)
399 struct mem_migration_state *m;
402 m = (struct mem_migration_state *)V3_Malloc(sizeof(struct mem_migration_state));
405 PrintError("Cannot allocate\n");
411 if (v3_bitmap_init(&(m->modified_pages),vm->mem_size >> 12) == -1) {
412 PrintError("Failed to initialize modified_pages bit vector");
416 v3_register_shadow_paging_event_callback(vm,paging_callback,m);
418 for (i=0;i<vm->num_cores;i++) {
419 v3_invalidate_shadow_pts(&(vm->cores[i]));
422 // and now we should get callbacks as writes happen
427 static void stop_page_tracking(struct mem_migration_state *m)
429 v3_unregister_shadow_paging_event_callback(m->vm,paging_callback,m);
431 v3_bitmap_deinit(&(m->modified_pages));
444 // zero: done with this round
445 static int save_inc_memory(struct v3_vm_info * vm,
446 struct v3_bitmap * mod_pgs_to_send,
447 struct v3_chkpt * chkpt) {
448 int page_size_bytes = 1 << 12; // assuming 4k pages right now
451 void * guest_mem_base = NULL;
452 int bitmap_num_bytes = (mod_pgs_to_send->num_bits / 8)
453 + ((mod_pgs_to_send->num_bits % 8) > 0);
456 guest_mem_base = V3_VAddr((void *)vm->mem_map.base_region.host_addr);
458 PrintDebug("Saving incremental memory.\n");
460 ctx = v3_chkpt_open_ctx(chkpt,"memory_bitmap_bits");
463 PrintError("Cannot open context for dirty memory bitmap\n");
468 if (v3_chkpt_save(ctx,
469 "memory_bitmap_bits",
471 mod_pgs_to_send->bits)) {
472 PrintError("Unable to write all of the dirty memory bitmap\n");
473 v3_chkpt_close_ctx(ctx);
477 v3_chkpt_close_ctx(ctx);
479 PrintDebug("Sent bitmap bits.\n");
481 // Dirty memory pages are sent in bitmap order
482 for (i = 0; i < mod_pgs_to_send->num_bits; i++) {
483 if (v3_bitmap_check(mod_pgs_to_send, i)) {
484 // PrintDebug("Sending memory page %d.\n",i);
485 ctx = v3_chkpt_open_ctx(chkpt, "memory_page");
487 PrintError("Unable to open context to send memory page\n");
490 if (v3_chkpt_save(ctx,
493 guest_mem_base + (page_size_bytes * i))) {
494 PrintError("Unable to send a memory page\n");
495 v3_chkpt_close_ctx(ctx);
499 v3_chkpt_close_ctx(ctx);
510 // zero: ok, but not done
511 // positive: ok, and also done
512 static int load_inc_memory(struct v3_vm_info * vm,
513 struct v3_bitmap * mod_pgs,
514 struct v3_chkpt * chkpt) {
515 int page_size_bytes = 1 << 12; // assuming 4k pages right now
518 void * guest_mem_base = NULL;
519 bool empty_bitmap = true;
520 int bitmap_num_bytes = (mod_pgs->num_bits / 8)
521 + ((mod_pgs->num_bits % 8) > 0);
524 guest_mem_base = V3_VAddr((void *)vm->mem_map.base_region.host_addr);
526 ctx = v3_chkpt_open_ctx(chkpt, "memory_bitmap_bits");
529 PrintError("Cannot open context to receive memory bitmap\n");
533 if (v3_chkpt_load(ctx,
534 "memory_bitmap_bits",
537 PrintError("Did not receive all of memory bitmap\n");
538 v3_chkpt_close_ctx(ctx);
542 v3_chkpt_close_ctx(ctx);
544 // Receive also follows bitmap order
545 for (i = 0; i < mod_pgs->num_bits; i ++) {
546 if (v3_bitmap_check(mod_pgs, i)) {
547 PrintDebug("Loading page %d\n", i);
548 empty_bitmap = false;
549 ctx = v3_chkpt_open_ctx(chkpt, "memory_page");
551 PrintError("Cannot open context to receive memory page\n");
555 if (v3_chkpt_load(ctx,
558 guest_mem_base + (page_size_bytes * i))) {
559 PrintError("Did not receive all of memory page\n");
560 v3_chkpt_close_ctx(ctx);
563 v3_chkpt_close_ctx(ctx);
568 // signal end of receiving pages
569 PrintDebug("Finished receiving pages.\n");
580 int save_header(struct v3_vm_info * vm, struct v3_chkpt * chkpt) {
581 extern v3_cpu_arch_t v3_mach_type;
584 ctx = v3_chkpt_open_ctx(chkpt, "header");
586 PrintError("Cannot open context to save header\n");
590 switch (v3_mach_type) {
592 case V3_SVM_REV3_CPU: {
593 if (v3_chkpt_save(ctx, "header", strlen(svm_chkpt_header), svm_chkpt_header)) {
594 PrintError("Could not save all of SVM header\n");
595 v3_chkpt_close_ctx(ctx);
602 case V3_VMX_EPT_UG_CPU: {
603 if (v3_chkpt_save(ctx, "header", strlen(vmx_chkpt_header), vmx_chkpt_header)) {
604 PrintError("Could not save all of VMX header\n");
605 v3_chkpt_close_ctx(ctx);
611 PrintError("checkpoint not supported on this architecture\n");
612 v3_chkpt_close_ctx(ctx);
616 v3_chkpt_close_ctx(ctx);
621 static int load_header(struct v3_vm_info * vm, struct v3_chkpt * chkpt) {
622 extern v3_cpu_arch_t v3_mach_type;
625 ctx = v3_chkpt_open_ctx(chkpt, "header");
627 switch (v3_mach_type) {
629 case V3_SVM_REV3_CPU: {
630 char header[strlen(svm_chkpt_header) + 1];
632 if (v3_chkpt_load(ctx, "header", strlen(svm_chkpt_header), header)) {
633 PrintError("Could not load all of SVM header\n");
634 v3_chkpt_close_ctx(ctx);
638 header[strlen(svm_chkpt_header)] = 0;
644 case V3_VMX_EPT_UG_CPU: {
645 char header[strlen(vmx_chkpt_header) + 1];
647 if (v3_chkpt_load(ctx, "header", strlen(vmx_chkpt_header), header)) {
648 PrintError("Could not load all of VMX header\n");
649 v3_chkpt_close_ctx(ctx);
653 header[strlen(vmx_chkpt_header)] = 0;
658 PrintError("checkpoint not supported on this architecture\n");
659 v3_chkpt_close_ctx(ctx);
663 v3_chkpt_close_ctx(ctx);
669 static int load_core(struct guest_info * info, struct v3_chkpt * chkpt) {
670 extern v3_cpu_arch_t v3_mach_type;
674 memset(key_name, 0, 16);
676 snprintf(key_name, 16, "guest_info%d", info->vcpu_id);
678 ctx = v3_chkpt_open_ctx(chkpt, key_name);
681 PrintError("Could not open context to load core\n");
685 V3_CHKPT_LOAD(ctx, "RIP", info->rip, loadfailout);
686 V3_CHKPT_LOAD(ctx, "GPRS", info->vm_regs, loadfailout);
688 V3_CHKPT_LOAD(ctx, "CR0", info->ctrl_regs.cr0, loadfailout);
689 V3_CHKPT_LOAD(ctx, "CR2", info->ctrl_regs.cr2, loadfailout);
690 V3_CHKPT_LOAD(ctx, "CR4", info->ctrl_regs.cr4, loadfailout);
691 V3_CHKPT_LOAD(ctx, "APIC_TPR", info->ctrl_regs.apic_tpr, loadfailout);
692 V3_CHKPT_LOAD(ctx, "RFLAGS", info->ctrl_regs.rflags, loadfailout);
693 V3_CHKPT_LOAD(ctx, "EFER", info->ctrl_regs.efer, loadfailout);
695 V3_CHKPT_LOAD(ctx, "DBRS", info->dbg_regs, loadfailout);
696 V3_CHKPT_LOAD(ctx, "SEGS", info->segments, loadfailout);
697 V3_CHKPT_LOAD(ctx, "GUEST_CR3", info->shdw_pg_state.guest_cr3, loadfailout);
698 V3_CHKPT_LOAD(ctx, "GUEST_CRO", info->shdw_pg_state.guest_cr0, loadfailout);
699 V3_CHKPT_LOAD(ctx, "GUEST_EFER", info->shdw_pg_state.guest_efer, loadfailout);
701 v3_chkpt_close_ctx(ctx);
703 PrintDebug("Finished reading guest_info information\n");
705 info->cpu_mode = v3_get_vm_cpu_mode(info);
706 info->mem_mode = v3_get_vm_mem_mode(info);
708 if (info->shdw_pg_mode == SHADOW_PAGING) {
709 if (v3_get_vm_mem_mode(info) == VIRTUAL_MEM) {
710 if (v3_activate_shadow_pt(info) == -1) {
711 PrintError("Failed to activate shadow page tables\n");
715 if (v3_activate_passthrough_pt(info) == -1) {
716 PrintError("Failed to activate passthrough page tables\n");
723 switch (v3_mach_type) {
725 case V3_SVM_REV3_CPU: {
728 snprintf(key_name, 16, "vmcb_data%d", info->vcpu_id);
729 ctx = v3_chkpt_open_ctx(chkpt, key_name);
732 PrintError("Could not open context to load SVM core\n");
736 if (v3_svm_load_core(info, ctx) < 0 ) {
737 PrintError("Failed to patch core %d\n", info->vcpu_id);
741 v3_chkpt_close_ctx(ctx);
747 case V3_VMX_EPT_UG_CPU: {
750 snprintf(key_name, 16, "vmcs_data%d", info->vcpu_id);
752 ctx = v3_chkpt_open_ctx(chkpt, key_name);
755 PrintError("Could not open context to load VMX core\n");
759 if (v3_vmx_load_core(info, ctx) < 0) {
760 PrintError("VMX checkpoint failed\n");
764 v3_chkpt_close_ctx(ctx);
769 PrintError("Invalid CPU Type (%d)\n", v3_mach_type);
773 PrintDebug("Load of core succeeded\n");
775 v3_print_guest_state(info);
780 PrintError("Failed to load core due to bad context load\n");
781 v3_chkpt_close_ctx(ctx);
787 static int save_core(struct guest_info * info, struct v3_chkpt * chkpt) {
788 extern v3_cpu_arch_t v3_mach_type;
792 PrintDebug("Saving core\n");
794 v3_print_guest_state(info);
796 memset(key_name, 0, 16);
799 snprintf(key_name, 16, "guest_info%d", info->vcpu_id);
801 ctx = v3_chkpt_open_ctx(chkpt, key_name);
804 PrintError("Unable to open context to save core\n");
809 V3_CHKPT_SAVE(ctx, "RIP", info->rip, savefailout);
810 V3_CHKPT_SAVE(ctx, "GPRS", info->vm_regs, savefailout);
812 V3_CHKPT_SAVE(ctx, "CR0", info->ctrl_regs.cr0, savefailout);
813 V3_CHKPT_SAVE(ctx, "CR2", info->ctrl_regs.cr2, savefailout);
814 V3_CHKPT_SAVE(ctx, "CR4", info->ctrl_regs.cr4, savefailout);
815 V3_CHKPT_SAVE(ctx, "APIC_TPR", info->ctrl_regs.apic_tpr, savefailout);
816 V3_CHKPT_SAVE(ctx, "RFLAGS", info->ctrl_regs.rflags, savefailout);
817 V3_CHKPT_SAVE(ctx, "EFER", info->ctrl_regs.efer, savefailout);
819 V3_CHKPT_SAVE(ctx, "DBRS", info->dbg_regs, savefailout);
820 V3_CHKPT_SAVE(ctx, "SEGS", info->segments, savefailout);
821 V3_CHKPT_SAVE(ctx, "GUEST_CR3", info->shdw_pg_state.guest_cr3, savefailout);
822 V3_CHKPT_SAVE(ctx, "GUEST_CRO", info->shdw_pg_state.guest_cr0, savefailout);
823 V3_CHKPT_SAVE(ctx, "GUEST_EFER", info->shdw_pg_state.guest_efer, savefailout);
825 v3_chkpt_close_ctx(ctx);
827 //Architechture specific code
828 switch (v3_mach_type) {
830 case V3_SVM_REV3_CPU: {
833 snprintf(key_name, 16, "vmcb_data%d", info->vcpu_id);
835 ctx = v3_chkpt_open_ctx(chkpt, key_name);
838 PrintError("Could not open context to store SVM core\n");
842 if (v3_svm_save_core(info, ctx) < 0) {
843 PrintError("VMCB Unable to be written\n");
847 v3_chkpt_close_ctx(ctx);
852 case V3_VMX_EPT_UG_CPU: {
855 snprintf(key_name, 16, "vmcs_data%d", info->vcpu_id);
857 ctx = v3_chkpt_open_ctx(chkpt, key_name);
860 PrintError("Could not open context to store VMX core\n");
864 if (v3_vmx_save_core(info, ctx) == -1) {
865 PrintError("VMX checkpoint failed\n");
869 v3_chkpt_close_ctx(ctx);
874 PrintError("Invalid CPU Type (%d)\n", v3_mach_type);
882 PrintError("Failed to save core due to bad context save\n");
883 v3_chkpt_close_ctx(ctx);
889 int v3_chkpt_save_vm(struct v3_vm_info * vm, char * store, char * url) {
890 struct v3_chkpt * chkpt = NULL;
895 chkpt = chkpt_open(vm, store, url, SAVE);
898 PrintError("Error creating checkpoint store for url %s\n",url);
902 /* If this guest is running we need to block it while the checkpoint occurs */
903 if (vm->run_state == VM_RUNNING) {
904 while (v3_raise_barrier(vm, NULL) == -1);
907 if ((ret = save_memory(vm, chkpt)) == -1) {
908 PrintError("Unable to save memory\n");
913 if ((ret = v3_save_vm_devices(vm, chkpt)) == -1) {
914 PrintError("Unable to save devices\n");
919 if ((ret = save_header(vm, chkpt)) == -1) {
920 PrintError("Unable to save header\n");
924 for (i = 0; i < vm->num_cores; i++){
925 if ((ret = save_core(&(vm->cores[i]), chkpt)) == -1) {
926 PrintError("chkpt of core %d failed\n", i);
933 /* Resume the guest if it was running */
934 if (vm->run_state == VM_RUNNING) {
935 v3_lower_barrier(vm);
944 int v3_chkpt_load_vm(struct v3_vm_info * vm, char * store, char * url) {
945 struct v3_chkpt * chkpt = NULL;
949 chkpt = chkpt_open(vm, store, url, LOAD);
952 PrintError("Error creating checkpoint store\n");
956 /* If this guest is running we need to block it while the checkpoint occurs */
957 if (vm->run_state == VM_RUNNING) {
958 while (v3_raise_barrier(vm, NULL) == -1);
961 if ((ret = load_memory(vm, chkpt)) == -1) {
962 PrintError("Unable to load memory\n");
967 if ((ret = v3_load_vm_devices(vm, chkpt)) == -1) {
968 PrintError("Unable to load devies\n");
973 if ((ret = load_header(vm, chkpt)) == -1) {
974 PrintError("Unable to load header\n");
979 for (i = 0; i < vm->num_cores; i++) {
980 if ((ret = load_core(&(vm->cores[i]), chkpt)) == -1) {
981 PrintError("Error loading core state (core=%d)\n", i);
988 /* Resume the guest if it was running and we didn't just trash the state*/
989 if (vm->run_state == VM_RUNNING) {
992 vm->run_state = VM_STOPPED;
995 /* We check the run state of the VM after every barrier
996 So this will immediately halt the VM
998 v3_lower_barrier(vm);
1008 #ifdef V3_CONFIG_LIVE_MIGRATION
1010 #define MOD_THRESHOLD 200 // pages below which we declare victory
1011 #define ITER_THRESHOLD 32 // iters below which we declare victory
1015 int v3_chkpt_send_vm(struct v3_vm_info * vm, char * store, char * url) {
1016 struct v3_chkpt * chkpt = NULL;
1019 bool last_modpage_iteration=false;
1020 struct v3_bitmap modified_pages_to_send;
1021 uint64_t start_time;
1023 int num_mod_pages=0;
1024 struct mem_migration_state *mm_state;
1027 // Currently will work only for shadow paging
1028 for (i=0;i<vm->num_cores;i++) {
1029 if (vm->cores[i].shdw_pg_mode!=SHADOW_PAGING) {
1030 PrintError("Cannot currently handle nested paging\n");
1036 chkpt = chkpt_open(vm, store, url, SAVE);
1038 if (chkpt == NULL) {
1039 PrintError("Error creating checkpoint store\n");
1044 // In a send, the memory is copied incrementally first,
1045 // followed by the remainder of the state
1047 if (v3_bitmap_init(&modified_pages_to_send,
1048 vm->mem_size>>12 // number of pages in main region
1050 PrintError("Could not intialize bitmap.\n");
1054 // 0. Initialize bitmap to all 1s
1055 for (i=0; i < modified_pages_to_send.num_bits; i++) {
1056 v3_bitmap_set(&modified_pages_to_send,i);
1060 while (!last_modpage_iteration) {
1061 PrintDebug("Modified memory page iteration %d\n",i++);
1063 start_time = v3_get_host_time(&(vm->cores[0].time_state));
1065 // We will pause the VM for a short while
1066 // so that we can collect the set of changed pages
1067 if (v3_pause_vm(vm) == -1) {
1068 PrintError("Could not pause VM\n");
1074 // special case, we already have the pages to send (all of them)
1075 // they are already in modified_pages_to_send
1077 // normally, we are in the middle of a round
1078 // We need to copy from the current tracking bitmap
1079 // to our send bitmap
1080 v3_bitmap_copy(&modified_pages_to_send,&(mm_state->modified_pages));
1081 // and now we need to remove our tracking
1082 stop_page_tracking(mm_state);
1085 // are we done? (note that we are still paused)
1086 num_mod_pages = v3_bitmap_count(&modified_pages_to_send);
1087 if (num_mod_pages<MOD_THRESHOLD || iter>ITER_THRESHOLD) {
1088 // we are done, so we will not restart page tracking
1089 // the vm is paused, and so we should be able
1090 // to just send the data
1091 PrintDebug("Last modified memory page iteration.\n");
1092 last_modpage_iteration = true;
1094 // we are not done, so we will restart page tracking
1095 // to prepare for a second round of pages
1096 // we will resume the VM as this happens
1097 if (!(mm_state=start_page_tracking(vm))) {
1098 PrintError("Error enabling page tracking.\n");
1102 if (v3_continue_vm(vm) == -1) {
1103 PrintError("Error resuming the VM\n");
1104 stop_page_tracking(mm_state);
1109 stop_time = v3_get_host_time(&(vm->cores[0].time_state));
1110 PrintDebug("num_mod_pages=%d\ndowntime=%llu\n",num_mod_pages,stop_time-start_time);
1114 // At this point, we are either paused and about to copy
1115 // the last chunk, or we are running, and will copy the last
1116 // round in parallel with current execution
1117 if (num_mod_pages>0) {
1118 if (save_inc_memory(vm, &modified_pages_to_send, chkpt) == -1) {
1119 PrintError("Error sending incremental memory.\n");
1123 } // we don't want to copy an empty bitmap here
1128 if (v3_bitmap_reset(&modified_pages_to_send) == -1) {
1129 PrintError("Error reseting bitmap.\n");
1134 // send bitmap of 0s to signal end of modpages
1135 if (save_inc_memory(vm, &modified_pages_to_send, chkpt) == -1) {
1136 PrintError("Error sending incremental memory.\n");
1141 // save the non-memory state
1142 if ((ret = v3_save_vm_devices(vm, chkpt)) == -1) {
1143 PrintError("Unable to save devices\n");
1148 if ((ret = save_header(vm, chkpt)) == -1) {
1149 PrintError("Unable to save header\n");
1153 for (i = 0; i < vm->num_cores; i++){
1154 if ((ret = save_core(&(vm->cores[i]), chkpt)) == -1) {
1155 PrintError("chkpt of core %d failed\n", i);
1160 stop_time = v3_get_host_time(&(vm->cores[0].time_state));
1161 PrintDebug("num_mod_pages=%d\ndowntime=%llu\n",num_mod_pages,stop_time-start_time);
1162 PrintDebug("Done sending VM!\n");
1164 v3_bitmap_deinit(&modified_pages_to_send);
1171 int v3_chkpt_receive_vm(struct v3_vm_info * vm, char * store, char * url) {
1172 struct v3_chkpt * chkpt = NULL;
1175 struct v3_bitmap mod_pgs;
1177 // Currently will work only for shadow paging
1178 for (i=0;i<vm->num_cores;i++) {
1179 if (vm->cores[i].shdw_pg_mode!=SHADOW_PAGING) {
1180 PrintError("Cannot currently handle nested paging\n");
1185 chkpt = chkpt_open(vm, store, url, LOAD);
1187 if (chkpt == NULL) {
1188 PrintError("Error creating checkpoint store\n");
1193 if (v3_bitmap_init(&mod_pgs,vm->mem_size>>12) == -1) {
1195 PrintError("Could not intialize bitmap.\n");
1199 /* If this guest is running we need to block it while the checkpoint occurs */
1200 if (vm->run_state == VM_RUNNING) {
1201 while (v3_raise_barrier(vm, NULL) == -1);
1206 // 1. Receive copy of bitmap
1208 PrintDebug("Memory page iteration %d\n",i++);
1209 int retval = load_inc_memory(vm, &mod_pgs, chkpt);
1211 // end of receiving memory pages
1213 } else if (retval == -1) {
1214 PrintError("Error receiving incremental memory.\n");
1220 if ((ret = v3_load_vm_devices(vm, chkpt)) == -1) {
1221 PrintError("Unable to load devices\n");
1227 if ((ret = load_header(vm, chkpt)) == -1) {
1228 PrintError("Unable to load header\n");
1234 for (i = 0; i < vm->num_cores; i++) {
1235 if ((ret = load_core(&(vm->cores[i]), chkpt)) == -1) {
1236 PrintError("Error loading core state (core=%d)\n", i);
1243 PrintError("Unable to receive VM\n");
1245 PrintDebug("Done receving the VM\n");
1249 /* Resume the guest if it was running and we didn't just trash the state*/
1250 if (vm->run_state == VM_RUNNING) {
1252 PrintError("VM was previously running. It is now borked. Pausing it. \n");
1253 vm->run_state = VM_STOPPED;
1256 /* We check the run state of the VM after every barrier
1257 So this will immediately halt the VM
1259 v3_lower_barrier(vm);
1262 v3_bitmap_deinit(&mod_pgs);