2 * This file is part of the Palacios Virtual Machine Monitor developed
3 * by the V3VEE Project with funding from the United States National
4 * Science Foundation and the Department of Energy.
6 * The V3VEE Project is a joint project between Northwestern University
7 * and the University of New Mexico. You can find out more at
10 * Copyright (c) 2011, Madhav Suresh <madhav@u.northwestern.edu>
11 * Copyright (c) 2011, The V3VEE Project <http://www.v3vee.org>
12 * All rights reserved.
14 * Author: Madhav Suresh <madhav@u.northwestern.edu>
15 * Arefin Huq <fig@arefin.net>
16 * Peter Dinda <pdinda@northwestern.edu> (store interface changes)
19 * This is free software. You are permitted to use,
20 * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
23 #include <palacios/vmm.h>
24 #include <palacios/vmm_sprintf.h>
25 #include <palacios/vm_guest.h>
26 #include <palacios/svm.h>
27 #include <palacios/vmx.h>
28 #include <palacios/vmm_checkpoint.h>
29 #include <palacios/vmm_hashtable.h>
30 #include <palacios/vmm_direct_paging.h>
31 #include <palacios/vmm_debug.h>
33 #include <palacios/vmm_dev_mgr.h>
35 #ifdef V3_CONFIG_LIVE_MIGRATION
36 #include <palacios/vmm_time.h>
37 #include <palacios/vm_guest_mem.h>
38 #include <palacios/vmm_shadow_paging.h>
41 #ifndef V3_CONFIG_DEBUG_CHECKPOINT
43 #define PrintDebug(fmt, args...)
47 static struct hashtable * store_table = NULL;
51 typedef enum {SAVE, LOAD} chkpt_mode_t;
53 struct chkpt_interface {
55 // Opening a checkpoint should return a pointer to the internal representation
56 // of the checkpoint in the store. This will be passed back
57 // as "store_data". Return NULL if the context cannot be opened
58 void * (*open_chkpt)(char * url, chkpt_mode_t mode);
59 // Closing the checkpoint should return -1 on failure, 0 on success
60 int (*close_chkpt)(void * store_data);
62 // Opening a context on the checkpoint with a given name should return
63 // a pointer to an internal representation of the context. This pointer
64 // is then passed back as "ctx".
65 // We will open only a single context at a time.
66 void * (*open_ctx)(void * store_data, char *name);
67 // Closing the context should return -1 on failure, 0 on success
68 int (*close_ctx)(void * store_data, void * ctx);
70 // Save and load include a tagged data buffer. These are
71 // "all or nothing" writes and reads.
72 // return -1 on failure, and 0 on success
74 int (*save)(void * store_data, void * ctx, char * tag, uint64_t len, void * buf);
75 int (*load)(void * store_data, void * ctx, char * tag, uint64_t len, void * buf);
80 struct v3_vm_info * vm;
82 struct v3_chkpt_ctx *current_ctx;
84 struct chkpt_interface * interface;
92 static uint_t store_hash_fn(addr_t key) {
93 char * name = (char *)key;
94 return v3_hash_buffer((uint8_t *)name, strlen(name));
97 static int store_eq_fn(addr_t key1, addr_t key2) {
98 char * name1 = (char *)key1;
99 char * name2 = (char *)key2;
101 return (strcmp(name1, name2) == 0);
106 #include "vmm_chkpt_stores.h"
109 int V3_init_checkpoint() {
110 extern struct chkpt_interface * __start__v3_chkpt_stores[];
111 extern struct chkpt_interface * __stop__v3_chkpt_stores[];
112 struct chkpt_interface ** tmp_store = __start__v3_chkpt_stores;
115 store_table = v3_create_htable(0, store_hash_fn, store_eq_fn);
117 while (tmp_store != __stop__v3_chkpt_stores) {
118 V3_Print(VM_NONE, VCORE_NONE, "Registering Checkpoint Backing Store (%s)\n", (*tmp_store)->name);
120 if (v3_htable_search(store_table, (addr_t)((*tmp_store)->name))) {
121 PrintError(VM_NONE, VCORE_NONE, "Multiple instances of Checkpoint backing Store (%s)\n", (*tmp_store)->name);
125 if (v3_htable_insert(store_table, (addr_t)((*tmp_store)->name), (addr_t)(*tmp_store)) == 0) {
126 PrintError(VM_NONE, VCORE_NONE, "Could not register Checkpoint backing store (%s)\n", (*tmp_store)->name);
130 tmp_store = &(__start__v3_chkpt_stores[++i]);
136 int V3_deinit_checkpoint() {
137 v3_free_htable(store_table, 0, 0);
142 static char svm_chkpt_header[] = "v3vee palacios checkpoint version: x.x, SVM x.x";
143 static char vmx_chkpt_header[] = "v3vee palacios checkpoint version: x.x, VMX x.x";
145 static int chkpt_close(struct v3_chkpt * chkpt) {
149 rc = chkpt->interface->close_chkpt(chkpt->store_data);
154 PrintError(VM_NONE, VCORE_NONE, "Internal store failed to close valid checkpoint\n");
160 PrintError(VM_NONE, VCORE_NONE, "Attempt to close null checkpoint\n");
166 static struct v3_chkpt * chkpt_open(struct v3_vm_info * vm, char * store, char * url, chkpt_mode_t mode) {
167 struct chkpt_interface * iface = NULL;
168 struct v3_chkpt * chkpt = NULL;
169 void * store_data = NULL;
171 iface = (void *)v3_htable_search(store_table, (addr_t)store);
174 V3_Print(vm, VCORE_NONE, "Error: Could not locate Checkpoint interface for store (%s)\n", store);
178 store_data = iface->open_chkpt(url, mode);
180 if (store_data == NULL) {
181 PrintError(vm, VCORE_NONE, "Could not open url (%s) for backing store (%s)\n", url, store);
186 chkpt = V3_Malloc(sizeof(struct v3_chkpt));
189 PrintError(vm, VCORE_NONE, "Could not allocate checkpoint state, closing checkpoint\n");
190 iface->close_chkpt(store_data);
194 memset(chkpt,0,sizeof(struct v3_chkpt));
196 chkpt->interface = iface;
198 chkpt->store_data = store_data;
199 chkpt->current_ctx = NULL;
204 struct v3_chkpt_ctx * v3_chkpt_open_ctx(struct v3_chkpt * chkpt, char * name) {
205 struct v3_chkpt_ctx * ctx;
207 if (chkpt->current_ctx) {
208 PrintError(VM_NONE, VCORE_NONE, "Attempt to open context %s before old context has been closed\n", name);
212 ctx = V3_Malloc(sizeof(struct v3_chkpt_ctx));
215 PrintError(VM_NONE, VCORE_NONE, "Unable to allocate context\n");
219 memset(ctx, 0, sizeof(struct v3_chkpt_ctx));
222 ctx->store_ctx = chkpt->interface->open_ctx(chkpt->store_data, name);
224 if (!(ctx->store_ctx)) {
225 PrintError(VM_NONE, VCORE_NONE, "Underlying store failed to open context %s\n",name);
230 chkpt->current_ctx = ctx;
235 int v3_chkpt_close_ctx(struct v3_chkpt_ctx * ctx) {
236 struct v3_chkpt * chkpt = ctx->chkpt;
239 if (chkpt->current_ctx != ctx) {
240 PrintError(VM_NONE, VCORE_NONE, "Attempt to close a context that is not the current context on the store\n");
244 ret = chkpt->interface->close_ctx(chkpt->store_data, ctx->store_ctx);
247 PrintError(VM_NONE, VCORE_NONE, "Failed to close context on store, closing device-independent context anyway - bad\n");
251 chkpt->current_ctx=NULL;
262 int v3_chkpt_save(struct v3_chkpt_ctx * ctx, char * tag, uint64_t len, void * buf) {
263 struct v3_chkpt * chkpt = ctx->chkpt;
267 PrintError(VM_NONE, VCORE_NONE, "Attempt to save tag %s on null context\n",tag);
271 if (chkpt->current_ctx != ctx) {
272 PrintError(VM_NONE, VCORE_NONE, "Attempt to save on context that is not the current context for the store\n");
276 rc = chkpt->interface->save(chkpt->store_data, ctx->store_ctx, tag , len, buf);
279 PrintError(VM_NONE, VCORE_NONE, "Underlying store failed to save tag %s on valid context\n",tag);
287 int v3_chkpt_load(struct v3_chkpt_ctx * ctx, char * tag, uint64_t len, void * buf) {
288 struct v3_chkpt * chkpt = ctx->chkpt;
292 PrintError(VM_NONE, VCORE_NONE, "Attempt to load tag %s from null context\n",tag);
296 if (chkpt->current_ctx != ctx) {
297 PrintError(VM_NONE, VCORE_NONE, "Attempt to load from context that is not the current context for the store\n");
301 rc = chkpt->interface->load(chkpt->store_data, ctx->store_ctx, tag, len, buf);
304 PrintError(VM_NONE, VCORE_NONE, "Underlying store failed to load tag %s from valid context\n",tag);
313 static int load_memory(struct v3_vm_info * vm, struct v3_chkpt * chkpt) {
315 void * guest_mem_base = NULL;
319 extern uint64_t v3_mem_block_size;
321 ctx = v3_chkpt_open_ctx(chkpt, "memory_img");
324 PrintError(vm, VCORE_NONE, "Unable to open context for memory load\n");
329 for (i=0;i<vm->mem_map.num_base_regions;i++) {
330 guest_mem_base = V3_VAddr((void *)vm->mem_map.base_regions[i].host_addr);
331 if (v3_chkpt_load(ctx, "memory_img", v3_mem_block_size, guest_mem_base)) {
332 PrintError(vm, VCORE_NONE, "Unable to load all of memory (region %d) (requested=%llu bytes, result=%llu bytes\n",i,(uint64_t)(vm->mem_size),ret);
333 v3_chkpt_close_ctx(ctx);
338 v3_chkpt_close_ctx(ctx);
344 static int save_memory(struct v3_vm_info * vm, struct v3_chkpt * chkpt) {
345 void * guest_mem_base = NULL;
348 extern uint64_t v3_mem_block_size;
352 ctx = v3_chkpt_open_ctx(chkpt, "memory_img");
355 PrintError(vm, VCORE_NONE, "Unable to open context to save memory\n");
359 for (i=0;i<vm->mem_map.num_base_regions;i++) {
360 guest_mem_base = V3_VAddr((void *)vm->mem_map.base_regions[i].host_addr);
361 if (v3_chkpt_save(ctx, "memory_img", v3_mem_block_size, guest_mem_base)) {
362 PrintError(vm, VCORE_NONE, "Unable to save all of memory (region %d) (requested=%llu, received=%llu)\n",i,(uint64_t)(vm->mem_size),ret);
363 v3_chkpt_close_ctx(ctx);
368 v3_chkpt_close_ctx(ctx);
373 #ifdef V3_CONFIG_LIVE_MIGRATION
375 struct mem_migration_state {
376 struct v3_vm_info *vm;
377 struct v3_bitmap modified_pages;
380 static int paging_callback(struct guest_info *core,
381 struct v3_shdw_pg_event *event,
384 struct mem_migration_state *m = (struct mem_migration_state *)priv_data;
386 if (event->event_type==SHADOW_PAGEFAULT &&
387 event->event_order==SHADOW_PREIMPL &&
388 event->error_code.write) {
390 if (!v3_gva_to_gpa(core,event->gva,&gpa)) {
391 // write to this page
392 v3_bitmap_set(&(m->modified_pages),gpa>>12);
394 // no worries, this isn't physical memory
397 // we don't care about other events
405 static struct mem_migration_state *start_page_tracking(struct v3_vm_info *vm)
407 struct mem_migration_state *m;
410 m = (struct mem_migration_state *)V3_Malloc(sizeof(struct mem_migration_state));
413 PrintError(vm, VCORE_NONE, "Cannot allocate\n");
419 if (v3_bitmap_init(&(m->modified_pages),vm->mem_size >> 12) == -1) {
420 PrintError(vm, VCORE_NONE, "Failed to initialize modified_pages bit vector");
424 v3_register_shadow_paging_event_callback(vm,paging_callback,m);
426 for (i=0;i<vm->num_cores;i++) {
427 v3_invalidate_shadow_pts(&(vm->cores[i]));
430 // and now we should get callbacks as writes happen
435 static void stop_page_tracking(struct mem_migration_state *m)
437 v3_unregister_shadow_paging_event_callback(m->vm,paging_callback,m);
439 v3_bitmap_deinit(&(m->modified_pages));
452 // zero: done with this round
453 static int save_inc_memory(struct v3_vm_info * vm,
454 struct v3_bitmap * mod_pgs_to_send,
455 struct v3_chkpt * chkpt) {
456 int page_size_bytes = 1 << 12; // assuming 4k pages right now
459 int bitmap_num_bytes = (mod_pgs_to_send->num_bits / 8)
460 + ((mod_pgs_to_send->num_bits % 8) > 0);
463 PrintDebug(vm, VCORE_NONE, "Saving incremental memory.\n");
465 ctx = v3_chkpt_open_ctx(chkpt,"memory_bitmap_bits");
468 PrintError(vm, VCORE_NONE, "Cannot open context for dirty memory bitmap\n");
473 if (v3_chkpt_save(ctx,
474 "memory_bitmap_bits",
476 mod_pgs_to_send->bits)) {
477 PrintError(vm, VCORE_NONE, "Unable to write all of the dirty memory bitmap\n");
478 v3_chkpt_close_ctx(ctx);
482 v3_chkpt_close_ctx(ctx);
484 PrintDebug(vm, VCORE_NONE, "Sent bitmap bits.\n");
486 // Dirty memory pages are sent in bitmap order
487 for (i = 0; i < mod_pgs_to_send->num_bits; i++) {
488 if (v3_bitmap_check(mod_pgs_to_send, i)) {
489 struct v3_mem_region *region = v3_get_base_region(vm,page_size_bytes * i);
491 PrintError(vm, VCORE_NONE, "Failed to find base region for page %d\n",i);
494 // PrintDebug(vm, VCORE_NONE, "Sending memory page %d.\n",i);
495 ctx = v3_chkpt_open_ctx(chkpt, "memory_page");
497 PrintError(vm, VCORE_NONE, "Unable to open context to send memory page\n");
500 if (v3_chkpt_save(ctx,
503 (void*)(region->host_addr + page_size_bytes * i - region->guest_start))) {
504 PrintError(vm, VCORE_NONE, "Unable to send a memory page\n");
505 v3_chkpt_close_ctx(ctx);
509 v3_chkpt_close_ctx(ctx);
520 // zero: ok, but not done
521 // positive: ok, and also done
522 static int load_inc_memory(struct v3_vm_info * vm,
523 struct v3_bitmap * mod_pgs,
524 struct v3_chkpt * chkpt) {
525 int page_size_bytes = 1 << 12; // assuming 4k pages right now
528 bool empty_bitmap = true;
529 int bitmap_num_bytes = (mod_pgs->num_bits / 8)
530 + ((mod_pgs->num_bits % 8) > 0);
533 ctx = v3_chkpt_open_ctx(chkpt, "memory_bitmap_bits");
536 PrintError(vm, VCORE_NONE, "Cannot open context to receive memory bitmap\n");
540 if (v3_chkpt_load(ctx,
541 "memory_bitmap_bits",
544 PrintError(vm, VCORE_NONE, "Did not receive all of memory bitmap\n");
545 v3_chkpt_close_ctx(ctx);
549 v3_chkpt_close_ctx(ctx);
551 // Receive also follows bitmap order
552 for (i = 0; i < mod_pgs->num_bits; i ++) {
553 if (v3_bitmap_check(mod_pgs, i)) {
554 struct v3_mem_region *region = v3_get_base_region(vm,page_size_bytes * i);
556 PrintError(vm, VCORE_NONE, "Failed to find base region for page %d\n",i);
559 //PrintDebug(vm, VCORE_NONE, "Loading page %d\n", i);
560 empty_bitmap = false;
561 ctx = v3_chkpt_open_ctx(chkpt, "memory_page");
563 PrintError(vm, VCORE_NONE, "Cannot open context to receive memory page\n");
567 if (v3_chkpt_load(ctx,
570 (void*)(region->host_addr + page_size_bytes * i - region->guest_start))) {
571 PrintError(vm, VCORE_NONE, "Did not receive all of memory page\n");
572 v3_chkpt_close_ctx(ctx);
575 v3_chkpt_close_ctx(ctx);
580 // signal end of receiving pages
581 PrintDebug(vm, VCORE_NONE, "Finished receiving pages.\n");
592 int save_header(struct v3_vm_info * vm, struct v3_chkpt * chkpt) {
593 extern v3_cpu_arch_t v3_mach_type;
596 ctx = v3_chkpt_open_ctx(chkpt, "header");
598 PrintError(vm, VCORE_NONE, "Cannot open context to save header\n");
602 switch (v3_mach_type) {
604 case V3_SVM_REV3_CPU: {
605 if (v3_chkpt_save(ctx, "header", strlen(svm_chkpt_header), svm_chkpt_header)) {
606 PrintError(vm, VCORE_NONE, "Could not save all of SVM header\n");
607 v3_chkpt_close_ctx(ctx);
614 case V3_VMX_EPT_UG_CPU: {
615 if (v3_chkpt_save(ctx, "header", strlen(vmx_chkpt_header), vmx_chkpt_header)) {
616 PrintError(vm, VCORE_NONE, "Could not save all of VMX header\n");
617 v3_chkpt_close_ctx(ctx);
623 PrintError(vm, VCORE_NONE, "checkpoint not supported on this architecture\n");
624 v3_chkpt_close_ctx(ctx);
628 v3_chkpt_close_ctx(ctx);
633 static int load_header(struct v3_vm_info * vm, struct v3_chkpt * chkpt) {
634 extern v3_cpu_arch_t v3_mach_type;
637 ctx = v3_chkpt_open_ctx(chkpt, "header");
639 switch (v3_mach_type) {
641 case V3_SVM_REV3_CPU: {
642 char header[strlen(svm_chkpt_header) + 1];
644 if (v3_chkpt_load(ctx, "header", strlen(svm_chkpt_header), header)) {
645 PrintError(vm, VCORE_NONE, "Could not load all of SVM header\n");
646 v3_chkpt_close_ctx(ctx);
650 header[strlen(svm_chkpt_header)] = 0;
656 case V3_VMX_EPT_UG_CPU: {
657 char header[strlen(vmx_chkpt_header) + 1];
659 if (v3_chkpt_load(ctx, "header", strlen(vmx_chkpt_header), header)) {
660 PrintError(vm, VCORE_NONE, "Could not load all of VMX header\n");
661 v3_chkpt_close_ctx(ctx);
665 header[strlen(vmx_chkpt_header)] = 0;
670 PrintError(vm, VCORE_NONE, "checkpoint not supported on this architecture\n");
671 v3_chkpt_close_ctx(ctx);
675 v3_chkpt_close_ctx(ctx);
681 static int load_core(struct guest_info * info, struct v3_chkpt * chkpt, v3_chkpt_options_t opts) {
682 extern v3_cpu_arch_t v3_mach_type;
687 PrintDebug(info->vm_info, info, "Loading core\n");
689 memset(key_name, 0, 16);
691 snprintf(key_name, 16, "guest_info%d", info->vcpu_id);
693 ctx = v3_chkpt_open_ctx(chkpt, key_name);
696 PrintError(info->vm_info, info, "Could not open context to load core\n");
700 V3_CHKPT_LOAD(ctx, "RIP", info->rip, loadfailout);
703 V3_CHKPT_LOAD(ctx,"RDI",info->vm_regs.rdi, loadfailout);
704 V3_CHKPT_LOAD(ctx,"RSI",info->vm_regs.rsi, loadfailout);
705 V3_CHKPT_LOAD(ctx,"RBP",info->vm_regs.rbp, loadfailout);
706 V3_CHKPT_LOAD(ctx,"RSP",info->vm_regs.rsp, loadfailout);
707 V3_CHKPT_LOAD(ctx,"RBX",info->vm_regs.rbx, loadfailout);
708 V3_CHKPT_LOAD(ctx,"RDX",info->vm_regs.rdx, loadfailout);
709 V3_CHKPT_LOAD(ctx,"RCX",info->vm_regs.rcx, loadfailout);
710 V3_CHKPT_LOAD(ctx,"RAX",info->vm_regs.rax, loadfailout);
711 V3_CHKPT_LOAD(ctx,"R8",info->vm_regs.r8, loadfailout);
712 V3_CHKPT_LOAD(ctx,"R9",info->vm_regs.r9, loadfailout);
713 V3_CHKPT_LOAD(ctx,"R10",info->vm_regs.r10, loadfailout);
714 V3_CHKPT_LOAD(ctx,"R11",info->vm_regs.r11, loadfailout);
715 V3_CHKPT_LOAD(ctx,"R12",info->vm_regs.r12, loadfailout);
716 V3_CHKPT_LOAD(ctx,"R13",info->vm_regs.r13, loadfailout);
717 V3_CHKPT_LOAD(ctx,"R14",info->vm_regs.r14, loadfailout);
718 V3_CHKPT_LOAD(ctx,"R15",info->vm_regs.r15, loadfailout);
721 V3_CHKPT_LOAD(ctx, "CR0", info->ctrl_regs.cr0, loadfailout);
723 V3_CHKPT_LOAD(ctx, "CR2", info->ctrl_regs.cr2, loadfailout);
724 V3_CHKPT_LOAD(ctx, "CR3", info->ctrl_regs.cr3, loadfailout);
725 V3_CHKPT_LOAD(ctx, "CR4", info->ctrl_regs.cr4, loadfailout);
726 // There are no CR5,6,7
727 // CR8 is derived from apic_tpr
728 tempreg = (info->ctrl_regs.apic_tpr >> 4) & 0xf;
729 V3_CHKPT_LOAD(ctx, "CR8", tempreg, loadfailout);
730 V3_CHKPT_LOAD(ctx, "APIC_TPR", info->ctrl_regs.apic_tpr, loadfailout);
731 V3_CHKPT_LOAD(ctx, "RFLAGS", info->ctrl_regs.rflags, loadfailout);
732 V3_CHKPT_LOAD(ctx, "EFER", info->ctrl_regs.efer, loadfailout);
735 V3_CHKPT_LOAD(ctx, "DR0", info->dbg_regs.dr0, loadfailout);
736 V3_CHKPT_LOAD(ctx, "DR1", info->dbg_regs.dr1, loadfailout);
737 V3_CHKPT_LOAD(ctx, "DR2", info->dbg_regs.dr2, loadfailout);
738 V3_CHKPT_LOAD(ctx, "DR3", info->dbg_regs.dr3, loadfailout);
739 // there is no DR4 or DR5
740 V3_CHKPT_LOAD(ctx, "DR6", info->dbg_regs.dr6, loadfailout);
741 V3_CHKPT_LOAD(ctx, "DR7", info->dbg_regs.dr7, loadfailout);
744 V3_CHKPT_LOAD(ctx, "CS", info->segments.cs, loadfailout);
745 V3_CHKPT_LOAD(ctx, "DS", info->segments.ds, loadfailout);
746 V3_CHKPT_LOAD(ctx, "ES", info->segments.es, loadfailout);
747 V3_CHKPT_LOAD(ctx, "FS", info->segments.fs, loadfailout);
748 V3_CHKPT_LOAD(ctx, "GS", info->segments.gs, loadfailout);
749 V3_CHKPT_LOAD(ctx, "SS", info->segments.ss, loadfailout);
750 V3_CHKPT_LOAD(ctx, "LDTR", info->segments.ldtr, loadfailout);
751 V3_CHKPT_LOAD(ctx, "GDTR", info->segments.gdtr, loadfailout);
752 V3_CHKPT_LOAD(ctx, "IDTR", info->segments.idtr, loadfailout);
753 V3_CHKPT_LOAD(ctx, "TR", info->segments.tr, loadfailout);
756 V3_CHKPT_LOAD(ctx, "STAR", info->msrs.star, loadfailout);
757 V3_CHKPT_LOAD(ctx, "LSTAR", info->msrs.lstar, loadfailout);
758 V3_CHKPT_LOAD(ctx, "SFMASK", info->msrs.sfmask, loadfailout);
759 V3_CHKPT_LOAD(ctx, "KERN_GS_BASE", info->msrs.kern_gs_base, loadfailout);
761 // Some components of guest state captured in the shadow pager
762 V3_CHKPT_LOAD(ctx, "GUEST_CR3", info->shdw_pg_state.guest_cr3, loadfailout);
763 V3_CHKPT_LOAD(ctx, "GUEST_CR0", info->shdw_pg_state.guest_cr0, loadfailout);
764 V3_CHKPT_LOAD(ctx, "GUEST_EFER", info->shdw_pg_state.guest_efer, loadfailout);
766 v3_chkpt_close_ctx(ctx); ctx=0;
768 PrintDebug(info->vm_info, info, "Finished reading guest_info information\n");
770 info->cpu_mode = v3_get_vm_cpu_mode(info);
771 info->mem_mode = v3_get_vm_mem_mode(info);
773 if (info->shdw_pg_mode == SHADOW_PAGING) {
774 if (v3_get_vm_mem_mode(info) == VIRTUAL_MEM) {
775 if (v3_activate_shadow_pt(info) == -1) {
776 PrintError(info->vm_info, info, "Failed to activate shadow page tables\n");
780 if (v3_activate_passthrough_pt(info) == -1) {
781 PrintError(info->vm_info, info, "Failed to activate passthrough page tables\n");
788 if (opts & V3_CHKPT_OPT_SKIP_ARCHDEP) {
792 switch (v3_mach_type) {
794 case V3_SVM_REV3_CPU: {
797 snprintf(key_name, 16, "vmcb_data%d", info->vcpu_id);
798 ctx = v3_chkpt_open_ctx(chkpt, key_name);
801 PrintError(info->vm_info, info, "Could not open context to load SVM core\n");
805 if (v3_svm_load_core(info, ctx) < 0 ) {
806 PrintError(info->vm_info, info, "Failed to patch core %d\n", info->vcpu_id);
810 v3_chkpt_close_ctx(ctx); ctx=0;
816 case V3_VMX_EPT_UG_CPU: {
819 snprintf(key_name, 16, "vmcs_data%d", info->vcpu_id);
821 ctx = v3_chkpt_open_ctx(chkpt, key_name);
824 PrintError(info->vm_info, info, "Could not open context to load VMX core\n");
828 if (v3_vmx_load_core(info, ctx) < 0) {
829 PrintError(info->vm_info, info, "VMX checkpoint failed\n");
833 v3_chkpt_close_ctx(ctx); ctx=0;
838 PrintError(info->vm_info, info, "Invalid CPU Type (%d)\n", v3_mach_type);
844 PrintDebug(info->vm_info, info, "Load of core succeeded\n");
846 v3_print_guest_state(info);
851 PrintError(info->vm_info, info, "Failed to load core\n");
852 if (ctx) { v3_chkpt_close_ctx(ctx);}
857 // GEM5 - Hypercall for initiating transfer to gem5 (checkpoint)
859 static int save_core(struct guest_info * info, struct v3_chkpt * chkpt, v3_chkpt_options_t opts) {
860 extern v3_cpu_arch_t v3_mach_type;
865 PrintDebug(info->vm_info, info, "Saving core\n");
867 v3_print_guest_state(info);
869 memset(key_name, 0, 16);
871 snprintf(key_name, 16, "guest_info%d", info->vcpu_id);
873 ctx = v3_chkpt_open_ctx(chkpt, key_name);
876 PrintError(info->vm_info, info, "Unable to open context to save core\n");
881 V3_CHKPT_SAVE(ctx, "RIP", info->rip, savefailout);
884 V3_CHKPT_SAVE(ctx,"RDI",info->vm_regs.rdi, savefailout);
885 V3_CHKPT_SAVE(ctx,"RSI",info->vm_regs.rsi, savefailout);
886 V3_CHKPT_SAVE(ctx,"RBP",info->vm_regs.rbp, savefailout);
887 V3_CHKPT_SAVE(ctx,"RSP",info->vm_regs.rsp, savefailout);
888 V3_CHKPT_SAVE(ctx,"RBX",info->vm_regs.rbx, savefailout);
889 V3_CHKPT_SAVE(ctx,"RDX",info->vm_regs.rdx, savefailout);
890 V3_CHKPT_SAVE(ctx,"RCX",info->vm_regs.rcx, savefailout);
891 V3_CHKPT_SAVE(ctx,"RAX",info->vm_regs.rax, savefailout);
892 V3_CHKPT_SAVE(ctx,"R8",info->vm_regs.r8, savefailout);
893 V3_CHKPT_SAVE(ctx,"R9",info->vm_regs.r9, savefailout);
894 V3_CHKPT_SAVE(ctx,"R10",info->vm_regs.r10, savefailout);
895 V3_CHKPT_SAVE(ctx,"R11",info->vm_regs.r11, savefailout);
896 V3_CHKPT_SAVE(ctx,"R12",info->vm_regs.r12, savefailout);
897 V3_CHKPT_SAVE(ctx,"R13",info->vm_regs.r13, savefailout);
898 V3_CHKPT_SAVE(ctx,"R14",info->vm_regs.r14, savefailout);
899 V3_CHKPT_SAVE(ctx,"R15",info->vm_regs.r15, savefailout);
902 V3_CHKPT_SAVE(ctx, "CR0", info->ctrl_regs.cr0, savefailout);
904 V3_CHKPT_SAVE(ctx, "CR2", info->ctrl_regs.cr2, savefailout);
905 V3_CHKPT_SAVE(ctx, "CR3", info->ctrl_regs.cr3, savefailout);
906 V3_CHKPT_SAVE(ctx, "CR4", info->ctrl_regs.cr4, savefailout);
907 // There are no CR5,6,7
908 // CR8 is derived from apic_tpr
909 tempreg = (info->ctrl_regs.apic_tpr >> 4) & 0xf;
910 V3_CHKPT_SAVE(ctx, "CR8", tempreg, savefailout);
911 V3_CHKPT_SAVE(ctx, "APIC_TPR", info->ctrl_regs.apic_tpr, savefailout);
912 V3_CHKPT_SAVE(ctx, "RFLAGS", info->ctrl_regs.rflags, savefailout);
913 V3_CHKPT_SAVE(ctx, "EFER", info->ctrl_regs.efer, savefailout);
916 V3_CHKPT_SAVE(ctx, "DR0", info->dbg_regs.dr0, savefailout);
917 V3_CHKPT_SAVE(ctx, "DR1", info->dbg_regs.dr1, savefailout);
918 V3_CHKPT_SAVE(ctx, "DR2", info->dbg_regs.dr2, savefailout);
919 V3_CHKPT_SAVE(ctx, "DR3", info->dbg_regs.dr3, savefailout);
920 // there is no DR4 or DR5
921 V3_CHKPT_SAVE(ctx, "DR6", info->dbg_regs.dr6, savefailout);
922 V3_CHKPT_SAVE(ctx, "DR7", info->dbg_regs.dr7, savefailout);
925 V3_CHKPT_SAVE(ctx, "CS", info->segments.cs, savefailout);
926 V3_CHKPT_SAVE(ctx, "DS", info->segments.ds, savefailout);
927 V3_CHKPT_SAVE(ctx, "ES", info->segments.es, savefailout);
928 V3_CHKPT_SAVE(ctx, "FS", info->segments.fs, savefailout);
929 V3_CHKPT_SAVE(ctx, "GS", info->segments.gs, savefailout);
930 V3_CHKPT_SAVE(ctx, "SS", info->segments.ss, savefailout);
931 V3_CHKPT_SAVE(ctx, "LDTR", info->segments.ldtr, savefailout);
932 V3_CHKPT_SAVE(ctx, "GDTR", info->segments.gdtr, savefailout);
933 V3_CHKPT_SAVE(ctx, "IDTR", info->segments.idtr, savefailout);
934 V3_CHKPT_SAVE(ctx, "TR", info->segments.tr, savefailout);
937 V3_CHKPT_SAVE(ctx, "STAR", info->msrs.star, savefailout);
938 V3_CHKPT_SAVE(ctx, "LSTAR", info->msrs.lstar, savefailout);
939 V3_CHKPT_SAVE(ctx, "SFMASK", info->msrs.sfmask, savefailout);
940 V3_CHKPT_SAVE(ctx, "KERN_GS_BASE", info->msrs.kern_gs_base, savefailout);
942 // Some components of guest state captured in the shadow pager
943 V3_CHKPT_SAVE(ctx, "GUEST_CR3", info->shdw_pg_state.guest_cr3, savefailout);
944 V3_CHKPT_SAVE(ctx, "GUEST_CR0", info->shdw_pg_state.guest_cr0, savefailout);
945 V3_CHKPT_SAVE(ctx, "GUEST_EFER", info->shdw_pg_state.guest_efer, savefailout);
947 v3_chkpt_close_ctx(ctx); ctx=0;
949 if (opts & V3_CHKPT_OPT_SKIP_ARCHDEP) {
953 //Architechture specific code
954 switch (v3_mach_type) {
956 case V3_SVM_REV3_CPU: {
959 snprintf(key_name, 16, "vmcb_data%d", info->vcpu_id);
961 ctx = v3_chkpt_open_ctx(chkpt, key_name);
964 PrintError(info->vm_info, info, "Could not open context to store SVM core\n");
968 if (v3_svm_save_core(info, ctx) < 0) {
969 PrintError(info->vm_info, info, "VMCB Unable to be written\n");
973 v3_chkpt_close_ctx(ctx); ctx=0;;
978 case V3_VMX_EPT_UG_CPU: {
981 snprintf(key_name, 16, "vmcs_data%d", info->vcpu_id);
983 ctx = v3_chkpt_open_ctx(chkpt, key_name);
986 PrintError(info->vm_info, info, "Could not open context to store VMX core\n");
990 if (v3_vmx_save_core(info, ctx) == -1) {
991 PrintError(info->vm_info, info, "VMX checkpoint failed\n");
995 v3_chkpt_close_ctx(ctx); ctx=0;
1000 PrintError(info->vm_info, info, "Invalid CPU Type (%d)\n", v3_mach_type);
1010 PrintError(info->vm_info, info, "Failed to save core\n");
1011 if (ctx) { v3_chkpt_close_ctx(ctx); }
1017 // GEM5 - Madhav has debug code here for printing instrucions
1020 int v3_chkpt_save_vm(struct v3_vm_info * vm, char * store, char * url, v3_chkpt_options_t opts) {
1021 struct v3_chkpt * chkpt = NULL;
1026 chkpt = chkpt_open(vm, store, url, SAVE);
1028 if (chkpt == NULL) {
1029 PrintError(vm, VCORE_NONE, "Error creating checkpoint store for url %s\n",url);
1033 /* If this guest is running we need to block it while the checkpoint occurs */
1034 if (vm->run_state == VM_RUNNING) {
1035 while (v3_raise_barrier(vm, NULL) == -1);
1038 if (!(opts & V3_CHKPT_OPT_SKIP_MEM)) {
1039 if ((ret = save_memory(vm, chkpt)) == -1) {
1040 PrintError(vm, VCORE_NONE, "Unable to save memory\n");
1046 if (!(opts & V3_CHKPT_OPT_SKIP_DEVS)) {
1047 if ((ret = v3_save_vm_devices(vm, chkpt)) == -1) {
1048 PrintError(vm, VCORE_NONE, "Unable to save devices\n");
1053 if ((ret = save_header(vm, chkpt)) == -1) {
1054 PrintError(vm, VCORE_NONE, "Unable to save header\n");
1058 if (!(opts & V3_CHKPT_OPT_SKIP_CORES)) {
1059 for (i = 0; i < vm->num_cores; i++){
1060 if ((ret = save_core(&(vm->cores[i]), chkpt, opts)) == -1) {
1061 PrintError(vm, VCORE_NONE, "chkpt of core %d failed\n", i);
1069 /* Resume the guest if it was running */
1070 if (vm->run_state == VM_RUNNING) {
1071 v3_lower_barrier(vm);
1080 int v3_chkpt_load_vm(struct v3_vm_info * vm, char * store, char * url, v3_chkpt_options_t opts) {
1081 struct v3_chkpt * chkpt = NULL;
1085 chkpt = chkpt_open(vm, store, url, LOAD);
1087 if (chkpt == NULL) {
1088 PrintError(vm, VCORE_NONE, "Error creating checkpoint store\n");
1092 /* If this guest is running we need to block it while the checkpoint occurs */
1093 if (vm->run_state == VM_RUNNING) {
1094 while (v3_raise_barrier(vm, NULL) == -1);
1097 if (!(opts & V3_CHKPT_OPT_SKIP_MEM)) {
1098 if ((ret = load_memory(vm, chkpt)) == -1) {
1099 PrintError(vm, VCORE_NONE, "Unable to load memory\n");
1104 if (!(opts & V3_CHKPT_OPT_SKIP_DEVS)) {
1105 if ((ret = v3_load_vm_devices(vm, chkpt)) == -1) {
1106 PrintError(vm, VCORE_NONE, "Unable to load devies\n");
1112 if ((ret = load_header(vm, chkpt)) == -1) {
1113 PrintError(vm, VCORE_NONE, "Unable to load header\n");
1118 if (!(opts & V3_CHKPT_OPT_SKIP_CORES)) {
1119 for (i = 0; i < vm->num_cores; i++) {
1120 if ((ret = load_core(&(vm->cores[i]), chkpt, opts)) == -1) {
1121 PrintError(vm, VCORE_NONE, "Error loading core state (core=%d)\n", i);
1129 /* Resume the guest if it was running and we didn't just trash the state*/
1130 if (vm->run_state == VM_RUNNING) {
1133 vm->run_state = VM_STOPPED;
1136 /* We check the run state of the VM after every barrier
1137 So this will immediately halt the VM
1139 v3_lower_barrier(vm);
1149 #ifdef V3_CONFIG_LIVE_MIGRATION
1151 #define MOD_THRESHOLD 200 // pages below which we declare victory
1152 #define ITER_THRESHOLD 32 // iters below which we declare victory
1156 int v3_chkpt_send_vm(struct v3_vm_info * vm, char * store, char * url, v3_chkpt_options_t opts) {
1157 struct v3_chkpt * chkpt = NULL;
1160 bool last_modpage_iteration=false;
1161 struct v3_bitmap modified_pages_to_send;
1162 uint64_t start_time;
1164 int num_mod_pages=0;
1165 struct mem_migration_state *mm_state;
1168 // Currently will work only for shadow paging
1169 for (i=0;i<vm->num_cores;i++) {
1170 if (vm->cores[i].shdw_pg_mode!=SHADOW_PAGING && !(opts & V3_CHKPT_OPT_SKIP_MEM)) {
1171 PrintError(vm, VCORE_NONE, "Cannot currently handle nested paging\n");
1177 chkpt = chkpt_open(vm, store, url, SAVE);
1179 if (chkpt == NULL) {
1180 PrintError(vm, VCORE_NONE, "Error creating checkpoint store\n");
1185 if (opts & V3_CHKPT_OPT_SKIP_MEM) {
1189 // In a send, the memory is copied incrementally first,
1190 // followed by the remainder of the state
1192 if (v3_bitmap_init(&modified_pages_to_send,
1193 vm->mem_size>>12 // number of pages in main region
1195 PrintError(vm, VCORE_NONE, "Could not intialize bitmap.\n");
1199 // 0. Initialize bitmap to all 1s
1200 for (i=0; i < modified_pages_to_send.num_bits; i++) {
1201 v3_bitmap_set(&modified_pages_to_send,i);
1205 while (!last_modpage_iteration) {
1206 PrintDebug(vm, VCORE_NONE, "Modified memory page iteration %d\n",i++);
1208 start_time = v3_get_host_time(&(vm->cores[0].time_state));
1210 // We will pause the VM for a short while
1211 // so that we can collect the set of changed pages
1212 if (v3_pause_vm(vm) == -1) {
1213 PrintError(vm, VCORE_NONE, "Could not pause VM\n");
1219 // special case, we already have the pages to send (all of them)
1220 // they are already in modified_pages_to_send
1222 // normally, we are in the middle of a round
1223 // We need to copy from the current tracking bitmap
1224 // to our send bitmap
1225 v3_bitmap_copy(&modified_pages_to_send,&(mm_state->modified_pages));
1226 // and now we need to remove our tracking
1227 stop_page_tracking(mm_state);
1230 // are we done? (note that we are still paused)
1231 num_mod_pages = v3_bitmap_count(&modified_pages_to_send);
1232 if (num_mod_pages<MOD_THRESHOLD || iter>ITER_THRESHOLD) {
1233 // we are done, so we will not restart page tracking
1234 // the vm is paused, and so we should be able
1235 // to just send the data
1236 PrintDebug(vm, VCORE_NONE, "Last modified memory page iteration.\n");
1237 last_modpage_iteration = true;
1239 // we are not done, so we will restart page tracking
1240 // to prepare for a second round of pages
1241 // we will resume the VM as this happens
1242 if (!(mm_state=start_page_tracking(vm))) {
1243 PrintError(vm, VCORE_NONE, "Error enabling page tracking.\n");
1247 if (v3_continue_vm(vm) == -1) {
1248 PrintError(vm, VCORE_NONE, "Error resuming the VM\n");
1249 stop_page_tracking(mm_state);
1254 stop_time = v3_get_host_time(&(vm->cores[0].time_state));
1255 PrintDebug(vm, VCORE_NONE, "num_mod_pages=%d\ndowntime=%llu\n",num_mod_pages,stop_time-start_time);
1259 // At this point, we are either paused and about to copy
1260 // the last chunk, or we are running, and will copy the last
1261 // round in parallel with current execution
1262 if (num_mod_pages>0) {
1263 if (save_inc_memory(vm, &modified_pages_to_send, chkpt) == -1) {
1264 PrintError(vm, VCORE_NONE, "Error sending incremental memory.\n");
1268 } // we don't want to copy an empty bitmap here
1273 if (v3_bitmap_reset(&modified_pages_to_send) == -1) {
1274 PrintError(vm, VCORE_NONE, "Error reseting bitmap.\n");
1279 // send bitmap of 0s to signal end of modpages
1280 if (save_inc_memory(vm, &modified_pages_to_send, chkpt) == -1) {
1281 PrintError(vm, VCORE_NONE, "Error sending incremental memory.\n");
1287 // save the non-memory state
1288 if (!(opts & V3_CHKPT_OPT_SKIP_DEVS)) {
1289 if ((ret = v3_save_vm_devices(vm, chkpt)) == -1) {
1290 PrintError(vm, VCORE_NONE, "Unable to save devices\n");
1295 if ((ret = save_header(vm, chkpt)) == -1) {
1296 PrintError(vm, VCORE_NONE, "Unable to save header\n");
1300 if (!(opts & V3_CHKPT_OPT_SKIP_CORES)) {
1301 for (i = 0; i < vm->num_cores; i++){
1302 if ((ret = save_core(&(vm->cores[i]), chkpt, opts)) == -1) {
1303 PrintError(vm, VCORE_NONE, "chkpt of core %d failed\n", i);
1309 if (!(opts & V3_CHKPT_OPT_SKIP_MEM)) {
1310 stop_time = v3_get_host_time(&(vm->cores[0].time_state));
1311 PrintDebug(vm, VCORE_NONE, "num_mod_pages=%d\ndowntime=%llu\n",num_mod_pages,stop_time-start_time);
1312 PrintDebug(vm, VCORE_NONE, "Done sending VM!\n");
1314 v3_bitmap_deinit(&modified_pages_to_send);
1323 int v3_chkpt_receive_vm(struct v3_vm_info * vm, char * store, char * url, v3_chkpt_options_t opts) {
1324 struct v3_chkpt * chkpt = NULL;
1327 struct v3_bitmap mod_pgs;
1329 // Currently will work only for shadow paging
1330 for (i=0;i<vm->num_cores;i++) {
1331 if (vm->cores[i].shdw_pg_mode!=SHADOW_PAGING && !(opts & V3_CHKPT_OPT_SKIP_MEM)) {
1332 PrintError(vm, VCORE_NONE, "Cannot currently handle nested paging\n");
1337 chkpt = chkpt_open(vm, store, url, LOAD);
1339 if (chkpt == NULL) {
1340 PrintError(vm, VCORE_NONE, "Error creating checkpoint store\n");
1346 if (opts & V3_CHKPT_OPT_SKIP_MEM) {
1350 if (v3_bitmap_init(&mod_pgs,vm->mem_size>>12) == -1) {
1352 PrintError(vm, VCORE_NONE, "Could not intialize bitmap.\n");
1356 /* If this guest is running we need to block it while the checkpoint occurs */
1357 if (vm->run_state == VM_RUNNING) {
1358 while (v3_raise_barrier(vm, NULL) == -1);
1363 // 1. Receive copy of bitmap
1365 PrintDebug(vm, VCORE_NONE, "Memory page iteration %d\n",i++);
1366 int retval = load_inc_memory(vm, &mod_pgs, chkpt);
1368 // end of receiving memory pages
1370 } else if (retval == -1) {
1371 PrintError(vm, VCORE_NONE, "Error receiving incremental memory.\n");
1379 if (!(opts & V3_CHKPT_OPT_SKIP_DEVS)) {
1380 if ((ret = v3_load_vm_devices(vm, chkpt)) == -1) {
1381 PrintError(vm, VCORE_NONE, "Unable to load devices\n");
1387 if ((ret = load_header(vm, chkpt)) == -1) {
1388 PrintError(vm, VCORE_NONE, "Unable to load header\n");
1394 if (!(opts & V3_CHKPT_OPT_SKIP_CORES)) {
1395 for (i = 0; i < vm->num_cores; i++) {
1396 if ((ret = load_core(&(vm->cores[i]), chkpt, opts)) == -1) {
1397 PrintError(vm, VCORE_NONE, "Error loading core state (core=%d)\n", i);
1405 PrintError(vm, VCORE_NONE, "Unable to receive VM\n");
1407 PrintDebug(vm, VCORE_NONE, "Done receving the VM\n");
1411 /* Resume the guest if it was running and we didn't just trash the state*/
1412 if (vm->run_state == VM_RUNNING) {
1414 PrintError(vm, VCORE_NONE, "VM was previously running. It is now borked. Pausing it. \n");
1415 vm->run_state = VM_STOPPED;
1418 /* We check the run state of the VM after every barrier
1419 So this will immediately halt the VM
1421 v3_lower_barrier(vm);
1425 if (!(opts & V3_CHKPT_OPT_SKIP_MEM)) {
1426 v3_bitmap_deinit(&mod_pgs);