2 * This file is part of the Palacios Virtual Machine Monitor developed
3 * by the V3VEE Project with funding from the United States National
4 * Science Foundation and the Department of Energy.
6 * The V3VEE Project is a joint project between Northwestern University
7 * and the University of New Mexico. You can find out more at
10 * Copyright (c) 2011, Madhav Suresh <madhav@u.northwestern.edu>
11 * Copyright (c) 2011, The V3VEE Project <http://www.v3vee.org>
12 * All rights reserved.
14 * Author: Madhav Suresh <madhav@u.northwestern.edu>
15 * Arefin Huq <fig@arefin.net>
16 * Peter Dinda <pdinda@northwestern.edu> (store interface changes)
19 * This is free software. You are permitted to use,
20 * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
23 #include <palacios/vmm.h>
24 #include <palacios/vmm_sprintf.h>
25 #include <palacios/vm_guest.h>
26 #include <palacios/svm.h>
27 #include <palacios/vmx.h>
28 #include <palacios/vmm_checkpoint.h>
29 #include <palacios/vmm_hashtable.h>
30 #include <palacios/vmm_direct_paging.h>
31 #include <palacios/vmm_debug.h>
33 #include <palacios/vmm_dev_mgr.h>
35 #ifdef V3_CONFIG_LIVE_MIGRATION
36 #include <palacios/vmm_time.h>
37 #include <palacios/vm_guest_mem.h>
38 #include <palacios/vmm_shadow_paging.h>
41 #ifndef V3_CONFIG_DEBUG_CHECKPOINT
43 #define PrintDebug(fmt, args...)
47 static struct hashtable * store_table = NULL;
51 typedef enum {SAVE, LOAD} chkpt_mode_t;
53 struct chkpt_interface {
55 // Opening a checkpoint should return a pointer to the internal representation
56 // of the checkpoint in the store. This will be passed back
57 // as "store_data". Return NULL if the context cannot be opened
58 void * (*open_chkpt)(char * url, chkpt_mode_t mode);
59 // Closing the checkpoint should return -1 on failure, 0 on success
60 int (*close_chkpt)(void * store_data);
62 // Opening a context on the checkpoint with a given name should return
63 // a pointer to an internal representation of the context. This pointer
64 // is then passed back as "ctx".
65 // We will open only a single context at a time.
66 void * (*open_ctx)(void * store_data, char *name);
67 // Closing the context should return -1 on failure, 0 on success
68 int (*close_ctx)(void * store_data, void * ctx);
70 // Save and load include a tagged data buffer. These are
71 // "all or nothing" writes and reads.
72 // return -1 on failure, and 0 on success
74 int (*save)(void * store_data, void * ctx, char * tag, uint64_t len, void * buf);
75 int (*load)(void * store_data, void * ctx, char * tag, uint64_t len, void * buf);
80 struct v3_vm_info * vm;
82 struct v3_chkpt_ctx *current_ctx;
84 struct chkpt_interface * interface;
92 static uint_t store_hash_fn(addr_t key) {
93 char * name = (char *)key;
94 return v3_hash_buffer((uint8_t *)name, strlen(name));
97 static int store_eq_fn(addr_t key1, addr_t key2) {
98 char * name1 = (char *)key1;
99 char * name2 = (char *)key2;
101 return (strcmp(name1, name2) == 0);
106 #include "vmm_chkpt_stores.h"
109 int V3_init_checkpoint() {
110 extern struct chkpt_interface * __start__v3_chkpt_stores[];
111 extern struct chkpt_interface * __stop__v3_chkpt_stores[];
112 struct chkpt_interface ** tmp_store = __start__v3_chkpt_stores;
115 store_table = v3_create_htable(0, store_hash_fn, store_eq_fn);
117 while (tmp_store != __stop__v3_chkpt_stores) {
118 V3_Print(VM_NONE, VCORE_NONE, "Registering Checkpoint Backing Store (%s)\n", (*tmp_store)->name);
120 if (v3_htable_search(store_table, (addr_t)((*tmp_store)->name))) {
121 PrintError(VM_NONE, VCORE_NONE, "Multiple instances of Checkpoint backing Store (%s)\n", (*tmp_store)->name);
125 if (v3_htable_insert(store_table, (addr_t)((*tmp_store)->name), (addr_t)(*tmp_store)) == 0) {
126 PrintError(VM_NONE, VCORE_NONE, "Could not register Checkpoint backing store (%s)\n", (*tmp_store)->name);
130 tmp_store = &(__start__v3_chkpt_stores[++i]);
136 int V3_deinit_checkpoint() {
137 v3_free_htable(store_table, 0, 0);
142 static char svm_chkpt_header[] = "v3vee palacios checkpoint version: x.x, SVM x.x";
143 static char vmx_chkpt_header[] = "v3vee palacios checkpoint version: x.x, VMX x.x";
145 static int chkpt_close(struct v3_chkpt * chkpt) {
149 rc = chkpt->interface->close_chkpt(chkpt->store_data);
154 PrintError(VM_NONE, VCORE_NONE, "Internal store failed to close valid checkpoint\n");
160 PrintError(VM_NONE, VCORE_NONE, "Attempt to close null checkpoint\n");
166 static struct v3_chkpt * chkpt_open(struct v3_vm_info * vm, char * store, char * url, chkpt_mode_t mode) {
167 struct chkpt_interface * iface = NULL;
168 struct v3_chkpt * chkpt = NULL;
169 void * store_data = NULL;
171 iface = (void *)v3_htable_search(store_table, (addr_t)store);
174 V3_Print(vm, VCORE_NONE, "Error: Could not locate Checkpoint interface for store (%s)\n", store);
178 store_data = iface->open_chkpt(url, mode);
180 if (store_data == NULL) {
181 PrintError(vm, VCORE_NONE, "Could not open url (%s) for backing store (%s)\n", url, store);
186 chkpt = V3_Malloc(sizeof(struct v3_chkpt));
189 PrintError(vm, VCORE_NONE, "Could not allocate checkpoint state, closing checkpoint\n");
190 iface->close_chkpt(store_data);
194 memset(chkpt,0,sizeof(struct v3_chkpt));
196 chkpt->interface = iface;
198 chkpt->store_data = store_data;
199 chkpt->current_ctx = NULL;
204 struct v3_chkpt_ctx * v3_chkpt_open_ctx(struct v3_chkpt * chkpt, char * name) {
205 struct v3_chkpt_ctx * ctx;
207 if (chkpt->current_ctx) {
208 PrintError(VM_NONE, VCORE_NONE, "Attempt to open context %s before old context has been closed\n", name);
212 ctx = V3_Malloc(sizeof(struct v3_chkpt_ctx));
215 PrintError(VM_NONE, VCORE_NONE, "Unable to allocate context\n");
219 memset(ctx, 0, sizeof(struct v3_chkpt_ctx));
222 ctx->store_ctx = chkpt->interface->open_ctx(chkpt->store_data, name);
224 if (!(ctx->store_ctx)) {
225 PrintError(VM_NONE, VCORE_NONE, "Underlying store failed to open context %s\n",name);
230 chkpt->current_ctx = ctx;
235 int v3_chkpt_close_ctx(struct v3_chkpt_ctx * ctx) {
236 struct v3_chkpt * chkpt = ctx->chkpt;
239 if (chkpt->current_ctx != ctx) {
240 PrintError(VM_NONE, VCORE_NONE, "Attempt to close a context that is not the current context on the store\n");
244 ret = chkpt->interface->close_ctx(chkpt->store_data, ctx->store_ctx);
247 PrintError(VM_NONE, VCORE_NONE, "Failed to close context on store, closing device-independent context anyway - bad\n");
251 chkpt->current_ctx=NULL;
262 int v3_chkpt_save(struct v3_chkpt_ctx * ctx, char * tag, uint64_t len, void * buf) {
263 struct v3_chkpt * chkpt = ctx->chkpt;
267 PrintError(VM_NONE, VCORE_NONE, "Attempt to save tag %s on null context\n",tag);
271 if (chkpt->current_ctx != ctx) {
272 PrintError(VM_NONE, VCORE_NONE, "Attempt to save on context that is not the current context for the store\n");
276 rc = chkpt->interface->save(chkpt->store_data, ctx->store_ctx, tag , len, buf);
279 PrintError(VM_NONE, VCORE_NONE, "Underlying store failed to save tag %s on valid context\n",tag);
287 int v3_chkpt_load(struct v3_chkpt_ctx * ctx, char * tag, uint64_t len, void * buf) {
288 struct v3_chkpt * chkpt = ctx->chkpt;
292 PrintError(VM_NONE, VCORE_NONE, "Attempt to load tag %s from null context\n",tag);
296 if (chkpt->current_ctx != ctx) {
297 PrintError(VM_NONE, VCORE_NONE, "Attempt to load from context that is not the current context for the store\n");
301 rc = chkpt->interface->load(chkpt->store_data, ctx->store_ctx, tag, len, buf);
304 PrintError(VM_NONE, VCORE_NONE, "Underlying store failed to load tag %s from valid context\n",tag);
313 static int load_memory(struct v3_vm_info * vm, struct v3_chkpt * chkpt) {
315 void * guest_mem_base = NULL;
319 guest_mem_base = V3_VAddr((void *)vm->mem_map.base_region.host_addr);
321 ctx = v3_chkpt_open_ctx(chkpt, "memory_img");
324 PrintError(vm, VCORE_NONE, "Unable to open context for memory load\n");
328 if (v3_chkpt_load(ctx, "memory_img", vm->mem_size, guest_mem_base)) {
329 PrintError(vm, VCORE_NONE, "Unable to load all of memory (requested=%llu bytes, result=%llu bytes\n",(uint64_t)(vm->mem_size),ret);
330 v3_chkpt_close_ctx(ctx);
334 v3_chkpt_close_ctx(ctx);
340 static int save_memory(struct v3_vm_info * vm, struct v3_chkpt * chkpt) {
341 void * guest_mem_base = NULL;
345 guest_mem_base = V3_VAddr((void *)vm->mem_map.base_region.host_addr);
347 ctx = v3_chkpt_open_ctx(chkpt, "memory_img");
350 PrintError(vm, VCORE_NONE, "Unable to open context to save memory\n");
354 if (v3_chkpt_save(ctx, "memory_img", vm->mem_size, guest_mem_base)) {
355 PrintError(vm, VCORE_NONE, "Unable to save all of memory (requested=%llu, received=%llu)\n",(uint64_t)(vm->mem_size),ret);
356 v3_chkpt_close_ctx(ctx);
360 v3_chkpt_close_ctx(ctx);
365 #ifdef V3_CONFIG_LIVE_MIGRATION
367 struct mem_migration_state {
368 struct v3_vm_info *vm;
369 struct v3_bitmap modified_pages;
372 static int paging_callback(struct guest_info *core,
373 struct v3_shdw_pg_event *event,
376 struct mem_migration_state *m = (struct mem_migration_state *)priv_data;
378 if (event->event_type==SHADOW_PAGEFAULT &&
379 event->event_order==SHADOW_PREIMPL &&
380 event->error_code.write) {
382 if (!v3_gva_to_gpa(core,event->gva,&gpa)) {
383 // write to this page
384 v3_bitmap_set(&(m->modified_pages),gpa>>12);
386 // no worries, this isn't physical memory
389 // we don't care about other events
397 static struct mem_migration_state *start_page_tracking(struct v3_vm_info *vm)
399 struct mem_migration_state *m;
402 m = (struct mem_migration_state *)V3_Malloc(sizeof(struct mem_migration_state));
405 PrintError(vm, VCORE_NONE, "Cannot allocate\n");
411 if (v3_bitmap_init(&(m->modified_pages),vm->mem_size >> 12) == -1) {
412 PrintError(vm, VCORE_NONE, "Failed to initialize modified_pages bit vector");
416 v3_register_shadow_paging_event_callback(vm,paging_callback,m);
418 for (i=0;i<vm->num_cores;i++) {
419 v3_invalidate_shadow_pts(&(vm->cores[i]));
422 // and now we should get callbacks as writes happen
427 static void stop_page_tracking(struct mem_migration_state *m)
429 v3_unregister_shadow_paging_event_callback(m->vm,paging_callback,m);
431 v3_bitmap_deinit(&(m->modified_pages));
444 // zero: done with this round
445 static int save_inc_memory(struct v3_vm_info * vm,
446 struct v3_bitmap * mod_pgs_to_send,
447 struct v3_chkpt * chkpt) {
448 int page_size_bytes = 1 << 12; // assuming 4k pages right now
451 void * guest_mem_base = NULL;
452 int bitmap_num_bytes = (mod_pgs_to_send->num_bits / 8)
453 + ((mod_pgs_to_send->num_bits % 8) > 0);
456 guest_mem_base = V3_VAddr((void *)vm->mem_map.base_region.host_addr);
458 PrintDebug(vm, VCORE_NONE, "Saving incremental memory.\n");
460 ctx = v3_chkpt_open_ctx(chkpt,"memory_bitmap_bits");
463 PrintError(vm, VCORE_NONE, "Cannot open context for dirty memory bitmap\n");
468 if (v3_chkpt_save(ctx,
469 "memory_bitmap_bits",
471 mod_pgs_to_send->bits)) {
472 PrintError(vm, VCORE_NONE, "Unable to write all of the dirty memory bitmap\n");
473 v3_chkpt_close_ctx(ctx);
477 v3_chkpt_close_ctx(ctx);
479 PrintDebug(vm, VCORE_NONE, "Sent bitmap bits.\n");
481 // Dirty memory pages are sent in bitmap order
482 for (i = 0; i < mod_pgs_to_send->num_bits; i++) {
483 if (v3_bitmap_check(mod_pgs_to_send, i)) {
484 // PrintDebug(vm, VCORE_NONE, "Sending memory page %d.\n",i);
485 ctx = v3_chkpt_open_ctx(chkpt, "memory_page");
487 PrintError(vm, VCORE_NONE, "Unable to open context to send memory page\n");
490 if (v3_chkpt_save(ctx,
493 guest_mem_base + (page_size_bytes * i))) {
494 PrintError(vm, VCORE_NONE, "Unable to send a memory page\n");
495 v3_chkpt_close_ctx(ctx);
499 v3_chkpt_close_ctx(ctx);
510 // zero: ok, but not done
511 // positive: ok, and also done
512 static int load_inc_memory(struct v3_vm_info * vm,
513 struct v3_bitmap * mod_pgs,
514 struct v3_chkpt * chkpt) {
515 int page_size_bytes = 1 << 12; // assuming 4k pages right now
518 void * guest_mem_base = NULL;
519 bool empty_bitmap = true;
520 int bitmap_num_bytes = (mod_pgs->num_bits / 8)
521 + ((mod_pgs->num_bits % 8) > 0);
524 guest_mem_base = V3_VAddr((void *)vm->mem_map.base_region.host_addr);
526 ctx = v3_chkpt_open_ctx(chkpt, "memory_bitmap_bits");
529 PrintError(vm, VCORE_NONE, "Cannot open context to receive memory bitmap\n");
533 if (v3_chkpt_load(ctx,
534 "memory_bitmap_bits",
537 PrintError(vm, VCORE_NONE, "Did not receive all of memory bitmap\n");
538 v3_chkpt_close_ctx(ctx);
542 v3_chkpt_close_ctx(ctx);
544 // Receive also follows bitmap order
545 for (i = 0; i < mod_pgs->num_bits; i ++) {
546 if (v3_bitmap_check(mod_pgs, i)) {
547 PrintDebug(vm, VCORE_NONE, "Loading page %d\n", i);
548 empty_bitmap = false;
549 ctx = v3_chkpt_open_ctx(chkpt, "memory_page");
551 PrintError(vm, VCORE_NONE, "Cannot open context to receive memory page\n");
555 if (v3_chkpt_load(ctx,
558 guest_mem_base + (page_size_bytes * i))) {
559 PrintError(vm, VCORE_NONE, "Did not receive all of memory page\n");
560 v3_chkpt_close_ctx(ctx);
563 v3_chkpt_close_ctx(ctx);
568 // signal end of receiving pages
569 PrintDebug(vm, VCORE_NONE, "Finished receiving pages.\n");
580 int save_header(struct v3_vm_info * vm, struct v3_chkpt * chkpt) {
581 extern v3_cpu_arch_t v3_mach_type;
584 ctx = v3_chkpt_open_ctx(chkpt, "header");
586 PrintError(vm, VCORE_NONE, "Cannot open context to save header\n");
590 switch (v3_mach_type) {
592 case V3_SVM_REV3_CPU: {
593 if (v3_chkpt_save(ctx, "header", strlen(svm_chkpt_header), svm_chkpt_header)) {
594 PrintError(vm, VCORE_NONE, "Could not save all of SVM header\n");
595 v3_chkpt_close_ctx(ctx);
602 case V3_VMX_EPT_UG_CPU: {
603 if (v3_chkpt_save(ctx, "header", strlen(vmx_chkpt_header), vmx_chkpt_header)) {
604 PrintError(vm, VCORE_NONE, "Could not save all of VMX header\n");
605 v3_chkpt_close_ctx(ctx);
611 PrintError(vm, VCORE_NONE, "checkpoint not supported on this architecture\n");
612 v3_chkpt_close_ctx(ctx);
616 v3_chkpt_close_ctx(ctx);
621 static int load_header(struct v3_vm_info * vm, struct v3_chkpt * chkpt) {
622 extern v3_cpu_arch_t v3_mach_type;
625 ctx = v3_chkpt_open_ctx(chkpt, "header");
627 switch (v3_mach_type) {
629 case V3_SVM_REV3_CPU: {
630 char header[strlen(svm_chkpt_header) + 1];
632 if (v3_chkpt_load(ctx, "header", strlen(svm_chkpt_header), header)) {
633 PrintError(vm, VCORE_NONE, "Could not load all of SVM header\n");
634 v3_chkpt_close_ctx(ctx);
638 header[strlen(svm_chkpt_header)] = 0;
644 case V3_VMX_EPT_UG_CPU: {
645 char header[strlen(vmx_chkpt_header) + 1];
647 if (v3_chkpt_load(ctx, "header", strlen(vmx_chkpt_header), header)) {
648 PrintError(vm, VCORE_NONE, "Could not load all of VMX header\n");
649 v3_chkpt_close_ctx(ctx);
653 header[strlen(vmx_chkpt_header)] = 0;
658 PrintError(vm, VCORE_NONE, "checkpoint not supported on this architecture\n");
659 v3_chkpt_close_ctx(ctx);
663 v3_chkpt_close_ctx(ctx);
669 static int load_core(struct guest_info * info, struct v3_chkpt * chkpt, v3_chkpt_options_t opts) {
670 extern v3_cpu_arch_t v3_mach_type;
675 PrintDebug(info->vm_info, info, "Loading core\n");
677 memset(key_name, 0, 16);
679 snprintf(key_name, 16, "guest_info%d", info->vcpu_id);
681 ctx = v3_chkpt_open_ctx(chkpt, key_name);
684 PrintError(info->vm_info, info, "Could not open context to load core\n");
688 V3_CHKPT_LOAD(ctx, "RIP", info->rip, loadfailout);
691 V3_CHKPT_LOAD(ctx,"RDI",info->vm_regs.rdi, loadfailout);
692 V3_CHKPT_LOAD(ctx,"RSI",info->vm_regs.rsi, loadfailout);
693 V3_CHKPT_LOAD(ctx,"RBP",info->vm_regs.rbp, loadfailout);
694 V3_CHKPT_LOAD(ctx,"RSP",info->vm_regs.rsp, loadfailout);
695 V3_CHKPT_LOAD(ctx,"RBX",info->vm_regs.rbx, loadfailout);
696 V3_CHKPT_LOAD(ctx,"RDX",info->vm_regs.rdx, loadfailout);
697 V3_CHKPT_LOAD(ctx,"RCX",info->vm_regs.rcx, loadfailout);
698 V3_CHKPT_LOAD(ctx,"RAX",info->vm_regs.rax, loadfailout);
699 V3_CHKPT_LOAD(ctx,"R8",info->vm_regs.r8, loadfailout);
700 V3_CHKPT_LOAD(ctx,"R9",info->vm_regs.r9, loadfailout);
701 V3_CHKPT_LOAD(ctx,"R10",info->vm_regs.r10, loadfailout);
702 V3_CHKPT_LOAD(ctx,"R11",info->vm_regs.r11, loadfailout);
703 V3_CHKPT_LOAD(ctx,"R12",info->vm_regs.r12, loadfailout);
704 V3_CHKPT_LOAD(ctx,"R13",info->vm_regs.r13, loadfailout);
705 V3_CHKPT_LOAD(ctx,"R14",info->vm_regs.r14, loadfailout);
706 V3_CHKPT_LOAD(ctx,"R15",info->vm_regs.r15, loadfailout);
709 V3_CHKPT_LOAD(ctx, "CR0", info->ctrl_regs.cr0, loadfailout);
711 V3_CHKPT_LOAD(ctx, "CR2", info->ctrl_regs.cr2, loadfailout);
712 V3_CHKPT_LOAD(ctx, "CR3", info->ctrl_regs.cr3, loadfailout);
713 V3_CHKPT_LOAD(ctx, "CR4", info->ctrl_regs.cr4, loadfailout);
714 // There are no CR5,6,7
715 // CR8 is derived from apic_tpr
716 tempreg = (info->ctrl_regs.apic_tpr >> 4) & 0xf;
717 V3_CHKPT_LOAD(ctx, "CR8", tempreg, loadfailout);
718 V3_CHKPT_LOAD(ctx, "APIC_TPR", info->ctrl_regs.apic_tpr, loadfailout);
719 V3_CHKPT_LOAD(ctx, "RFLAGS", info->ctrl_regs.rflags, loadfailout);
720 V3_CHKPT_LOAD(ctx, "EFER", info->ctrl_regs.efer, loadfailout);
723 V3_CHKPT_LOAD(ctx, "DR0", info->dbg_regs.dr0, loadfailout);
724 V3_CHKPT_LOAD(ctx, "DR1", info->dbg_regs.dr1, loadfailout);
725 V3_CHKPT_LOAD(ctx, "DR2", info->dbg_regs.dr2, loadfailout);
726 V3_CHKPT_LOAD(ctx, "DR3", info->dbg_regs.dr3, loadfailout);
727 // there is no DR4 or DR5
728 V3_CHKPT_LOAD(ctx, "DR6", info->dbg_regs.dr6, loadfailout);
729 V3_CHKPT_LOAD(ctx, "DR7", info->dbg_regs.dr7, loadfailout);
732 V3_CHKPT_LOAD(ctx, "CS", info->segments.cs, loadfailout);
733 V3_CHKPT_LOAD(ctx, "DS", info->segments.ds, loadfailout);
734 V3_CHKPT_LOAD(ctx, "ES", info->segments.es, loadfailout);
735 V3_CHKPT_LOAD(ctx, "FS", info->segments.fs, loadfailout);
736 V3_CHKPT_LOAD(ctx, "GS", info->segments.gs, loadfailout);
737 V3_CHKPT_LOAD(ctx, "SS", info->segments.ss, loadfailout);
738 V3_CHKPT_LOAD(ctx, "LDTR", info->segments.ldtr, loadfailout);
739 V3_CHKPT_LOAD(ctx, "GDTR", info->segments.gdtr, loadfailout);
740 V3_CHKPT_LOAD(ctx, "IDTR", info->segments.idtr, loadfailout);
741 V3_CHKPT_LOAD(ctx, "TR", info->segments.tr, loadfailout);
744 V3_CHKPT_LOAD(ctx, "STAR", info->msrs.star, loadfailout);
745 V3_CHKPT_LOAD(ctx, "LSTAR", info->msrs.lstar, loadfailout);
746 V3_CHKPT_LOAD(ctx, "SFMASK", info->msrs.sfmask, loadfailout);
747 V3_CHKPT_LOAD(ctx, "KERN_GS_BASE", info->msrs.kern_gs_base, loadfailout);
749 // Some components of guest state captured in the shadow pager
750 V3_CHKPT_LOAD(ctx, "GUEST_CR3", info->shdw_pg_state.guest_cr3, loadfailout);
751 V3_CHKPT_LOAD(ctx, "GUEST_CR0", info->shdw_pg_state.guest_cr0, loadfailout);
752 V3_CHKPT_LOAD(ctx, "GUEST_EFER", info->shdw_pg_state.guest_efer, loadfailout);
754 v3_chkpt_close_ctx(ctx); ctx=0;
756 PrintDebug(info->vm_info, info, "Finished reading guest_info information\n");
758 info->cpu_mode = v3_get_vm_cpu_mode(info);
759 info->mem_mode = v3_get_vm_mem_mode(info);
761 if (info->shdw_pg_mode == SHADOW_PAGING) {
762 if (v3_get_vm_mem_mode(info) == VIRTUAL_MEM) {
763 if (v3_activate_shadow_pt(info) == -1) {
764 PrintError(info->vm_info, info, "Failed to activate shadow page tables\n");
768 if (v3_activate_passthrough_pt(info) == -1) {
769 PrintError(info->vm_info, info, "Failed to activate passthrough page tables\n");
776 if (opts & V3_CHKPT_OPT_SKIP_ARCHDEP) {
780 switch (v3_mach_type) {
782 case V3_SVM_REV3_CPU: {
785 snprintf(key_name, 16, "vmcb_data%d", info->vcpu_id);
786 ctx = v3_chkpt_open_ctx(chkpt, key_name);
789 PrintError(info->vm_info, info, "Could not open context to load SVM core\n");
793 if (v3_svm_load_core(info, ctx) < 0 ) {
794 PrintError(info->vm_info, info, "Failed to patch core %d\n", info->vcpu_id);
798 v3_chkpt_close_ctx(ctx); ctx=0;
804 case V3_VMX_EPT_UG_CPU: {
807 snprintf(key_name, 16, "vmcs_data%d", info->vcpu_id);
809 ctx = v3_chkpt_open_ctx(chkpt, key_name);
812 PrintError(info->vm_info, info, "Could not open context to load VMX core\n");
816 if (v3_vmx_load_core(info, ctx) < 0) {
817 PrintError(info->vm_info, info, "VMX checkpoint failed\n");
821 v3_chkpt_close_ctx(ctx); ctx=0;
826 PrintError(info->vm_info, info, "Invalid CPU Type (%d)\n", v3_mach_type);
832 PrintDebug(info->vm_info, info, "Load of core succeeded\n");
834 v3_print_guest_state(info);
839 PrintError(info->vm_info, info, "Failed to load core\n");
840 if (ctx) { v3_chkpt_close_ctx(ctx);}
845 // GEM5 - Hypercall for initiating transfer to gem5 (checkpoint)
847 static int save_core(struct guest_info * info, struct v3_chkpt * chkpt, v3_chkpt_options_t opts) {
848 extern v3_cpu_arch_t v3_mach_type;
853 PrintDebug(info->vm_info, info, "Saving core\n");
855 v3_print_guest_state(info);
857 memset(key_name, 0, 16);
859 snprintf(key_name, 16, "guest_info%d", info->vcpu_id);
861 ctx = v3_chkpt_open_ctx(chkpt, key_name);
864 PrintError(info->vm_info, info, "Unable to open context to save core\n");
869 V3_CHKPT_SAVE(ctx, "RIP", info->rip, savefailout);
872 V3_CHKPT_SAVE(ctx,"RDI",info->vm_regs.rdi, savefailout);
873 V3_CHKPT_SAVE(ctx,"RSI",info->vm_regs.rsi, savefailout);
874 V3_CHKPT_SAVE(ctx,"RBP",info->vm_regs.rbp, savefailout);
875 V3_CHKPT_SAVE(ctx,"RSP",info->vm_regs.rsp, savefailout);
876 V3_CHKPT_SAVE(ctx,"RBX",info->vm_regs.rbx, savefailout);
877 V3_CHKPT_SAVE(ctx,"RDX",info->vm_regs.rdx, savefailout);
878 V3_CHKPT_SAVE(ctx,"RCX",info->vm_regs.rcx, savefailout);
879 V3_CHKPT_SAVE(ctx,"RAX",info->vm_regs.rax, savefailout);
880 V3_CHKPT_SAVE(ctx,"R8",info->vm_regs.r8, savefailout);
881 V3_CHKPT_SAVE(ctx,"R9",info->vm_regs.r9, savefailout);
882 V3_CHKPT_SAVE(ctx,"R10",info->vm_regs.r10, savefailout);
883 V3_CHKPT_SAVE(ctx,"R11",info->vm_regs.r11, savefailout);
884 V3_CHKPT_SAVE(ctx,"R12",info->vm_regs.r12, savefailout);
885 V3_CHKPT_SAVE(ctx,"R13",info->vm_regs.r13, savefailout);
886 V3_CHKPT_SAVE(ctx,"R14",info->vm_regs.r14, savefailout);
887 V3_CHKPT_SAVE(ctx,"R15",info->vm_regs.r15, savefailout);
890 V3_CHKPT_SAVE(ctx, "CR0", info->ctrl_regs.cr0, savefailout);
892 V3_CHKPT_SAVE(ctx, "CR2", info->ctrl_regs.cr2, savefailout);
893 V3_CHKPT_SAVE(ctx, "CR3", info->ctrl_regs.cr3, savefailout);
894 V3_CHKPT_SAVE(ctx, "CR4", info->ctrl_regs.cr4, savefailout);
895 // There are no CR5,6,7
896 // CR8 is derived from apic_tpr
897 tempreg = (info->ctrl_regs.apic_tpr >> 4) & 0xf;
898 V3_CHKPT_SAVE(ctx, "CR8", tempreg, savefailout);
899 V3_CHKPT_SAVE(ctx, "APIC_TPR", info->ctrl_regs.apic_tpr, savefailout);
900 V3_CHKPT_SAVE(ctx, "RFLAGS", info->ctrl_regs.rflags, savefailout);
901 V3_CHKPT_SAVE(ctx, "EFER", info->ctrl_regs.efer, savefailout);
904 V3_CHKPT_SAVE(ctx, "DR0", info->dbg_regs.dr0, savefailout);
905 V3_CHKPT_SAVE(ctx, "DR1", info->dbg_regs.dr1, savefailout);
906 V3_CHKPT_SAVE(ctx, "DR2", info->dbg_regs.dr2, savefailout);
907 V3_CHKPT_SAVE(ctx, "DR3", info->dbg_regs.dr3, savefailout);
908 // there is no DR4 or DR5
909 V3_CHKPT_SAVE(ctx, "DR6", info->dbg_regs.dr6, savefailout);
910 V3_CHKPT_SAVE(ctx, "DR7", info->dbg_regs.dr7, savefailout);
913 V3_CHKPT_SAVE(ctx, "CS", info->segments.cs, savefailout);
914 V3_CHKPT_SAVE(ctx, "DS", info->segments.ds, savefailout);
915 V3_CHKPT_SAVE(ctx, "ES", info->segments.es, savefailout);
916 V3_CHKPT_SAVE(ctx, "FS", info->segments.fs, savefailout);
917 V3_CHKPT_SAVE(ctx, "GS", info->segments.gs, savefailout);
918 V3_CHKPT_SAVE(ctx, "SS", info->segments.ss, savefailout);
919 V3_CHKPT_SAVE(ctx, "LDTR", info->segments.ldtr, savefailout);
920 V3_CHKPT_SAVE(ctx, "GDTR", info->segments.gdtr, savefailout);
921 V3_CHKPT_SAVE(ctx, "IDTR", info->segments.idtr, savefailout);
922 V3_CHKPT_SAVE(ctx, "TR", info->segments.tr, savefailout);
925 V3_CHKPT_SAVE(ctx, "STAR", info->msrs.star, savefailout);
926 V3_CHKPT_SAVE(ctx, "LSTAR", info->msrs.lstar, savefailout);
927 V3_CHKPT_SAVE(ctx, "SFMASK", info->msrs.sfmask, savefailout);
928 V3_CHKPT_SAVE(ctx, "KERN_GS_BASE", info->msrs.kern_gs_base, savefailout);
930 // Some components of guest state captured in the shadow pager
931 V3_CHKPT_SAVE(ctx, "GUEST_CR3", info->shdw_pg_state.guest_cr3, savefailout);
932 V3_CHKPT_SAVE(ctx, "GUEST_CR0", info->shdw_pg_state.guest_cr0, savefailout);
933 V3_CHKPT_SAVE(ctx, "GUEST_EFER", info->shdw_pg_state.guest_efer, savefailout);
935 v3_chkpt_close_ctx(ctx); ctx=0;
937 if (opts & V3_CHKPT_OPT_SKIP_ARCHDEP) {
941 //Architechture specific code
942 switch (v3_mach_type) {
944 case V3_SVM_REV3_CPU: {
947 snprintf(key_name, 16, "vmcb_data%d", info->vcpu_id);
949 ctx = v3_chkpt_open_ctx(chkpt, key_name);
952 PrintError(info->vm_info, info, "Could not open context to store SVM core\n");
956 if (v3_svm_save_core(info, ctx) < 0) {
957 PrintError(info->vm_info, info, "VMCB Unable to be written\n");
961 v3_chkpt_close_ctx(ctx); ctx=0;;
966 case V3_VMX_EPT_UG_CPU: {
969 snprintf(key_name, 16, "vmcs_data%d", info->vcpu_id);
971 ctx = v3_chkpt_open_ctx(chkpt, key_name);
974 PrintError(info->vm_info, info, "Could not open context to store VMX core\n");
978 if (v3_vmx_save_core(info, ctx) == -1) {
979 PrintError(info->vm_info, info, "VMX checkpoint failed\n");
983 v3_chkpt_close_ctx(ctx); ctx=0;
988 PrintError(info->vm_info, info, "Invalid CPU Type (%d)\n", v3_mach_type);
998 PrintError(info->vm_info, info, "Failed to save core\n");
999 if (ctx) { v3_chkpt_close_ctx(ctx); }
1005 // GEM5 - Madhav has debug code here for printing instrucions
1008 int v3_chkpt_save_vm(struct v3_vm_info * vm, char * store, char * url, v3_chkpt_options_t opts) {
1009 struct v3_chkpt * chkpt = NULL;
1014 chkpt = chkpt_open(vm, store, url, SAVE);
1016 if (chkpt == NULL) {
1017 PrintError(vm, VCORE_NONE, "Error creating checkpoint store for url %s\n",url);
1021 /* If this guest is running we need to block it while the checkpoint occurs */
1022 if (vm->run_state == VM_RUNNING) {
1023 while (v3_raise_barrier(vm, NULL) == -1);
1026 if (!(opts & V3_CHKPT_OPT_SKIP_MEM)) {
1027 if ((ret = save_memory(vm, chkpt)) == -1) {
1028 PrintError(vm, VCORE_NONE, "Unable to save memory\n");
1034 if (!(opts & V3_CHKPT_OPT_SKIP_DEVS)) {
1035 if ((ret = v3_save_vm_devices(vm, chkpt)) == -1) {
1036 PrintError(vm, VCORE_NONE, "Unable to save devices\n");
1041 if ((ret = save_header(vm, chkpt)) == -1) {
1042 PrintError(vm, VCORE_NONE, "Unable to save header\n");
1046 if (!(opts & V3_CHKPT_OPT_SKIP_CORES)) {
1047 for (i = 0; i < vm->num_cores; i++){
1048 if ((ret = save_core(&(vm->cores[i]), chkpt, opts)) == -1) {
1049 PrintError(vm, VCORE_NONE, "chkpt of core %d failed\n", i);
1057 /* Resume the guest if it was running */
1058 if (vm->run_state == VM_RUNNING) {
1059 v3_lower_barrier(vm);
1068 int v3_chkpt_load_vm(struct v3_vm_info * vm, char * store, char * url, v3_chkpt_options_t opts) {
1069 struct v3_chkpt * chkpt = NULL;
1073 chkpt = chkpt_open(vm, store, url, LOAD);
1075 if (chkpt == NULL) {
1076 PrintError(vm, VCORE_NONE, "Error creating checkpoint store\n");
1080 /* If this guest is running we need to block it while the checkpoint occurs */
1081 if (vm->run_state == VM_RUNNING) {
1082 while (v3_raise_barrier(vm, NULL) == -1);
1085 if (!(opts & V3_CHKPT_OPT_SKIP_MEM)) {
1086 if ((ret = load_memory(vm, chkpt)) == -1) {
1087 PrintError(vm, VCORE_NONE, "Unable to load memory\n");
1092 if (!(opts & V3_CHKPT_OPT_SKIP_DEVS)) {
1093 if ((ret = v3_load_vm_devices(vm, chkpt)) == -1) {
1094 PrintError(vm, VCORE_NONE, "Unable to load devies\n");
1100 if ((ret = load_header(vm, chkpt)) == -1) {
1101 PrintError(vm, VCORE_NONE, "Unable to load header\n");
1106 if (!(opts & V3_CHKPT_OPT_SKIP_CORES)) {
1107 for (i = 0; i < vm->num_cores; i++) {
1108 if ((ret = load_core(&(vm->cores[i]), chkpt, opts)) == -1) {
1109 PrintError(vm, VCORE_NONE, "Error loading core state (core=%d)\n", i);
1117 /* Resume the guest if it was running and we didn't just trash the state*/
1118 if (vm->run_state == VM_RUNNING) {
1121 vm->run_state = VM_STOPPED;
1124 /* We check the run state of the VM after every barrier
1125 So this will immediately halt the VM
1127 v3_lower_barrier(vm);
1137 #ifdef V3_CONFIG_LIVE_MIGRATION
1139 #define MOD_THRESHOLD 200 // pages below which we declare victory
1140 #define ITER_THRESHOLD 32 // iters below which we declare victory
1144 int v3_chkpt_send_vm(struct v3_vm_info * vm, char * store, char * url, v3_chkpt_options_t opts) {
1145 struct v3_chkpt * chkpt = NULL;
1148 bool last_modpage_iteration=false;
1149 struct v3_bitmap modified_pages_to_send;
1150 uint64_t start_time;
1152 int num_mod_pages=0;
1153 struct mem_migration_state *mm_state;
1156 // Currently will work only for shadow paging
1157 for (i=0;i<vm->num_cores;i++) {
1158 if (vm->cores[i].shdw_pg_mode!=SHADOW_PAGING && !(opts & V3_CHKPT_OPT_SKIP_MEM)) {
1159 PrintError(vm, VCORE_NONE, "Cannot currently handle nested paging\n");
1165 chkpt = chkpt_open(vm, store, url, SAVE);
1167 if (chkpt == NULL) {
1168 PrintError(vm, VCORE_NONE, "Error creating checkpoint store\n");
1173 if (opts & V3_CHKPT_OPT_SKIP_MEM) {
1177 // In a send, the memory is copied incrementally first,
1178 // followed by the remainder of the state
1180 if (v3_bitmap_init(&modified_pages_to_send,
1181 vm->mem_size>>12 // number of pages in main region
1183 PrintError(vm, VCORE_NONE, "Could not intialize bitmap.\n");
1187 // 0. Initialize bitmap to all 1s
1188 for (i=0; i < modified_pages_to_send.num_bits; i++) {
1189 v3_bitmap_set(&modified_pages_to_send,i);
1193 while (!last_modpage_iteration) {
1194 PrintDebug(vm, VCORE_NONE, "Modified memory page iteration %d\n",i++);
1196 start_time = v3_get_host_time(&(vm->cores[0].time_state));
1198 // We will pause the VM for a short while
1199 // so that we can collect the set of changed pages
1200 if (v3_pause_vm(vm) == -1) {
1201 PrintError(vm, VCORE_NONE, "Could not pause VM\n");
1207 // special case, we already have the pages to send (all of them)
1208 // they are already in modified_pages_to_send
1210 // normally, we are in the middle of a round
1211 // We need to copy from the current tracking bitmap
1212 // to our send bitmap
1213 v3_bitmap_copy(&modified_pages_to_send,&(mm_state->modified_pages));
1214 // and now we need to remove our tracking
1215 stop_page_tracking(mm_state);
1218 // are we done? (note that we are still paused)
1219 num_mod_pages = v3_bitmap_count(&modified_pages_to_send);
1220 if (num_mod_pages<MOD_THRESHOLD || iter>ITER_THRESHOLD) {
1221 // we are done, so we will not restart page tracking
1222 // the vm is paused, and so we should be able
1223 // to just send the data
1224 PrintDebug(vm, VCORE_NONE, "Last modified memory page iteration.\n");
1225 last_modpage_iteration = true;
1227 // we are not done, so we will restart page tracking
1228 // to prepare for a second round of pages
1229 // we will resume the VM as this happens
1230 if (!(mm_state=start_page_tracking(vm))) {
1231 PrintError(vm, VCORE_NONE, "Error enabling page tracking.\n");
1235 if (v3_continue_vm(vm) == -1) {
1236 PrintError(vm, VCORE_NONE, "Error resuming the VM\n");
1237 stop_page_tracking(mm_state);
1242 stop_time = v3_get_host_time(&(vm->cores[0].time_state));
1243 PrintDebug(vm, VCORE_NONE, "num_mod_pages=%d\ndowntime=%llu\n",num_mod_pages,stop_time-start_time);
1247 // At this point, we are either paused and about to copy
1248 // the last chunk, or we are running, and will copy the last
1249 // round in parallel with current execution
1250 if (num_mod_pages>0) {
1251 if (save_inc_memory(vm, &modified_pages_to_send, chkpt) == -1) {
1252 PrintError(vm, VCORE_NONE, "Error sending incremental memory.\n");
1256 } // we don't want to copy an empty bitmap here
1261 if (v3_bitmap_reset(&modified_pages_to_send) == -1) {
1262 PrintError(vm, VCORE_NONE, "Error reseting bitmap.\n");
1267 // send bitmap of 0s to signal end of modpages
1268 if (save_inc_memory(vm, &modified_pages_to_send, chkpt) == -1) {
1269 PrintError(vm, VCORE_NONE, "Error sending incremental memory.\n");
1275 // save the non-memory state
1276 if (!(opts & V3_CHKPT_OPT_SKIP_DEVS)) {
1277 if ((ret = v3_save_vm_devices(vm, chkpt)) == -1) {
1278 PrintError(vm, VCORE_NONE, "Unable to save devices\n");
1283 if ((ret = save_header(vm, chkpt)) == -1) {
1284 PrintError(vm, VCORE_NONE, "Unable to save header\n");
1288 if (!(opts & V3_CHKPT_OPT_SKIP_CORES)) {
1289 for (i = 0; i < vm->num_cores; i++){
1290 if ((ret = save_core(&(vm->cores[i]), chkpt, opts)) == -1) {
1291 PrintError(vm, VCORE_NONE, "chkpt of core %d failed\n", i);
1297 if (!(opts & V3_CHKPT_OPT_SKIP_MEM)) {
1298 stop_time = v3_get_host_time(&(vm->cores[0].time_state));
1299 PrintDebug(vm, VCORE_NONE, "num_mod_pages=%d\ndowntime=%llu\n",num_mod_pages,stop_time-start_time);
1300 PrintDebug(vm, VCORE_NONE, "Done sending VM!\n");
1302 v3_bitmap_deinit(&modified_pages_to_send);
1311 int v3_chkpt_receive_vm(struct v3_vm_info * vm, char * store, char * url, v3_chkpt_options_t opts) {
1312 struct v3_chkpt * chkpt = NULL;
1315 struct v3_bitmap mod_pgs;
1317 // Currently will work only for shadow paging
1318 for (i=0;i<vm->num_cores;i++) {
1319 if (vm->cores[i].shdw_pg_mode!=SHADOW_PAGING && !(opts & V3_CHKPT_OPT_SKIP_MEM)) {
1320 PrintError(vm, VCORE_NONE, "Cannot currently handle nested paging\n");
1325 chkpt = chkpt_open(vm, store, url, LOAD);
1327 if (chkpt == NULL) {
1328 PrintError(vm, VCORE_NONE, "Error creating checkpoint store\n");
1334 if (opts & V3_CHKPT_OPT_SKIP_MEM) {
1338 if (v3_bitmap_init(&mod_pgs,vm->mem_size>>12) == -1) {
1340 PrintError(vm, VCORE_NONE, "Could not intialize bitmap.\n");
1344 /* If this guest is running we need to block it while the checkpoint occurs */
1345 if (vm->run_state == VM_RUNNING) {
1346 while (v3_raise_barrier(vm, NULL) == -1);
1351 // 1. Receive copy of bitmap
1353 PrintDebug(vm, VCORE_NONE, "Memory page iteration %d\n",i++);
1354 int retval = load_inc_memory(vm, &mod_pgs, chkpt);
1356 // end of receiving memory pages
1358 } else if (retval == -1) {
1359 PrintError(vm, VCORE_NONE, "Error receiving incremental memory.\n");
1367 if (!(opts & V3_CHKPT_OPT_SKIP_DEVS)) {
1368 if ((ret = v3_load_vm_devices(vm, chkpt)) == -1) {
1369 PrintError(vm, VCORE_NONE, "Unable to load devices\n");
1375 if ((ret = load_header(vm, chkpt)) == -1) {
1376 PrintError(vm, VCORE_NONE, "Unable to load header\n");
1382 if (!(opts & V3_CHKPT_OPT_SKIP_CORES)) {
1383 for (i = 0; i < vm->num_cores; i++) {
1384 if ((ret = load_core(&(vm->cores[i]), chkpt, opts)) == -1) {
1385 PrintError(vm, VCORE_NONE, "Error loading core state (core=%d)\n", i);
1393 PrintError(vm, VCORE_NONE, "Unable to receive VM\n");
1395 PrintDebug(vm, VCORE_NONE, "Done receving the VM\n");
1399 /* Resume the guest if it was running and we didn't just trash the state*/
1400 if (vm->run_state == VM_RUNNING) {
1402 PrintError(vm, VCORE_NONE, "VM was previously running. It is now borked. Pausing it. \n");
1403 vm->run_state = VM_STOPPED;
1406 /* We check the run state of the VM after every barrier
1407 So this will immediately halt the VM
1409 v3_lower_barrier(vm);
1413 if (!(opts & V3_CHKPT_OPT_SKIP_MEM)) {
1414 v3_bitmap_deinit(&mod_pgs);