2 * This file is part of the Palacios Virtual Machine Monitor developed
3 * by the V3VEE Project with funding from the United States National
4 * Science Foundation and the Department of Energy.
6 * The V3VEE Project is a joint project between Northwestern University
7 * and the University of New Mexico. You can find out more at
10 * Copyright (c) 2011, Madhav Suresh <madhav@u.northwestern.edu>
11 * Copyright (c) 2011, The V3VEE Project <http://www.v3vee.org>
12 * All rights reserved.
14 * Author: Madhav Suresh <madhav@u.northwestern.edu>
15 * Arefin Huq <fig@arefin.net>
16 * Peter Dinda <pdinda@northwestern.edu> (store interface changes)
19 * This is free software. You are permitted to use,
20 * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
23 #include <palacios/vmm.h>
24 #include <palacios/vmm_sprintf.h>
25 #include <palacios/vm_guest.h>
26 #include <palacios/svm.h>
27 #include <palacios/vmx.h>
28 #include <palacios/vmm_checkpoint.h>
29 #include <palacios/vmm_hashtable.h>
30 #include <palacios/vmm_direct_paging.h>
31 #include <palacios/vmm_debug.h>
33 #include <palacios/vmm_dev_mgr.h>
35 #ifdef V3_CONFIG_LIVE_MIGRATION
36 #include <palacios/vmm_time.h>
37 #include <palacios/vm_guest_mem.h>
38 #include <palacios/vmm_shadow_paging.h>
41 #ifndef V3_CONFIG_DEBUG_CHECKPOINT
43 #define PrintDebug(fmt, args...)
47 static struct hashtable * store_table = NULL;
51 typedef enum {SAVE, LOAD} chkpt_mode_t;
53 struct chkpt_interface {
55 // Opening a checkpoint should return a pointer to the internal representation
56 // of the checkpoint in the store. This will be passed back
57 // as "store_data". Return NULL if the context cannot be opened
58 void * (*open_chkpt)(char * url, chkpt_mode_t mode);
59 // Closing the checkpoint should return -1 on failure, 0 on success
60 int (*close_chkpt)(void * store_data);
62 // Opening a context on the checkpoint with a given name should return
63 // a pointer to an internal representation of the context. This pointer
64 // is then passed back as "ctx".
65 // We will open only a single context at a time.
66 void * (*open_ctx)(void * store_data, char *name);
67 // Closing the context should return -1 on failure, 0 on success
68 int (*close_ctx)(void * store_data, void * ctx);
70 // Save and load include a tagged data buffer. These are
71 // "all or nothing" writes and reads.
72 // return -1 on failure, and 0 on success
74 int (*save)(void * store_data, void * ctx, char * tag, uint64_t len, void * buf);
75 int (*load)(void * store_data, void * ctx, char * tag, uint64_t len, void * buf);
80 struct v3_vm_info * vm;
82 struct v3_chkpt_ctx *current_ctx;
84 struct chkpt_interface * interface;
92 static uint_t store_hash_fn(addr_t key) {
93 char * name = (char *)key;
94 return v3_hash_buffer((uint8_t *)name, strlen(name));
97 static int store_eq_fn(addr_t key1, addr_t key2) {
98 char * name1 = (char *)key1;
99 char * name2 = (char *)key2;
101 return (strcmp(name1, name2) == 0);
106 #include "vmm_chkpt_stores.h"
109 int V3_init_checkpoint() {
110 extern struct chkpt_interface * __start__v3_chkpt_stores[];
111 extern struct chkpt_interface * __stop__v3_chkpt_stores[];
112 struct chkpt_interface ** tmp_store = __start__v3_chkpt_stores;
115 store_table = v3_create_htable(0, store_hash_fn, store_eq_fn);
117 while (tmp_store != __stop__v3_chkpt_stores) {
118 V3_Print(VM_NONE, VCORE_NONE, "Registering Checkpoint Backing Store (%s)\n", (*tmp_store)->name);
120 if (v3_htable_search(store_table, (addr_t)((*tmp_store)->name))) {
121 PrintError(VM_NONE, VCORE_NONE, "Multiple instances of Checkpoint backing Store (%s)\n", (*tmp_store)->name);
125 if (v3_htable_insert(store_table, (addr_t)((*tmp_store)->name), (addr_t)(*tmp_store)) == 0) {
126 PrintError(VM_NONE, VCORE_NONE, "Could not register Checkpoint backing store (%s)\n", (*tmp_store)->name);
130 tmp_store = &(__start__v3_chkpt_stores[++i]);
136 int V3_deinit_checkpoint() {
137 v3_free_htable(store_table, 0, 0);
142 static char svm_chkpt_header[] = "v3vee palacios checkpoint version: x.x, SVM x.x";
143 static char vmx_chkpt_header[] = "v3vee palacios checkpoint version: x.x, VMX x.x";
145 static int chkpt_close(struct v3_chkpt * chkpt) {
149 rc = chkpt->interface->close_chkpt(chkpt->store_data);
154 PrintError(VM_NONE, VCORE_NONE, "Internal store failed to close valid checkpoint\n");
160 PrintError(VM_NONE, VCORE_NONE, "Attempt to close null checkpoint\n");
166 static struct v3_chkpt * chkpt_open(struct v3_vm_info * vm, char * store, char * url, chkpt_mode_t mode) {
167 struct chkpt_interface * iface = NULL;
168 struct v3_chkpt * chkpt = NULL;
169 void * store_data = NULL;
171 iface = (void *)v3_htable_search(store_table, (addr_t)store);
174 V3_Print(vm, VCORE_NONE, "Error: Could not locate Checkpoint interface for store (%s)\n", store);
178 store_data = iface->open_chkpt(url, mode);
180 if (store_data == NULL) {
181 PrintError(vm, VCORE_NONE, "Could not open url (%s) for backing store (%s)\n", url, store);
186 chkpt = V3_Malloc(sizeof(struct v3_chkpt));
189 PrintError(vm, VCORE_NONE, "Could not allocate checkpoint state, closing checkpoint\n");
190 iface->close_chkpt(store_data);
194 memset(chkpt,0,sizeof(struct v3_chkpt));
196 chkpt->interface = iface;
198 chkpt->store_data = store_data;
199 chkpt->current_ctx = NULL;
204 struct v3_chkpt_ctx * v3_chkpt_open_ctx(struct v3_chkpt * chkpt, char * name) {
205 struct v3_chkpt_ctx * ctx;
207 if (chkpt->current_ctx) {
208 PrintError(VM_NONE, VCORE_NONE, "Attempt to open context %s before old context has been closed\n", name);
212 ctx = V3_Malloc(sizeof(struct v3_chkpt_ctx));
215 PrintError(VM_NONE, VCORE_NONE, "Unable to allocate context\n");
219 memset(ctx, 0, sizeof(struct v3_chkpt_ctx));
222 ctx->store_ctx = chkpt->interface->open_ctx(chkpt->store_data, name);
224 if (!(ctx->store_ctx)) {
225 PrintError(VM_NONE, VCORE_NONE, "Underlying store failed to open context %s\n",name);
230 chkpt->current_ctx = ctx;
235 int v3_chkpt_close_ctx(struct v3_chkpt_ctx * ctx) {
236 struct v3_chkpt * chkpt = ctx->chkpt;
239 if (chkpt->current_ctx != ctx) {
240 PrintError(VM_NONE, VCORE_NONE, "Attempt to close a context that is not the current context on the store\n");
244 ret = chkpt->interface->close_ctx(chkpt->store_data, ctx->store_ctx);
247 PrintError(VM_NONE, VCORE_NONE, "Failed to close context on store, closing device-independent context anyway - bad\n");
251 chkpt->current_ctx=NULL;
262 int v3_chkpt_save(struct v3_chkpt_ctx * ctx, char * tag, uint64_t len, void * buf) {
263 struct v3_chkpt * chkpt = ctx->chkpt;
267 PrintError(VM_NONE, VCORE_NONE, "Attempt to save tag %s on null context\n",tag);
271 if (chkpt->current_ctx != ctx) {
272 PrintError(VM_NONE, VCORE_NONE, "Attempt to save on context that is not the current context for the store\n");
276 rc = chkpt->interface->save(chkpt->store_data, ctx->store_ctx, tag , len, buf);
279 PrintError(VM_NONE, VCORE_NONE, "Underlying store failed to save tag %s on valid context\n",tag);
287 int v3_chkpt_load(struct v3_chkpt_ctx * ctx, char * tag, uint64_t len, void * buf) {
288 struct v3_chkpt * chkpt = ctx->chkpt;
292 PrintError(VM_NONE, VCORE_NONE, "Attempt to load tag %s from null context\n",tag);
296 if (chkpt->current_ctx != ctx) {
297 PrintError(VM_NONE, VCORE_NONE, "Attempt to load from context that is not the current context for the store\n");
301 rc = chkpt->interface->load(chkpt->store_data, ctx->store_ctx, tag, len, buf);
304 PrintError(VM_NONE, VCORE_NONE, "Underlying store failed to load tag %s from valid context\n",tag);
313 static int load_memory(struct v3_vm_info * vm, struct v3_chkpt * chkpt) {
315 void * guest_mem_base = NULL;
319 guest_mem_base = V3_VAddr((void *)vm->mem_map.base_region.host_addr);
321 ctx = v3_chkpt_open_ctx(chkpt, "memory_img");
324 PrintError(vm, VCORE_NONE, "Unable to open context for memory load\n");
328 if (v3_chkpt_load(ctx, "memory_img", vm->mem_size, guest_mem_base)) {
329 PrintError(vm, VCORE_NONE, "Unable to load all of memory (requested=%llu bytes, result=%llu bytes\n",(uint64_t)(vm->mem_size),ret);
330 v3_chkpt_close_ctx(ctx);
334 v3_chkpt_close_ctx(ctx);
340 static int save_memory(struct v3_vm_info * vm, struct v3_chkpt * chkpt) {
341 void * guest_mem_base = NULL;
345 guest_mem_base = V3_VAddr((void *)vm->mem_map.base_region.host_addr);
347 ctx = v3_chkpt_open_ctx(chkpt, "memory_img");
350 PrintError(vm, VCORE_NONE, "Unable to open context to save memory\n");
354 if (v3_chkpt_save(ctx, "memory_img", vm->mem_size, guest_mem_base)) {
355 PrintError(vm, VCORE_NONE, "Unable to save all of memory (requested=%llu, received=%llu)\n",(uint64_t)(vm->mem_size),ret);
356 v3_chkpt_close_ctx(ctx);
360 v3_chkpt_close_ctx(ctx);
365 #ifdef V3_CONFIG_LIVE_MIGRATION
367 struct mem_migration_state {
368 struct v3_vm_info *vm;
369 struct v3_bitmap modified_pages;
372 static int paging_callback(struct guest_info *core,
373 struct v3_shdw_pg_event *event,
376 struct mem_migration_state *m = (struct mem_migration_state *)priv_data;
378 if (event->event_type==SHADOW_PAGEFAULT &&
379 event->event_order==SHADOW_PREIMPL &&
380 event->error_code.write) {
382 if (!v3_gva_to_gpa(core,event->gva,&gpa)) {
383 // write to this page
384 v3_bitmap_set(&(m->modified_pages),gpa>>12);
386 // no worries, this isn't physical memory
389 // we don't care about other events
397 static struct mem_migration_state *start_page_tracking(struct v3_vm_info *vm)
399 struct mem_migration_state *m;
402 m = (struct mem_migration_state *)V3_Malloc(sizeof(struct mem_migration_state));
405 PrintError(vm, VCORE_NONE, "Cannot allocate\n");
411 if (v3_bitmap_init(&(m->modified_pages),vm->mem_size >> 12) == -1) {
412 PrintError(vm, VCORE_NONE, "Failed to initialize modified_pages bit vector");
416 v3_register_shadow_paging_event_callback(vm,paging_callback,m);
418 for (i=0;i<vm->num_cores;i++) {
419 v3_invalidate_shadow_pts(&(vm->cores[i]));
422 // and now we should get callbacks as writes happen
427 static void stop_page_tracking(struct mem_migration_state *m)
429 v3_unregister_shadow_paging_event_callback(m->vm,paging_callback,m);
431 v3_bitmap_deinit(&(m->modified_pages));
444 // zero: done with this round
445 static int save_inc_memory(struct v3_vm_info * vm,
446 struct v3_bitmap * mod_pgs_to_send,
447 struct v3_chkpt * chkpt) {
448 int page_size_bytes = 1 << 12; // assuming 4k pages right now
451 void * guest_mem_base = NULL;
452 int bitmap_num_bytes = (mod_pgs_to_send->num_bits / 8)
453 + ((mod_pgs_to_send->num_bits % 8) > 0);
456 guest_mem_base = V3_VAddr((void *)vm->mem_map.base_region.host_addr);
458 PrintDebug(vm, VCORE_NONE, "Saving incremental memory.\n");
460 ctx = v3_chkpt_open_ctx(chkpt,"memory_bitmap_bits");
463 PrintError(vm, VCORE_NONE, "Cannot open context for dirty memory bitmap\n");
468 if (v3_chkpt_save(ctx,
469 "memory_bitmap_bits",
471 mod_pgs_to_send->bits)) {
472 PrintError(vm, VCORE_NONE, "Unable to write all of the dirty memory bitmap\n");
473 v3_chkpt_close_ctx(ctx);
477 v3_chkpt_close_ctx(ctx);
479 PrintDebug(vm, VCORE_NONE, "Sent bitmap bits.\n");
481 // Dirty memory pages are sent in bitmap order
482 for (i = 0; i < mod_pgs_to_send->num_bits; i++) {
483 if (v3_bitmap_check(mod_pgs_to_send, i)) {
484 // PrintDebug(vm, VCORE_NONE, "Sending memory page %d.\n",i);
485 ctx = v3_chkpt_open_ctx(chkpt, "memory_page");
487 PrintError(vm, VCORE_NONE, "Unable to open context to send memory page\n");
490 if (v3_chkpt_save(ctx,
493 guest_mem_base + (page_size_bytes * i))) {
494 PrintError(vm, VCORE_NONE, "Unable to send a memory page\n");
495 v3_chkpt_close_ctx(ctx);
499 v3_chkpt_close_ctx(ctx);
510 // zero: ok, but not done
511 // positive: ok, and also done
512 static int load_inc_memory(struct v3_vm_info * vm,
513 struct v3_bitmap * mod_pgs,
514 struct v3_chkpt * chkpt) {
515 int page_size_bytes = 1 << 12; // assuming 4k pages right now
518 void * guest_mem_base = NULL;
519 bool empty_bitmap = true;
520 int bitmap_num_bytes = (mod_pgs->num_bits / 8)
521 + ((mod_pgs->num_bits % 8) > 0);
524 guest_mem_base = V3_VAddr((void *)vm->mem_map.base_region.host_addr);
526 ctx = v3_chkpt_open_ctx(chkpt, "memory_bitmap_bits");
529 PrintError(vm, VCORE_NONE, "Cannot open context to receive memory bitmap\n");
533 if (v3_chkpt_load(ctx,
534 "memory_bitmap_bits",
537 PrintError(vm, VCORE_NONE, "Did not receive all of memory bitmap\n");
538 v3_chkpt_close_ctx(ctx);
542 v3_chkpt_close_ctx(ctx);
544 // Receive also follows bitmap order
545 for (i = 0; i < mod_pgs->num_bits; i ++) {
546 if (v3_bitmap_check(mod_pgs, i)) {
547 PrintDebug(vm, VCORE_NONE, "Loading page %d\n", i);
548 empty_bitmap = false;
549 ctx = v3_chkpt_open_ctx(chkpt, "memory_page");
551 PrintError(vm, VCORE_NONE, "Cannot open context to receive memory page\n");
555 if (v3_chkpt_load(ctx,
558 guest_mem_base + (page_size_bytes * i))) {
559 PrintError(vm, VCORE_NONE, "Did not receive all of memory page\n");
560 v3_chkpt_close_ctx(ctx);
563 v3_chkpt_close_ctx(ctx);
568 // signal end of receiving pages
569 PrintDebug(vm, VCORE_NONE, "Finished receiving pages.\n");
580 int save_header(struct v3_vm_info * vm, struct v3_chkpt * chkpt) {
581 extern v3_cpu_arch_t v3_mach_type;
584 ctx = v3_chkpt_open_ctx(chkpt, "header");
586 PrintError(vm, VCORE_NONE, "Cannot open context to save header\n");
590 switch (v3_mach_type) {
592 case V3_SVM_REV3_CPU: {
593 if (v3_chkpt_save(ctx, "header", strlen(svm_chkpt_header), svm_chkpt_header)) {
594 PrintError(vm, VCORE_NONE, "Could not save all of SVM header\n");
595 v3_chkpt_close_ctx(ctx);
602 case V3_VMX_EPT_UG_CPU: {
603 if (v3_chkpt_save(ctx, "header", strlen(vmx_chkpt_header), vmx_chkpt_header)) {
604 PrintError(vm, VCORE_NONE, "Could not save all of VMX header\n");
605 v3_chkpt_close_ctx(ctx);
611 PrintError(vm, VCORE_NONE, "checkpoint not supported on this architecture\n");
612 v3_chkpt_close_ctx(ctx);
616 v3_chkpt_close_ctx(ctx);
621 static int load_header(struct v3_vm_info * vm, struct v3_chkpt * chkpt) {
622 extern v3_cpu_arch_t v3_mach_type;
625 ctx = v3_chkpt_open_ctx(chkpt, "header");
627 switch (v3_mach_type) {
629 case V3_SVM_REV3_CPU: {
630 char header[strlen(svm_chkpt_header) + 1];
632 if (v3_chkpt_load(ctx, "header", strlen(svm_chkpt_header), header)) {
633 PrintError(vm, VCORE_NONE, "Could not load all of SVM header\n");
634 v3_chkpt_close_ctx(ctx);
638 header[strlen(svm_chkpt_header)] = 0;
644 case V3_VMX_EPT_UG_CPU: {
645 char header[strlen(vmx_chkpt_header) + 1];
647 if (v3_chkpt_load(ctx, "header", strlen(vmx_chkpt_header), header)) {
648 PrintError(vm, VCORE_NONE, "Could not load all of VMX header\n");
649 v3_chkpt_close_ctx(ctx);
653 header[strlen(vmx_chkpt_header)] = 0;
658 PrintError(vm, VCORE_NONE, "checkpoint not supported on this architecture\n");
659 v3_chkpt_close_ctx(ctx);
663 v3_chkpt_close_ctx(ctx);
669 static int load_core(struct guest_info * info, struct v3_chkpt * chkpt) {
670 extern v3_cpu_arch_t v3_mach_type;
675 PrintDebug(info->vm_info, info, "Loading core\n");
677 memset(key_name, 0, 16);
679 snprintf(key_name, 16, "guest_info%d", info->vcpu_id);
681 ctx = v3_chkpt_open_ctx(chkpt, key_name);
684 PrintError(info->vm_info, info, "Could not open context to load core\n");
688 V3_CHKPT_LOAD(ctx, "RIP", info->rip, loadfailout);
691 V3_CHKPT_LOAD(ctx,"RDI",info->vm_regs.rdi, loadfailout);
692 V3_CHKPT_LOAD(ctx,"RSI",info->vm_regs.rsi, loadfailout);
693 V3_CHKPT_LOAD(ctx,"RBP",info->vm_regs.rbp, loadfailout);
694 V3_CHKPT_LOAD(ctx,"RSP",info->vm_regs.rsp, loadfailout);
695 V3_CHKPT_LOAD(ctx,"RBX",info->vm_regs.rbx, loadfailout);
696 V3_CHKPT_LOAD(ctx,"RDX",info->vm_regs.rdx, loadfailout);
697 V3_CHKPT_LOAD(ctx,"RCX",info->vm_regs.rcx, loadfailout);
698 V3_CHKPT_LOAD(ctx,"RAX",info->vm_regs.rax, loadfailout);
699 V3_CHKPT_LOAD(ctx,"R8",info->vm_regs.r8, loadfailout);
700 V3_CHKPT_LOAD(ctx,"R9",info->vm_regs.r9, loadfailout);
701 V3_CHKPT_LOAD(ctx,"R10",info->vm_regs.r10, loadfailout);
702 V3_CHKPT_LOAD(ctx,"R11",info->vm_regs.r11, loadfailout);
703 V3_CHKPT_LOAD(ctx,"R12",info->vm_regs.r12, loadfailout);
704 V3_CHKPT_LOAD(ctx,"R13",info->vm_regs.r13, loadfailout);
705 V3_CHKPT_LOAD(ctx,"R14",info->vm_regs.r14, loadfailout);
706 V3_CHKPT_LOAD(ctx,"R15",info->vm_regs.r15, loadfailout);
709 V3_CHKPT_LOAD(ctx, "CR0", info->ctrl_regs.cr0, loadfailout);
711 V3_CHKPT_LOAD(ctx, "CR2", info->ctrl_regs.cr2, loadfailout);
712 V3_CHKPT_LOAD(ctx, "CR3", info->ctrl_regs.cr3, loadfailout);
713 V3_CHKPT_LOAD(ctx, "CR4", info->ctrl_regs.cr4, loadfailout);
714 // There are no CR5,6,7
715 // CR8 is derived from apic_tpr
716 tempreg = (info->ctrl_regs.apic_tpr >> 4) & 0xf;
717 V3_CHKPT_LOAD(ctx, "CR8", tempreg, loadfailout);
718 V3_CHKPT_LOAD(ctx, "APIC_TPR", info->ctrl_regs.apic_tpr, loadfailout);
719 V3_CHKPT_LOAD(ctx, "RFLAGS", info->ctrl_regs.rflags, loadfailout);
720 V3_CHKPT_LOAD(ctx, "EFER", info->ctrl_regs.efer, loadfailout);
723 V3_CHKPT_LOAD(ctx, "DR0", info->dbg_regs.dr0, loadfailout);
724 V3_CHKPT_LOAD(ctx, "DR1", info->dbg_regs.dr1, loadfailout);
725 V3_CHKPT_LOAD(ctx, "DR2", info->dbg_regs.dr2, loadfailout);
726 V3_CHKPT_LOAD(ctx, "DR3", info->dbg_regs.dr3, loadfailout);
727 // there is no DR4 or DR5
728 V3_CHKPT_LOAD(ctx, "DR6", info->dbg_regs.dr6, loadfailout);
729 V3_CHKPT_LOAD(ctx, "DR7", info->dbg_regs.dr7, loadfailout);
732 V3_CHKPT_LOAD(ctx, "CS", info->segments.cs, loadfailout);
733 V3_CHKPT_LOAD(ctx, "DS", info->segments.ds, loadfailout);
734 V3_CHKPT_LOAD(ctx, "ES", info->segments.es, loadfailout);
735 V3_CHKPT_LOAD(ctx, "FS", info->segments.fs, loadfailout);
736 V3_CHKPT_LOAD(ctx, "GS", info->segments.gs, loadfailout);
737 V3_CHKPT_LOAD(ctx, "SS", info->segments.ss, loadfailout);
738 V3_CHKPT_LOAD(ctx, "LDTR", info->segments.ldtr, loadfailout);
739 V3_CHKPT_LOAD(ctx, "GDTR", info->segments.gdtr, loadfailout);
740 V3_CHKPT_LOAD(ctx, "IDTR", info->segments.idtr, loadfailout);
741 V3_CHKPT_LOAD(ctx, "TR", info->segments.tr, loadfailout);
744 V3_CHKPT_LOAD(ctx, "STAR", info->msrs.star, loadfailout);
745 V3_CHKPT_LOAD(ctx, "LSTAR", info->msrs.lstar, loadfailout);
746 V3_CHKPT_LOAD(ctx, "SFMASK", info->msrs.sfmask, loadfailout);
747 V3_CHKPT_LOAD(ctx, "KERN_GS_BASE", info->msrs.kern_gs_base, loadfailout);
749 // Some components of guest state captured in the shadow pager
750 V3_CHKPT_LOAD(ctx, "GUEST_CR3", info->shdw_pg_state.guest_cr3, loadfailout);
751 V3_CHKPT_LOAD(ctx, "GUEST_CRO", info->shdw_pg_state.guest_cr0, loadfailout);
752 V3_CHKPT_LOAD(ctx, "GUEST_EFER", info->shdw_pg_state.guest_efer, loadfailout);
754 v3_chkpt_close_ctx(ctx); ctx=0;
756 PrintDebug(info->vm_info, info, "Finished reading guest_info information\n");
758 info->cpu_mode = v3_get_vm_cpu_mode(info);
759 info->mem_mode = v3_get_vm_mem_mode(info);
761 if (info->shdw_pg_mode == SHADOW_PAGING) {
762 if (v3_get_vm_mem_mode(info) == VIRTUAL_MEM) {
763 if (v3_activate_shadow_pt(info) == -1) {
764 PrintError(info->vm_info, info, "Failed to activate shadow page tables\n");
768 if (v3_activate_passthrough_pt(info) == -1) {
769 PrintError(info->vm_info, info, "Failed to activate passthrough page tables\n");
776 switch (v3_mach_type) {
778 case V3_SVM_REV3_CPU: {
781 snprintf(key_name, 16, "vmcb_data%d", info->vcpu_id);
782 ctx = v3_chkpt_open_ctx(chkpt, key_name);
785 PrintError(info->vm_info, info, "Could not open context to load SVM core\n");
789 if (v3_svm_load_core(info, ctx) < 0 ) {
790 PrintError(info->vm_info, info, "Failed to patch core %d\n", info->vcpu_id);
794 v3_chkpt_close_ctx(ctx); ctx=0;
800 case V3_VMX_EPT_UG_CPU: {
803 snprintf(key_name, 16, "vmcs_data%d", info->vcpu_id);
805 ctx = v3_chkpt_open_ctx(chkpt, key_name);
808 PrintError(info->vm_info, info, "Could not open context to load VMX core\n");
812 if (v3_vmx_load_core(info, ctx) < 0) {
813 PrintError(info->vm_info, info, "VMX checkpoint failed\n");
817 v3_chkpt_close_ctx(ctx); ctx=0;
822 PrintError(info->vm_info, info, "Invalid CPU Type (%d)\n", v3_mach_type);
826 PrintDebug(info->vm_info, info, "Load of core succeeded\n");
828 v3_print_guest_state(info);
833 PrintError(info->vm_info, info, "Failed to load core\n");
834 if (ctx) { v3_chkpt_close_ctx(ctx);}
839 // GEM5 - Hypercall for initiating transfer to gem5 (checkpoint)
841 static int save_core(struct guest_info * info, struct v3_chkpt * chkpt) {
842 extern v3_cpu_arch_t v3_mach_type;
847 PrintDebug(info->vm_info, info, "Saving core\n");
849 v3_print_guest_state(info);
851 memset(key_name, 0, 16);
853 snprintf(key_name, 16, "guest_info%d", info->vcpu_id);
855 ctx = v3_chkpt_open_ctx(chkpt, key_name);
858 PrintError(info->vm_info, info, "Unable to open context to save core\n");
863 V3_CHKPT_SAVE(ctx, "RIP", info->rip, savefailout);
866 V3_CHKPT_SAVE(ctx,"RDI",info->vm_regs.rdi, savefailout);
867 V3_CHKPT_SAVE(ctx,"RSI",info->vm_regs.rsi, savefailout);
868 V3_CHKPT_SAVE(ctx,"RBP",info->vm_regs.rbp, savefailout);
869 V3_CHKPT_SAVE(ctx,"RSP",info->vm_regs.rsp, savefailout);
870 V3_CHKPT_SAVE(ctx,"RBX",info->vm_regs.rbx, savefailout);
871 V3_CHKPT_SAVE(ctx,"RDX",info->vm_regs.rdx, savefailout);
872 V3_CHKPT_SAVE(ctx,"RCX",info->vm_regs.rcx, savefailout);
873 V3_CHKPT_SAVE(ctx,"RAX",info->vm_regs.rax, savefailout);
874 V3_CHKPT_SAVE(ctx,"R8",info->vm_regs.r8, savefailout);
875 V3_CHKPT_SAVE(ctx,"R9",info->vm_regs.r9, savefailout);
876 V3_CHKPT_SAVE(ctx,"R10",info->vm_regs.r10, savefailout);
877 V3_CHKPT_SAVE(ctx,"R11",info->vm_regs.r11, savefailout);
878 V3_CHKPT_SAVE(ctx,"R12",info->vm_regs.r12, savefailout);
879 V3_CHKPT_SAVE(ctx,"R13",info->vm_regs.r13, savefailout);
880 V3_CHKPT_SAVE(ctx,"R14",info->vm_regs.r14, savefailout);
881 V3_CHKPT_SAVE(ctx,"R15",info->vm_regs.r15, savefailout);
884 V3_CHKPT_SAVE(ctx, "CR0", info->ctrl_regs.cr0, savefailout);
886 V3_CHKPT_SAVE(ctx, "CR2", info->ctrl_regs.cr2, savefailout);
887 V3_CHKPT_SAVE(ctx, "CR3", info->ctrl_regs.cr3, savefailout);
888 V3_CHKPT_SAVE(ctx, "CR4", info->ctrl_regs.cr4, savefailout);
889 // There are no CR5,6,7
890 // CR8 is derived from apic_tpr
891 tempreg = (info->ctrl_regs.apic_tpr >> 4) & 0xf;
892 V3_CHKPT_SAVE(ctx, "CR8", tempreg, savefailout);
893 V3_CHKPT_SAVE(ctx, "APIC_TPR", info->ctrl_regs.apic_tpr, savefailout);
894 V3_CHKPT_SAVE(ctx, "RFLAGS", info->ctrl_regs.rflags, savefailout);
895 V3_CHKPT_SAVE(ctx, "EFER", info->ctrl_regs.efer, savefailout);
898 V3_CHKPT_SAVE(ctx, "DR0", info->dbg_regs.dr0, savefailout);
899 V3_CHKPT_SAVE(ctx, "DR1", info->dbg_regs.dr1, savefailout);
900 V3_CHKPT_SAVE(ctx, "DR2", info->dbg_regs.dr2, savefailout);
901 V3_CHKPT_SAVE(ctx, "DR3", info->dbg_regs.dr3, savefailout);
902 // there is no DR4 or DR5
903 V3_CHKPT_SAVE(ctx, "DR6", info->dbg_regs.dr6, savefailout);
904 V3_CHKPT_SAVE(ctx, "DR7", info->dbg_regs.dr7, savefailout);
907 V3_CHKPT_SAVE(ctx, "CS", info->segments.cs, savefailout);
908 V3_CHKPT_SAVE(ctx, "DS", info->segments.ds, savefailout);
909 V3_CHKPT_SAVE(ctx, "ES", info->segments.es, savefailout);
910 V3_CHKPT_SAVE(ctx, "FS", info->segments.fs, savefailout);
911 V3_CHKPT_SAVE(ctx, "GS", info->segments.gs, savefailout);
912 V3_CHKPT_SAVE(ctx, "SS", info->segments.ss, savefailout);
913 V3_CHKPT_SAVE(ctx, "LDTR", info->segments.ldtr, savefailout);
914 V3_CHKPT_SAVE(ctx, "GDTR", info->segments.gdtr, savefailout);
915 V3_CHKPT_SAVE(ctx, "IDTR", info->segments.idtr, savefailout);
916 V3_CHKPT_SAVE(ctx, "TR", info->segments.tr, savefailout);
919 V3_CHKPT_SAVE(ctx, "STAR", info->msrs.star, savefailout);
920 V3_CHKPT_SAVE(ctx, "LSTAR", info->msrs.lstar, savefailout);
921 V3_CHKPT_SAVE(ctx, "SFMASK", info->msrs.sfmask, savefailout);
922 V3_CHKPT_SAVE(ctx, "KERN_GS_BASE", info->msrs.kern_gs_base, savefailout);
924 // Some components of guest state captured in the shadow pager
925 V3_CHKPT_SAVE(ctx, "GUEST_CR3", info->shdw_pg_state.guest_cr3, savefailout);
926 V3_CHKPT_SAVE(ctx, "GUEST_CRO", info->shdw_pg_state.guest_cr0, savefailout);
927 V3_CHKPT_SAVE(ctx, "GUEST_EFER", info->shdw_pg_state.guest_efer, savefailout);
929 v3_chkpt_close_ctx(ctx); ctx=0;
931 //Architechture specific code
932 switch (v3_mach_type) {
934 case V3_SVM_REV3_CPU: {
937 snprintf(key_name, 16, "vmcb_data%d", info->vcpu_id);
939 ctx = v3_chkpt_open_ctx(chkpt, key_name);
942 PrintError(info->vm_info, info, "Could not open context to store SVM core\n");
946 if (v3_svm_save_core(info, ctx) < 0) {
947 PrintError(info->vm_info, info, "VMCB Unable to be written\n");
951 v3_chkpt_close_ctx(ctx); ctx=0;;
956 case V3_VMX_EPT_UG_CPU: {
959 snprintf(key_name, 16, "vmcs_data%d", info->vcpu_id);
961 ctx = v3_chkpt_open_ctx(chkpt, key_name);
964 PrintError(info->vm_info, info, "Could not open context to store VMX core\n");
968 if (v3_vmx_save_core(info, ctx) == -1) {
969 PrintError(info->vm_info, info, "VMX checkpoint failed\n");
973 v3_chkpt_close_ctx(ctx); ctx=0;
978 PrintError(info->vm_info, info, "Invalid CPU Type (%d)\n", v3_mach_type);
986 PrintError(info->vm_info, info, "Failed to save core\n");
987 if (ctx) { v3_chkpt_close_ctx(ctx); }
993 // GEM5 - Madhav has debug code here for printing instrucions
996 int v3_chkpt_save_vm(struct v3_vm_info * vm, char * store, char * url) {
997 struct v3_chkpt * chkpt = NULL;
1002 chkpt = chkpt_open(vm, store, url, SAVE);
1004 if (chkpt == NULL) {
1005 PrintError(vm, VCORE_NONE, "Error creating checkpoint store for url %s\n",url);
1009 /* If this guest is running we need to block it while the checkpoint occurs */
1010 if (vm->run_state == VM_RUNNING) {
1011 while (v3_raise_barrier(vm, NULL) == -1);
1014 if ((ret = save_memory(vm, chkpt)) == -1) {
1015 PrintError(vm, VCORE_NONE, "Unable to save memory\n");
1020 if ((ret = v3_save_vm_devices(vm, chkpt)) == -1) {
1021 PrintError(vm, VCORE_NONE, "Unable to save devices\n");
1026 if ((ret = save_header(vm, chkpt)) == -1) {
1027 PrintError(vm, VCORE_NONE, "Unable to save header\n");
1031 for (i = 0; i < vm->num_cores; i++){
1032 if ((ret = save_core(&(vm->cores[i]), chkpt)) == -1) {
1033 PrintError(vm, VCORE_NONE, "chkpt of core %d failed\n", i);
1040 /* Resume the guest if it was running */
1041 if (vm->run_state == VM_RUNNING) {
1042 v3_lower_barrier(vm);
1051 int v3_chkpt_load_vm(struct v3_vm_info * vm, char * store, char * url) {
1052 struct v3_chkpt * chkpt = NULL;
1056 chkpt = chkpt_open(vm, store, url, LOAD);
1058 if (chkpt == NULL) {
1059 PrintError(vm, VCORE_NONE, "Error creating checkpoint store\n");
1063 /* If this guest is running we need to block it while the checkpoint occurs */
1064 if (vm->run_state == VM_RUNNING) {
1065 while (v3_raise_barrier(vm, NULL) == -1);
1068 if ((ret = load_memory(vm, chkpt)) == -1) {
1069 PrintError(vm, VCORE_NONE, "Unable to load memory\n");
1074 if ((ret = v3_load_vm_devices(vm, chkpt)) == -1) {
1075 PrintError(vm, VCORE_NONE, "Unable to load devies\n");
1080 if ((ret = load_header(vm, chkpt)) == -1) {
1081 PrintError(vm, VCORE_NONE, "Unable to load header\n");
1086 for (i = 0; i < vm->num_cores; i++) {
1087 if ((ret = load_core(&(vm->cores[i]), chkpt)) == -1) {
1088 PrintError(vm, VCORE_NONE, "Error loading core state (core=%d)\n", i);
1095 /* Resume the guest if it was running and we didn't just trash the state*/
1096 if (vm->run_state == VM_RUNNING) {
1099 vm->run_state = VM_STOPPED;
1102 /* We check the run state of the VM after every barrier
1103 So this will immediately halt the VM
1105 v3_lower_barrier(vm);
1115 #ifdef V3_CONFIG_LIVE_MIGRATION
1117 #define MOD_THRESHOLD 200 // pages below which we declare victory
1118 #define ITER_THRESHOLD 32 // iters below which we declare victory
1122 int v3_chkpt_send_vm(struct v3_vm_info * vm, char * store, char * url) {
1123 struct v3_chkpt * chkpt = NULL;
1126 bool last_modpage_iteration=false;
1127 struct v3_bitmap modified_pages_to_send;
1128 uint64_t start_time;
1130 int num_mod_pages=0;
1131 struct mem_migration_state *mm_state;
1134 // Currently will work only for shadow paging
1135 for (i=0;i<vm->num_cores;i++) {
1136 if (vm->cores[i].shdw_pg_mode!=SHADOW_PAGING) {
1137 PrintError(vm, VCORE_NONE, "Cannot currently handle nested paging\n");
1143 chkpt = chkpt_open(vm, store, url, SAVE);
1145 if (chkpt == NULL) {
1146 PrintError(vm, VCORE_NONE, "Error creating checkpoint store\n");
1151 // In a send, the memory is copied incrementally first,
1152 // followed by the remainder of the state
1154 if (v3_bitmap_init(&modified_pages_to_send,
1155 vm->mem_size>>12 // number of pages in main region
1157 PrintError(vm, VCORE_NONE, "Could not intialize bitmap.\n");
1161 // 0. Initialize bitmap to all 1s
1162 for (i=0; i < modified_pages_to_send.num_bits; i++) {
1163 v3_bitmap_set(&modified_pages_to_send,i);
1167 while (!last_modpage_iteration) {
1168 PrintDebug(vm, VCORE_NONE, "Modified memory page iteration %d\n",i++);
1170 start_time = v3_get_host_time(&(vm->cores[0].time_state));
1172 // We will pause the VM for a short while
1173 // so that we can collect the set of changed pages
1174 if (v3_pause_vm(vm) == -1) {
1175 PrintError(vm, VCORE_NONE, "Could not pause VM\n");
1181 // special case, we already have the pages to send (all of them)
1182 // they are already in modified_pages_to_send
1184 // normally, we are in the middle of a round
1185 // We need to copy from the current tracking bitmap
1186 // to our send bitmap
1187 v3_bitmap_copy(&modified_pages_to_send,&(mm_state->modified_pages));
1188 // and now we need to remove our tracking
1189 stop_page_tracking(mm_state);
1192 // are we done? (note that we are still paused)
1193 num_mod_pages = v3_bitmap_count(&modified_pages_to_send);
1194 if (num_mod_pages<MOD_THRESHOLD || iter>ITER_THRESHOLD) {
1195 // we are done, so we will not restart page tracking
1196 // the vm is paused, and so we should be able
1197 // to just send the data
1198 PrintDebug(vm, VCORE_NONE, "Last modified memory page iteration.\n");
1199 last_modpage_iteration = true;
1201 // we are not done, so we will restart page tracking
1202 // to prepare for a second round of pages
1203 // we will resume the VM as this happens
1204 if (!(mm_state=start_page_tracking(vm))) {
1205 PrintError(vm, VCORE_NONE, "Error enabling page tracking.\n");
1209 if (v3_continue_vm(vm) == -1) {
1210 PrintError(vm, VCORE_NONE, "Error resuming the VM\n");
1211 stop_page_tracking(mm_state);
1216 stop_time = v3_get_host_time(&(vm->cores[0].time_state));
1217 PrintDebug(vm, VCORE_NONE, "num_mod_pages=%d\ndowntime=%llu\n",num_mod_pages,stop_time-start_time);
1221 // At this point, we are either paused and about to copy
1222 // the last chunk, or we are running, and will copy the last
1223 // round in parallel with current execution
1224 if (num_mod_pages>0) {
1225 if (save_inc_memory(vm, &modified_pages_to_send, chkpt) == -1) {
1226 PrintError(vm, VCORE_NONE, "Error sending incremental memory.\n");
1230 } // we don't want to copy an empty bitmap here
1235 if (v3_bitmap_reset(&modified_pages_to_send) == -1) {
1236 PrintError(vm, VCORE_NONE, "Error reseting bitmap.\n");
1241 // send bitmap of 0s to signal end of modpages
1242 if (save_inc_memory(vm, &modified_pages_to_send, chkpt) == -1) {
1243 PrintError(vm, VCORE_NONE, "Error sending incremental memory.\n");
1248 // save the non-memory state
1249 if ((ret = v3_save_vm_devices(vm, chkpt)) == -1) {
1250 PrintError(vm, VCORE_NONE, "Unable to save devices\n");
1255 if ((ret = save_header(vm, chkpt)) == -1) {
1256 PrintError(vm, VCORE_NONE, "Unable to save header\n");
1260 for (i = 0; i < vm->num_cores; i++){
1261 if ((ret = save_core(&(vm->cores[i]), chkpt)) == -1) {
1262 PrintError(vm, VCORE_NONE, "chkpt of core %d failed\n", i);
1267 stop_time = v3_get_host_time(&(vm->cores[0].time_state));
1268 PrintDebug(vm, VCORE_NONE, "num_mod_pages=%d\ndowntime=%llu\n",num_mod_pages,stop_time-start_time);
1269 PrintDebug(vm, VCORE_NONE, "Done sending VM!\n");
1271 v3_bitmap_deinit(&modified_pages_to_send);
1278 int v3_chkpt_receive_vm(struct v3_vm_info * vm, char * store, char * url) {
1279 struct v3_chkpt * chkpt = NULL;
1282 struct v3_bitmap mod_pgs;
1284 // Currently will work only for shadow paging
1285 for (i=0;i<vm->num_cores;i++) {
1286 if (vm->cores[i].shdw_pg_mode!=SHADOW_PAGING) {
1287 PrintError(vm, VCORE_NONE, "Cannot currently handle nested paging\n");
1292 chkpt = chkpt_open(vm, store, url, LOAD);
1294 if (chkpt == NULL) {
1295 PrintError(vm, VCORE_NONE, "Error creating checkpoint store\n");
1300 if (v3_bitmap_init(&mod_pgs,vm->mem_size>>12) == -1) {
1302 PrintError(vm, VCORE_NONE, "Could not intialize bitmap.\n");
1306 /* If this guest is running we need to block it while the checkpoint occurs */
1307 if (vm->run_state == VM_RUNNING) {
1308 while (v3_raise_barrier(vm, NULL) == -1);
1313 // 1. Receive copy of bitmap
1315 PrintDebug(vm, VCORE_NONE, "Memory page iteration %d\n",i++);
1316 int retval = load_inc_memory(vm, &mod_pgs, chkpt);
1318 // end of receiving memory pages
1320 } else if (retval == -1) {
1321 PrintError(vm, VCORE_NONE, "Error receiving incremental memory.\n");
1327 if ((ret = v3_load_vm_devices(vm, chkpt)) == -1) {
1328 PrintError(vm, VCORE_NONE, "Unable to load devices\n");
1334 if ((ret = load_header(vm, chkpt)) == -1) {
1335 PrintError(vm, VCORE_NONE, "Unable to load header\n");
1341 for (i = 0; i < vm->num_cores; i++) {
1342 if ((ret = load_core(&(vm->cores[i]), chkpt)) == -1) {
1343 PrintError(vm, VCORE_NONE, "Error loading core state (core=%d)\n", i);
1350 PrintError(vm, VCORE_NONE, "Unable to receive VM\n");
1352 PrintDebug(vm, VCORE_NONE, "Done receving the VM\n");
1356 /* Resume the guest if it was running and we didn't just trash the state*/
1357 if (vm->run_state == VM_RUNNING) {
1359 PrintError(vm, VCORE_NONE, "VM was previously running. It is now borked. Pausing it. \n");
1360 vm->run_state = VM_STOPPED;
1363 /* We check the run state of the VM after every barrier
1364 So this will immediately halt the VM
1366 v3_lower_barrier(vm);
1369 v3_bitmap_deinit(&mod_pgs);