Device manager updated with extended save/load model to facilitate devices that need...

[palacios.releases.git] / palacios / src / palacios / vmm.c
diff --git a/palacios/src/palacios/vmm.c b/palacios/src/palacios/vmm.c

index 34b6c2c..de77277 100644 (file)
--- a/palacios/src/palacios/vmm.c
+++ b/palacios/src/palacios/vmm.c
@@ -25,6 +25,8 @@
 #include <palacios/vmm_lowlevel.h>
 #include <palacios/vmm_sprintf.h>
 #include <palacios/vmm_extensions.h>
+#include <palacios/vmm_timeout.h>
+
 
 #ifdef V3_CONFIG_SVM
 #include <palacios/svm.h>
@@ -33,8 +35,14 @@
 #include <palacios/vmx.h>
 #endif
 
+#ifdef V3_CONFIG_CHECKPOINT
+#include <palacios/vmm_checkpoint.h>
+#endif
+
 
 v3_cpu_arch_t v3_cpu_types[V3_CONFIG_MAX_CPUS];
+v3_cpu_arch_t v3_mach_type = V3_INVALID_CPU;
+
 struct v3_os_hooks * os_hooks = NULL;
 int v3_dbg_enable = 0;
 
@@ -92,15 +100,19 @@ static void deinit_cpu(void * arg) {
 }
 
 
-
-void Init_V3(struct v3_os_hooks * hooks, int num_cpus) {
-    int i;
+void Init_V3(struct v3_os_hooks * hooks, char * cpu_mask, int num_cpus) {
+    int i = 0;
+    int minor = 0;
+    int major = 0;
 
     V3_Print("V3 Print statement to fix a Kitten page fault bug\n");
 
     // Set global variables. 
     os_hooks = hooks;
 
+    // Determine the global machine type
+    v3_mach_type = V3_INVALID_CPU;
+
     for (i = 0; i < V3_CONFIG_MAX_CPUS; i++) {
        v3_cpu_types[i] = V3_INVALID_CPU;
     }
@@ -119,24 +131,30 @@ void Init_V3(struct v3_os_hooks * hooks, int num_cpus) {
     V3_init_symmod();
 #endif
 
+#ifdef V3_CONFIG_CHECKPOINT
+    V3_init_checkpoint();
+#endif
 
-
-#ifdef V3_CONFIG_MULTITHREAD_OS
     if ((hooks) && (hooks->call_on_cpu)) {
 
-       for (i = 0; i < num_cpus; i++) {
+        for (i = 0; i < num_cpus; i++) {
+            major = i / 8;
+            minor = i % 8;
 
-           V3_Print("Initializing VMM extensions on cpu %d\n", i);
-           hooks->call_on_cpu(i, &init_cpu, (void *)(addr_t)i);
-       }
-    }
-#else 
-    init_cpu(0);
-#endif
+            if ((cpu_mask == NULL) || (*(cpu_mask + major) & (0x1 << minor))) {
+                V3_Print("Initializing VMM extensions on cpu %d\n", i);
+                hooks->call_on_cpu(i, &init_cpu, (void *)(addr_t)i);
 
+               if (v3_mach_type == V3_INVALID_CPU) {
+                   v3_mach_type = v3_cpu_types[i];
+               }   
+            }
+        }
+    }
 }
 
 
+
 void Shutdown_V3() {
     int i;
 
@@ -149,8 +167,11 @@ void Shutdown_V3() {
     V3_deinit_symmod();
 #endif
 
+#ifdef V3_CONFIG_CHECKPOINT
+    V3_deinit_checkpoint();
+#endif
+
 
-#ifdef V3_CONFIG_MULTITHREAD_OS
     if ((os_hooks) && (os_hooks->call_on_cpu)) {
        for (i = 0; i < V3_CONFIG_MAX_CPUS; i++) {
            if (v3_cpu_types[i] != V3_INVALID_CPU) {
@@ -159,9 +180,6 @@ void Shutdown_V3() {
            }
        }
     }
-#else 
-    deinit_cpu(0);
-#endif
 
 }
 
@@ -204,7 +222,7 @@ static int start_core(void * p)
     PrintDebug("virtual core %u (on logical core %u): in start_core (RIP=%p)\n", 
               core->vcpu_id, core->pcpu_id, (void *)(addr_t)core->rip);
 
-    switch (v3_cpu_types[0]) {
+    switch (v3_mach_type) {
 #ifdef V3_CONFIG_SVM
        case V3_SVM_CPU:
        case V3_SVM_REV3_CPU:
@@ -228,11 +246,7 @@ static int start_core(void * p)
 
 
 // For the moment very ugly. Eventually we will shift the cpu_mask to an arbitrary sized type...
-#ifdef V3_CONFIG_MULTITHREAD_OS
 #define MAX_CORES 32
-#else
-#define MAX_CORES 1
-#endif
 
 
 int v3_start_vm(struct v3_vm_info * vm, unsigned int cpu_mask) {
@@ -241,6 +255,28 @@ int v3_start_vm(struct v3_vm_info * vm, unsigned int cpu_mask) {
     uint32_t avail_cores = 0;
     int vcore_id = 0;
 
+
+    if (vm->run_state != VM_STOPPED) {
+        PrintError("VM has already been launched (state=%d)\n", (int)vm->run_state);
+        return -1;
+    }
+
+    
+    // Do not run if any core is using shadow paging and we are out of 4 GB bounds
+    for (i=0;i<vm->num_cores;i++) { 
+       if (vm->cores[i].shdw_pg_mode == SHADOW_PAGING) {
+           if ((vm->mem_map.base_region.host_addr + vm->mem_size ) >= 0x100000000ULL) {
+               PrintError("Base memory region exceeds 4 GB boundary with shadow paging enabled on core %d.\n",i);
+               PrintError("Any use of non-64 bit mode in the guest is likely to fail in this configuration.\n");
+               PrintError("If you would like to proceed anyway, remove this check and recompile Palacios.\n");
+               PrintError("Alternatively, change this VM to use nested paging.\n");
+               return -1;
+           }
+       }
+    }
+
+
+
     /// CHECK IF WE ARE MULTICORE ENABLED....
 
     V3_Print("V3 --  Starting VM (%u cores)\n", vm->num_cores);
@@ -253,7 +289,11 @@ int v3_start_vm(struct v3_vm_info * vm, unsigned int cpu_mask) {
        int minor = i % 8;
        
        if (core_mask[major] & (0x1 << minor)) {
-           avail_cores++;
+           if (v3_cpu_types[i] == V3_INVALID_CPU) {
+               core_mask[major] &= ~(0x1 << minor);
+           } else {
+               avail_cores++;
+           }
        }
     }
 
@@ -264,9 +304,11 @@ int v3_start_vm(struct v3_vm_info * vm, unsigned int cpu_mask) {
        return -1;
     }
 
-#ifdef V3_CONFIG_MULTITHREAD_OS
-    // spawn off new threads, for other cores
-    for (i = 0, vcore_id = 1; (i < MAX_CORES) && (vcore_id < vm->num_cores); i++) {
+    vm->run_state = VM_RUNNING;
+
+    // Spawn off threads for each core. 
+    // We work backwards, so that core 0 is always started last.
+    for (i = 0, vcore_id = vm->num_cores - 1; (i < MAX_CORES) && (vcore_id >= 0); i++) {
        int major = 0;
        int minor = 0;
        struct guest_info * core = &(vm->cores[vcore_id]);
@@ -282,19 +324,12 @@ int v3_start_vm(struct v3_vm_info * vm, unsigned int cpu_mask) {
 
            i--; // We reset the logical core idx. Not strictly necessary I guess... 
        } else {
-
-           if (i == V3_Get_CPU()) {
-               // We skip the local CPU because it is reserved for vcore 0
-               continue;
-           }
-           
            core_idx = i;
        }
 
        major = core_idx / 8;
        minor = core_idx % 8;
 
-
        if ((core_mask[major] & (0x1 << minor)) == 0) {
            PrintError("Logical CPU %d not available for virtual core %d; not started\n",
                       core_idx, vcore_id);
@@ -316,7 +351,7 @@ int v3_start_vm(struct v3_vm_info * vm, unsigned int cpu_mask) {
        PrintDebug("run: core=%u, func=0x%p, arg=0x%p, name=%s\n",
                   core_idx, start_core, core, core->exec_name);
 
-       // TODO: actually manage these threads instead of just launching them
+       core->core_run_state = CORE_STOPPED;  // core zero will turn itself on
        core->pcpu_id = core_idx;
        core->core_thread = V3_CREATE_THREAD_ON_CPU(core_idx, start_core, core, core->exec_name);
 
@@ -326,16 +361,11 @@ int v3_start_vm(struct v3_vm_info * vm, unsigned int cpu_mask) {
            return -1;
        }
 
-       vcore_id++;
+       vcore_id--;
     }
-#endif
-
-    sprintf(vm->cores[0].exec_name, "%s", vm->name);
 
-    vm->cores[0].pcpu_id = V3_Get_CPU();
-
-    if (start_core(&(vm->cores[0])) != 0) {
-       PrintError("Error starting VM core 0\n");
+    if (vcore_id >= 0) {
+       PrintError("Error starting VM: Not enough available CPU cores\n");
        v3_stop_vm(vm);
        return -1;
     }
@@ -377,24 +407,65 @@ int v3_reset_vm_core(struct guest_info * core, addr_t rip) {
 int v3_move_vm_core(struct v3_vm_info * vm, int vcore_id, int target_cpu) {
     struct guest_info * core = NULL;
 
-    if(vcore_id < 0 || vcore_id > vm->num_cores) {
+    if ((vcore_id < 0) || (vcore_id >= vm->num_cores)) {
+       PrintError("Attempted to migrate invalid virtual core (%d)\n", vcore_id);
        return -1;
     }
 
     core = &(vm->cores[vcore_id]);
 
-    if(target_cpu != core->pcpu_id &&
-       core->core_move_state != CORE_MOVE_PENDING){
-       core->core_move_state = CORE_MOVE_PENDING;
-       core->target_pcpu_id = target_cpu;
-       v3_interrupt_cpu(vm, core->pcpu_id, 0);
+    if (target_cpu == core->pcpu_id) {
+       PrintError("Attempted to migrate to local core (%d)\n", target_cpu);
+       // well that was pointless
+       return 0;
+    }
 
-       while(core->core_move_state != CORE_MOVE_DONE){
-           v3_yield(NULL);
-       }
+    if (core->core_thread == NULL) {
+       PrintError("Attempted to migrate a core without a valid thread context\n");
+       return -1;
     }
 
+    while (v3_raise_barrier(vm, NULL) == -1);
+
+    V3_Print("Performing Migration from %d to %d\n", core->pcpu_id, target_cpu);
+
+    // Double check that we weren't preemptively migrated
+    if (target_cpu != core->pcpu_id) {    
+
+       V3_Print("Moving Core\n");
+
+
+#ifdef V3_CONFIG_VMX
+       switch (v3_cpu_types[core->pcpu_id]) {
+           case V3_VMX_CPU:
+           case V3_VMX_EPT_CPU:
+           case V3_VMX_EPT_UG_CPU:
+               PrintDebug("Flushing VMX Guest CPU %d\n", core->vcpu_id);
+               V3_Call_On_CPU(core->pcpu_id, (void (*)(void *))v3_flush_vmx_vm_core, (void *)core);
+               break;
+           default:
+               break;
+       }
+#endif
+
+       if (V3_MOVE_THREAD_TO_CPU(target_cpu, core->core_thread) != 0) {
+           PrintError("Failed to move Vcore %d to CPU %d\n", 
+                      core->vcpu_id, target_cpu);
+           v3_lower_barrier(vm);
+           return -1;
+       } 
        
+       /* There will be a benign race window here:
+          core->pcpu_id will be set to the target core before its fully "migrated"
+          However the core will NEVER run on the old core again, its just in flight to the new core
+       */
+       core->pcpu_id = target_cpu;
+
+       V3_Print("core now at %d\n", core->pcpu_id);    
+    }
+
+    v3_lower_barrier(vm);
+
     return 0;
 }
 
@@ -402,9 +473,20 @@ int v3_move_vm_core(struct v3_vm_info * vm, int vcore_id, int target_cpu) {
 
 int v3_stop_vm(struct v3_vm_info * vm) {
 
+    if ((vm->run_state != VM_RUNNING) && 
+       (vm->run_state != VM_SIMULATING)) {
+       PrintError("Tried to stop VM in invalid runstate (%d)\n", vm->run_state);
+       return -1;
+    }
+
     vm->run_state = VM_STOPPED;
 
-    // force exit all cores via a cross call/IPI
+    // Sanity check to catch any weird execution states
+    if (v3_wait_for_barrier(vm, NULL) == 0) {
+       v3_lower_barrier(vm);
+    }
+    
+    // XXX force exit all cores via a cross call/IPI XXX
 
     while (1) {
        int i = 0;
@@ -420,7 +502,7 @@ int v3_stop_vm(struct v3_vm_info * vm) {
            break;
        }
 
-       v3_yield(NULL);
+       v3_yield(NULL,-1);
     }
     
     V3_Print("VM stopped. Returning\n");
@@ -451,18 +533,150 @@ int v3_continue_vm(struct v3_vm_info * vm) {
        return -1;
     }
 
+    vm->run_state = VM_RUNNING;
+
     v3_lower_barrier(vm);
 
+    return 0;
+}
+
+
+
+static int sim_callback(struct guest_info * core, void * private_data) {
+    struct v3_bitmap * timeout_map = private_data;
+
+    v3_bitmap_set(timeout_map, core->vcpu_id);
+    
+    V3_Print("Simulation callback activated (guest_rip=%p)\n", (void *)core->rip);
+
+    while (v3_bitmap_check(timeout_map, core->vcpu_id) == 1) {
+       v3_yield(NULL,-1);
+    }
+
+    return 0;
+}
+
+
+
+
+int v3_simulate_vm(struct v3_vm_info * vm, unsigned int msecs) {
+    struct v3_bitmap timeout_map;
+    int i = 0;
+    int all_blocked = 0;
+    uint64_t cycles = 0;
+    uint64_t cpu_khz = V3_CPU_KHZ();
+
+    if (vm->run_state != VM_PAUSED) {
+       PrintError("VM must be paused before simulation begins\n");
+       return -1;
+    }
+
+    /* AT this point VM is paused */
+    
+    // initialize bitmap
+    v3_bitmap_init(&timeout_map, vm->num_cores);
+
+
+
+
+    // calculate cycles from msecs...
+    // IMPORTANT: Floating point not allowed.
+    cycles = (msecs * cpu_khz);
+    
+
+
+    V3_Print("Simulating %u msecs (%llu cycles) [CPU_KHZ=%llu]\n", msecs, cycles, cpu_khz);
+
+    // set timeout
+    
+    for (i = 0; i < vm->num_cores; i++) {
+       if (v3_add_core_timeout(&(vm->cores[i]), cycles, sim_callback, &timeout_map) == -1) {
+           PrintError("Could not register simulation timeout for core %d\n", i);
+           return -1;
+       }
+    }
+
+    V3_Print("timeouts set on all cores\n ");
+
+    
+    // Run the simulation
+//    vm->run_state = VM_SIMULATING;
     vm->run_state = VM_RUNNING;
+    v3_lower_barrier(vm);
+
+
+    V3_Print("Barrier lowered: We are now Simulating!!\n");
+
+    // block until simulation is complete    
+    while (all_blocked == 0) {
+       all_blocked = 1;
+
+       for (i = 0; i < vm->num_cores; i++) {
+           if (v3_bitmap_check(&timeout_map, i)  == 0) {
+               all_blocked = 0;
+           }
+       }
+
+       if (all_blocked == 1) {
+           break;
+       }
+
+       v3_yield(NULL,-1);
+    }
+
+
+    V3_Print("Simulation is complete\n");
+
+    // Simulation is complete
+    // Reset back to PAUSED state
+
+    v3_raise_barrier_nowait(vm, NULL);
+    vm->run_state = VM_PAUSED;
+    
+    v3_bitmap_reset(&timeout_map);
+
+    v3_wait_for_barrier(vm, NULL);
 
     return 0;
+
 }
 
+#ifdef V3_CONFIG_CHECKPOINT
+#include <palacios/vmm_checkpoint.h>
+
+int v3_save_vm(struct v3_vm_info * vm, char * store, char * url) {
+    return v3_chkpt_save_vm(vm, store, url);
+}
+
+
+int v3_load_vm(struct v3_vm_info * vm, char * store, char * url) {
+    return v3_chkpt_load_vm(vm, store, url);
+}
+
+#ifdef V3_CONFIG_LIVE_MIGRATION
+int v3_send_vm(struct v3_vm_info * vm, char * store, char * url) {
+    return v3_chkpt_send_vm(vm, store, url);
+}
+
+
+int v3_receive_vm(struct v3_vm_info * vm, char * store, char * url) {
+    return v3_chkpt_receive_vm(vm, store, url);
+}
+#endif
+
+#endif
+
 
 int v3_free_vm(struct v3_vm_info * vm) {
     int i = 0;
     // deinitialize guest (free memory, etc...)
 
+    if ((vm->run_state != VM_STOPPED) &&
+       (vm->run_state != VM_ERROR)) {
+       PrintError("Tried to Free VM in invalid runstate (%d)\n", vm->run_state);
+       return -1;
+    }
+
     v3_free_vm_devices(vm);
 
     // free cores
@@ -511,42 +725,46 @@ v3_cpu_mode_t v3_get_host_cpu_mode() {
 #endif 
 
 
-#define V3_Yield(addr)                                 \
-    do {                                               \
-       extern struct v3_os_hooks * os_hooks;           \
-       if ((os_hooks) && (os_hooks)->yield_cpu) {      \
-           (os_hooks)->yield_cpu();                    \
-       }                                               \
-    } while (0)                                                \
 
 
 
-void v3_yield_cond(struct guest_info * info) {
+void v3_yield_cond(struct guest_info * info, int usec) {
     uint64_t cur_cycle;
     cur_cycle = v3_get_host_time(&info->time_state);
 
     if (cur_cycle > (info->yield_start_cycle + info->vm_info->yield_cycle_period)) {
+       //PrintDebug("Conditional Yield (cur_cyle=%p, start_cycle=%p, period=%p)\n", 
+       //           (void *)cur_cycle, (void *)info->yield_start_cycle, 
+       //         (void *)info->yield_cycle_period);
+       
+       if (usec < 0) { 
+           V3_Yield();
+       } else {
+           V3_Sleep(usec);
+       }
 
-       /*
-         PrintDebug("Conditional Yield (cur_cyle=%p, start_cycle=%p, period=%p)\n", 
-         (void *)cur_cycle, (void *)info->yield_start_cycle, (void *)info->yield_cycle_period);
-       */
-       V3_Yield();
-       info->yield_start_cycle = v3_get_host_time(&info->time_state);
+        info->yield_start_cycle +=  info->vm_info->yield_cycle_period;
     }
 }
-
+ 
 
 /* 
  * unconditional cpu yield 
  * if the yielding thread is a guest context, the guest quantum is reset on resumption 
  * Non guest context threads should call this function with a NULL argument
- */
-void v3_yield(struct guest_info * info) {
-    V3_Yield();
+ *
+ * usec <0  => the non-timed yield is used
+ * usec >=0 => the timed yield is used, which also usually implies interruptible
+ */ 
+void v3_yield(struct guest_info * info, int usec) {
+    if (usec < 0) { 
+       V3_Yield();
+    } else {
+       V3_Sleep(usec);
+    }
 
     if (info) {
-       info->yield_start_cycle = v3_get_host_time(&info->time_state);
+        info->yield_start_cycle +=  info->vm_info->yield_cycle_period;
     }
 }
 
@@ -567,7 +785,6 @@ void v3_print_cond(const char * fmt, ...) {
 }
 
 
-#ifdef V3_CONFIG_MULTITHREAD_OS
 
 void v3_interrupt_cpu(struct v3_vm_info * vm, int logical_cpu, int vector) {
     extern struct v3_os_hooks * os_hooks;
@@ -576,12 +793,11 @@ void v3_interrupt_cpu(struct v3_vm_info * vm, int logical_cpu, int vector) {
        (os_hooks)->interrupt_cpu(vm, logical_cpu, vector);
     }
 }
-#endif
 
 
 
 int v3_vm_enter(struct guest_info * info) {
-    switch (v3_cpu_types[0]) {
+    switch (v3_mach_type) {
 #ifdef V3_CONFIG_SVM
        case V3_SVM_CPU:
        case V3_SVM_REV3_CPU: