2 * This file is part of the Palacios Virtual Machine Monitor developed
3 * by the V3VEE Project with funding from the United States National
4 * Science Foundation and the Department of Energy.
6 * The V3VEE Project is a joint project between Northwestern University
7 * and the University of New Mexico. You can find out more at
10 * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu>
11 * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org>
12 * All rights reserved.
14 * Author: Jack Lange <jarusl@cs.northwestern.edu>
16 * This is free software. You are permitted to use,
17 * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
20 #include <palacios/vmm.h>
21 #include <palacios/vmm_intr.h>
22 #include <palacios/vmm_config.h>
23 #include <palacios/vm_guest.h>
24 #include <palacios/vmm_ctrl_regs.h>
25 #include <palacios/vmm_lowlevel.h>
26 #include <palacios/vmm_sprintf.h>
27 #include <palacios/vmm_extensions.h>
28 #include <palacios/vmm_timeout.h>
32 #include <palacios/svm.h>
35 #include <palacios/vmx.h>
38 #ifdef V3_CONFIG_CHECKPOINT
39 #include <palacios/vmm_checkpoint.h>
43 v3_cpu_arch_t v3_cpu_types[V3_CONFIG_MAX_CPUS];
44 v3_cpu_arch_t v3_mach_type = V3_INVALID_CPU;
46 struct v3_os_hooks * os_hooks = NULL;
47 int v3_dbg_enable = 0;
52 static void init_cpu(void * arg) {
53 uint32_t cpu_id = (uint32_t)(addr_t)arg;
56 if (v3_is_svm_capable()) {
57 PrintDebug(VM_NONE, VCORE_NONE, "Machine is SVM Capable\n");
58 v3_init_svm_cpu(cpu_id);
63 if (v3_is_vmx_capable()) {
64 PrintDebug(VM_NONE, VCORE_NONE, "Machine is VMX Capable\n");
65 v3_init_vmx_cpu(cpu_id);
70 PrintError(VM_NONE, VCORE_NONE, "CPU has no virtualization Extensions\n");
75 static void deinit_cpu(void * arg) {
76 uint32_t cpu_id = (uint32_t)(addr_t)arg;
79 switch (v3_cpu_types[cpu_id]) {
83 PrintDebug(VM_NONE, VCORE_NONE, "Deinitializing SVM CPU %d\n", cpu_id);
84 v3_deinit_svm_cpu(cpu_id);
90 case V3_VMX_EPT_UG_CPU:
91 PrintDebug(VM_NONE, VCORE_NONE, "Deinitializing VMX CPU %d\n", cpu_id);
92 v3_deinit_vmx_cpu(cpu_id);
97 PrintError(VM_NONE, VCORE_NONE, "CPU has no virtualization Extensions\n");
103 void Init_V3(struct v3_os_hooks * hooks, char * cpu_mask, int num_cpus) {
108 V3_Print(VM_NONE, VCORE_NONE, "V3 Print statement to fix a Kitten page fault bug\n");
110 // Set global variables.
113 // Determine the global machine type
114 v3_mach_type = V3_INVALID_CPU;
116 for (i = 0; i < V3_CONFIG_MAX_CPUS; i++) {
117 v3_cpu_types[i] = V3_INVALID_CPU;
120 // Register all the possible device types
123 // Register all shadow paging handlers
124 V3_init_shdw_paging();
126 // Register all extensions
127 V3_init_extensions();
130 #ifdef V3_CONFIG_SYMMOD
134 #ifdef V3_CONFIG_CHECKPOINT
135 V3_init_checkpoint();
138 if ((hooks) && (hooks->call_on_cpu)) {
140 for (i = 0; i < num_cpus; i++) {
144 if ((cpu_mask == NULL) || (*(cpu_mask + major) & (0x1 << minor))) {
145 V3_Print(VM_NONE, VCORE_NONE, "Initializing VMM extensions on cpu %d\n", i);
146 hooks->call_on_cpu(i, &init_cpu, (void *)(addr_t)i);
148 if (v3_mach_type == V3_INVALID_CPU) {
149 v3_mach_type = v3_cpu_types[i];
162 V3_deinit_shdw_paging();
164 V3_deinit_extensions();
166 #ifdef V3_CONFIG_SYMMOD
170 #ifdef V3_CONFIG_CHECKPOINT
171 V3_deinit_checkpoint();
175 if ((os_hooks) && (os_hooks->call_on_cpu)) {
176 for (i = 0; i < V3_CONFIG_MAX_CPUS; i++) {
177 if (v3_cpu_types[i] != V3_INVALID_CPU) {
178 V3_Call_On_CPU(i, deinit_cpu, (void *)(addr_t)i);
179 //deinit_cpu((void *)(addr_t)i);
187 v3_cpu_arch_t v3_get_cpu_type(int cpu_id) {
188 return v3_cpu_types[cpu_id];
192 struct v3_vm_info * v3_create_vm(void * cfg, void * priv_data, char * name) {
193 struct v3_vm_info * vm = v3_config_guest(cfg, priv_data);
196 PrintError(VM_NONE, VCORE_NONE, "Could not configure guest\n");
200 V3_Print(vm, VCORE_NONE, "CORE 0 RIP=%p\n", (void *)(addr_t)(vm->cores[0].rip));
204 } else if (strlen(name) >= 128) {
205 PrintError(vm, VCORE_NONE,"VM name is too long. Will be truncated to 128 chars.\n");
208 memset(vm->name, 0, 128);
209 strncpy(vm->name, name, 127);
217 static int start_core(void * p)
219 struct guest_info * core = (struct guest_info *)p;
222 PrintDebug(core->vm_info,core,"virtual core %u (on logical core %u): in start_core (RIP=%p)\n",
223 core->vcpu_id, core->pcpu_id, (void *)(addr_t)core->rip);
225 switch (v3_mach_type) {
228 case V3_SVM_REV3_CPU:
229 return v3_start_svm_guest(core);
235 case V3_VMX_EPT_UG_CPU:
236 return v3_start_vmx_guest(core);
240 PrintError(core->vm_info, core, "Attempting to enter a guest on an invalid CPU\n");
248 // For the moment very ugly. Eventually we will shift the cpu_mask to an arbitrary sized type...
252 int v3_start_vm(struct v3_vm_info * vm, unsigned int cpu_mask) {
254 uint8_t * core_mask = (uint8_t *)&cpu_mask; // This is to make future expansion easier
255 uint32_t avail_cores = 0;
259 if (vm->run_state != VM_STOPPED) {
260 PrintError(vm, VCORE_NONE, "VM has already been launched (state=%d)\n", (int)vm->run_state);
265 // Do not run if any core is using shadow paging and we are out of 4 GB bounds
266 for (i=0;i<vm->num_cores;i++) {
267 if (vm->cores[i].shdw_pg_mode == SHADOW_PAGING) {
268 if ((vm->mem_map.base_region.host_addr + vm->mem_size ) >= 0x100000000ULL) {
269 PrintError(vm, VCORE_NONE, "Base memory region exceeds 4 GB boundary with shadow paging enabled on core %d.\n",i);
270 PrintError(vm, VCORE_NONE, "Any use of non-64 bit mode in the guest is likely to fail in this configuration.\n");
271 PrintError(vm, VCORE_NONE, "If you would like to proceed anyway, remove this check and recompile Palacios.\n");
272 PrintError(vm, VCORE_NONE, "Alternatively, change this VM to use nested paging.\n");
280 /// CHECK IF WE ARE MULTICORE ENABLED....
282 V3_Print(vm, VCORE_NONE, "V3 -- Starting VM (%u cores)\n", vm->num_cores);
283 V3_Print(vm, VCORE_NONE, "CORE 0 RIP=%p\n", (void *)(addr_t)(vm->cores[0].rip));
286 // Check that enough cores are present in the mask to handle vcores
287 for (i = 0; i < MAX_CORES; i++) {
291 if (core_mask[major] & (0x1 << minor)) {
292 if (v3_cpu_types[i] == V3_INVALID_CPU) {
293 core_mask[major] &= ~(0x1 << minor);
301 if (vm->num_cores > avail_cores) {
302 PrintError(vm, VCORE_NONE, "Attempted to start a VM with too many cores (vm->num_cores = %d, avail_cores = %d, MAX=%d)\n",
303 vm->num_cores, avail_cores, MAX_CORES);
307 vm->run_state = VM_RUNNING;
309 // Spawn off threads for each core.
310 // We work backwards, so that core 0 is always started last.
311 for (i = 0, vcore_id = vm->num_cores - 1; (i < MAX_CORES) && (vcore_id >= 0); i++) {
314 struct guest_info * core = &(vm->cores[vcore_id]);
315 char * specified_cpu = v3_cfg_val(core->core_cfg_data, "target_cpu");
316 uint32_t core_idx = 0;
318 if (specified_cpu != NULL) {
319 core_idx = atoi(specified_cpu);
321 if ((core_idx < 0) || (core_idx >= MAX_CORES)) {
322 PrintError(vm, VCORE_NONE, "Target CPU out of bounds (%d) (MAX_CORES=%d)\n", core_idx, MAX_CORES);
325 i--; // We reset the logical core idx. Not strictly necessary I guess...
330 major = core_idx / 8;
331 minor = core_idx % 8;
333 if ((core_mask[major] & (0x1 << minor)) == 0) {
334 PrintError(vm, VCORE_NONE, "Logical CPU %d not available for virtual core %d; not started\n",
337 if (specified_cpu != NULL) {
338 PrintError(vm, VCORE_NONE, "CPU was specified explicitly (%d). HARD ERROR\n", core_idx);
346 PrintDebug(vm, VCORE_NONE, "Starting virtual core %u on logical core %u\n",
349 sprintf(core->exec_name, "%s-%u", vm->name, vcore_id);
351 PrintDebug(vm, VCORE_NONE, "run: core=%u, func=0x%p, arg=0x%p, name=%s\n",
352 core_idx, start_core, core, core->exec_name);
354 core->core_run_state = CORE_STOPPED; // core zero will turn itself on
355 core->pcpu_id = core_idx;
356 core->core_thread = V3_CREATE_THREAD_ON_CPU(core_idx, start_core, core, core->exec_name);
358 if (core->core_thread == NULL) {
359 PrintError(vm, VCORE_NONE, "Thread launch failed\n");
368 PrintError(vm, VCORE_NONE, "Error starting VM: Not enough available CPU cores\n");
379 int v3_reset_vm_core(struct guest_info * core, addr_t rip) {
381 switch (v3_cpu_types[core->pcpu_id]) {
384 case V3_SVM_REV3_CPU:
385 PrintDebug(core->vm_info, core, "Resetting SVM Guest CPU %d\n", core->vcpu_id);
386 return v3_reset_svm_vm_core(core, rip);
391 case V3_VMX_EPT_UG_CPU:
392 PrintDebug(core->vm_info, core, "Resetting VMX Guest CPU %d\n", core->vcpu_id);
393 return v3_reset_vmx_vm_core(core, rip);
397 PrintError(core->vm_info, core, "CPU has no virtualization Extensions\n");
406 /* move a virtual core to different physical core */
407 int v3_move_vm_core(struct v3_vm_info * vm, int vcore_id, int target_cpu) {
408 struct guest_info * core = NULL;
410 if ((vcore_id < 0) || (vcore_id >= vm->num_cores)) {
411 PrintError(vm, VCORE_NONE, "Attempted to migrate invalid virtual core (%d)\n", vcore_id);
415 core = &(vm->cores[vcore_id]);
417 if (target_cpu == core->pcpu_id) {
418 PrintError(vm, core, "Attempted to migrate to local core (%d)\n", target_cpu);
419 // well that was pointless
423 if (core->core_thread == NULL) {
424 PrintError(vm, core, "Attempted to migrate a core without a valid thread context\n");
428 while (v3_raise_barrier(vm, NULL) == -1);
430 V3_Print(vm, core, "Performing Migration from %d to %d\n", core->pcpu_id, target_cpu);
432 // Double check that we weren't preemptively migrated
433 if (target_cpu != core->pcpu_id) {
435 V3_Print(vm, core, "Moving Core\n");
439 switch (v3_cpu_types[core->pcpu_id]) {
442 case V3_VMX_EPT_UG_CPU:
443 PrintDebug(vm, core, "Flushing VMX Guest CPU %d\n", core->vcpu_id);
444 V3_Call_On_CPU(core->pcpu_id, (void (*)(void *))v3_flush_vmx_vm_core, (void *)core);
451 if (V3_MOVE_THREAD_TO_CPU(target_cpu, core->core_thread) != 0) {
452 PrintError(vm, core, "Failed to move Vcore %d to CPU %d\n",
453 core->vcpu_id, target_cpu);
454 v3_lower_barrier(vm);
458 /* There will be a benign race window here:
459 core->pcpu_id will be set to the target core before its fully "migrated"
460 However the core will NEVER run on the old core again, its just in flight to the new core
462 core->pcpu_id = target_cpu;
464 V3_Print(vm, core, "core now at %d\n", core->pcpu_id);
467 v3_lower_barrier(vm);
474 int v3_stop_vm(struct v3_vm_info * vm) {
476 if ((vm->run_state != VM_RUNNING) &&
477 (vm->run_state != VM_SIMULATING)) {
478 PrintError(vm, VCORE_NONE,"Tried to stop VM in invalid runstate (%d)\n", vm->run_state);
482 vm->run_state = VM_STOPPED;
484 // Sanity check to catch any weird execution states
485 if (v3_wait_for_barrier(vm, NULL) == 0) {
486 v3_lower_barrier(vm);
489 // XXX force exit all cores via a cross call/IPI XXX
493 int still_running = 0;
495 for (i = 0; i < vm->num_cores; i++) {
496 if (vm->cores[i].core_run_state != CORE_STOPPED) {
501 if (still_running == 0) {
508 V3_Print(vm, VCORE_NONE,"VM stopped. Returning\n");
514 int v3_pause_vm(struct v3_vm_info * vm) {
516 if (vm->run_state != VM_RUNNING) {
517 PrintError(vm, VCORE_NONE,"Tried to pause a VM that was not running\n");
521 while (v3_raise_barrier(vm, NULL) == -1);
523 vm->run_state = VM_PAUSED;
529 int v3_continue_vm(struct v3_vm_info * vm) {
531 if (vm->run_state != VM_PAUSED) {
532 PrintError(vm, VCORE_NONE,"Tried to continue a VM that was not paused\n");
536 vm->run_state = VM_RUNNING;
538 v3_lower_barrier(vm);
545 static int sim_callback(struct guest_info * core, void * private_data) {
546 struct v3_bitmap * timeout_map = private_data;
548 v3_bitmap_set(timeout_map, core->vcpu_id);
550 V3_Print(core->vm_info, core, "Simulation callback activated (guest_rip=%p)\n", (void *)core->rip);
552 while (v3_bitmap_check(timeout_map, core->vcpu_id) == 1) {
562 int v3_simulate_vm(struct v3_vm_info * vm, unsigned int msecs) {
563 struct v3_bitmap timeout_map;
567 uint64_t cpu_khz = V3_CPU_KHZ();
569 if (vm->run_state != VM_PAUSED) {
570 PrintError(vm, VCORE_NONE,"VM must be paused before simulation begins\n");
574 /* AT this point VM is paused */
577 v3_bitmap_init(&timeout_map, vm->num_cores);
582 // calculate cycles from msecs...
583 // IMPORTANT: Floating point not allowed.
584 cycles = (msecs * cpu_khz);
588 V3_Print(vm, VCORE_NONE,"Simulating %u msecs (%llu cycles) [CPU_KHZ=%llu]\n", msecs, cycles, cpu_khz);
592 for (i = 0; i < vm->num_cores; i++) {
593 if (v3_add_core_timeout(&(vm->cores[i]), cycles, sim_callback, &timeout_map) == -1) {
594 PrintError(vm, VCORE_NONE,"Could not register simulation timeout for core %d\n", i);
599 V3_Print(vm, VCORE_NONE,"timeouts set on all cores\n ");
602 // Run the simulation
603 // vm->run_state = VM_SIMULATING;
604 vm->run_state = VM_RUNNING;
605 v3_lower_barrier(vm);
608 V3_Print(vm, VCORE_NONE,"Barrier lowered: We are now Simulating!!\n");
610 // block until simulation is complete
611 while (all_blocked == 0) {
614 for (i = 0; i < vm->num_cores; i++) {
615 if (v3_bitmap_check(&timeout_map, i) == 0) {
620 if (all_blocked == 1) {
628 V3_Print(vm, VCORE_NONE,"Simulation is complete\n");
630 // Simulation is complete
631 // Reset back to PAUSED state
633 v3_raise_barrier_nowait(vm, NULL);
634 vm->run_state = VM_PAUSED;
636 v3_bitmap_reset(&timeout_map);
638 v3_wait_for_barrier(vm, NULL);
644 int v3_get_state_vm(struct v3_vm_info *vm, struct v3_vm_state *s)
647 uint32_t numcores = s->num_vcores > vm->num_cores ? vm->num_cores : s->num_vcores;
649 switch (vm->run_state) {
650 case VM_INVALID: s->state = V3_VM_INVALID; break;
651 case VM_RUNNING: s->state = V3_VM_RUNNING; break;
652 case VM_STOPPED: s->state = V3_VM_STOPPED; break;
653 case VM_PAUSED: s->state = V3_VM_PAUSED; break;
654 case VM_ERROR: s->state = V3_VM_ERROR; break;
655 case VM_SIMULATING: s->state = V3_VM_SIMULATING; break;
656 default: s->state = V3_VM_UNKNOWN; break;
659 s->mem_base_paddr = (void*)(vm->mem_map.base_region.host_addr);
660 s->mem_size = vm->mem_size;
662 s->num_vcores = numcores;
664 for (i=0;i<numcores;i++) {
665 switch (vm->cores[i].core_run_state) {
666 case CORE_INVALID: s->vcore[i].state = V3_VCORE_INVALID; break;
667 case CORE_RUNNING: s->vcore[i].state = V3_VCORE_RUNNING; break;
668 case CORE_STOPPED: s->vcore[i].state = V3_VCORE_STOPPED; break;
669 default: s->vcore[i].state = V3_VCORE_UNKNOWN; break;
671 switch (vm->cores[i].cpu_mode) {
672 case REAL: s->vcore[i].cpu_mode = V3_VCORE_CPU_REAL; break;
673 case PROTECTED: s->vcore[i].cpu_mode = V3_VCORE_CPU_PROTECTED; break;
674 case PROTECTED_PAE: s->vcore[i].cpu_mode = V3_VCORE_CPU_PROTECTED_PAE; break;
675 case LONG: s->vcore[i].cpu_mode = V3_VCORE_CPU_LONG; break;
676 case LONG_32_COMPAT: s->vcore[i].cpu_mode = V3_VCORE_CPU_LONG_32_COMPAT; break;
677 case LONG_16_COMPAT: s->vcore[i].cpu_mode = V3_VCORE_CPU_LONG_16_COMPAT; break;
678 default: s->vcore[i].cpu_mode = V3_VCORE_CPU_UNKNOWN; break;
680 switch (vm->cores[i].shdw_pg_mode) {
681 case SHADOW_PAGING: s->vcore[i].mem_state = V3_VCORE_MEM_STATE_SHADOW; break;
682 case NESTED_PAGING: s->vcore[i].mem_state = V3_VCORE_MEM_STATE_NESTED; break;
683 default: s->vcore[i].mem_state = V3_VCORE_MEM_STATE_UNKNOWN; break;
685 switch (vm->cores[i].mem_mode) {
686 case PHYSICAL_MEM: s->vcore[i].mem_mode = V3_VCORE_MEM_MODE_PHYSICAL; break;
687 case VIRTUAL_MEM: s->vcore[i].mem_mode=V3_VCORE_MEM_MODE_VIRTUAL; break;
688 default: s->vcore[i].mem_mode=V3_VCORE_MEM_MODE_UNKNOWN; break;
691 s->vcore[i].pcore=vm->cores[i].pcpu_id;
692 s->vcore[i].last_rip=(void*)(vm->cores[i].rip);
693 s->vcore[i].num_exits=vm->cores[i].num_exits;
700 #ifdef V3_CONFIG_CHECKPOINT
701 #include <palacios/vmm_checkpoint.h>
703 int v3_save_vm(struct v3_vm_info * vm, char * store, char * url) {
704 return v3_chkpt_save_vm(vm, store, url);
708 int v3_load_vm(struct v3_vm_info * vm, char * store, char * url) {
709 return v3_chkpt_load_vm(vm, store, url);
712 #ifdef V3_CONFIG_LIVE_MIGRATION
713 int v3_send_vm(struct v3_vm_info * vm, char * store, char * url) {
714 return v3_chkpt_send_vm(vm, store, url);
718 int v3_receive_vm(struct v3_vm_info * vm, char * store, char * url) {
719 return v3_chkpt_receive_vm(vm, store, url);
726 int v3_free_vm(struct v3_vm_info * vm) {
728 // deinitialize guest (free memory, etc...)
730 if ((vm->run_state != VM_STOPPED) &&
731 (vm->run_state != VM_ERROR)) {
732 PrintError(vm, VCORE_NONE,"Tried to Free VM in invalid runstate (%d)\n", vm->run_state);
736 v3_free_vm_devices(vm);
739 for (i = 0; i < vm->num_cores; i++) {
740 v3_free_core(&(vm->cores[i]));
744 v3_free_vm_internal(vm);
756 v3_cpu_mode_t v3_get_host_cpu_mode() {
766 cr4 = (struct cr4_32 *)&(cr4_val);
769 return PROTECTED_PAE;
777 v3_cpu_mode_t v3_get_host_cpu_mode() {
787 void v3_yield_cond(struct guest_info * info, int usec) {
789 cur_cycle = v3_get_host_time(&info->time_state);
791 if (cur_cycle > (info->yield_start_cycle + info->vm_info->yield_cycle_period)) {
792 //PrintDebug(info->vm_info, info, "Conditional Yield (cur_cyle=%p, start_cycle=%p, period=%p)\n",
793 // (void *)cur_cycle, (void *)info->yield_start_cycle,
794 // (void *)info->yield_cycle_period);
802 info->yield_start_cycle += info->vm_info->yield_cycle_period;
808 * unconditional cpu yield
809 * if the yielding thread is a guest context, the guest quantum is reset on resumption
810 * Non guest context threads should call this function with a NULL argument
812 * usec <0 => the non-timed yield is used
813 * usec >=0 => the timed yield is used, which also usually implies interruptible
815 void v3_yield(struct guest_info * info, int usec) {
823 info->yield_start_cycle += info->vm_info->yield_cycle_period;
830 void v3_print_cond(const char * fmt, ...) {
831 if (v3_dbg_enable == 1) {
836 vsnprintf(buf, 2048, fmt, ap);
839 V3_Print(VM_NONE, VCORE_NONE,"%s", buf);
845 void v3_interrupt_cpu(struct v3_vm_info * vm, int logical_cpu, int vector) {
846 extern struct v3_os_hooks * os_hooks;
848 if ((os_hooks) && (os_hooks)->interrupt_cpu) {
849 (os_hooks)->interrupt_cpu(vm, logical_cpu, vector);
855 int v3_vm_enter(struct guest_info * info) {
856 switch (v3_mach_type) {
859 case V3_SVM_REV3_CPU:
860 return v3_svm_enter(info);
866 case V3_VMX_EPT_UG_CPU:
867 return v3_vmx_enter(info);
871 PrintError(info->vm_info, info, "Attemping to enter a guest on an invalid CPU\n");
877 void *v3_get_host_vm(struct v3_vm_info *x)
880 return x->host_priv_data;
886 int v3_get_vcore(struct guest_info *x)