2 * This file is part of the Palacios Virtual Machine Monitor developed
3 * by the V3VEE Project with funding from the United States National
4 * Science Foundation and the Department of Energy.
6 * The V3VEE Project is a joint project between Northwestern University
7 * and the University of New Mexico. You can find out more at
10 * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu>
11 * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org>
12 * All rights reserved.
14 * Author: Jack Lange <jarusl@cs.northwestern.edu>
16 * This is free software. You are permitted to use,
17 * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
20 #include <palacios/vmm.h>
21 #include <palacios/vmm_intr.h>
22 #include <palacios/vmm_config.h>
23 #include <palacios/vm_guest.h>
24 #include <palacios/vmm_ctrl_regs.h>
25 #include <palacios/vmm_lowlevel.h>
26 #include <palacios/vmm_sprintf.h>
27 #include <palacios/vmm_extensions.h>
28 #include <palacios/vmm_timeout.h>
32 #include <palacios/svm.h>
35 #include <palacios/vmx.h>
38 #ifdef V3_CONFIG_CHECKPOINT
39 #include <palacios/vmm_checkpoint.h>
43 v3_cpu_arch_t v3_cpu_types[V3_CONFIG_MAX_CPUS];
44 v3_cpu_arch_t v3_mach_type = V3_INVALID_CPU;
46 struct v3_os_hooks * os_hooks = NULL;
47 int v3_dbg_enable = 0;
52 static void init_cpu(void * arg) {
53 uint32_t cpu_id = (uint32_t)(addr_t)arg;
56 if (v3_is_svm_capable()) {
57 PrintDebug(VM_NONE, VCORE_NONE, "Machine is SVM Capable\n");
58 v3_init_svm_cpu(cpu_id);
63 if (v3_is_vmx_capable()) {
64 PrintDebug(VM_NONE, VCORE_NONE, "Machine is VMX Capable\n");
65 v3_init_vmx_cpu(cpu_id);
70 PrintError(VM_NONE, VCORE_NONE, "CPU has no virtualization Extensions\n");
75 static void deinit_cpu(void * arg) {
76 uint32_t cpu_id = (uint32_t)(addr_t)arg;
79 switch (v3_cpu_types[cpu_id]) {
83 PrintDebug(VM_NONE, VCORE_NONE, "Deinitializing SVM CPU %d\n", cpu_id);
84 v3_deinit_svm_cpu(cpu_id);
90 case V3_VMX_EPT_UG_CPU:
91 PrintDebug(VM_NONE, VCORE_NONE, "Deinitializing VMX CPU %d\n", cpu_id);
92 v3_deinit_vmx_cpu(cpu_id);
97 PrintError(VM_NONE, VCORE_NONE, "CPU has no virtualization Extensions\n");
102 /* Options are space-separated values of the form "X=Y", for example
103 * scheduler=EDF CPUs=1,2,3,4
104 * THe following code pushes them into a hashtable for each of access
105 * by other code. Storage is allocated for keys and values as part
106 * of this process. XXX Need a way to deallocate this storage if the
107 * module is removed XXX
109 static char *option_storage;
110 static struct hashtable *option_table;
111 static char *truevalue = "true";
113 static uint_t option_hash_fn(addr_t key) {
114 char * name = (char *)key;
115 return v3_hash_buffer((uint8_t *)name, strlen(name));
117 static int option_eq_fn(addr_t key1, addr_t key2) {
118 char * name1 = (char *)key1;
119 char * name2 = (char *)key2;
121 return (strcmp(name1, name2) == 0);
124 void V3_parse_options(char *options)
126 char *currKey = NULL, *currVal = NULL;
134 len = strlen(options);
135 option_storage = V3_Malloc(len + 1);
136 strcpy(option_storage, options);
139 option_table = v3_create_htable(0, option_hash_fn, option_eq_fn);
141 /* Skip whitespace */
148 v3_htable_insert(option_table, (addr_t)currKey, (addr_t)currVal);
154 } else if (parseKey) {
163 } else /* !parseKey */ {
174 v3_htable_insert(option_table, (addr_t)currKey, (addr_t)currVal);
179 char *v3_lookup_option(char *key) {
180 return (char *)v3_htable_search(option_table, (addr_t)(key));
183 void Init_V3(struct v3_os_hooks * hooks, char * cpu_mask, int num_cpus, char *options) {
188 V3_Print(VM_NONE, VCORE_NONE, "V3 Print statement to fix a Kitten page fault bug\n");
190 // Set global variables.
193 // Determine the global machine type
194 v3_mach_type = V3_INVALID_CPU;
196 for (i = 0; i < V3_CONFIG_MAX_CPUS; i++) {
197 v3_cpu_types[i] = V3_INVALID_CPU;
200 // Parse host-os defined options into an easily-accessed format.
201 V3_parse_options(options);
203 // Register all the possible device types
206 // Register all shadow paging handlers
207 V3_init_shdw_paging();
209 // Initialize the scheduler framework (must be before extensions)
210 V3_init_scheduling();
212 // Register all extensions
213 V3_init_extensions();
215 // Enabling scheduler
216 V3_enable_scheduler();
219 #ifdef V3_CONFIG_SYMMOD
223 #ifdef V3_CONFIG_CHECKPOINT
224 V3_init_checkpoint();
227 if ((hooks) && (hooks->call_on_cpu)) {
229 for (i = 0; i < num_cpus; i++) {
233 if ((cpu_mask == NULL) || (*(cpu_mask + major) & (0x1 << minor))) {
234 V3_Print(VM_NONE, VCORE_NONE, "Initializing VMM extensions on cpu %d\n", i);
235 hooks->call_on_cpu(i, &init_cpu, (void *)(addr_t)i);
237 if (v3_mach_type == V3_INVALID_CPU) {
238 v3_mach_type = v3_cpu_types[i];
251 V3_deinit_shdw_paging();
253 V3_deinit_extensions();
255 #ifdef V3_CONFIG_SYMMOD
259 #ifdef V3_CONFIG_CHECKPOINT
260 V3_deinit_checkpoint();
264 if ((os_hooks) && (os_hooks->call_on_cpu)) {
265 for (i = 0; i < V3_CONFIG_MAX_CPUS; i++) {
266 if (v3_cpu_types[i] != V3_INVALID_CPU) {
267 V3_Call_On_CPU(i, deinit_cpu, (void *)(addr_t)i);
268 //deinit_cpu((void *)(addr_t)i);
276 v3_cpu_arch_t v3_get_cpu_type(int cpu_id) {
277 return v3_cpu_types[cpu_id];
281 struct v3_vm_info * v3_create_vm(void * cfg, void * priv_data, char * name) {
282 struct v3_vm_info * vm = v3_config_guest(cfg, priv_data);
285 PrintError(VM_NONE, VCORE_NONE, "Could not configure guest\n");
289 V3_Print(vm, VCORE_NONE, "CORE 0 RIP=%p\n", (void *)(addr_t)(vm->cores[0].rip));
293 } else if (strlen(name) >= 128) {
294 PrintError(vm, VCORE_NONE,"VM name is too long. Will be truncated to 128 chars.\n");
297 memset(vm->name, 0, 128);
298 strncpy(vm->name, name, 127);
301 * Register this VM with the palacios scheduler. It will ask for admission
304 if(v3_scheduler_register_vm(vm) == -1) {
306 PrintError(vm, VCORE_NONE,"Error registering VM with scheduler\n");
315 static int start_core(void * p)
317 struct guest_info * core = (struct guest_info *)p;
319 if (v3_scheduler_register_core(core) == -1){
320 PrintError(core->vm_info, core,"Error initializing scheduling in core %d\n", core->vcpu_id);
323 PrintDebug(core->vm_info,core,"virtual core %u (on logical core %u): in start_core (RIP=%p)\n",
324 core->vcpu_id, core->pcpu_id, (void *)(addr_t)core->rip);
326 switch (v3_mach_type) {
329 case V3_SVM_REV3_CPU:
330 return v3_start_svm_guest(core);
336 case V3_VMX_EPT_UG_CPU:
337 return v3_start_vmx_guest(core);
341 PrintError(core->vm_info, core, "Attempting to enter a guest on an invalid CPU\n");
349 // For the moment very ugly. Eventually we will shift the cpu_mask to an arbitrary sized type...
353 int v3_start_vm(struct v3_vm_info * vm, unsigned int cpu_mask) {
355 uint8_t * core_mask = (uint8_t *)&cpu_mask; // This is to make future expansion easier
356 uint32_t avail_cores = 0;
360 if (vm->run_state != VM_STOPPED) {
361 PrintError(vm, VCORE_NONE, "VM has already been launched (state=%d)\n", (int)vm->run_state);
366 // Do not run if any core is using shadow paging and we are out of 4 GB bounds
367 for (i=0;i<vm->num_cores;i++) {
368 if (vm->cores[i].shdw_pg_mode == SHADOW_PAGING) {
369 if ((vm->mem_map.base_region.host_addr + vm->mem_size ) >= 0x100000000ULL) {
370 PrintError(vm, VCORE_NONE, "Base memory region exceeds 4 GB boundary with shadow paging enabled on core %d.\n",i);
371 PrintError(vm, VCORE_NONE, "Any use of non-64 bit mode in the guest is likely to fail in this configuration.\n");
372 PrintError(vm, VCORE_NONE, "If you would like to proceed anyway, remove this check and recompile Palacios.\n");
373 PrintError(vm, VCORE_NONE, "Alternatively, change this VM to use nested paging.\n");
381 /// CHECK IF WE ARE MULTICORE ENABLED....
383 V3_Print(vm, VCORE_NONE, "V3 -- Starting VM (%u cores)\n", vm->num_cores);
384 V3_Print(vm, VCORE_NONE, "CORE 0 RIP=%p\n", (void *)(addr_t)(vm->cores[0].rip));
387 // Check that enough cores are present in the mask to handle vcores
388 for (i = 0; i < MAX_CORES; i++) {
392 if (core_mask[major] & (0x1 << minor)) {
393 if (v3_cpu_types[i] == V3_INVALID_CPU) {
394 core_mask[major] &= ~(0x1 << minor);
402 vm->avail_cores = avail_cores;
404 if (v3_scheduler_admit_vm(vm) != 0){
405 PrintError(vm, VCORE_NONE,"Error admitting VM %s for scheduling", vm->name);
408 vm->run_state = VM_RUNNING;
410 // Spawn off threads for each core.
411 // We work backwards, so that core 0 is always started last.
412 for (i = 0, vcore_id = vm->num_cores - 1; (i < MAX_CORES) && (vcore_id >= 0); i++) {
415 struct guest_info * core = &(vm->cores[vcore_id]);
416 char * specified_cpu = v3_cfg_val(core->core_cfg_data, "target_cpu");
417 uint32_t core_idx = 0;
419 if (specified_cpu != NULL) {
420 core_idx = atoi(specified_cpu);
422 if ((core_idx < 0) || (core_idx >= MAX_CORES)) {
423 PrintError(vm, VCORE_NONE, "Target CPU out of bounds (%d) (MAX_CORES=%d)\n", core_idx, MAX_CORES);
426 i--; // We reset the logical core idx. Not strictly necessary I guess...
431 major = core_idx / 8;
432 minor = core_idx % 8;
434 if ((core_mask[major] & (0x1 << minor)) == 0) {
435 PrintError(vm, VCORE_NONE, "Logical CPU %d not available for virtual core %d; not started\n",
438 if (specified_cpu != NULL) {
439 PrintError(vm, VCORE_NONE, "CPU was specified explicitly (%d). HARD ERROR\n", core_idx);
447 PrintDebug(vm, VCORE_NONE, "Starting virtual core %u on logical core %u\n",
450 sprintf(core->exec_name, "%s-%u", vm->name, vcore_id);
452 PrintDebug(vm, VCORE_NONE, "run: core=%u, func=0x%p, arg=0x%p, name=%s\n",
453 core_idx, start_core, core, core->exec_name);
455 core->core_run_state = CORE_STOPPED; // core zero will turn itself on
456 core->pcpu_id = core_idx;
457 core->core_thread = V3_CREATE_THREAD_ON_CPU(core_idx, start_core, core, core->exec_name);
459 if (core->core_thread == NULL) {
460 PrintError(vm, VCORE_NONE, "Thread launch failed\n");
469 PrintError(vm, VCORE_NONE, "Error starting VM: Not enough available CPU cores\n");
480 int v3_reset_vm_core(struct guest_info * core, addr_t rip) {
482 switch (v3_cpu_types[core->pcpu_id]) {
485 case V3_SVM_REV3_CPU:
486 PrintDebug(core->vm_info, core, "Resetting SVM Guest CPU %d\n", core->vcpu_id);
487 return v3_reset_svm_vm_core(core, rip);
492 case V3_VMX_EPT_UG_CPU:
493 PrintDebug(core->vm_info, core, "Resetting VMX Guest CPU %d\n", core->vcpu_id);
494 return v3_reset_vmx_vm_core(core, rip);
498 PrintError(core->vm_info, core, "CPU has no virtualization Extensions\n");
507 /* move a virtual core to different physical core */
508 int v3_move_vm_core(struct v3_vm_info * vm, int vcore_id, int target_cpu) {
509 struct guest_info * core = NULL;
511 if ((vcore_id < 0) || (vcore_id >= vm->num_cores)) {
512 PrintError(vm, VCORE_NONE, "Attempted to migrate invalid virtual core (%d)\n", vcore_id);
516 core = &(vm->cores[vcore_id]);
518 if (target_cpu == core->pcpu_id) {
519 PrintError(vm, core, "Attempted to migrate to local core (%d)\n", target_cpu);
520 // well that was pointless
524 if (core->core_thread == NULL) {
525 PrintError(vm, core, "Attempted to migrate a core without a valid thread context\n");
529 while (v3_raise_barrier(vm, NULL) == -1);
531 V3_Print(vm, core, "Performing Migration from %d to %d\n", core->pcpu_id, target_cpu);
533 // Double check that we weren't preemptively migrated
534 if (target_cpu != core->pcpu_id) {
536 V3_Print(vm, core, "Moving Core\n");
540 switch (v3_cpu_types[core->pcpu_id]) {
543 case V3_VMX_EPT_UG_CPU:
544 PrintDebug(vm, core, "Flushing VMX Guest CPU %d\n", core->vcpu_id);
545 V3_Call_On_CPU(core->pcpu_id, (void (*)(void *))v3_flush_vmx_vm_core, (void *)core);
552 if (V3_MOVE_THREAD_TO_CPU(target_cpu, core->core_thread) != 0) {
553 PrintError(vm, core, "Failed to move Vcore %d to CPU %d\n",
554 core->vcpu_id, target_cpu);
555 v3_lower_barrier(vm);
559 /* There will be a benign race window here:
560 core->pcpu_id will be set to the target core before its fully "migrated"
561 However the core will NEVER run on the old core again, its just in flight to the new core
563 core->pcpu_id = target_cpu;
565 V3_Print(vm, core, "core now at %d\n", core->pcpu_id);
568 v3_lower_barrier(vm);
575 int v3_stop_vm(struct v3_vm_info * vm) {
577 if ((vm->run_state != VM_RUNNING) &&
578 (vm->run_state != VM_SIMULATING)) {
579 PrintError(vm, VCORE_NONE,"Tried to stop VM in invalid runstate (%d)\n", vm->run_state);
583 vm->run_state = VM_STOPPED;
585 // Sanity check to catch any weird execution states
586 if (v3_wait_for_barrier(vm, NULL) == 0) {
587 v3_lower_barrier(vm);
590 // XXX force exit all cores via a cross call/IPI XXX
594 int still_running = 0;
596 for (i = 0; i < vm->num_cores; i++) {
597 if (vm->cores[i].core_run_state != CORE_STOPPED) {
602 if (still_running == 0) {
609 V3_Print(vm, VCORE_NONE,"VM stopped. Returning\n");
615 int v3_pause_vm(struct v3_vm_info * vm) {
617 if (vm->run_state != VM_RUNNING) {
618 PrintError(vm, VCORE_NONE,"Tried to pause a VM that was not running\n");
622 while (v3_raise_barrier(vm, NULL) == -1);
624 vm->run_state = VM_PAUSED;
630 int v3_continue_vm(struct v3_vm_info * vm) {
632 if (vm->run_state != VM_PAUSED) {
633 PrintError(vm, VCORE_NONE,"Tried to continue a VM that was not paused\n");
637 vm->run_state = VM_RUNNING;
639 v3_lower_barrier(vm);
646 static int sim_callback(struct guest_info * core, void * private_data) {
647 struct v3_bitmap * timeout_map = private_data;
649 v3_bitmap_set(timeout_map, core->vcpu_id);
651 V3_Print(core->vm_info, core, "Simulation callback activated (guest_rip=%p)\n", (void *)core->rip);
653 while (v3_bitmap_check(timeout_map, core->vcpu_id) == 1) {
663 int v3_simulate_vm(struct v3_vm_info * vm, unsigned int msecs) {
664 struct v3_bitmap timeout_map;
668 uint64_t cpu_khz = V3_CPU_KHZ();
670 if (vm->run_state != VM_PAUSED) {
671 PrintError(vm, VCORE_NONE,"VM must be paused before simulation begins\n");
675 /* AT this point VM is paused */
678 v3_bitmap_init(&timeout_map, vm->num_cores);
683 // calculate cycles from msecs...
684 // IMPORTANT: Floating point not allowed.
685 cycles = (msecs * cpu_khz);
689 V3_Print(vm, VCORE_NONE,"Simulating %u msecs (%llu cycles) [CPU_KHZ=%llu]\n", msecs, cycles, cpu_khz);
693 for (i = 0; i < vm->num_cores; i++) {
694 if (v3_add_core_timeout(&(vm->cores[i]), cycles, sim_callback, &timeout_map) == -1) {
695 PrintError(vm, VCORE_NONE,"Could not register simulation timeout for core %d\n", i);
700 V3_Print(vm, VCORE_NONE,"timeouts set on all cores\n ");
703 // Run the simulation
704 // vm->run_state = VM_SIMULATING;
705 vm->run_state = VM_RUNNING;
706 v3_lower_barrier(vm);
709 V3_Print(vm, VCORE_NONE,"Barrier lowered: We are now Simulating!!\n");
711 // block until simulation is complete
712 while (all_blocked == 0) {
715 for (i = 0; i < vm->num_cores; i++) {
716 if (v3_bitmap_check(&timeout_map, i) == 0) {
721 if (all_blocked == 1) {
729 V3_Print(vm, VCORE_NONE,"Simulation is complete\n");
731 // Simulation is complete
732 // Reset back to PAUSED state
734 v3_raise_barrier_nowait(vm, NULL);
735 vm->run_state = VM_PAUSED;
737 v3_bitmap_reset(&timeout_map);
739 v3_wait_for_barrier(vm, NULL);
745 int v3_get_state_vm(struct v3_vm_info *vm, struct v3_vm_state *s)
748 uint32_t numcores = s->num_vcores > vm->num_cores ? vm->num_cores : s->num_vcores;
750 switch (vm->run_state) {
751 case VM_INVALID: s->state = V3_VM_INVALID; break;
752 case VM_RUNNING: s->state = V3_VM_RUNNING; break;
753 case VM_STOPPED: s->state = V3_VM_STOPPED; break;
754 case VM_PAUSED: s->state = V3_VM_PAUSED; break;
755 case VM_ERROR: s->state = V3_VM_ERROR; break;
756 case VM_SIMULATING: s->state = V3_VM_SIMULATING; break;
757 default: s->state = V3_VM_UNKNOWN; break;
760 s->mem_base_paddr = (void*)(vm->mem_map.base_region.host_addr);
761 s->mem_size = vm->mem_size;
763 s->num_vcores = numcores;
765 for (i=0;i<numcores;i++) {
766 switch (vm->cores[i].core_run_state) {
767 case CORE_INVALID: s->vcore[i].state = V3_VCORE_INVALID; break;
768 case CORE_RUNNING: s->vcore[i].state = V3_VCORE_RUNNING; break;
769 case CORE_STOPPED: s->vcore[i].state = V3_VCORE_STOPPED; break;
770 default: s->vcore[i].state = V3_VCORE_UNKNOWN; break;
772 switch (vm->cores[i].cpu_mode) {
773 case REAL: s->vcore[i].cpu_mode = V3_VCORE_CPU_REAL; break;
774 case PROTECTED: s->vcore[i].cpu_mode = V3_VCORE_CPU_PROTECTED; break;
775 case PROTECTED_PAE: s->vcore[i].cpu_mode = V3_VCORE_CPU_PROTECTED_PAE; break;
776 case LONG: s->vcore[i].cpu_mode = V3_VCORE_CPU_LONG; break;
777 case LONG_32_COMPAT: s->vcore[i].cpu_mode = V3_VCORE_CPU_LONG_32_COMPAT; break;
778 case LONG_16_COMPAT: s->vcore[i].cpu_mode = V3_VCORE_CPU_LONG_16_COMPAT; break;
779 default: s->vcore[i].cpu_mode = V3_VCORE_CPU_UNKNOWN; break;
781 switch (vm->cores[i].shdw_pg_mode) {
782 case SHADOW_PAGING: s->vcore[i].mem_state = V3_VCORE_MEM_STATE_SHADOW; break;
783 case NESTED_PAGING: s->vcore[i].mem_state = V3_VCORE_MEM_STATE_NESTED; break;
784 default: s->vcore[i].mem_state = V3_VCORE_MEM_STATE_UNKNOWN; break;
786 switch (vm->cores[i].mem_mode) {
787 case PHYSICAL_MEM: s->vcore[i].mem_mode = V3_VCORE_MEM_MODE_PHYSICAL; break;
788 case VIRTUAL_MEM: s->vcore[i].mem_mode=V3_VCORE_MEM_MODE_VIRTUAL; break;
789 default: s->vcore[i].mem_mode=V3_VCORE_MEM_MODE_UNKNOWN; break;
792 s->vcore[i].pcore=vm->cores[i].pcpu_id;
793 s->vcore[i].last_rip=(void*)(vm->cores[i].rip);
794 s->vcore[i].num_exits=vm->cores[i].num_exits;
801 #ifdef V3_CONFIG_CHECKPOINT
802 #include <palacios/vmm_checkpoint.h>
804 int v3_save_vm(struct v3_vm_info * vm, char * store, char * url) {
805 return v3_chkpt_save_vm(vm, store, url);
809 int v3_load_vm(struct v3_vm_info * vm, char * store, char * url) {
810 return v3_chkpt_load_vm(vm, store, url);
813 #ifdef V3_CONFIG_LIVE_MIGRATION
814 int v3_send_vm(struct v3_vm_info * vm, char * store, char * url) {
815 return v3_chkpt_send_vm(vm, store, url);
819 int v3_receive_vm(struct v3_vm_info * vm, char * store, char * url) {
820 return v3_chkpt_receive_vm(vm, store, url);
827 int v3_free_vm(struct v3_vm_info * vm) {
829 // deinitialize guest (free memory, etc...)
831 if ((vm->run_state != VM_STOPPED) &&
832 (vm->run_state != VM_ERROR)) {
833 PrintError(vm, VCORE_NONE,"Tried to Free VM in invalid runstate (%d)\n", vm->run_state);
837 v3_free_vm_devices(vm);
840 for (i = 0; i < vm->num_cores; i++) {
841 v3_free_core(&(vm->cores[i]));
845 v3_free_vm_internal(vm);
857 v3_cpu_mode_t v3_get_host_cpu_mode() {
867 cr4 = (struct cr4_32 *)&(cr4_val);
870 return PROTECTED_PAE;
878 v3_cpu_mode_t v3_get_host_cpu_mode() {
884 void v3_print_cond(const char * fmt, ...) {
885 if (v3_dbg_enable == 1) {
890 vsnprintf(buf, 2048, fmt, ap);
893 V3_Print(VM_NONE, VCORE_NONE,"%s", buf);
899 void v3_interrupt_cpu(struct v3_vm_info * vm, int logical_cpu, int vector) {
900 extern struct v3_os_hooks * os_hooks;
902 if ((os_hooks) && (os_hooks)->interrupt_cpu) {
903 (os_hooks)->interrupt_cpu(vm, logical_cpu, vector);
909 int v3_vm_enter(struct guest_info * info) {
910 switch (v3_mach_type) {
913 case V3_SVM_REV3_CPU:
914 return v3_svm_enter(info);
920 case V3_VMX_EPT_UG_CPU:
921 return v3_vmx_enter(info);
925 PrintError(info->vm_info, info, "Attemping to enter a guest on an invalid CPU\n");
931 void *v3_get_host_vm(struct v3_vm_info *x)
934 return x->host_priv_data;
940 int v3_get_vcore(struct guest_info *x)