2 * This file is part of the Palacios Virtual Machine Monitor developed
3 * by the V3VEE Project with funding from the United States National
4 * Science Foundation and the Department of Energy.
6 * The V3VEE Project is a joint project between Northwestern University
7 * and the University of New Mexico. You can find out more at
10 * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu>
11 * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org>
12 * All rights reserved.
14 * Author: Jack Lange <jarusl@cs.northwestern.edu>
16 * This is free software. You are permitted to use,
17 * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
20 #include <palacios/vmm.h>
21 #include <palacios/vmm_intr.h>
22 #include <palacios/vmm_config.h>
23 #include <palacios/vm_guest.h>
24 #include <palacios/vmm_ctrl_regs.h>
25 #include <palacios/vmm_lowlevel.h>
26 #include <palacios/vmm_sprintf.h>
27 #include <palacios/vmm_extensions.h>
28 #include <palacios/vmm_timeout.h>
32 #include <palacios/svm.h>
35 #include <palacios/vmx.h>
38 #ifdef V3_CONFIG_CHECKPOINT
39 #include <palacios/vmm_checkpoint.h>
43 v3_cpu_arch_t v3_cpu_types[V3_CONFIG_MAX_CPUS];
44 v3_cpu_arch_t v3_mach_type = V3_INVALID_CPU;
46 struct v3_os_hooks * os_hooks = NULL;
47 int v3_dbg_enable = 0;
52 static void init_cpu(void * arg) {
53 uint32_t cpu_id = (uint32_t)(addr_t)arg;
56 if (v3_is_svm_capable()) {
57 PrintDebug(VM_NONE, VCORE_NONE, "Machine is SVM Capable\n");
58 v3_init_svm_cpu(cpu_id);
63 if (v3_is_vmx_capable()) {
64 PrintDebug(VM_NONE, VCORE_NONE, "Machine is VMX Capable\n");
65 v3_init_vmx_cpu(cpu_id);
70 PrintError(VM_NONE, VCORE_NONE, "CPU has no virtualization Extensions\n");
75 static void deinit_cpu(void * arg) {
76 uint32_t cpu_id = (uint32_t)(addr_t)arg;
79 switch (v3_cpu_types[cpu_id]) {
83 PrintDebug(VM_NONE, VCORE_NONE, "Deinitializing SVM CPU %d\n", cpu_id);
84 v3_deinit_svm_cpu(cpu_id);
90 case V3_VMX_EPT_UG_CPU:
91 PrintDebug(VM_NONE, VCORE_NONE, "Deinitializing VMX CPU %d\n", cpu_id);
92 v3_deinit_vmx_cpu(cpu_id);
97 PrintError(VM_NONE, VCORE_NONE, "CPU has no virtualization Extensions\n");
102 /* Options are space-separated values of the form "X=Y", for example
103 * scheduler=EDF CPUs=1,2,3,4
104 * THe following code pushes them into a hashtable for each of access
105 * by other code. Storage is allocated for keys and values as part
106 * of this process. XXX Need a way to deallocate this storage if the
107 * module is removed XXX
109 static char *option_storage;
110 static struct hashtable *option_table;
111 static char *truevalue = "true";
113 static uint_t option_hash_fn(addr_t key) {
114 char * name = (char *)key;
115 return v3_hash_buffer((uint8_t *)name, strlen(name));
117 static int option_eq_fn(addr_t key1, addr_t key2) {
118 char * name1 = (char *)key1;
119 char * name2 = (char *)key2;
121 return (strcmp(name1, name2) == 0);
124 void V3_parse_options(char *options)
126 char *currKey = NULL, *currVal = NULL;
128 int len = strlen(options);
131 option_storage = V3_Malloc(len + 1);
132 strcpy(option_storage, options);
135 option_table = v3_create_htable(0, option_hash_fn, option_eq_fn);
137 /* Skip whitespace */
144 v3_htable_insert(option_table, (addr_t)currKey, (addr_t)currVal);
150 } else if (parseKey) {
159 } else /* !parseKey */ {
170 v3_htable_insert(option_table, (addr_t)currKey, (addr_t)currVal);
175 char *v3_lookup_option(char *key) {
176 return (char *)v3_htable_search(option_table, (addr_t)(key));
179 void Init_V3(struct v3_os_hooks * hooks, char * cpu_mask, int num_cpus, char *options) {
184 V3_Print(VM_NONE, VCORE_NONE, "V3 Print statement to fix a Kitten page fault bug\n");
186 // Set global variables.
189 // Determine the global machine type
190 v3_mach_type = V3_INVALID_CPU;
192 for (i = 0; i < V3_CONFIG_MAX_CPUS; i++) {
193 v3_cpu_types[i] = V3_INVALID_CPU;
196 // Parse host-os defined options into an easily-accessed format.
197 V3_parse_options(options);
199 // Register all the possible device types
202 // Register all shadow paging handlers
203 V3_init_shdw_paging();
205 // Initialize the scheduler framework (must be before extensions)
206 V3_init_scheduling();
208 // Register all extensions
209 V3_init_extensions();
211 // Enabling scheduler
212 V3_enable_scheduler();
215 #ifdef V3_CONFIG_SYMMOD
219 #ifdef V3_CONFIG_CHECKPOINT
220 V3_init_checkpoint();
223 if ((hooks) && (hooks->call_on_cpu)) {
225 for (i = 0; i < num_cpus; i++) {
229 if ((cpu_mask == NULL) || (*(cpu_mask + major) & (0x1 << minor))) {
230 V3_Print(VM_NONE, VCORE_NONE, "Initializing VMM extensions on cpu %d\n", i);
231 hooks->call_on_cpu(i, &init_cpu, (void *)(addr_t)i);
233 if (v3_mach_type == V3_INVALID_CPU) {
234 v3_mach_type = v3_cpu_types[i];
247 V3_deinit_shdw_paging();
249 V3_deinit_extensions();
251 #ifdef V3_CONFIG_SYMMOD
255 #ifdef V3_CONFIG_CHECKPOINT
256 V3_deinit_checkpoint();
260 if ((os_hooks) && (os_hooks->call_on_cpu)) {
261 for (i = 0; i < V3_CONFIG_MAX_CPUS; i++) {
262 if (v3_cpu_types[i] != V3_INVALID_CPU) {
263 V3_Call_On_CPU(i, deinit_cpu, (void *)(addr_t)i);
264 //deinit_cpu((void *)(addr_t)i);
272 v3_cpu_arch_t v3_get_cpu_type(int cpu_id) {
273 return v3_cpu_types[cpu_id];
277 struct v3_vm_info * v3_create_vm(void * cfg, void * priv_data, char * name) {
278 struct v3_vm_info * vm = v3_config_guest(cfg, priv_data);
281 PrintError(VM_NONE, VCORE_NONE, "Could not configure guest\n");
285 V3_Print(vm, VCORE_NONE, "CORE 0 RIP=%p\n", (void *)(addr_t)(vm->cores[0].rip));
289 } else if (strlen(name) >= 128) {
290 PrintError(vm, VCORE_NONE,"VM name is too long. Will be truncated to 128 chars.\n");
293 memset(vm->name, 0, 128);
294 strncpy(vm->name, name, 127);
297 * Register this VM with the palacios scheduler. It will ask for admission
300 if(v3_scheduler_register_vm(vm) != -1) {
302 PrintError(vm, VCORE_NONE,"Error registering VM with scheduler\n");
311 static int start_core(void * p)
313 struct guest_info * core = (struct guest_info *)p;
315 if (v3_scheduler_register_core(core) == -1){
316 PrintError(core->vm_info, core,"Error initializing scheduling in core %d\n", core->vcpu_id);
319 PrintDebug(core->vm_info,core,"virtual core %u (on logical core %u): in start_core (RIP=%p)\n",
320 core->vcpu_id, core->pcpu_id, (void *)(addr_t)core->rip);
322 switch (v3_mach_type) {
325 case V3_SVM_REV3_CPU:
326 return v3_start_svm_guest(core);
332 case V3_VMX_EPT_UG_CPU:
333 return v3_start_vmx_guest(core);
337 PrintError(core->vm_info, core, "Attempting to enter a guest on an invalid CPU\n");
345 // For the moment very ugly. Eventually we will shift the cpu_mask to an arbitrary sized type...
349 int v3_start_vm(struct v3_vm_info * vm, unsigned int cpu_mask) {
351 uint8_t * core_mask = (uint8_t *)&cpu_mask; // This is to make future expansion easier
352 uint32_t avail_cores = 0;
356 if (vm->run_state != VM_STOPPED) {
357 PrintError(vm, VCORE_NONE, "VM has already been launched (state=%d)\n", (int)vm->run_state);
362 // Do not run if any core is using shadow paging and we are out of 4 GB bounds
363 for (i=0;i<vm->num_cores;i++) {
364 if (vm->cores[i].shdw_pg_mode == SHADOW_PAGING) {
365 if ((vm->mem_map.base_region.host_addr + vm->mem_size ) >= 0x100000000ULL) {
366 PrintError(vm, VCORE_NONE, "Base memory region exceeds 4 GB boundary with shadow paging enabled on core %d.\n",i);
367 PrintError(vm, VCORE_NONE, "Any use of non-64 bit mode in the guest is likely to fail in this configuration.\n");
368 PrintError(vm, VCORE_NONE, "If you would like to proceed anyway, remove this check and recompile Palacios.\n");
369 PrintError(vm, VCORE_NONE, "Alternatively, change this VM to use nested paging.\n");
377 /// CHECK IF WE ARE MULTICORE ENABLED....
379 V3_Print(vm, VCORE_NONE, "V3 -- Starting VM (%u cores)\n", vm->num_cores);
380 V3_Print(vm, VCORE_NONE, "CORE 0 RIP=%p\n", (void *)(addr_t)(vm->cores[0].rip));
383 // Check that enough cores are present in the mask to handle vcores
384 for (i = 0; i < MAX_CORES; i++) {
388 if (core_mask[major] & (0x1 << minor)) {
389 if (v3_cpu_types[i] == V3_INVALID_CPU) {
390 core_mask[major] &= ~(0x1 << minor);
398 vm->avail_cores = avail_cores;
400 if (v3_scheduler_admit_vm(vm) != 0){
401 PrintError(vm, VCORE_NONE,"Error admitting VM %s for scheduling", vm->name);
404 vm->run_state = VM_RUNNING;
406 // Spawn off threads for each core.
407 // We work backwards, so that core 0 is always started last.
408 for (i = 0, vcore_id = vm->num_cores - 1; (i < MAX_CORES) && (vcore_id >= 0); i++) {
411 struct guest_info * core = &(vm->cores[vcore_id]);
412 char * specified_cpu = v3_cfg_val(core->core_cfg_data, "target_cpu");
413 uint32_t core_idx = 0;
415 if (specified_cpu != NULL) {
416 core_idx = atoi(specified_cpu);
418 if ((core_idx < 0) || (core_idx >= MAX_CORES)) {
419 PrintError(vm, VCORE_NONE, "Target CPU out of bounds (%d) (MAX_CORES=%d)\n", core_idx, MAX_CORES);
422 i--; // We reset the logical core idx. Not strictly necessary I guess...
427 major = core_idx / 8;
428 minor = core_idx % 8;
430 if ((core_mask[major] & (0x1 << minor)) == 0) {
431 PrintError(vm, VCORE_NONE, "Logical CPU %d not available for virtual core %d; not started\n",
434 if (specified_cpu != NULL) {
435 PrintError(vm, VCORE_NONE, "CPU was specified explicitly (%d). HARD ERROR\n", core_idx);
443 PrintDebug(vm, VCORE_NONE, "Starting virtual core %u on logical core %u\n",
446 sprintf(core->exec_name, "%s-%u", vm->name, vcore_id);
448 PrintDebug(vm, VCORE_NONE, "run: core=%u, func=0x%p, arg=0x%p, name=%s\n",
449 core_idx, start_core, core, core->exec_name);
451 core->core_run_state = CORE_STOPPED; // core zero will turn itself on
452 core->pcpu_id = core_idx;
453 core->core_thread = V3_CREATE_THREAD_ON_CPU(core_idx, start_core, core, core->exec_name);
455 if (core->core_thread == NULL) {
456 PrintError(vm, VCORE_NONE, "Thread launch failed\n");
465 PrintError(vm, VCORE_NONE, "Error starting VM: Not enough available CPU cores\n");
476 int v3_reset_vm_core(struct guest_info * core, addr_t rip) {
478 switch (v3_cpu_types[core->pcpu_id]) {
481 case V3_SVM_REV3_CPU:
482 PrintDebug(core->vm_info, core, "Resetting SVM Guest CPU %d\n", core->vcpu_id);
483 return v3_reset_svm_vm_core(core, rip);
488 case V3_VMX_EPT_UG_CPU:
489 PrintDebug(core->vm_info, core, "Resetting VMX Guest CPU %d\n", core->vcpu_id);
490 return v3_reset_vmx_vm_core(core, rip);
494 PrintError(core->vm_info, core, "CPU has no virtualization Extensions\n");
503 /* move a virtual core to different physical core */
504 int v3_move_vm_core(struct v3_vm_info * vm, int vcore_id, int target_cpu) {
505 struct guest_info * core = NULL;
507 if ((vcore_id < 0) || (vcore_id >= vm->num_cores)) {
508 PrintError(vm, VCORE_NONE, "Attempted to migrate invalid virtual core (%d)\n", vcore_id);
512 core = &(vm->cores[vcore_id]);
514 if (target_cpu == core->pcpu_id) {
515 PrintError(vm, core, "Attempted to migrate to local core (%d)\n", target_cpu);
516 // well that was pointless
520 if (core->core_thread == NULL) {
521 PrintError(vm, core, "Attempted to migrate a core without a valid thread context\n");
525 while (v3_raise_barrier(vm, NULL) == -1);
527 V3_Print(vm, core, "Performing Migration from %d to %d\n", core->pcpu_id, target_cpu);
529 // Double check that we weren't preemptively migrated
530 if (target_cpu != core->pcpu_id) {
532 V3_Print(vm, core, "Moving Core\n");
536 switch (v3_cpu_types[core->pcpu_id]) {
539 case V3_VMX_EPT_UG_CPU:
540 PrintDebug(vm, core, "Flushing VMX Guest CPU %d\n", core->vcpu_id);
541 V3_Call_On_CPU(core->pcpu_id, (void (*)(void *))v3_flush_vmx_vm_core, (void *)core);
548 if (V3_MOVE_THREAD_TO_CPU(target_cpu, core->core_thread) != 0) {
549 PrintError(vm, core, "Failed to move Vcore %d to CPU %d\n",
550 core->vcpu_id, target_cpu);
551 v3_lower_barrier(vm);
555 /* There will be a benign race window here:
556 core->pcpu_id will be set to the target core before its fully "migrated"
557 However the core will NEVER run on the old core again, its just in flight to the new core
559 core->pcpu_id = target_cpu;
561 V3_Print(vm, core, "core now at %d\n", core->pcpu_id);
564 v3_lower_barrier(vm);
571 int v3_stop_vm(struct v3_vm_info * vm) {
573 if ((vm->run_state != VM_RUNNING) &&
574 (vm->run_state != VM_SIMULATING)) {
575 PrintError(vm, VCORE_NONE,"Tried to stop VM in invalid runstate (%d)\n", vm->run_state);
579 vm->run_state = VM_STOPPED;
581 // Sanity check to catch any weird execution states
582 if (v3_wait_for_barrier(vm, NULL) == 0) {
583 v3_lower_barrier(vm);
586 // XXX force exit all cores via a cross call/IPI XXX
590 int still_running = 0;
592 for (i = 0; i < vm->num_cores; i++) {
593 if (vm->cores[i].core_run_state != CORE_STOPPED) {
598 if (still_running == 0) {
605 V3_Print(vm, VCORE_NONE,"VM stopped. Returning\n");
611 int v3_pause_vm(struct v3_vm_info * vm) {
613 if (vm->run_state != VM_RUNNING) {
614 PrintError(vm, VCORE_NONE,"Tried to pause a VM that was not running\n");
618 while (v3_raise_barrier(vm, NULL) == -1);
620 vm->run_state = VM_PAUSED;
626 int v3_continue_vm(struct v3_vm_info * vm) {
628 if (vm->run_state != VM_PAUSED) {
629 PrintError(vm, VCORE_NONE,"Tried to continue a VM that was not paused\n");
633 vm->run_state = VM_RUNNING;
635 v3_lower_barrier(vm);
642 static int sim_callback(struct guest_info * core, void * private_data) {
643 struct v3_bitmap * timeout_map = private_data;
645 v3_bitmap_set(timeout_map, core->vcpu_id);
647 V3_Print(core->vm_info, core, "Simulation callback activated (guest_rip=%p)\n", (void *)core->rip);
649 while (v3_bitmap_check(timeout_map, core->vcpu_id) == 1) {
659 int v3_simulate_vm(struct v3_vm_info * vm, unsigned int msecs) {
660 struct v3_bitmap timeout_map;
664 uint64_t cpu_khz = V3_CPU_KHZ();
666 if (vm->run_state != VM_PAUSED) {
667 PrintError(vm, VCORE_NONE,"VM must be paused before simulation begins\n");
671 /* AT this point VM is paused */
674 v3_bitmap_init(&timeout_map, vm->num_cores);
679 // calculate cycles from msecs...
680 // IMPORTANT: Floating point not allowed.
681 cycles = (msecs * cpu_khz);
685 V3_Print(vm, VCORE_NONE,"Simulating %u msecs (%llu cycles) [CPU_KHZ=%llu]\n", msecs, cycles, cpu_khz);
689 for (i = 0; i < vm->num_cores; i++) {
690 if (v3_add_core_timeout(&(vm->cores[i]), cycles, sim_callback, &timeout_map) == -1) {
691 PrintError(vm, VCORE_NONE,"Could not register simulation timeout for core %d\n", i);
696 V3_Print(vm, VCORE_NONE,"timeouts set on all cores\n ");
699 // Run the simulation
700 // vm->run_state = VM_SIMULATING;
701 vm->run_state = VM_RUNNING;
702 v3_lower_barrier(vm);
705 V3_Print(vm, VCORE_NONE,"Barrier lowered: We are now Simulating!!\n");
707 // block until simulation is complete
708 while (all_blocked == 0) {
711 for (i = 0; i < vm->num_cores; i++) {
712 if (v3_bitmap_check(&timeout_map, i) == 0) {
717 if (all_blocked == 1) {
725 V3_Print(vm, VCORE_NONE,"Simulation is complete\n");
727 // Simulation is complete
728 // Reset back to PAUSED state
730 v3_raise_barrier_nowait(vm, NULL);
731 vm->run_state = VM_PAUSED;
733 v3_bitmap_reset(&timeout_map);
735 v3_wait_for_barrier(vm, NULL);
741 int v3_get_state_vm(struct v3_vm_info *vm, struct v3_vm_state *s)
744 uint32_t numcores = s->num_vcores > vm->num_cores ? vm->num_cores : s->num_vcores;
746 switch (vm->run_state) {
747 case VM_INVALID: s->state = V3_VM_INVALID; break;
748 case VM_RUNNING: s->state = V3_VM_RUNNING; break;
749 case VM_STOPPED: s->state = V3_VM_STOPPED; break;
750 case VM_PAUSED: s->state = V3_VM_PAUSED; break;
751 case VM_ERROR: s->state = V3_VM_ERROR; break;
752 case VM_SIMULATING: s->state = V3_VM_SIMULATING; break;
753 default: s->state = V3_VM_UNKNOWN; break;
756 s->mem_base_paddr = (void*)(vm->mem_map.base_region.host_addr);
757 s->mem_size = vm->mem_size;
759 s->num_vcores = numcores;
761 for (i=0;i<numcores;i++) {
762 switch (vm->cores[i].core_run_state) {
763 case CORE_INVALID: s->vcore[i].state = V3_VCORE_INVALID; break;
764 case CORE_RUNNING: s->vcore[i].state = V3_VCORE_RUNNING; break;
765 case CORE_STOPPED: s->vcore[i].state = V3_VCORE_STOPPED; break;
766 default: s->vcore[i].state = V3_VCORE_UNKNOWN; break;
768 switch (vm->cores[i].cpu_mode) {
769 case REAL: s->vcore[i].cpu_mode = V3_VCORE_CPU_REAL; break;
770 case PROTECTED: s->vcore[i].cpu_mode = V3_VCORE_CPU_PROTECTED; break;
771 case PROTECTED_PAE: s->vcore[i].cpu_mode = V3_VCORE_CPU_PROTECTED_PAE; break;
772 case LONG: s->vcore[i].cpu_mode = V3_VCORE_CPU_LONG; break;
773 case LONG_32_COMPAT: s->vcore[i].cpu_mode = V3_VCORE_CPU_LONG_32_COMPAT; break;
774 case LONG_16_COMPAT: s->vcore[i].cpu_mode = V3_VCORE_CPU_LONG_16_COMPAT; break;
775 default: s->vcore[i].cpu_mode = V3_VCORE_CPU_UNKNOWN; break;
777 switch (vm->cores[i].shdw_pg_mode) {
778 case SHADOW_PAGING: s->vcore[i].mem_state = V3_VCORE_MEM_STATE_SHADOW; break;
779 case NESTED_PAGING: s->vcore[i].mem_state = V3_VCORE_MEM_STATE_NESTED; break;
780 default: s->vcore[i].mem_state = V3_VCORE_MEM_STATE_UNKNOWN; break;
782 switch (vm->cores[i].mem_mode) {
783 case PHYSICAL_MEM: s->vcore[i].mem_mode = V3_VCORE_MEM_MODE_PHYSICAL; break;
784 case VIRTUAL_MEM: s->vcore[i].mem_mode=V3_VCORE_MEM_MODE_VIRTUAL; break;
785 default: s->vcore[i].mem_mode=V3_VCORE_MEM_MODE_UNKNOWN; break;
788 s->vcore[i].pcore=vm->cores[i].pcpu_id;
789 s->vcore[i].last_rip=(void*)(vm->cores[i].rip);
790 s->vcore[i].num_exits=vm->cores[i].num_exits;
797 #ifdef V3_CONFIG_CHECKPOINT
798 #include <palacios/vmm_checkpoint.h>
800 int v3_save_vm(struct v3_vm_info * vm, char * store, char * url) {
801 return v3_chkpt_save_vm(vm, store, url);
805 int v3_load_vm(struct v3_vm_info * vm, char * store, char * url) {
806 return v3_chkpt_load_vm(vm, store, url);
809 #ifdef V3_CONFIG_LIVE_MIGRATION
810 int v3_send_vm(struct v3_vm_info * vm, char * store, char * url) {
811 return v3_chkpt_send_vm(vm, store, url);
815 int v3_receive_vm(struct v3_vm_info * vm, char * store, char * url) {
816 return v3_chkpt_receive_vm(vm, store, url);
823 int v3_free_vm(struct v3_vm_info * vm) {
825 // deinitialize guest (free memory, etc...)
827 if ((vm->run_state != VM_STOPPED) &&
828 (vm->run_state != VM_ERROR)) {
829 PrintError(vm, VCORE_NONE,"Tried to Free VM in invalid runstate (%d)\n", vm->run_state);
833 v3_free_vm_devices(vm);
836 for (i = 0; i < vm->num_cores; i++) {
837 v3_free_core(&(vm->cores[i]));
841 v3_free_vm_internal(vm);
853 v3_cpu_mode_t v3_get_host_cpu_mode() {
863 cr4 = (struct cr4_32 *)&(cr4_val);
866 return PROTECTED_PAE;
874 v3_cpu_mode_t v3_get_host_cpu_mode() {
880 void v3_print_cond(const char * fmt, ...) {
881 if (v3_dbg_enable == 1) {
886 vsnprintf(buf, 2048, fmt, ap);
889 V3_Print(VM_NONE, VCORE_NONE,"%s", buf);
895 void v3_interrupt_cpu(struct v3_vm_info * vm, int logical_cpu, int vector) {
896 extern struct v3_os_hooks * os_hooks;
898 if ((os_hooks) && (os_hooks)->interrupt_cpu) {
899 (os_hooks)->interrupt_cpu(vm, logical_cpu, vector);
905 int v3_vm_enter(struct guest_info * info) {
906 switch (v3_mach_type) {
909 case V3_SVM_REV3_CPU:
910 return v3_svm_enter(info);
916 case V3_VMX_EPT_UG_CPU:
917 return v3_vmx_enter(info);
921 PrintError(info->vm_info, info, "Attemping to enter a guest on an invalid CPU\n");
927 void *v3_get_host_vm(struct v3_vm_info *x)
930 return x->host_priv_data;
936 int v3_get_vcore(struct guest_info *x)