2 * This file is part of the Palacios Virtual Machine Monitor developed
3 * by the V3VEE Project with funding from the United States National
4 * Science Foundation and the Department of Energy.
6 * The V3VEE Project is a joint project between Northwestern University
7 * and the University of New Mexico. You can find out more at
10 * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu>
11 * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org>
12 * All rights reserved.
14 * Author: Jack Lange <jarusl@cs.northwestern.edu>
16 * This is free software. You are permitted to use,
17 * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
20 #include <palacios/vmm.h>
21 #include <palacios/vmm_mem.h>
22 #include <palacios/vmm_intr.h>
23 #include <palacios/vmm_config.h>
24 #include <palacios/vm_guest.h>
25 #include <palacios/vmm_ctrl_regs.h>
26 #include <palacios/vmm_lowlevel.h>
27 #include <palacios/vmm_sprintf.h>
28 #include <palacios/vmm_extensions.h>
29 #include <palacios/vmm_timeout.h>
30 #include <palacios/vmm_options.h>
31 #include <palacios/vmm_cpu_mapper.h>
34 #include <palacios/svm.h>
37 #include <palacios/vmx.h>
40 #ifdef V3_CONFIG_CHECKPOINT
41 #include <palacios/vmm_checkpoint.h>
45 v3_cpu_arch_t v3_cpu_types[V3_CONFIG_MAX_CPUS];
46 v3_cpu_arch_t v3_mach_type = V3_INVALID_CPU;
48 struct v3_os_hooks * os_hooks = NULL;
49 int v3_dbg_enable = 0;
53 static void init_cpu(void * arg) {
54 uint32_t cpu_id = (uint32_t)(addr_t)arg;
57 if (v3_is_svm_capable()) {
58 PrintDebug(VM_NONE, VCORE_NONE, "Machine is SVM Capable\n");
59 v3_init_svm_cpu(cpu_id);
64 if (v3_is_vmx_capable()) {
65 PrintDebug(VM_NONE, VCORE_NONE, "Machine is VMX Capable\n");
66 v3_init_vmx_cpu(cpu_id);
71 PrintError(VM_NONE, VCORE_NONE, "CPU has no virtualization Extensions\n");
76 static void deinit_cpu(void * arg) {
77 uint32_t cpu_id = (uint32_t)(addr_t)arg;
80 switch (v3_cpu_types[cpu_id]) {
84 PrintDebug(VM_NONE, VCORE_NONE, "Deinitializing SVM CPU %d\n", cpu_id);
85 v3_deinit_svm_cpu(cpu_id);
91 case V3_VMX_EPT_UG_CPU:
92 PrintDebug(VM_NONE, VCORE_NONE, "Deinitializing VMX CPU %d\n", cpu_id);
93 v3_deinit_vmx_cpu(cpu_id);
98 PrintError(VM_NONE, VCORE_NONE, "CPU has no virtualization Extensions\n");
103 void Init_V3(struct v3_os_hooks * hooks, char * cpu_mask, int num_cpus, char *options) {
108 V3_Print(VM_NONE, VCORE_NONE, "V3 Print statement to fix a Kitten page fault bug\n");
111 // Set global variables.
114 if (num_cpus>V3_CONFIG_MAX_CPUS) {
115 PrintError(VM_NONE,VCORE_NONE, "Requesting as many as %d cpus, but Palacios is compiled for a maximum of %d. Only the first %d cpus will be considered\n", num_cpus, V3_CONFIG_MAX_CPUS, V3_CONFIG_MAX_CPUS);
118 // Determine the global machine type
119 v3_mach_type = V3_INVALID_CPU;
121 for (i = 0; i < V3_CONFIG_MAX_CPUS; i++) {
122 v3_cpu_types[i] = V3_INVALID_CPU;
125 // Parse host-os defined options into an easily-accessed format.
126 v3_parse_options(options);
128 // Memory manager initialization
131 // Register all the possible device types
134 // Register all shadow paging handlers
135 V3_init_shdw_paging();
137 // Initialize the cpu_mapper framework (must be before extensions)
138 V3_init_cpu_mapper();
140 // Initialize the scheduler framework (must be before extensions)
141 V3_init_scheduling();
143 // Register all extensions
144 V3_init_extensions();
146 // Enabling cpu_mapper
147 V3_enable_cpu_mapper();
149 // Enabling scheduler
150 V3_enable_scheduler();
153 #ifdef V3_CONFIG_SYMMOD
157 #ifdef V3_CONFIG_CHECKPOINT
158 V3_init_checkpoint();
161 if ((hooks) && (hooks->call_on_cpu)) {
163 for (i = 0; i < num_cpus && i < V3_CONFIG_MAX_CPUS; i++) {
167 if ((cpu_mask == NULL) || (*(cpu_mask + major) & (0x1 << minor))) {
168 V3_Print(VM_NONE, VCORE_NONE, "Initializing VMM extensions on cpu %d\n", i);
169 hooks->call_on_cpu(i, &init_cpu, (void *)(addr_t)i);
171 if (v3_mach_type == V3_INVALID_CPU) {
172 v3_mach_type = v3_cpu_types[i];
184 // Reverse order of Init_V3
188 if ((os_hooks) && (os_hooks->call_on_cpu)) {
189 for (i = 0; i < V3_CONFIG_MAX_CPUS; i++) {
190 if (v3_cpu_types[i] != V3_INVALID_CPU) {
191 V3_Call_On_CPU(i, deinit_cpu, (void *)(addr_t)i);
192 //deinit_cpu((void *)(addr_t)i);
197 #ifdef V3_CONFIG_CHECKPOINT
198 V3_deinit_checkpoint();
201 #ifdef V3_CONFIG_SYMMOD
205 V3_disable_scheduler();
207 V3_disable_cpu_mapper();
209 V3_deinit_extensions();
211 V3_deinit_scheduling();
213 V3_deinit_cpu_mapper();
215 V3_deinit_shdw_paging();
227 v3_cpu_arch_t v3_get_cpu_type(int cpu_id) {
228 return v3_cpu_types[cpu_id];
232 struct v3_vm_info * v3_create_vm(void * cfg, void * priv_data, char * name) {
233 struct v3_vm_info * vm = v3_config_guest(cfg, priv_data);
236 PrintError(VM_NONE, VCORE_NONE, "Could not configure guest\n");
240 V3_Print(vm, VCORE_NONE, "CORE 0 RIP=%p\n", (void *)(addr_t)(vm->cores[0].rip));
244 } else if (strlen(name) >= 128) {
245 PrintError(vm, VCORE_NONE,"VM name is too long. Will be truncated to 128 chars.\n");
248 memset(vm->name, 0, 128);
249 strncpy(vm->name, name, 127);
251 if(v3_cpu_mapper_register_vm(vm) == -1) {
253 PrintError(vm, VCORE_NONE,"Error registering VM with cpu_mapper\n");
257 * Register this VM with the palacios scheduler. It will ask for admission
260 if(v3_scheduler_register_vm(vm) == -1) {
262 PrintError(vm, VCORE_NONE,"Error registering VM with scheduler\n");
271 static int start_core(void * p)
273 struct guest_info * core = (struct guest_info *)p;
275 if (v3_scheduler_register_core(core) == -1){
276 PrintError(core->vm_info, core,"Error initializing scheduling in core %d\n", core->vcpu_id);
279 PrintDebug(core->vm_info,core,"virtual core %u (on logical core %u): in start_core (RIP=%p)\n",
280 core->vcpu_id, core->pcpu_id, (void *)(addr_t)core->rip);
282 switch (v3_mach_type) {
285 case V3_SVM_REV3_CPU:
286 return v3_start_svm_guest(core);
292 case V3_VMX_EPT_UG_CPU:
293 return v3_start_vmx_guest(core);
297 PrintError(core->vm_info, core, "Attempting to enter a guest on an invalid CPU\n");
304 int v3_start_vm(struct v3_vm_info * vm, unsigned int cpu_mask) {
307 uint8_t * core_mask = (uint8_t *)&cpu_mask; // This is to make future expansion easier
308 uint32_t avail_cores = 0;
310 extern uint64_t v3_mem_block_size;
313 if (vm->run_state != VM_STOPPED) {
314 PrintError(vm, VCORE_NONE, "VM has already been launched (state=%d)\n", (int)vm->run_state);
319 // Do not run if any core is using shadow paging and we are out of 4 GB bounds
320 for (i=0;i<vm->num_cores;i++) {
321 if (vm->cores[i].shdw_pg_mode == SHADOW_PAGING) {
322 for (j=0;j<vm->mem_map.num_base_regions;j++) {
323 if ((vm->mem_map.base_regions[i].host_addr + v3_mem_block_size) >= 0x100000000ULL) {
324 PrintError(vm, VCORE_NONE, "Base memory region %d exceeds 4 GB boundary with shadow paging enabled on core %d.\n",j, i);
325 PrintError(vm, VCORE_NONE, "Any use of non-64 bit mode in the guest is likely to fail in this configuration.\n");
326 PrintError(vm, VCORE_NONE, "If you would like to proceed anyway, remove this check and recompile Palacios.\n");
327 PrintError(vm, VCORE_NONE, "Alternatively, change this VM to use nested paging.\n");
334 /// CHECK IF WE ARE MULTICORE ENABLED....
336 V3_Print(vm, VCORE_NONE, "V3 -- Starting VM (%u cores)\n", vm->num_cores);
337 V3_Print(vm, VCORE_NONE, "CORE 0 RIP=%p\n", (void *)(addr_t)(vm->cores[0].rip));
340 // Check that enough cores are present in the mask to handle vcores
341 for (i = 0; i < V3_CONFIG_MAX_CPUS; i++) {
345 if (core_mask[major] & (0x1 << minor)) {
346 if (v3_cpu_types[i] == V3_INVALID_CPU) {
347 core_mask[major] &= ~(0x1 << minor);
354 vm->avail_cores = avail_cores;
356 if (v3_cpu_mapper_admit_vm(vm,cpu_mask) != 0){
357 PrintError(vm, VCORE_NONE,"Error admitting VM %s for mapping", vm->name);
360 if (v3_scheduler_admit_vm(vm) != 0){
361 PrintError(vm, VCORE_NONE,"Error admitting VM %s for scheduling", vm->name);
364 vm->run_state = VM_RUNNING;
367 for (vcore_id = 0; vcore_id < vm->num_cores; vcore_id++) {
369 struct guest_info * core = &(vm->cores[vcore_id]);
371 PrintDebug(vm, VCORE_NONE, "Starting virtual core %u on logical core %u\n",
372 vcore_id, core->pcpu_id);
374 sprintf(core->exec_name, "%s-%u", vm->name, vcore_id);
376 PrintDebug(vm, VCORE_NONE, "run: core=%u, func=0x%p, arg=0x%p, name=%s\n",
377 core->pcpu_id, start_core, core, core->exec_name);
379 core->core_run_state = CORE_STOPPED; // core zero will turn itself on
380 core->core_thread = V3_CREATE_THREAD_ON_CPU(core->pcpu_id, start_core, core, core->exec_name);
382 if (core->core_thread == NULL) {
383 PrintError(vm, VCORE_NONE, "Thread launch failed\n");
394 int v3_reset_vm_core(struct guest_info * core, addr_t rip) {
396 switch (v3_cpu_types[core->pcpu_id]) {
399 case V3_SVM_REV3_CPU:
400 PrintDebug(core->vm_info, core, "Resetting SVM Guest CPU %d\n", core->vcpu_id);
401 return v3_reset_svm_vm_core(core, rip);
406 case V3_VMX_EPT_UG_CPU:
407 PrintDebug(core->vm_info, core, "Resetting VMX Guest CPU %d\n", core->vcpu_id);
408 return v3_reset_vmx_vm_core(core, rip);
412 PrintError(core->vm_info, core, "CPU has no virtualization Extensions\n");
421 /* move a virtual core to different physical core */
422 int v3_move_vm_core(struct v3_vm_info * vm, int vcore_id, int target_cpu) {
423 struct guest_info * core = NULL;
425 if ((vcore_id < 0) || (vcore_id >= vm->num_cores)) {
426 PrintError(vm, VCORE_NONE, "Attempted to migrate invalid virtual core (%d)\n", vcore_id);
430 core = &(vm->cores[vcore_id]);
432 if (target_cpu == core->pcpu_id) {
433 PrintError(vm, core, "Attempted to migrate to local core (%d)\n", target_cpu);
434 // well that was pointless
438 if (core->core_thread == NULL) {
439 PrintError(vm, core, "Attempted to migrate a core without a valid thread context\n");
443 while (v3_raise_barrier(vm, NULL) == -1);
445 V3_Print(vm, core, "Performing Migration from %d to %d\n", core->pcpu_id, target_cpu);
447 // Double check that we weren't preemptively migrated
448 if (target_cpu != core->pcpu_id) {
450 V3_Print(vm, core, "Moving Core\n");
452 if(v3_cpu_mapper_admit_core(vm, vcore_id, target_cpu) == -1){
453 PrintError(vm, core, "Core %d can not be admitted in cpu %d\n",vcore_id, target_cpu);
459 switch (v3_cpu_types[core->pcpu_id]) {
462 case V3_VMX_EPT_UG_CPU:
463 PrintDebug(vm, core, "Flushing VMX Guest CPU %d\n", core->vcpu_id);
464 V3_Call_On_CPU(core->pcpu_id, (void (*)(void *))v3_flush_vmx_vm_core, (void *)core);
471 if (V3_MOVE_THREAD_TO_CPU(target_cpu, core->core_thread) != 0) {
472 PrintError(vm, core, "Failed to move Vcore %d to CPU %d\n",
473 core->vcpu_id, target_cpu);
474 v3_lower_barrier(vm);
478 /* There will be a benign race window here:
479 core->pcpu_id will be set to the target core before its fully "migrated"
480 However the core will NEVER run on the old core again, its just in flight to the new core
482 core->pcpu_id = target_cpu;
484 V3_Print(vm, core, "core now at %d\n", core->pcpu_id);
487 v3_lower_barrier(vm);
494 int v3_stop_vm(struct v3_vm_info * vm) {
496 struct guest_info * running_core;
498 if ((vm->run_state != VM_RUNNING) &&
499 (vm->run_state != VM_SIMULATING)) {
500 PrintError(vm, VCORE_NONE,"Tried to stop VM in invalid runstate (%d)\n", vm->run_state);
504 vm->run_state = VM_STOPPED;
506 // Sanity check to catch any weird execution states
507 if (v3_wait_for_barrier(vm, NULL) == 0) {
508 v3_lower_barrier(vm);
511 // XXX force exit all cores via a cross call/IPI XXX
515 int still_running = 0;
517 for (i = 0; i < vm->num_cores; i++) {
518 if (vm->cores[i].core_run_state != CORE_STOPPED) {
519 running_core = &vm->cores[i];
524 if (still_running == 0) {
528 v3_scheduler_stop_core(running_core);
531 V3_Print(vm, VCORE_NONE,"VM stopped. Returning\n");
537 int v3_pause_vm(struct v3_vm_info * vm) {
539 if (vm->run_state != VM_RUNNING) {
540 PrintError(vm, VCORE_NONE,"Tried to pause a VM that was not running\n");
544 while (v3_raise_barrier(vm, NULL) == -1);
546 vm->run_state = VM_PAUSED;
552 int v3_continue_vm(struct v3_vm_info * vm) {
554 if (vm->run_state != VM_PAUSED) {
555 PrintError(vm, VCORE_NONE,"Tried to continue a VM that was not paused\n");
559 vm->run_state = VM_RUNNING;
561 v3_lower_barrier(vm);
568 static int sim_callback(struct guest_info * core, void * private_data) {
569 struct v3_bitmap * timeout_map = private_data;
571 v3_bitmap_set(timeout_map, core->vcpu_id);
573 V3_Print(core->vm_info, core, "Simulation callback activated (guest_rip=%p)\n", (void *)core->rip);
575 while (v3_bitmap_check(timeout_map, core->vcpu_id) == 1) {
585 int v3_simulate_vm(struct v3_vm_info * vm, unsigned int msecs) {
586 struct v3_bitmap timeout_map;
590 uint64_t cpu_khz = V3_CPU_KHZ();
592 if (vm->run_state != VM_PAUSED) {
593 PrintError(vm, VCORE_NONE,"VM must be paused before simulation begins\n");
597 /* AT this point VM is paused */
600 v3_bitmap_init(&timeout_map, vm->num_cores);
605 // calculate cycles from msecs...
606 // IMPORTANT: Floating point not allowed.
607 cycles = (msecs * cpu_khz);
611 V3_Print(vm, VCORE_NONE,"Simulating %u msecs (%llu cycles) [CPU_KHZ=%llu]\n", msecs, cycles, cpu_khz);
615 for (i = 0; i < vm->num_cores; i++) {
616 if (v3_add_core_timeout(&(vm->cores[i]), cycles, sim_callback, &timeout_map) == -1) {
617 PrintError(vm, VCORE_NONE,"Could not register simulation timeout for core %d\n", i);
622 V3_Print(vm, VCORE_NONE,"timeouts set on all cores\n ");
625 // Run the simulation
626 // vm->run_state = VM_SIMULATING;
627 vm->run_state = VM_RUNNING;
628 v3_lower_barrier(vm);
631 V3_Print(vm, VCORE_NONE,"Barrier lowered: We are now Simulating!!\n");
633 // block until simulation is complete
634 while (all_blocked == 0) {
637 for (i = 0; i < vm->num_cores; i++) {
638 if (v3_bitmap_check(&timeout_map, i) == 0) {
643 if (all_blocked == 1) {
651 V3_Print(vm, VCORE_NONE,"Simulation is complete\n");
653 // Simulation is complete
654 // Reset back to PAUSED state
656 v3_raise_barrier_nowait(vm, NULL);
657 vm->run_state = VM_PAUSED;
659 v3_bitmap_reset(&timeout_map);
661 v3_wait_for_barrier(vm, NULL);
667 int v3_get_state_vm(struct v3_vm_info *vm,
668 struct v3_vm_base_state *base,
669 struct v3_vm_core_state *core,
670 struct v3_vm_mem_state *mem)
673 uint32_t numcores = core->num_vcores > vm->num_cores ? vm->num_cores : core->num_vcores;
674 uint32_t numregions = mem->num_regions > vm->mem_map.num_base_regions ? vm->mem_map.num_base_regions : mem->num_regions;
675 extern uint64_t v3_mem_block_size;
677 switch (vm->run_state) {
678 case VM_INVALID: base->state = V3_VM_INVALID; break;
679 case VM_RUNNING: base->state = V3_VM_RUNNING; break;
680 case VM_STOPPED: base->state = V3_VM_STOPPED; break;
681 case VM_PAUSED: base->state = V3_VM_PAUSED; break;
682 case VM_ERROR: base->state = V3_VM_ERROR; break;
683 case VM_SIMULATING: base->state = V3_VM_SIMULATING; break;
684 default: base->state = V3_VM_UNKNOWN; break;
687 for (i=0;i<numcores;i++) {
688 switch (vm->cores[i].core_run_state) {
689 case CORE_INVALID: core->vcore[i].state = V3_VCORE_INVALID; break;
690 case CORE_RUNNING: core->vcore[i].state = V3_VCORE_RUNNING; break;
691 case CORE_STOPPED: core->vcore[i].state = V3_VCORE_STOPPED; break;
692 default: core->vcore[i].state = V3_VCORE_UNKNOWN; break;
694 switch (vm->cores[i].cpu_mode) {
695 case REAL: core->vcore[i].cpu_mode = V3_VCORE_CPU_REAL; break;
696 case PROTECTED: core->vcore[i].cpu_mode = V3_VCORE_CPU_PROTECTED; break;
697 case PROTECTED_PAE: core->vcore[i].cpu_mode = V3_VCORE_CPU_PROTECTED_PAE; break;
698 case LONG: core->vcore[i].cpu_mode = V3_VCORE_CPU_LONG; break;
699 case LONG_32_COMPAT: core->vcore[i].cpu_mode = V3_VCORE_CPU_LONG_32_COMPAT; break;
700 case LONG_16_COMPAT: core->vcore[i].cpu_mode = V3_VCORE_CPU_LONG_16_COMPAT; break;
701 default: core->vcore[i].cpu_mode = V3_VCORE_CPU_UNKNOWN; break;
703 switch (vm->cores[i].shdw_pg_mode) {
704 case SHADOW_PAGING: core->vcore[i].mem_state = V3_VCORE_MEM_STATE_SHADOW; break;
705 case NESTED_PAGING: core->vcore[i].mem_state = V3_VCORE_MEM_STATE_NESTED; break;
706 default: core->vcore[i].mem_state = V3_VCORE_MEM_STATE_UNKNOWN; break;
708 switch (vm->cores[i].mem_mode) {
709 case PHYSICAL_MEM: core->vcore[i].mem_mode = V3_VCORE_MEM_MODE_PHYSICAL; break;
710 case VIRTUAL_MEM: core->vcore[i].mem_mode=V3_VCORE_MEM_MODE_VIRTUAL; break;
711 default: core->vcore[i].mem_mode=V3_VCORE_MEM_MODE_UNKNOWN; break;
714 core->vcore[i].pcore=vm->cores[i].pcpu_id;
715 core->vcore[i].last_rip=(void*)(vm->cores[i].rip);
716 core->vcore[i].num_exits=vm->cores[i].num_exits;
719 core->num_vcores=numcores;
721 for (i=0;i<vm->mem_map.num_base_regions;i++) {
722 mem->region[i].host_paddr = (void*)(vm->mem_map.base_regions[i].host_addr);
723 mem->region[i].size = v3_mem_block_size;
726 mem->num_regions=numregions;
732 #ifdef V3_CONFIG_CHECKPOINT
733 #include <palacios/vmm_checkpoint.h>
735 int v3_save_vm(struct v3_vm_info * vm, char * store, char * url, v3_chkpt_options_t opts) {
736 return v3_chkpt_save_vm(vm, store, url, opts);
740 int v3_load_vm(struct v3_vm_info * vm, char * store, char * url, v3_chkpt_options_t opts) {
741 return v3_chkpt_load_vm(vm, store, url, opts);
744 #ifdef V3_CONFIG_LIVE_MIGRATION
745 int v3_send_vm(struct v3_vm_info * vm, char * store, char * url, v3_chkpt_options_t opts) {
746 return v3_chkpt_send_vm(vm, store, url, opts);
750 int v3_receive_vm(struct v3_vm_info * vm, char * store, char * url, v3_chkpt_options_t opts) {
751 return v3_chkpt_receive_vm(vm, store, url, opts);
758 int v3_free_vm(struct v3_vm_info * vm) {
760 // deinitialize guest (free memory, etc...)
762 if ((vm->run_state != VM_STOPPED) &&
763 (vm->run_state != VM_ERROR)) {
764 PrintError(vm, VCORE_NONE,"Tried to Free VM in invalid runstate (%d)\n", vm->run_state);
768 v3_free_vm_devices(vm);
771 for (i = 0; i < vm->num_cores; i++) {
772 v3_scheduler_free_core(&(vm->cores[i]));
773 v3_free_core(&(vm->cores[i]));
777 v3_scheduler_free_vm(vm);
778 v3_free_vm_internal(vm);
790 v3_cpu_mode_t v3_get_host_cpu_mode() {
800 cr4 = (struct cr4_32 *)&(cr4_val);
803 return PROTECTED_PAE;
811 v3_cpu_mode_t v3_get_host_cpu_mode() {
817 void v3_print_cond(const char * fmt, ...) {
818 if (v3_dbg_enable == 1) {
823 vsnprintf(buf, 2048, fmt, ap);
826 V3_Print(VM_NONE, VCORE_NONE,"%s", buf);
832 void v3_interrupt_cpu(struct v3_vm_info * vm, int logical_cpu, int vector) {
833 extern struct v3_os_hooks * os_hooks;
835 if ((os_hooks) && (os_hooks)->interrupt_cpu) {
836 (os_hooks)->interrupt_cpu(vm, logical_cpu, vector);
842 int v3_vm_enter(struct guest_info * info) {
843 switch (v3_mach_type) {
846 case V3_SVM_REV3_CPU:
847 return v3_svm_enter(info);
853 case V3_VMX_EPT_UG_CPU:
854 return v3_vmx_enter(info);
858 PrintError(info->vm_info, info, "Attemping to enter a guest on an invalid CPU\n");
864 void *v3_get_host_vm(struct v3_vm_info *x)
867 return x->host_priv_data;
873 int v3_get_vcore(struct guest_info *x)