2 * This file is part of the Palacios Virtual Machine Monitor developed
3 * by the V3VEE Project with funding from the United States National
4 * Science Foundation and the Department of Energy.
6 * The V3VEE Project is a joint project between Northwestern University
7 * and the University of New Mexico. You can find out more at
10 * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu>
11 * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org>
12 * All rights reserved.
14 * Author: Jack Lange <jarusl@cs.northwestern.edu>
16 * This is free software. You are permitted to use,
17 * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
20 #include <palacios/vmm.h>
21 #include <palacios/vmm_mem.h>
22 #include <palacios/vmm_intr.h>
23 #include <palacios/vmm_config.h>
24 #include <palacios/vm_guest.h>
25 #include <palacios/vmm_ctrl_regs.h>
26 #include <palacios/vmm_lowlevel.h>
27 #include <palacios/vmm_sprintf.h>
28 #include <palacios/vmm_extensions.h>
29 #include <palacios/vmm_timeout.h>
30 #include <palacios/vmm_options.h>
31 #include <palacios/vmm_cpu_mapper.h>
34 #include <palacios/svm.h>
37 #include <palacios/vmx.h>
40 #ifdef V3_CONFIG_CHECKPOINT
41 #include <palacios/vmm_checkpoint.h>
45 v3_cpu_arch_t v3_cpu_types[V3_CONFIG_MAX_CPUS];
46 v3_cpu_arch_t v3_mach_type = V3_INVALID_CPU;
48 struct v3_os_hooks * os_hooks = NULL;
49 int v3_dbg_enable = 0;
54 static void init_cpu(void * arg) {
55 uint32_t cpu_id = (uint32_t)(addr_t)arg;
58 if (v3_is_svm_capable()) {
59 PrintDebug(VM_NONE, VCORE_NONE, "Machine is SVM Capable\n");
60 v3_init_svm_cpu(cpu_id);
65 if (v3_is_vmx_capable()) {
66 PrintDebug(VM_NONE, VCORE_NONE, "Machine is VMX Capable\n");
67 v3_init_vmx_cpu(cpu_id);
72 PrintError(VM_NONE, VCORE_NONE, "CPU has no virtualization Extensions\n");
77 static void deinit_cpu(void * arg) {
78 uint32_t cpu_id = (uint32_t)(addr_t)arg;
81 switch (v3_cpu_types[cpu_id]) {
85 PrintDebug(VM_NONE, VCORE_NONE, "Deinitializing SVM CPU %d\n", cpu_id);
86 v3_deinit_svm_cpu(cpu_id);
92 case V3_VMX_EPT_UG_CPU:
93 PrintDebug(VM_NONE, VCORE_NONE, "Deinitializing VMX CPU %d\n", cpu_id);
94 v3_deinit_vmx_cpu(cpu_id);
99 PrintError(VM_NONE, VCORE_NONE, "CPU has no virtualization Extensions\n");
104 void Init_V3(struct v3_os_hooks * hooks, char * cpu_mask, int num_cpus, char *options) {
109 V3_Print(VM_NONE, VCORE_NONE, "V3 Print statement to fix a Kitten page fault bug\n");
111 // Set global variables.
114 // Determine the global machine type
115 v3_mach_type = V3_INVALID_CPU;
117 for (i = 0; i < V3_CONFIG_MAX_CPUS; i++) {
118 v3_cpu_types[i] = V3_INVALID_CPU;
121 // Parse host-os defined options into an easily-accessed format.
122 v3_parse_options(options);
124 // Register all the possible device types
127 // Register all shadow paging handlers
128 V3_init_shdw_paging();
130 // Initialize the cpu_mapper framework (must be before extensions)
131 V3_init_cpu_mapper();
133 // Initialize the scheduler framework (must be before extensions)
134 V3_init_scheduling();
136 // Register all extensions
137 V3_init_extensions();
139 // Enabling cpu_mapper
140 V3_enable_cpu_mapper();
142 // Enabling scheduler
143 V3_enable_scheduler();
146 #ifdef V3_CONFIG_SYMMOD
150 #ifdef V3_CONFIG_CHECKPOINT
151 V3_init_checkpoint();
154 if ((hooks) && (hooks->call_on_cpu)) {
156 for (i = 0; i < num_cpus; i++) {
160 if ((cpu_mask == NULL) || (*(cpu_mask + major) & (0x1 << minor))) {
161 V3_Print(VM_NONE, VCORE_NONE, "Initializing VMM extensions on cpu %d\n", i);
162 hooks->call_on_cpu(i, &init_cpu, (void *)(addr_t)i);
164 if (v3_mach_type == V3_INVALID_CPU) {
165 v3_mach_type = v3_cpu_types[i];
178 V3_deinit_shdw_paging();
180 V3_deinit_extensions();
182 #ifdef V3_CONFIG_SYMMOD
186 #ifdef V3_CONFIG_CHECKPOINT
187 V3_deinit_checkpoint();
191 if ((os_hooks) && (os_hooks->call_on_cpu)) {
192 for (i = 0; i < V3_CONFIG_MAX_CPUS; i++) {
193 if (v3_cpu_types[i] != V3_INVALID_CPU) {
194 V3_Call_On_CPU(i, deinit_cpu, (void *)(addr_t)i);
195 //deinit_cpu((void *)(addr_t)i);
203 v3_cpu_arch_t v3_get_cpu_type(int cpu_id) {
204 return v3_cpu_types[cpu_id];
208 struct v3_vm_info * v3_create_vm(void * cfg, void * priv_data, char * name) {
209 struct v3_vm_info * vm = v3_config_guest(cfg, priv_data);
212 PrintError(VM_NONE, VCORE_NONE, "Could not configure guest\n");
216 V3_Print(vm, VCORE_NONE, "CORE 0 RIP=%p\n", (void *)(addr_t)(vm->cores[0].rip));
220 } else if (strlen(name) >= 128) {
221 PrintError(vm, VCORE_NONE,"VM name is too long. Will be truncated to 128 chars.\n");
224 memset(vm->name, 0, 128);
225 strncpy(vm->name, name, 127);
228 * Register this VM with the palacios scheduler. It will ask for admission
231 if(v3_scheduler_register_vm(vm) == -1) {
233 PrintError(vm, VCORE_NONE,"Error registering VM with scheduler\n");
242 static int start_core(void * p)
244 struct guest_info * core = (struct guest_info *)p;
246 if (v3_scheduler_register_core(core) == -1){
247 PrintError(core->vm_info, core,"Error initializing scheduling in core %d\n", core->vcpu_id);
250 PrintDebug(core->vm_info,core,"virtual core %u (on logical core %u): in start_core (RIP=%p)\n",
251 core->vcpu_id, core->pcpu_id, (void *)(addr_t)core->rip);
253 switch (v3_mach_type) {
256 case V3_SVM_REV3_CPU:
257 return v3_start_svm_guest(core);
263 case V3_VMX_EPT_UG_CPU:
264 return v3_start_vmx_guest(core);
268 PrintError(core->vm_info, core, "Attempting to enter a guest on an invalid CPU\n");
275 int v3_start_vm(struct v3_vm_info * vm, unsigned int cpu_mask) {
278 uint8_t * core_mask = (uint8_t *)&cpu_mask; // This is to make future expansion easier
279 uint32_t avail_cores = 0;
283 if (vm->run_state != VM_STOPPED) {
284 PrintError(vm, VCORE_NONE, "VM has already been launched (state=%d)\n", (int)vm->run_state);
289 // Do not run if any core is using shadow paging and we are out of 4 GB bounds
290 for (i=0;i<vm->num_cores;i++) {
291 if (vm->cores[i].shdw_pg_mode == SHADOW_PAGING) {
292 for (j=0;j<vm->mem_map.num_base_regions;j++) {
293 if ((vm->mem_map.base_regions[i].host_addr + V3_CONFIG_MEM_BLOCK_SIZE) >= 0x100000000ULL) {
294 PrintError(vm, VCORE_NONE, "Base memory region %d exceeds 4 GB boundary with shadow paging enabled on core %d.\n",j, i);
295 PrintError(vm, VCORE_NONE, "Any use of non-64 bit mode in the guest is likely to fail in this configuration.\n");
296 PrintError(vm, VCORE_NONE, "If you would like to proceed anyway, remove this check and recompile Palacios.\n");
297 PrintError(vm, VCORE_NONE, "Alternatively, change this VM to use nested paging.\n");
304 /// CHECK IF WE ARE MULTICORE ENABLED....
306 V3_Print(vm, VCORE_NONE, "V3 -- Starting VM (%u cores)\n", vm->num_cores);
307 V3_Print(vm, VCORE_NONE, "CORE 0 RIP=%p\n", (void *)(addr_t)(vm->cores[0].rip));
310 // Check that enough cores are present in the mask to handle vcores
311 for (i = 0; i < V3_CONFIG_MAX_CPUS; i++) {
315 if (core_mask[major] & (0x1 << minor)) {
316 if (v3_cpu_types[i] == V3_INVALID_CPU) {
317 core_mask[major] &= ~(0x1 << minor);
324 vm->avail_cores = avail_cores;
326 if (v3_scheduler_admit_vm(vm) != 0){
327 PrintError(vm, VCORE_NONE,"Error admitting VM %s for scheduling", vm->name);
330 if (v3_cpu_mapper_admit_vm(vm) != 0){
331 PrintError(vm, VCORE_NONE,"Error admitting VM %s for mapping", vm->name);
334 vm->run_state = VM_RUNNING;
336 if(v3_cpu_mapper_register_vm(vm,cpu_mask) == -1) {
338 PrintError(vm, VCORE_NONE,"Error registering VM with cpu_mapper\n");
342 for (vcore_id = 0; vcore_id < vm->num_cores; vcore_id++) {
344 struct guest_info * core = &(vm->cores[vcore_id]);
346 PrintDebug(vm, VCORE_NONE, "Starting virtual core %u on logical core %u\n",
347 vcore_id, core->pcpu_id);
349 sprintf(core->exec_name, "%s-%u", vm->name, vcore_id);
351 PrintDebug(vm, VCORE_NONE, "run: core=%u, func=0x%p, arg=0x%p, name=%s\n",
352 core->pcpu_id, start_core, core, core->exec_name);
354 core->core_run_state = CORE_STOPPED; // core zero will turn itself on
355 core->core_thread = V3_CREATE_THREAD_ON_CPU(core->pcpu_id, start_core, core, core->exec_name);
357 if (core->core_thread == NULL) {
358 PrintError(vm, VCORE_NONE, "Thread launch failed\n");
369 int v3_reset_vm_core(struct guest_info * core, addr_t rip) {
371 switch (v3_cpu_types[core->pcpu_id]) {
374 case V3_SVM_REV3_CPU:
375 PrintDebug(core->vm_info, core, "Resetting SVM Guest CPU %d\n", core->vcpu_id);
376 return v3_reset_svm_vm_core(core, rip);
381 case V3_VMX_EPT_UG_CPU:
382 PrintDebug(core->vm_info, core, "Resetting VMX Guest CPU %d\n", core->vcpu_id);
383 return v3_reset_vmx_vm_core(core, rip);
387 PrintError(core->vm_info, core, "CPU has no virtualization Extensions\n");
396 /* move a virtual core to different physical core */
397 int v3_move_vm_core(struct v3_vm_info * vm, int vcore_id, int target_cpu) {
398 struct guest_info * core = NULL;
400 if ((vcore_id < 0) || (vcore_id >= vm->num_cores)) {
401 PrintError(vm, VCORE_NONE, "Attempted to migrate invalid virtual core (%d)\n", vcore_id);
405 core = &(vm->cores[vcore_id]);
407 if (target_cpu == core->pcpu_id) {
408 PrintError(vm, core, "Attempted to migrate to local core (%d)\n", target_cpu);
409 // well that was pointless
413 if (core->core_thread == NULL) {
414 PrintError(vm, core, "Attempted to migrate a core without a valid thread context\n");
418 while (v3_raise_barrier(vm, NULL) == -1);
420 V3_Print(vm, core, "Performing Migration from %d to %d\n", core->pcpu_id, target_cpu);
422 // Double check that we weren't preemptively migrated
423 if (target_cpu != core->pcpu_id) {
425 V3_Print(vm, core, "Moving Core\n");
427 if(v3_cpu_mapper_admit_core(vm, vcore_id, target_cpu) == -1){
428 PrintError(vm, core, "Core %d can not be admitted in cpu %d\n",vcore_id, target_cpu);
434 switch (v3_cpu_types[core->pcpu_id]) {
437 case V3_VMX_EPT_UG_CPU:
438 PrintDebug(vm, core, "Flushing VMX Guest CPU %d\n", core->vcpu_id);
439 V3_Call_On_CPU(core->pcpu_id, (void (*)(void *))v3_flush_vmx_vm_core, (void *)core);
446 if (V3_MOVE_THREAD_TO_CPU(target_cpu, core->core_thread) != 0) {
447 PrintError(vm, core, "Failed to move Vcore %d to CPU %d\n",
448 core->vcpu_id, target_cpu);
449 v3_lower_barrier(vm);
453 /* There will be a benign race window here:
454 core->pcpu_id will be set to the target core before its fully "migrated"
455 However the core will NEVER run on the old core again, its just in flight to the new core
457 core->pcpu_id = target_cpu;
459 V3_Print(vm, core, "core now at %d\n", core->pcpu_id);
462 v3_lower_barrier(vm);
469 int v3_stop_vm(struct v3_vm_info * vm) {
471 if ((vm->run_state != VM_RUNNING) &&
472 (vm->run_state != VM_SIMULATING)) {
473 PrintError(vm, VCORE_NONE,"Tried to stop VM in invalid runstate (%d)\n", vm->run_state);
477 vm->run_state = VM_STOPPED;
479 // Sanity check to catch any weird execution states
480 if (v3_wait_for_barrier(vm, NULL) == 0) {
481 v3_lower_barrier(vm);
484 // XXX force exit all cores via a cross call/IPI XXX
488 int still_running = 0;
490 for (i = 0; i < vm->num_cores; i++) {
491 if (vm->cores[i].core_run_state != CORE_STOPPED) {
496 if (still_running == 0) {
503 V3_Print(vm, VCORE_NONE,"VM stopped. Returning\n");
509 int v3_pause_vm(struct v3_vm_info * vm) {
511 if (vm->run_state != VM_RUNNING) {
512 PrintError(vm, VCORE_NONE,"Tried to pause a VM that was not running\n");
516 while (v3_raise_barrier(vm, NULL) == -1);
518 vm->run_state = VM_PAUSED;
524 int v3_continue_vm(struct v3_vm_info * vm) {
526 if (vm->run_state != VM_PAUSED) {
527 PrintError(vm, VCORE_NONE,"Tried to continue a VM that was not paused\n");
531 vm->run_state = VM_RUNNING;
533 v3_lower_barrier(vm);
540 static int sim_callback(struct guest_info * core, void * private_data) {
541 struct v3_bitmap * timeout_map = private_data;
543 v3_bitmap_set(timeout_map, core->vcpu_id);
545 V3_Print(core->vm_info, core, "Simulation callback activated (guest_rip=%p)\n", (void *)core->rip);
547 while (v3_bitmap_check(timeout_map, core->vcpu_id) == 1) {
557 int v3_simulate_vm(struct v3_vm_info * vm, unsigned int msecs) {
558 struct v3_bitmap timeout_map;
562 uint64_t cpu_khz = V3_CPU_KHZ();
564 if (vm->run_state != VM_PAUSED) {
565 PrintError(vm, VCORE_NONE,"VM must be paused before simulation begins\n");
569 /* AT this point VM is paused */
572 v3_bitmap_init(&timeout_map, vm->num_cores);
577 // calculate cycles from msecs...
578 // IMPORTANT: Floating point not allowed.
579 cycles = (msecs * cpu_khz);
583 V3_Print(vm, VCORE_NONE,"Simulating %u msecs (%llu cycles) [CPU_KHZ=%llu]\n", msecs, cycles, cpu_khz);
587 for (i = 0; i < vm->num_cores; i++) {
588 if (v3_add_core_timeout(&(vm->cores[i]), cycles, sim_callback, &timeout_map) == -1) {
589 PrintError(vm, VCORE_NONE,"Could not register simulation timeout for core %d\n", i);
594 V3_Print(vm, VCORE_NONE,"timeouts set on all cores\n ");
597 // Run the simulation
598 // vm->run_state = VM_SIMULATING;
599 vm->run_state = VM_RUNNING;
600 v3_lower_barrier(vm);
603 V3_Print(vm, VCORE_NONE,"Barrier lowered: We are now Simulating!!\n");
605 // block until simulation is complete
606 while (all_blocked == 0) {
609 for (i = 0; i < vm->num_cores; i++) {
610 if (v3_bitmap_check(&timeout_map, i) == 0) {
615 if (all_blocked == 1) {
623 V3_Print(vm, VCORE_NONE,"Simulation is complete\n");
625 // Simulation is complete
626 // Reset back to PAUSED state
628 v3_raise_barrier_nowait(vm, NULL);
629 vm->run_state = VM_PAUSED;
631 v3_bitmap_reset(&timeout_map);
633 v3_wait_for_barrier(vm, NULL);
639 int v3_get_state_vm(struct v3_vm_info *vm,
640 struct v3_vm_base_state *base,
641 struct v3_vm_core_state *core,
642 struct v3_vm_mem_state *mem)
645 uint32_t numcores = core->num_vcores > vm->num_cores ? vm->num_cores : core->num_vcores;
646 uint32_t numregions = mem->num_regions > vm->mem_map.num_base_regions ? vm->mem_map.num_base_regions : mem->num_regions;
649 switch (vm->run_state) {
650 case VM_INVALID: base->state = V3_VM_INVALID; break;
651 case VM_RUNNING: base->state = V3_VM_RUNNING; break;
652 case VM_STOPPED: base->state = V3_VM_STOPPED; break;
653 case VM_PAUSED: base->state = V3_VM_PAUSED; break;
654 case VM_ERROR: base->state = V3_VM_ERROR; break;
655 case VM_SIMULATING: base->state = V3_VM_SIMULATING; break;
656 default: base->state = V3_VM_UNKNOWN; break;
659 for (i=0;i<numcores;i++) {
660 switch (vm->cores[i].core_run_state) {
661 case CORE_INVALID: core->vcore[i].state = V3_VCORE_INVALID; break;
662 case CORE_RUNNING: core->vcore[i].state = V3_VCORE_RUNNING; break;
663 case CORE_STOPPED: core->vcore[i].state = V3_VCORE_STOPPED; break;
664 default: core->vcore[i].state = V3_VCORE_UNKNOWN; break;
666 switch (vm->cores[i].cpu_mode) {
667 case REAL: core->vcore[i].cpu_mode = V3_VCORE_CPU_REAL; break;
668 case PROTECTED: core->vcore[i].cpu_mode = V3_VCORE_CPU_PROTECTED; break;
669 case PROTECTED_PAE: core->vcore[i].cpu_mode = V3_VCORE_CPU_PROTECTED_PAE; break;
670 case LONG: core->vcore[i].cpu_mode = V3_VCORE_CPU_LONG; break;
671 case LONG_32_COMPAT: core->vcore[i].cpu_mode = V3_VCORE_CPU_LONG_32_COMPAT; break;
672 case LONG_16_COMPAT: core->vcore[i].cpu_mode = V3_VCORE_CPU_LONG_16_COMPAT; break;
673 default: core->vcore[i].cpu_mode = V3_VCORE_CPU_UNKNOWN; break;
675 switch (vm->cores[i].shdw_pg_mode) {
676 case SHADOW_PAGING: core->vcore[i].mem_state = V3_VCORE_MEM_STATE_SHADOW; break;
677 case NESTED_PAGING: core->vcore[i].mem_state = V3_VCORE_MEM_STATE_NESTED; break;
678 default: core->vcore[i].mem_state = V3_VCORE_MEM_STATE_UNKNOWN; break;
680 switch (vm->cores[i].mem_mode) {
681 case PHYSICAL_MEM: core->vcore[i].mem_mode = V3_VCORE_MEM_MODE_PHYSICAL; break;
682 case VIRTUAL_MEM: core->vcore[i].mem_mode=V3_VCORE_MEM_MODE_VIRTUAL; break;
683 default: core->vcore[i].mem_mode=V3_VCORE_MEM_MODE_UNKNOWN; break;
686 core->vcore[i].pcore=vm->cores[i].pcpu_id;
687 core->vcore[i].last_rip=(void*)(vm->cores[i].rip);
688 core->vcore[i].num_exits=vm->cores[i].num_exits;
691 core->num_vcores=numcores;
693 for (i=0;i<vm->mem_map.num_base_regions;i++) {
694 mem->region[i].host_paddr = (void*)(vm->mem_map.base_regions[i].host_addr);
695 mem->region[i].size = V3_CONFIG_MEM_BLOCK_SIZE;
698 mem->num_regions=numregions;
704 #ifdef V3_CONFIG_CHECKPOINT
705 #include <palacios/vmm_checkpoint.h>
707 int v3_save_vm(struct v3_vm_info * vm, char * store, char * url, v3_chkpt_options_t opts) {
708 return v3_chkpt_save_vm(vm, store, url, opts);
712 int v3_load_vm(struct v3_vm_info * vm, char * store, char * url, v3_chkpt_options_t opts) {
713 return v3_chkpt_load_vm(vm, store, url, opts);
716 #ifdef V3_CONFIG_LIVE_MIGRATION
717 int v3_send_vm(struct v3_vm_info * vm, char * store, char * url, v3_chkpt_options_t opts) {
718 return v3_chkpt_send_vm(vm, store, url, opts);
722 int v3_receive_vm(struct v3_vm_info * vm, char * store, char * url, v3_chkpt_options_t opts) {
723 return v3_chkpt_receive_vm(vm, store, url, opts);
730 int v3_free_vm(struct v3_vm_info * vm) {
732 // deinitialize guest (free memory, etc...)
734 if ((vm->run_state != VM_STOPPED) &&
735 (vm->run_state != VM_ERROR)) {
736 PrintError(vm, VCORE_NONE,"Tried to Free VM in invalid runstate (%d)\n", vm->run_state);
740 v3_free_vm_devices(vm);
743 for (i = 0; i < vm->num_cores; i++) {
744 v3_free_core(&(vm->cores[i]));
748 v3_free_vm_internal(vm);
760 v3_cpu_mode_t v3_get_host_cpu_mode() {
770 cr4 = (struct cr4_32 *)&(cr4_val);
773 return PROTECTED_PAE;
781 v3_cpu_mode_t v3_get_host_cpu_mode() {
787 void v3_print_cond(const char * fmt, ...) {
788 if (v3_dbg_enable == 1) {
793 vsnprintf(buf, 2048, fmt, ap);
796 V3_Print(VM_NONE, VCORE_NONE,"%s", buf);
802 void v3_interrupt_cpu(struct v3_vm_info * vm, int logical_cpu, int vector) {
803 extern struct v3_os_hooks * os_hooks;
805 if ((os_hooks) && (os_hooks)->interrupt_cpu) {
806 (os_hooks)->interrupt_cpu(vm, logical_cpu, vector);
812 int v3_vm_enter(struct guest_info * info) {
813 switch (v3_mach_type) {
816 case V3_SVM_REV3_CPU:
817 return v3_svm_enter(info);
823 case V3_VMX_EPT_UG_CPU:
824 return v3_vmx_enter(info);
828 PrintError(info->vm_info, info, "Attemping to enter a guest on an invalid CPU\n");
834 void *v3_get_host_vm(struct v3_vm_info *x)
837 return x->host_priv_data;
843 int v3_get_vcore(struct guest_info *x)