2 * This file is part of the Palacios Virtual Machine Monitor developed
3 * by the V3VEE Project with funding from the United States National
4 * Science Foundation and the Department of Energy.
6 * The V3VEE Project is a joint project between Northwestern University
7 * and the University of New Mexico. You can find out more at
10 * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu>
11 * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org>
12 * All rights reserved.
14 * Author: Jack Lange <jarusl@cs.northwestern.edu>
16 * This is free software. You are permitted to use,
17 * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
20 #include <palacios/vmm.h>
21 #include <palacios/vmm_mem.h>
22 #include <palacios/vmm_intr.h>
23 #include <palacios/vmm_config.h>
24 #include <palacios/vm_guest.h>
25 #include <palacios/vmm_ctrl_regs.h>
26 #include <palacios/vmm_lowlevel.h>
27 #include <palacios/vmm_sprintf.h>
28 #include <palacios/vmm_extensions.h>
29 #include <palacios/vmm_timeout.h>
30 #include <palacios/vmm_options.h>
31 #include <palacios/vmm_cpu_mapper.h>
34 #include <palacios/svm.h>
37 #include <palacios/vmx.h>
40 #ifdef V3_CONFIG_CHECKPOINT
41 #include <palacios/vmm_checkpoint.h>
45 v3_cpu_arch_t v3_cpu_types[V3_CONFIG_MAX_CPUS];
46 v3_cpu_arch_t v3_mach_type = V3_INVALID_CPU;
48 struct v3_os_hooks * os_hooks = NULL;
49 int v3_dbg_enable = 0;
53 static void init_cpu(void * arg) {
54 uint32_t cpu_id = (uint32_t)(addr_t)arg;
57 if (v3_is_svm_capable()) {
58 PrintDebug(VM_NONE, VCORE_NONE, "Machine is SVM Capable\n");
59 v3_init_svm_cpu(cpu_id);
64 if (v3_is_vmx_capable()) {
65 PrintDebug(VM_NONE, VCORE_NONE, "Machine is VMX Capable\n");
66 v3_init_vmx_cpu(cpu_id);
71 PrintError(VM_NONE, VCORE_NONE, "CPU has no virtualization Extensions\n");
76 static void deinit_cpu(void * arg) {
77 uint32_t cpu_id = (uint32_t)(addr_t)arg;
80 switch (v3_cpu_types[cpu_id]) {
84 PrintDebug(VM_NONE, VCORE_NONE, "Deinitializing SVM CPU %d\n", cpu_id);
85 v3_deinit_svm_cpu(cpu_id);
91 case V3_VMX_EPT_UG_CPU:
92 PrintDebug(VM_NONE, VCORE_NONE, "Deinitializing VMX CPU %d\n", cpu_id);
93 v3_deinit_vmx_cpu(cpu_id);
98 PrintError(VM_NONE, VCORE_NONE, "CPU has no virtualization Extensions\n");
103 void Init_V3(struct v3_os_hooks * hooks, char * cpu_mask, int num_cpus, char *options) {
108 V3_Print(VM_NONE, VCORE_NONE, "V3 Print statement to fix a Kitten page fault bug\n");
110 // Set global variables.
113 // Determine the global machine type
114 v3_mach_type = V3_INVALID_CPU;
116 for (i = 0; i < V3_CONFIG_MAX_CPUS; i++) {
117 v3_cpu_types[i] = V3_INVALID_CPU;
120 // Parse host-os defined options into an easily-accessed format.
121 v3_parse_options(options);
123 // Memory manager initialization
126 // Register all the possible device types
129 // Register all shadow paging handlers
130 V3_init_shdw_paging();
132 // Initialize the cpu_mapper framework (must be before extensions)
133 V3_init_cpu_mapper();
135 // Initialize the scheduler framework (must be before extensions)
136 V3_init_scheduling();
138 // Register all extensions
139 V3_init_extensions();
141 // Enabling cpu_mapper
142 V3_enable_cpu_mapper();
144 // Enabling scheduler
145 V3_enable_scheduler();
148 #ifdef V3_CONFIG_SYMMOD
152 #ifdef V3_CONFIG_CHECKPOINT
153 V3_init_checkpoint();
156 if ((hooks) && (hooks->call_on_cpu)) {
158 for (i = 0; i < num_cpus; i++) {
162 if ((cpu_mask == NULL) || (*(cpu_mask + major) & (0x1 << minor))) {
163 V3_Print(VM_NONE, VCORE_NONE, "Initializing VMM extensions on cpu %d\n", i);
164 hooks->call_on_cpu(i, &init_cpu, (void *)(addr_t)i);
166 if (v3_mach_type == V3_INVALID_CPU) {
167 v3_mach_type = v3_cpu_types[i];
180 V3_deinit_shdw_paging();
182 V3_deinit_extensions();
184 #ifdef V3_CONFIG_SYMMOD
188 #ifdef V3_CONFIG_CHECKPOINT
189 V3_deinit_checkpoint();
193 if ((os_hooks) && (os_hooks->call_on_cpu)) {
194 for (i = 0; i < V3_CONFIG_MAX_CPUS; i++) {
195 if (v3_cpu_types[i] != V3_INVALID_CPU) {
196 V3_Call_On_CPU(i, deinit_cpu, (void *)(addr_t)i);
197 //deinit_cpu((void *)(addr_t)i);
207 v3_cpu_arch_t v3_get_cpu_type(int cpu_id) {
208 return v3_cpu_types[cpu_id];
212 struct v3_vm_info * v3_create_vm(void * cfg, void * priv_data, char * name) {
213 struct v3_vm_info * vm = v3_config_guest(cfg, priv_data);
216 PrintError(VM_NONE, VCORE_NONE, "Could not configure guest\n");
220 V3_Print(vm, VCORE_NONE, "CORE 0 RIP=%p\n", (void *)(addr_t)(vm->cores[0].rip));
224 } else if (strlen(name) >= 128) {
225 PrintError(vm, VCORE_NONE,"VM name is too long. Will be truncated to 128 chars.\n");
228 memset(vm->name, 0, 128);
229 strncpy(vm->name, name, 127);
232 * Register this VM with the palacios scheduler. It will ask for admission
235 if(v3_scheduler_register_vm(vm) == -1) {
237 PrintError(vm, VCORE_NONE,"Error registering VM with scheduler\n");
246 static int start_core(void * p)
248 struct guest_info * core = (struct guest_info *)p;
250 if (v3_scheduler_register_core(core) == -1){
251 PrintError(core->vm_info, core,"Error initializing scheduling in core %d\n", core->vcpu_id);
254 PrintDebug(core->vm_info,core,"virtual core %u (on logical core %u): in start_core (RIP=%p)\n",
255 core->vcpu_id, core->pcpu_id, (void *)(addr_t)core->rip);
257 switch (v3_mach_type) {
260 case V3_SVM_REV3_CPU:
261 return v3_start_svm_guest(core);
267 case V3_VMX_EPT_UG_CPU:
268 return v3_start_vmx_guest(core);
272 PrintError(core->vm_info, core, "Attempting to enter a guest on an invalid CPU\n");
279 int v3_start_vm(struct v3_vm_info * vm, unsigned int cpu_mask) {
282 uint8_t * core_mask = (uint8_t *)&cpu_mask; // This is to make future expansion easier
283 uint32_t avail_cores = 0;
285 extern uint64_t v3_mem_block_size;
288 if (vm->run_state != VM_STOPPED) {
289 PrintError(vm, VCORE_NONE, "VM has already been launched (state=%d)\n", (int)vm->run_state);
294 // Do not run if any core is using shadow paging and we are out of 4 GB bounds
295 for (i=0;i<vm->num_cores;i++) {
296 if (vm->cores[i].shdw_pg_mode == SHADOW_PAGING) {
297 for (j=0;j<vm->mem_map.num_base_regions;j++) {
298 if ((vm->mem_map.base_regions[i].host_addr + v3_mem_block_size) >= 0x100000000ULL) {
299 PrintError(vm, VCORE_NONE, "Base memory region %d exceeds 4 GB boundary with shadow paging enabled on core %d.\n",j, i);
300 PrintError(vm, VCORE_NONE, "Any use of non-64 bit mode in the guest is likely to fail in this configuration.\n");
301 PrintError(vm, VCORE_NONE, "If you would like to proceed anyway, remove this check and recompile Palacios.\n");
302 PrintError(vm, VCORE_NONE, "Alternatively, change this VM to use nested paging.\n");
309 /// CHECK IF WE ARE MULTICORE ENABLED....
311 V3_Print(vm, VCORE_NONE, "V3 -- Starting VM (%u cores)\n", vm->num_cores);
312 V3_Print(vm, VCORE_NONE, "CORE 0 RIP=%p\n", (void *)(addr_t)(vm->cores[0].rip));
315 // Check that enough cores are present in the mask to handle vcores
316 for (i = 0; i < V3_CONFIG_MAX_CPUS; i++) {
320 if (core_mask[major] & (0x1 << minor)) {
321 if (v3_cpu_types[i] == V3_INVALID_CPU) {
322 core_mask[major] &= ~(0x1 << minor);
329 vm->avail_cores = avail_cores;
331 if (v3_scheduler_admit_vm(vm) != 0){
332 PrintError(vm, VCORE_NONE,"Error admitting VM %s for scheduling", vm->name);
335 if (v3_cpu_mapper_admit_vm(vm) != 0){
336 PrintError(vm, VCORE_NONE,"Error admitting VM %s for mapping", vm->name);
339 vm->run_state = VM_RUNNING;
341 if(v3_cpu_mapper_register_vm(vm,cpu_mask) == -1) {
343 PrintError(vm, VCORE_NONE,"Error registering VM with cpu_mapper\n");
347 for (vcore_id = 0; vcore_id < vm->num_cores; vcore_id++) {
349 struct guest_info * core = &(vm->cores[vcore_id]);
351 PrintDebug(vm, VCORE_NONE, "Starting virtual core %u on logical core %u\n",
352 vcore_id, core->pcpu_id);
354 sprintf(core->exec_name, "%s-%u", vm->name, vcore_id);
356 PrintDebug(vm, VCORE_NONE, "run: core=%u, func=0x%p, arg=0x%p, name=%s\n",
357 core->pcpu_id, start_core, core, core->exec_name);
359 core->core_run_state = CORE_STOPPED; // core zero will turn itself on
360 core->core_thread = V3_CREATE_THREAD_ON_CPU(core->pcpu_id, start_core, core, core->exec_name);
362 if (core->core_thread == NULL) {
363 PrintError(vm, VCORE_NONE, "Thread launch failed\n");
374 int v3_reset_vm_core(struct guest_info * core, addr_t rip) {
376 switch (v3_cpu_types[core->pcpu_id]) {
379 case V3_SVM_REV3_CPU:
380 PrintDebug(core->vm_info, core, "Resetting SVM Guest CPU %d\n", core->vcpu_id);
381 return v3_reset_svm_vm_core(core, rip);
386 case V3_VMX_EPT_UG_CPU:
387 PrintDebug(core->vm_info, core, "Resetting VMX Guest CPU %d\n", core->vcpu_id);
388 return v3_reset_vmx_vm_core(core, rip);
392 PrintError(core->vm_info, core, "CPU has no virtualization Extensions\n");
401 /* move a virtual core to different physical core */
402 int v3_move_vm_core(struct v3_vm_info * vm, int vcore_id, int target_cpu) {
403 struct guest_info * core = NULL;
405 if ((vcore_id < 0) || (vcore_id >= vm->num_cores)) {
406 PrintError(vm, VCORE_NONE, "Attempted to migrate invalid virtual core (%d)\n", vcore_id);
410 core = &(vm->cores[vcore_id]);
412 if (target_cpu == core->pcpu_id) {
413 PrintError(vm, core, "Attempted to migrate to local core (%d)\n", target_cpu);
414 // well that was pointless
418 if (core->core_thread == NULL) {
419 PrintError(vm, core, "Attempted to migrate a core without a valid thread context\n");
423 while (v3_raise_barrier(vm, NULL) == -1);
425 V3_Print(vm, core, "Performing Migration from %d to %d\n", core->pcpu_id, target_cpu);
427 // Double check that we weren't preemptively migrated
428 if (target_cpu != core->pcpu_id) {
430 V3_Print(vm, core, "Moving Core\n");
432 if(v3_cpu_mapper_admit_core(vm, vcore_id, target_cpu) == -1){
433 PrintError(vm, core, "Core %d can not be admitted in cpu %d\n",vcore_id, target_cpu);
439 switch (v3_cpu_types[core->pcpu_id]) {
442 case V3_VMX_EPT_UG_CPU:
443 PrintDebug(vm, core, "Flushing VMX Guest CPU %d\n", core->vcpu_id);
444 V3_Call_On_CPU(core->pcpu_id, (void (*)(void *))v3_flush_vmx_vm_core, (void *)core);
451 if (V3_MOVE_THREAD_TO_CPU(target_cpu, core->core_thread) != 0) {
452 PrintError(vm, core, "Failed to move Vcore %d to CPU %d\n",
453 core->vcpu_id, target_cpu);
454 v3_lower_barrier(vm);
458 /* There will be a benign race window here:
459 core->pcpu_id will be set to the target core before its fully "migrated"
460 However the core will NEVER run on the old core again, its just in flight to the new core
462 core->pcpu_id = target_cpu;
464 V3_Print(vm, core, "core now at %d\n", core->pcpu_id);
467 v3_lower_barrier(vm);
474 int v3_stop_vm(struct v3_vm_info * vm) {
476 if ((vm->run_state != VM_RUNNING) &&
477 (vm->run_state != VM_SIMULATING)) {
478 PrintError(vm, VCORE_NONE,"Tried to stop VM in invalid runstate (%d)\n", vm->run_state);
482 vm->run_state = VM_STOPPED;
484 // Sanity check to catch any weird execution states
485 if (v3_wait_for_barrier(vm, NULL) == 0) {
486 v3_lower_barrier(vm);
489 // XXX force exit all cores via a cross call/IPI XXX
493 int still_running = 0;
495 for (i = 0; i < vm->num_cores; i++) {
496 if (vm->cores[i].core_run_state != CORE_STOPPED) {
501 if (still_running == 0) {
508 V3_Print(vm, VCORE_NONE,"VM stopped. Returning\n");
514 int v3_pause_vm(struct v3_vm_info * vm) {
516 if (vm->run_state != VM_RUNNING) {
517 PrintError(vm, VCORE_NONE,"Tried to pause a VM that was not running\n");
521 while (v3_raise_barrier(vm, NULL) == -1);
523 vm->run_state = VM_PAUSED;
529 int v3_continue_vm(struct v3_vm_info * vm) {
531 if (vm->run_state != VM_PAUSED) {
532 PrintError(vm, VCORE_NONE,"Tried to continue a VM that was not paused\n");
536 vm->run_state = VM_RUNNING;
538 v3_lower_barrier(vm);
545 static int sim_callback(struct guest_info * core, void * private_data) {
546 struct v3_bitmap * timeout_map = private_data;
548 v3_bitmap_set(timeout_map, core->vcpu_id);
550 V3_Print(core->vm_info, core, "Simulation callback activated (guest_rip=%p)\n", (void *)core->rip);
552 while (v3_bitmap_check(timeout_map, core->vcpu_id) == 1) {
562 int v3_simulate_vm(struct v3_vm_info * vm, unsigned int msecs) {
563 struct v3_bitmap timeout_map;
567 uint64_t cpu_khz = V3_CPU_KHZ();
569 if (vm->run_state != VM_PAUSED) {
570 PrintError(vm, VCORE_NONE,"VM must be paused before simulation begins\n");
574 /* AT this point VM is paused */
577 v3_bitmap_init(&timeout_map, vm->num_cores);
582 // calculate cycles from msecs...
583 // IMPORTANT: Floating point not allowed.
584 cycles = (msecs * cpu_khz);
588 V3_Print(vm, VCORE_NONE,"Simulating %u msecs (%llu cycles) [CPU_KHZ=%llu]\n", msecs, cycles, cpu_khz);
592 for (i = 0; i < vm->num_cores; i++) {
593 if (v3_add_core_timeout(&(vm->cores[i]), cycles, sim_callback, &timeout_map) == -1) {
594 PrintError(vm, VCORE_NONE,"Could not register simulation timeout for core %d\n", i);
599 V3_Print(vm, VCORE_NONE,"timeouts set on all cores\n ");
602 // Run the simulation
603 // vm->run_state = VM_SIMULATING;
604 vm->run_state = VM_RUNNING;
605 v3_lower_barrier(vm);
608 V3_Print(vm, VCORE_NONE,"Barrier lowered: We are now Simulating!!\n");
610 // block until simulation is complete
611 while (all_blocked == 0) {
614 for (i = 0; i < vm->num_cores; i++) {
615 if (v3_bitmap_check(&timeout_map, i) == 0) {
620 if (all_blocked == 1) {
628 V3_Print(vm, VCORE_NONE,"Simulation is complete\n");
630 // Simulation is complete
631 // Reset back to PAUSED state
633 v3_raise_barrier_nowait(vm, NULL);
634 vm->run_state = VM_PAUSED;
636 v3_bitmap_reset(&timeout_map);
638 v3_wait_for_barrier(vm, NULL);
644 int v3_get_state_vm(struct v3_vm_info *vm,
645 struct v3_vm_base_state *base,
646 struct v3_vm_core_state *core,
647 struct v3_vm_mem_state *mem)
650 uint32_t numcores = core->num_vcores > vm->num_cores ? vm->num_cores : core->num_vcores;
651 uint32_t numregions = mem->num_regions > vm->mem_map.num_base_regions ? vm->mem_map.num_base_regions : mem->num_regions;
652 extern uint64_t v3_mem_block_size;
654 switch (vm->run_state) {
655 case VM_INVALID: base->state = V3_VM_INVALID; break;
656 case VM_RUNNING: base->state = V3_VM_RUNNING; break;
657 case VM_STOPPED: base->state = V3_VM_STOPPED; break;
658 case VM_PAUSED: base->state = V3_VM_PAUSED; break;
659 case VM_ERROR: base->state = V3_VM_ERROR; break;
660 case VM_SIMULATING: base->state = V3_VM_SIMULATING; break;
661 default: base->state = V3_VM_UNKNOWN; break;
664 for (i=0;i<numcores;i++) {
665 switch (vm->cores[i].core_run_state) {
666 case CORE_INVALID: core->vcore[i].state = V3_VCORE_INVALID; break;
667 case CORE_RUNNING: core->vcore[i].state = V3_VCORE_RUNNING; break;
668 case CORE_STOPPED: core->vcore[i].state = V3_VCORE_STOPPED; break;
669 default: core->vcore[i].state = V3_VCORE_UNKNOWN; break;
671 switch (vm->cores[i].cpu_mode) {
672 case REAL: core->vcore[i].cpu_mode = V3_VCORE_CPU_REAL; break;
673 case PROTECTED: core->vcore[i].cpu_mode = V3_VCORE_CPU_PROTECTED; break;
674 case PROTECTED_PAE: core->vcore[i].cpu_mode = V3_VCORE_CPU_PROTECTED_PAE; break;
675 case LONG: core->vcore[i].cpu_mode = V3_VCORE_CPU_LONG; break;
676 case LONG_32_COMPAT: core->vcore[i].cpu_mode = V3_VCORE_CPU_LONG_32_COMPAT; break;
677 case LONG_16_COMPAT: core->vcore[i].cpu_mode = V3_VCORE_CPU_LONG_16_COMPAT; break;
678 default: core->vcore[i].cpu_mode = V3_VCORE_CPU_UNKNOWN; break;
680 switch (vm->cores[i].shdw_pg_mode) {
681 case SHADOW_PAGING: core->vcore[i].mem_state = V3_VCORE_MEM_STATE_SHADOW; break;
682 case NESTED_PAGING: core->vcore[i].mem_state = V3_VCORE_MEM_STATE_NESTED; break;
683 default: core->vcore[i].mem_state = V3_VCORE_MEM_STATE_UNKNOWN; break;
685 switch (vm->cores[i].mem_mode) {
686 case PHYSICAL_MEM: core->vcore[i].mem_mode = V3_VCORE_MEM_MODE_PHYSICAL; break;
687 case VIRTUAL_MEM: core->vcore[i].mem_mode=V3_VCORE_MEM_MODE_VIRTUAL; break;
688 default: core->vcore[i].mem_mode=V3_VCORE_MEM_MODE_UNKNOWN; break;
691 core->vcore[i].pcore=vm->cores[i].pcpu_id;
692 core->vcore[i].last_rip=(void*)(vm->cores[i].rip);
693 core->vcore[i].num_exits=vm->cores[i].num_exits;
696 core->num_vcores=numcores;
698 for (i=0;i<vm->mem_map.num_base_regions;i++) {
699 mem->region[i].host_paddr = (void*)(vm->mem_map.base_regions[i].host_addr);
700 mem->region[i].size = v3_mem_block_size;
703 mem->num_regions=numregions;
709 #ifdef V3_CONFIG_CHECKPOINT
710 #include <palacios/vmm_checkpoint.h>
712 int v3_save_vm(struct v3_vm_info * vm, char * store, char * url, v3_chkpt_options_t opts) {
713 return v3_chkpt_save_vm(vm, store, url, opts);
717 int v3_load_vm(struct v3_vm_info * vm, char * store, char * url, v3_chkpt_options_t opts) {
718 return v3_chkpt_load_vm(vm, store, url, opts);
721 #ifdef V3_CONFIG_LIVE_MIGRATION
722 int v3_send_vm(struct v3_vm_info * vm, char * store, char * url, v3_chkpt_options_t opts) {
723 return v3_chkpt_send_vm(vm, store, url, opts);
727 int v3_receive_vm(struct v3_vm_info * vm, char * store, char * url, v3_chkpt_options_t opts) {
728 return v3_chkpt_receive_vm(vm, store, url, opts);
735 int v3_free_vm(struct v3_vm_info * vm) {
737 // deinitialize guest (free memory, etc...)
739 if ((vm->run_state != VM_STOPPED) &&
740 (vm->run_state != VM_ERROR)) {
741 PrintError(vm, VCORE_NONE,"Tried to Free VM in invalid runstate (%d)\n", vm->run_state);
745 v3_free_vm_devices(vm);
748 for (i = 0; i < vm->num_cores; i++) {
749 v3_free_core(&(vm->cores[i]));
753 v3_free_vm_internal(vm);
765 v3_cpu_mode_t v3_get_host_cpu_mode() {
775 cr4 = (struct cr4_32 *)&(cr4_val);
778 return PROTECTED_PAE;
786 v3_cpu_mode_t v3_get_host_cpu_mode() {
792 void v3_print_cond(const char * fmt, ...) {
793 if (v3_dbg_enable == 1) {
798 vsnprintf(buf, 2048, fmt, ap);
801 V3_Print(VM_NONE, VCORE_NONE,"%s", buf);
807 void v3_interrupt_cpu(struct v3_vm_info * vm, int logical_cpu, int vector) {
808 extern struct v3_os_hooks * os_hooks;
810 if ((os_hooks) && (os_hooks)->interrupt_cpu) {
811 (os_hooks)->interrupt_cpu(vm, logical_cpu, vector);
817 int v3_vm_enter(struct guest_info * info) {
818 switch (v3_mach_type) {
821 case V3_SVM_REV3_CPU:
822 return v3_svm_enter(info);
828 case V3_VMX_EPT_UG_CPU:
829 return v3_vmx_enter(info);
833 PrintError(info->vm_info, info, "Attemping to enter a guest on an invalid CPU\n");
839 void *v3_get_host_vm(struct v3_vm_info *x)
842 return x->host_priv_data;
848 int v3_get_vcore(struct guest_info *x)