2 * This file is part of the Palacios Virtual Machine Monitor developed
3 * by the V3VEE Project with funding from the United States National
4 * Science Foundation and the Department of Energy.
6 * The V3VEE Project is a joint project between Northwestern University
7 * and the University of New Mexico. You can find out more at
10 * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu>
11 * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org>
12 * All rights reserved.
14 * Author: Jack Lange <jarusl@cs.northwestern.edu>
16 * This is free software. You are permitted to use,
17 * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
20 #include <palacios/vmm.h>
21 #include <palacios/vmm_intr.h>
22 #include <palacios/vmm_config.h>
23 #include <palacios/vm_guest.h>
24 #include <palacios/vmm_ctrl_regs.h>
25 #include <palacios/vmm_lowlevel.h>
26 #include <palacios/vmm_sprintf.h>
27 #include <palacios/vmm_extensions.h>
28 #include <palacios/vmm_timeout.h>
32 #include <palacios/svm.h>
35 #include <palacios/vmx.h>
38 #ifdef V3_CONFIG_CHECKPOINT
39 #include <palacios/vmm_checkpoint.h>
43 v3_cpu_arch_t v3_cpu_types[V3_CONFIG_MAX_CPUS];
44 v3_cpu_arch_t v3_mach_type = V3_INVALID_CPU;
46 struct v3_os_hooks * os_hooks = NULL;
47 int v3_dbg_enable = 0;
52 static void init_cpu(void * arg) {
53 uint32_t cpu_id = (uint32_t)(addr_t)arg;
56 if (v3_is_svm_capable()) {
57 PrintDebug(VM_NONE, VCORE_NONE, "Machine is SVM Capable\n");
58 v3_init_svm_cpu(cpu_id);
63 if (v3_is_vmx_capable()) {
64 PrintDebug(VM_NONE, VCORE_NONE, "Machine is VMX Capable\n");
65 v3_init_vmx_cpu(cpu_id);
70 PrintError(VM_NONE, VCORE_NONE, "CPU has no virtualization Extensions\n");
75 static void deinit_cpu(void * arg) {
76 uint32_t cpu_id = (uint32_t)(addr_t)arg;
79 switch (v3_cpu_types[cpu_id]) {
83 PrintDebug(VM_NONE, VCORE_NONE, "Deinitializing SVM CPU %d\n", cpu_id);
84 v3_deinit_svm_cpu(cpu_id);
90 case V3_VMX_EPT_UG_CPU:
91 PrintDebug(VM_NONE, VCORE_NONE, "Deinitializing VMX CPU %d\n", cpu_id);
92 v3_deinit_vmx_cpu(cpu_id);
97 PrintError(VM_NONE, VCORE_NONE, "CPU has no virtualization Extensions\n");
103 void Init_V3(struct v3_os_hooks * hooks, char * cpu_mask, int num_cpus) {
108 V3_Print(VM_NONE, VCORE_NONE, "V3 Print statement to fix a Kitten page fault bug\n");
110 // Set global variables.
113 // Determine the global machine type
114 v3_mach_type = V3_INVALID_CPU;
116 for (i = 0; i < V3_CONFIG_MAX_CPUS; i++) {
117 v3_cpu_types[i] = V3_INVALID_CPU;
120 // Register all the possible device types
123 // Register all shadow paging handlers
124 V3_init_shdw_paging();
126 // Initialize the scheduler framework (must be before extensions)
127 V3_init_scheduling();
129 // Register all extensions
130 V3_init_extensions();
132 // Enabling scheduler
133 V3_enable_scheduler();
136 #ifdef V3_CONFIG_SYMMOD
140 #ifdef V3_CONFIG_CHECKPOINT
141 V3_init_checkpoint();
144 if ((hooks) && (hooks->call_on_cpu)) {
146 for (i = 0; i < num_cpus; i++) {
150 if ((cpu_mask == NULL) || (*(cpu_mask + major) & (0x1 << minor))) {
151 V3_Print(VM_NONE, VCORE_NONE, "Initializing VMM extensions on cpu %d\n", i);
152 hooks->call_on_cpu(i, &init_cpu, (void *)(addr_t)i);
154 if (v3_mach_type == V3_INVALID_CPU) {
155 v3_mach_type = v3_cpu_types[i];
168 V3_deinit_shdw_paging();
170 V3_deinit_extensions();
172 #ifdef V3_CONFIG_SYMMOD
176 #ifdef V3_CONFIG_CHECKPOINT
177 V3_deinit_checkpoint();
181 if ((os_hooks) && (os_hooks->call_on_cpu)) {
182 for (i = 0; i < V3_CONFIG_MAX_CPUS; i++) {
183 if (v3_cpu_types[i] != V3_INVALID_CPU) {
184 V3_Call_On_CPU(i, deinit_cpu, (void *)(addr_t)i);
185 //deinit_cpu((void *)(addr_t)i);
193 v3_cpu_arch_t v3_get_cpu_type(int cpu_id) {
194 return v3_cpu_types[cpu_id];
198 struct v3_vm_info * v3_create_vm(void * cfg, void * priv_data, char * name) {
199 struct v3_vm_info * vm = v3_config_guest(cfg, priv_data);
202 PrintError(VM_NONE, VCORE_NONE, "Could not configure guest\n");
206 V3_Print(vm, VCORE_NONE, "CORE 0 RIP=%p\n", (void *)(addr_t)(vm->cores[0].rip));
210 } else if (strlen(name) >= 128) {
211 PrintError(vm, VCORE_NONE,"VM name is too long. Will be truncated to 128 chars.\n");
214 memset(vm->name, 0, 128);
215 strncpy(vm->name, name, 127);
218 * Register this VM with the palacios scheduler. It will ask for admission
221 if(v3_scheduler_register_vm(vm) == -1) {
223 PrintError(vm, VCORE_NONE,"Error registering VM with scheduler\n");
232 static int start_core(void * p)
234 struct guest_info * core = (struct guest_info *)p;
236 if (v3_scheduler_register_core(core) == -1){
237 PrintError(core->vm_info, core,"Error initializing scheduling in core %d\n", core->vcpu_id);
240 PrintDebug(core->vm_info,core,"virtual core %u (on logical core %u): in start_core (RIP=%p)\n",
241 core->vcpu_id, core->pcpu_id, (void *)(addr_t)core->rip);
243 switch (v3_mach_type) {
246 case V3_SVM_REV3_CPU:
247 return v3_start_svm_guest(core);
253 case V3_VMX_EPT_UG_CPU:
254 return v3_start_vmx_guest(core);
258 PrintError(core->vm_info, core, "Attempting to enter a guest on an invalid CPU\n");
266 // For the moment very ugly. Eventually we will shift the cpu_mask to an arbitrary sized type...
270 int v3_start_vm(struct v3_vm_info * vm, unsigned int cpu_mask) {
272 uint8_t * core_mask = (uint8_t *)&cpu_mask; // This is to make future expansion easier
273 uint32_t avail_cores = 0;
277 if (vm->run_state != VM_STOPPED) {
278 PrintError(vm, VCORE_NONE, "VM has already been launched (state=%d)\n", (int)vm->run_state);
283 // Do not run if any core is using shadow paging and we are out of 4 GB bounds
284 for (i=0;i<vm->num_cores;i++) {
285 if (vm->cores[i].shdw_pg_mode == SHADOW_PAGING) {
286 if ((vm->mem_map.base_region.host_addr + vm->mem_size ) >= 0x100000000ULL) {
287 PrintError(vm, VCORE_NONE, "Base memory region exceeds 4 GB boundary with shadow paging enabled on core %d.\n",i);
288 PrintError(vm, VCORE_NONE, "Any use of non-64 bit mode in the guest is likely to fail in this configuration.\n");
289 PrintError(vm, VCORE_NONE, "If you would like to proceed anyway, remove this check and recompile Palacios.\n");
290 PrintError(vm, VCORE_NONE, "Alternatively, change this VM to use nested paging.\n");
298 /// CHECK IF WE ARE MULTICORE ENABLED....
300 V3_Print(vm, VCORE_NONE, "V3 -- Starting VM (%u cores)\n", vm->num_cores);
301 V3_Print(vm, VCORE_NONE, "CORE 0 RIP=%p\n", (void *)(addr_t)(vm->cores[0].rip));
304 // Check that enough cores are present in the mask to handle vcores
305 for (i = 0; i < MAX_CORES; i++) {
309 if (core_mask[major] & (0x1 << minor)) {
310 if (v3_cpu_types[i] == V3_INVALID_CPU) {
311 core_mask[major] &= ~(0x1 << minor);
319 vm->avail_cores = avail_cores;
321 if (v3_scheduler_admit_vm(vm) != 0){
322 PrintError(vm, VCORE_NONE,"Error admitting VM %s for scheduling", vm->name);
325 vm->run_state = VM_RUNNING;
327 // Spawn off threads for each core.
328 // We work backwards, so that core 0 is always started last.
329 for (i = 0, vcore_id = vm->num_cores - 1; (i < MAX_CORES) && (vcore_id >= 0); i++) {
332 struct guest_info * core = &(vm->cores[vcore_id]);
333 char * specified_cpu = v3_cfg_val(core->core_cfg_data, "target_cpu");
334 uint32_t core_idx = 0;
336 if (specified_cpu != NULL) {
337 core_idx = atoi(specified_cpu);
339 if ((core_idx < 0) || (core_idx >= MAX_CORES)) {
340 PrintError(vm, VCORE_NONE, "Target CPU out of bounds (%d) (MAX_CORES=%d)\n", core_idx, MAX_CORES);
343 i--; // We reset the logical core idx. Not strictly necessary I guess...
348 major = core_idx / 8;
349 minor = core_idx % 8;
351 if ((core_mask[major] & (0x1 << minor)) == 0) {
352 PrintError(vm, VCORE_NONE, "Logical CPU %d not available for virtual core %d; not started\n",
355 if (specified_cpu != NULL) {
356 PrintError(vm, VCORE_NONE, "CPU was specified explicitly (%d). HARD ERROR\n", core_idx);
364 PrintDebug(vm, VCORE_NONE, "Starting virtual core %u on logical core %u\n",
367 sprintf(core->exec_name, "%s-%u", vm->name, vcore_id);
369 PrintDebug(vm, VCORE_NONE, "run: core=%u, func=0x%p, arg=0x%p, name=%s\n",
370 core_idx, start_core, core, core->exec_name);
372 core->core_run_state = CORE_STOPPED; // core zero will turn itself on
373 core->pcpu_id = core_idx;
374 core->core_thread = V3_CREATE_THREAD_ON_CPU(core_idx, start_core, core, core->exec_name);
376 if (core->core_thread == NULL) {
377 PrintError(vm, VCORE_NONE, "Thread launch failed\n");
386 PrintError(vm, VCORE_NONE, "Error starting VM: Not enough available CPU cores\n");
397 int v3_reset_vm_core(struct guest_info * core, addr_t rip) {
399 switch (v3_cpu_types[core->pcpu_id]) {
402 case V3_SVM_REV3_CPU:
403 PrintDebug(core->vm_info, core, "Resetting SVM Guest CPU %d\n", core->vcpu_id);
404 return v3_reset_svm_vm_core(core, rip);
409 case V3_VMX_EPT_UG_CPU:
410 PrintDebug(core->vm_info, core, "Resetting VMX Guest CPU %d\n", core->vcpu_id);
411 return v3_reset_vmx_vm_core(core, rip);
415 PrintError(core->vm_info, core, "CPU has no virtualization Extensions\n");
424 /* move a virtual core to different physical core */
425 int v3_move_vm_core(struct v3_vm_info * vm, int vcore_id, int target_cpu) {
426 struct guest_info * core = NULL;
428 if ((vcore_id < 0) || (vcore_id >= vm->num_cores)) {
429 PrintError(vm, VCORE_NONE, "Attempted to migrate invalid virtual core (%d)\n", vcore_id);
433 core = &(vm->cores[vcore_id]);
435 if (target_cpu == core->pcpu_id) {
436 PrintError(vm, core, "Attempted to migrate to local core (%d)\n", target_cpu);
437 // well that was pointless
441 if (core->core_thread == NULL) {
442 PrintError(vm, core, "Attempted to migrate a core without a valid thread context\n");
446 while (v3_raise_barrier(vm, NULL) == -1);
448 V3_Print(vm, core, "Performing Migration from %d to %d\n", core->pcpu_id, target_cpu);
450 // Double check that we weren't preemptively migrated
451 if (target_cpu != core->pcpu_id) {
453 V3_Print(vm, core, "Moving Core\n");
457 switch (v3_cpu_types[core->pcpu_id]) {
460 case V3_VMX_EPT_UG_CPU:
461 PrintDebug(vm, core, "Flushing VMX Guest CPU %d\n", core->vcpu_id);
462 V3_Call_On_CPU(core->pcpu_id, (void (*)(void *))v3_flush_vmx_vm_core, (void *)core);
469 if (V3_MOVE_THREAD_TO_CPU(target_cpu, core->core_thread) != 0) {
470 PrintError(vm, core, "Failed to move Vcore %d to CPU %d\n",
471 core->vcpu_id, target_cpu);
472 v3_lower_barrier(vm);
476 /* There will be a benign race window here:
477 core->pcpu_id will be set to the target core before its fully "migrated"
478 However the core will NEVER run on the old core again, its just in flight to the new core
480 core->pcpu_id = target_cpu;
482 V3_Print(vm, core, "core now at %d\n", core->pcpu_id);
485 v3_lower_barrier(vm);
492 int v3_stop_vm(struct v3_vm_info * vm) {
494 if ((vm->run_state != VM_RUNNING) &&
495 (vm->run_state != VM_SIMULATING)) {
496 PrintError(vm, VCORE_NONE,"Tried to stop VM in invalid runstate (%d)\n", vm->run_state);
500 vm->run_state = VM_STOPPED;
502 // Sanity check to catch any weird execution states
503 if (v3_wait_for_barrier(vm, NULL) == 0) {
504 v3_lower_barrier(vm);
507 // XXX force exit all cores via a cross call/IPI XXX
511 int still_running = 0;
513 for (i = 0; i < vm->num_cores; i++) {
514 if (vm->cores[i].core_run_state != CORE_STOPPED) {
519 if (still_running == 0) {
526 V3_Print(vm, VCORE_NONE,"VM stopped. Returning\n");
532 int v3_pause_vm(struct v3_vm_info * vm) {
534 if (vm->run_state != VM_RUNNING) {
535 PrintError(vm, VCORE_NONE,"Tried to pause a VM that was not running\n");
539 while (v3_raise_barrier(vm, NULL) == -1);
541 vm->run_state = VM_PAUSED;
547 int v3_continue_vm(struct v3_vm_info * vm) {
549 if (vm->run_state != VM_PAUSED) {
550 PrintError(vm, VCORE_NONE,"Tried to continue a VM that was not paused\n");
554 vm->run_state = VM_RUNNING;
556 v3_lower_barrier(vm);
563 static int sim_callback(struct guest_info * core, void * private_data) {
564 struct v3_bitmap * timeout_map = private_data;
566 v3_bitmap_set(timeout_map, core->vcpu_id);
568 V3_Print(core->vm_info, core, "Simulation callback activated (guest_rip=%p)\n", (void *)core->rip);
570 while (v3_bitmap_check(timeout_map, core->vcpu_id) == 1) {
580 int v3_simulate_vm(struct v3_vm_info * vm, unsigned int msecs) {
581 struct v3_bitmap timeout_map;
585 uint64_t cpu_khz = V3_CPU_KHZ();
587 if (vm->run_state != VM_PAUSED) {
588 PrintError(vm, VCORE_NONE,"VM must be paused before simulation begins\n");
592 /* AT this point VM is paused */
595 v3_bitmap_init(&timeout_map, vm->num_cores);
600 // calculate cycles from msecs...
601 // IMPORTANT: Floating point not allowed.
602 cycles = (msecs * cpu_khz);
606 V3_Print(vm, VCORE_NONE,"Simulating %u msecs (%llu cycles) [CPU_KHZ=%llu]\n", msecs, cycles, cpu_khz);
610 for (i = 0; i < vm->num_cores; i++) {
611 if (v3_add_core_timeout(&(vm->cores[i]), cycles, sim_callback, &timeout_map) == -1) {
612 PrintError(vm, VCORE_NONE,"Could not register simulation timeout for core %d\n", i);
617 V3_Print(vm, VCORE_NONE,"timeouts set on all cores\n ");
620 // Run the simulation
621 // vm->run_state = VM_SIMULATING;
622 vm->run_state = VM_RUNNING;
623 v3_lower_barrier(vm);
626 V3_Print(vm, VCORE_NONE,"Barrier lowered: We are now Simulating!!\n");
628 // block until simulation is complete
629 while (all_blocked == 0) {
632 for (i = 0; i < vm->num_cores; i++) {
633 if (v3_bitmap_check(&timeout_map, i) == 0) {
638 if (all_blocked == 1) {
646 V3_Print(vm, VCORE_NONE,"Simulation is complete\n");
648 // Simulation is complete
649 // Reset back to PAUSED state
651 v3_raise_barrier_nowait(vm, NULL);
652 vm->run_state = VM_PAUSED;
654 v3_bitmap_reset(&timeout_map);
656 v3_wait_for_barrier(vm, NULL);
662 int v3_get_state_vm(struct v3_vm_info *vm, struct v3_vm_state *s)
665 uint32_t numcores = s->num_vcores > vm->num_cores ? vm->num_cores : s->num_vcores;
667 switch (vm->run_state) {
668 case VM_INVALID: s->state = V3_VM_INVALID; break;
669 case VM_RUNNING: s->state = V3_VM_RUNNING; break;
670 case VM_STOPPED: s->state = V3_VM_STOPPED; break;
671 case VM_PAUSED: s->state = V3_VM_PAUSED; break;
672 case VM_ERROR: s->state = V3_VM_ERROR; break;
673 case VM_SIMULATING: s->state = V3_VM_SIMULATING; break;
674 default: s->state = V3_VM_UNKNOWN; break;
677 s->mem_base_paddr = (void*)(vm->mem_map.base_region.host_addr);
678 s->mem_size = vm->mem_size;
680 s->num_vcores = numcores;
682 for (i=0;i<numcores;i++) {
683 switch (vm->cores[i].core_run_state) {
684 case CORE_INVALID: s->vcore[i].state = V3_VCORE_INVALID; break;
685 case CORE_RUNNING: s->vcore[i].state = V3_VCORE_RUNNING; break;
686 case CORE_STOPPED: s->vcore[i].state = V3_VCORE_STOPPED; break;
687 default: s->vcore[i].state = V3_VCORE_UNKNOWN; break;
689 switch (vm->cores[i].cpu_mode) {
690 case REAL: s->vcore[i].cpu_mode = V3_VCORE_CPU_REAL; break;
691 case PROTECTED: s->vcore[i].cpu_mode = V3_VCORE_CPU_PROTECTED; break;
692 case PROTECTED_PAE: s->vcore[i].cpu_mode = V3_VCORE_CPU_PROTECTED_PAE; break;
693 case LONG: s->vcore[i].cpu_mode = V3_VCORE_CPU_LONG; break;
694 case LONG_32_COMPAT: s->vcore[i].cpu_mode = V3_VCORE_CPU_LONG_32_COMPAT; break;
695 case LONG_16_COMPAT: s->vcore[i].cpu_mode = V3_VCORE_CPU_LONG_16_COMPAT; break;
696 default: s->vcore[i].cpu_mode = V3_VCORE_CPU_UNKNOWN; break;
698 switch (vm->cores[i].shdw_pg_mode) {
699 case SHADOW_PAGING: s->vcore[i].mem_state = V3_VCORE_MEM_STATE_SHADOW; break;
700 case NESTED_PAGING: s->vcore[i].mem_state = V3_VCORE_MEM_STATE_NESTED; break;
701 default: s->vcore[i].mem_state = V3_VCORE_MEM_STATE_UNKNOWN; break;
703 switch (vm->cores[i].mem_mode) {
704 case PHYSICAL_MEM: s->vcore[i].mem_mode = V3_VCORE_MEM_MODE_PHYSICAL; break;
705 case VIRTUAL_MEM: s->vcore[i].mem_mode=V3_VCORE_MEM_MODE_VIRTUAL; break;
706 default: s->vcore[i].mem_mode=V3_VCORE_MEM_MODE_UNKNOWN; break;
709 s->vcore[i].pcore=vm->cores[i].pcpu_id;
710 s->vcore[i].last_rip=(void*)(vm->cores[i].rip);
711 s->vcore[i].num_exits=vm->cores[i].num_exits;
718 #ifdef V3_CONFIG_CHECKPOINT
719 #include <palacios/vmm_checkpoint.h>
721 int v3_save_vm(struct v3_vm_info * vm, char * store, char * url) {
722 return v3_chkpt_save_vm(vm, store, url);
726 int v3_load_vm(struct v3_vm_info * vm, char * store, char * url) {
727 return v3_chkpt_load_vm(vm, store, url);
730 #ifdef V3_CONFIG_LIVE_MIGRATION
731 int v3_send_vm(struct v3_vm_info * vm, char * store, char * url) {
732 return v3_chkpt_send_vm(vm, store, url);
736 int v3_receive_vm(struct v3_vm_info * vm, char * store, char * url) {
737 return v3_chkpt_receive_vm(vm, store, url);
744 int v3_free_vm(struct v3_vm_info * vm) {
746 // deinitialize guest (free memory, etc...)
748 if ((vm->run_state != VM_STOPPED) &&
749 (vm->run_state != VM_ERROR)) {
750 PrintError(vm, VCORE_NONE,"Tried to Free VM in invalid runstate (%d)\n", vm->run_state);
754 v3_free_vm_devices(vm);
757 for (i = 0; i < vm->num_cores; i++) {
758 v3_free_core(&(vm->cores[i]));
762 v3_free_vm_internal(vm);
774 v3_cpu_mode_t v3_get_host_cpu_mode() {
784 cr4 = (struct cr4_32 *)&(cr4_val);
787 return PROTECTED_PAE;
795 v3_cpu_mode_t v3_get_host_cpu_mode() {
801 void v3_print_cond(const char * fmt, ...) {
802 if (v3_dbg_enable == 1) {
807 vsnprintf(buf, 2048, fmt, ap);
810 V3_Print(VM_NONE, VCORE_NONE,"%s", buf);
816 void v3_interrupt_cpu(struct v3_vm_info * vm, int logical_cpu, int vector) {
817 extern struct v3_os_hooks * os_hooks;
819 if ((os_hooks) && (os_hooks)->interrupt_cpu) {
820 (os_hooks)->interrupt_cpu(vm, logical_cpu, vector);
826 int v3_vm_enter(struct guest_info * info) {
827 switch (v3_mach_type) {
830 case V3_SVM_REV3_CPU:
831 return v3_svm_enter(info);
837 case V3_VMX_EPT_UG_CPU:
838 return v3_vmx_enter(info);
842 PrintError(info->vm_info, info, "Attemping to enter a guest on an invalid CPU\n");
848 void *v3_get_host_vm(struct v3_vm_info *x)
851 return x->host_priv_data;
857 int v3_get_vcore(struct guest_info *x)