2 * This file is part of the Palacios Virtual Machine Monitor developed
3 * by the V3VEE Project with funding from the United States National
4 * Science Foundation and the Department of Energy.
6 * The V3VEE Project is a joint project between Northwestern University
7 * and the University of New Mexico. You can find out more at
10 * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu>
11 * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org>
12 * All rights reserved.
14 * Author: Jack Lange <jarusl@cs.northwestern.edu>
16 * This is free software. You are permitted to use,
17 * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
20 #include <palacios/vmm.h>
21 #include <palacios/vmm_intr.h>
22 #include <palacios/vmm_config.h>
23 #include <palacios/vm_guest.h>
24 #include <palacios/vmm_ctrl_regs.h>
25 #include <palacios/vmm_lowlevel.h>
26 #include <palacios/vmm_sprintf.h>
27 #include <palacios/vmm_extensions.h>
28 #include <palacios/vmm_timeout.h>
29 #include <palacios/vmm_options.h>
32 #include <palacios/svm.h>
35 #include <palacios/vmx.h>
38 #ifdef V3_CONFIG_CHECKPOINT
39 #include <palacios/vmm_checkpoint.h>
43 v3_cpu_arch_t v3_cpu_types[V3_CONFIG_MAX_CPUS];
44 v3_cpu_arch_t v3_mach_type = V3_INVALID_CPU;
46 struct v3_os_hooks * os_hooks = NULL;
47 int v3_dbg_enable = 0;
52 static void init_cpu(void * arg) {
53 uint32_t cpu_id = (uint32_t)(addr_t)arg;
56 if (v3_is_svm_capable()) {
57 PrintDebug(VM_NONE, VCORE_NONE, "Machine is SVM Capable\n");
58 v3_init_svm_cpu(cpu_id);
63 if (v3_is_vmx_capable()) {
64 PrintDebug(VM_NONE, VCORE_NONE, "Machine is VMX Capable\n");
65 v3_init_vmx_cpu(cpu_id);
70 PrintError(VM_NONE, VCORE_NONE, "CPU has no virtualization Extensions\n");
75 static void deinit_cpu(void * arg) {
76 uint32_t cpu_id = (uint32_t)(addr_t)arg;
79 switch (v3_cpu_types[cpu_id]) {
83 PrintDebug(VM_NONE, VCORE_NONE, "Deinitializing SVM CPU %d\n", cpu_id);
84 v3_deinit_svm_cpu(cpu_id);
90 case V3_VMX_EPT_UG_CPU:
91 PrintDebug(VM_NONE, VCORE_NONE, "Deinitializing VMX CPU %d\n", cpu_id);
92 v3_deinit_vmx_cpu(cpu_id);
97 PrintError(VM_NONE, VCORE_NONE, "CPU has no virtualization Extensions\n");
102 void Init_V3(struct v3_os_hooks * hooks, char * cpu_mask, int num_cpus, char *options) {
107 V3_Print(VM_NONE, VCORE_NONE, "V3 Print statement to fix a Kitten page fault bug\n");
109 // Set global variables.
112 // Determine the global machine type
113 v3_mach_type = V3_INVALID_CPU;
115 for (i = 0; i < V3_CONFIG_MAX_CPUS; i++) {
116 v3_cpu_types[i] = V3_INVALID_CPU;
119 // Parse host-os defined options into an easily-accessed format.
120 v3_parse_options(options);
122 // Register all the possible device types
125 // Register all shadow paging handlers
126 V3_init_shdw_paging();
128 // Initialize the scheduler framework (must be before extensions)
129 V3_init_scheduling();
131 // Register all extensions
132 V3_init_extensions();
134 // Enabling scheduler
135 V3_enable_scheduler();
138 #ifdef V3_CONFIG_SYMMOD
142 #ifdef V3_CONFIG_CHECKPOINT
143 V3_init_checkpoint();
146 if ((hooks) && (hooks->call_on_cpu)) {
148 for (i = 0; i < num_cpus; i++) {
152 if ((cpu_mask == NULL) || (*(cpu_mask + major) & (0x1 << minor))) {
153 V3_Print(VM_NONE, VCORE_NONE, "Initializing VMM extensions on cpu %d\n", i);
154 hooks->call_on_cpu(i, &init_cpu, (void *)(addr_t)i);
156 if (v3_mach_type == V3_INVALID_CPU) {
157 v3_mach_type = v3_cpu_types[i];
170 V3_deinit_shdw_paging();
172 V3_deinit_extensions();
174 #ifdef V3_CONFIG_SYMMOD
178 #ifdef V3_CONFIG_CHECKPOINT
179 V3_deinit_checkpoint();
183 if ((os_hooks) && (os_hooks->call_on_cpu)) {
184 for (i = 0; i < V3_CONFIG_MAX_CPUS; i++) {
185 if (v3_cpu_types[i] != V3_INVALID_CPU) {
186 V3_Call_On_CPU(i, deinit_cpu, (void *)(addr_t)i);
187 //deinit_cpu((void *)(addr_t)i);
195 v3_cpu_arch_t v3_get_cpu_type(int cpu_id) {
196 return v3_cpu_types[cpu_id];
200 struct v3_vm_info * v3_create_vm(void * cfg, void * priv_data, char * name) {
201 struct v3_vm_info * vm = v3_config_guest(cfg, priv_data);
204 PrintError(VM_NONE, VCORE_NONE, "Could not configure guest\n");
208 V3_Print(vm, VCORE_NONE, "CORE 0 RIP=%p\n", (void *)(addr_t)(vm->cores[0].rip));
212 } else if (strlen(name) >= 128) {
213 PrintError(vm, VCORE_NONE,"VM name is too long. Will be truncated to 128 chars.\n");
216 memset(vm->name, 0, 128);
217 strncpy(vm->name, name, 127);
220 * Register this VM with the palacios scheduler. It will ask for admission
223 if(v3_scheduler_register_vm(vm) == -1) {
225 PrintError(vm, VCORE_NONE,"Error registering VM with scheduler\n");
234 static int start_core(void * p)
236 struct guest_info * core = (struct guest_info *)p;
238 if (v3_scheduler_register_core(core) == -1){
239 PrintError(core->vm_info, core,"Error initializing scheduling in core %d\n", core->vcpu_id);
242 PrintDebug(core->vm_info,core,"virtual core %u (on logical core %u): in start_core (RIP=%p)\n",
243 core->vcpu_id, core->pcpu_id, (void *)(addr_t)core->rip);
245 switch (v3_mach_type) {
248 case V3_SVM_REV3_CPU:
249 return v3_start_svm_guest(core);
255 case V3_VMX_EPT_UG_CPU:
256 return v3_start_vmx_guest(core);
260 PrintError(core->vm_info, core, "Attempting to enter a guest on an invalid CPU\n");
268 // For the moment very ugly. Eventually we will shift the cpu_mask to an arbitrary sized type...
272 int v3_start_vm(struct v3_vm_info * vm, unsigned int cpu_mask) {
274 uint8_t * core_mask = (uint8_t *)&cpu_mask; // This is to make future expansion easier
275 uint32_t avail_cores = 0;
279 if (vm->run_state != VM_STOPPED) {
280 PrintError(vm, VCORE_NONE, "VM has already been launched (state=%d)\n", (int)vm->run_state);
285 // Do not run if any core is using shadow paging and we are out of 4 GB bounds
286 for (i=0;i<vm->num_cores;i++) {
287 if (vm->cores[i].shdw_pg_mode == SHADOW_PAGING) {
288 if ((vm->mem_map.base_region.host_addr + vm->mem_size ) >= 0x100000000ULL) {
289 PrintError(vm, VCORE_NONE, "Base memory region exceeds 4 GB boundary with shadow paging enabled on core %d.\n",i);
290 PrintError(vm, VCORE_NONE, "Any use of non-64 bit mode in the guest is likely to fail in this configuration.\n");
291 PrintError(vm, VCORE_NONE, "If you would like to proceed anyway, remove this check and recompile Palacios.\n");
292 PrintError(vm, VCORE_NONE, "Alternatively, change this VM to use nested paging.\n");
300 /// CHECK IF WE ARE MULTICORE ENABLED....
302 V3_Print(vm, VCORE_NONE, "V3 -- Starting VM (%u cores)\n", vm->num_cores);
303 V3_Print(vm, VCORE_NONE, "CORE 0 RIP=%p\n", (void *)(addr_t)(vm->cores[0].rip));
306 // Check that enough cores are present in the mask to handle vcores
307 for (i = 0; i < MAX_CORES; i++) {
311 if (core_mask[major] & (0x1 << minor)) {
312 if (v3_cpu_types[i] == V3_INVALID_CPU) {
313 core_mask[major] &= ~(0x1 << minor);
321 vm->avail_cores = avail_cores;
323 if (v3_scheduler_admit_vm(vm) != 0){
324 PrintError(vm, VCORE_NONE,"Error admitting VM %s for scheduling", vm->name);
327 vm->run_state = VM_RUNNING;
329 // Spawn off threads for each core.
330 // We work backwards, so that core 0 is always started last.
331 for (i = 0, vcore_id = vm->num_cores - 1; (i < MAX_CORES) && (vcore_id >= 0); i++) {
334 struct guest_info * core = &(vm->cores[vcore_id]);
335 char * specified_cpu = v3_cfg_val(core->core_cfg_data, "target_cpu");
336 uint32_t core_idx = 0;
338 if (specified_cpu != NULL) {
339 core_idx = atoi(specified_cpu);
341 if ((core_idx < 0) || (core_idx >= MAX_CORES)) {
342 PrintError(vm, VCORE_NONE, "Target CPU out of bounds (%d) (MAX_CORES=%d)\n", core_idx, MAX_CORES);
345 i--; // We reset the logical core idx. Not strictly necessary I guess...
350 major = core_idx / 8;
351 minor = core_idx % 8;
353 if ((core_mask[major] & (0x1 << minor)) == 0) {
354 PrintError(vm, VCORE_NONE, "Logical CPU %d not available for virtual core %d; not started\n",
357 if (specified_cpu != NULL) {
358 PrintError(vm, VCORE_NONE, "CPU was specified explicitly (%d). HARD ERROR\n", core_idx);
366 PrintDebug(vm, VCORE_NONE, "Starting virtual core %u on logical core %u\n",
369 sprintf(core->exec_name, "%s-%u", vm->name, vcore_id);
371 PrintDebug(vm, VCORE_NONE, "run: core=%u, func=0x%p, arg=0x%p, name=%s\n",
372 core_idx, start_core, core, core->exec_name);
374 core->core_run_state = CORE_STOPPED; // core zero will turn itself on
375 core->pcpu_id = core_idx;
376 core->core_thread = V3_CREATE_THREAD_ON_CPU(core_idx, start_core, core, core->exec_name);
378 if (core->core_thread == NULL) {
379 PrintError(vm, VCORE_NONE, "Thread launch failed\n");
388 PrintError(vm, VCORE_NONE, "Error starting VM: Not enough available CPU cores\n");
399 int v3_reset_vm_core(struct guest_info * core, addr_t rip) {
401 switch (v3_cpu_types[core->pcpu_id]) {
404 case V3_SVM_REV3_CPU:
405 PrintDebug(core->vm_info, core, "Resetting SVM Guest CPU %d\n", core->vcpu_id);
406 return v3_reset_svm_vm_core(core, rip);
411 case V3_VMX_EPT_UG_CPU:
412 PrintDebug(core->vm_info, core, "Resetting VMX Guest CPU %d\n", core->vcpu_id);
413 return v3_reset_vmx_vm_core(core, rip);
417 PrintError(core->vm_info, core, "CPU has no virtualization Extensions\n");
426 /* move a virtual core to different physical core */
427 int v3_move_vm_core(struct v3_vm_info * vm, int vcore_id, int target_cpu) {
428 struct guest_info * core = NULL;
430 if ((vcore_id < 0) || (vcore_id >= vm->num_cores)) {
431 PrintError(vm, VCORE_NONE, "Attempted to migrate invalid virtual core (%d)\n", vcore_id);
435 core = &(vm->cores[vcore_id]);
437 if (target_cpu == core->pcpu_id) {
438 PrintError(vm, core, "Attempted to migrate to local core (%d)\n", target_cpu);
439 // well that was pointless
443 if (core->core_thread == NULL) {
444 PrintError(vm, core, "Attempted to migrate a core without a valid thread context\n");
448 while (v3_raise_barrier(vm, NULL) == -1);
450 V3_Print(vm, core, "Performing Migration from %d to %d\n", core->pcpu_id, target_cpu);
452 // Double check that we weren't preemptively migrated
453 if (target_cpu != core->pcpu_id) {
455 V3_Print(vm, core, "Moving Core\n");
459 switch (v3_cpu_types[core->pcpu_id]) {
462 case V3_VMX_EPT_UG_CPU:
463 PrintDebug(vm, core, "Flushing VMX Guest CPU %d\n", core->vcpu_id);
464 V3_Call_On_CPU(core->pcpu_id, (void (*)(void *))v3_flush_vmx_vm_core, (void *)core);
471 if (V3_MOVE_THREAD_TO_CPU(target_cpu, core->core_thread) != 0) {
472 PrintError(vm, core, "Failed to move Vcore %d to CPU %d\n",
473 core->vcpu_id, target_cpu);
474 v3_lower_barrier(vm);
478 /* There will be a benign race window here:
479 core->pcpu_id will be set to the target core before its fully "migrated"
480 However the core will NEVER run on the old core again, its just in flight to the new core
482 core->pcpu_id = target_cpu;
484 V3_Print(vm, core, "core now at %d\n", core->pcpu_id);
487 v3_lower_barrier(vm);
494 int v3_stop_vm(struct v3_vm_info * vm) {
496 if ((vm->run_state != VM_RUNNING) &&
497 (vm->run_state != VM_SIMULATING)) {
498 PrintError(vm, VCORE_NONE,"Tried to stop VM in invalid runstate (%d)\n", vm->run_state);
502 vm->run_state = VM_STOPPED;
504 // Sanity check to catch any weird execution states
505 if (v3_wait_for_barrier(vm, NULL) == 0) {
506 v3_lower_barrier(vm);
509 // XXX force exit all cores via a cross call/IPI XXX
513 int still_running = 0;
515 for (i = 0; i < vm->num_cores; i++) {
516 if (vm->cores[i].core_run_state != CORE_STOPPED) {
521 if (still_running == 0) {
528 V3_Print(vm, VCORE_NONE,"VM stopped. Returning\n");
534 int v3_pause_vm(struct v3_vm_info * vm) {
536 if (vm->run_state != VM_RUNNING) {
537 PrintError(vm, VCORE_NONE,"Tried to pause a VM that was not running\n");
541 while (v3_raise_barrier(vm, NULL) == -1);
543 vm->run_state = VM_PAUSED;
549 int v3_continue_vm(struct v3_vm_info * vm) {
551 if (vm->run_state != VM_PAUSED) {
552 PrintError(vm, VCORE_NONE,"Tried to continue a VM that was not paused\n");
556 vm->run_state = VM_RUNNING;
558 v3_lower_barrier(vm);
565 static int sim_callback(struct guest_info * core, void * private_data) {
566 struct v3_bitmap * timeout_map = private_data;
568 v3_bitmap_set(timeout_map, core->vcpu_id);
570 V3_Print(core->vm_info, core, "Simulation callback activated (guest_rip=%p)\n", (void *)core->rip);
572 while (v3_bitmap_check(timeout_map, core->vcpu_id) == 1) {
582 int v3_simulate_vm(struct v3_vm_info * vm, unsigned int msecs) {
583 struct v3_bitmap timeout_map;
587 uint64_t cpu_khz = V3_CPU_KHZ();
589 if (vm->run_state != VM_PAUSED) {
590 PrintError(vm, VCORE_NONE,"VM must be paused before simulation begins\n");
594 /* AT this point VM is paused */
597 v3_bitmap_init(&timeout_map, vm->num_cores);
602 // calculate cycles from msecs...
603 // IMPORTANT: Floating point not allowed.
604 cycles = (msecs * cpu_khz);
608 V3_Print(vm, VCORE_NONE,"Simulating %u msecs (%llu cycles) [CPU_KHZ=%llu]\n", msecs, cycles, cpu_khz);
612 for (i = 0; i < vm->num_cores; i++) {
613 if (v3_add_core_timeout(&(vm->cores[i]), cycles, sim_callback, &timeout_map) == -1) {
614 PrintError(vm, VCORE_NONE,"Could not register simulation timeout for core %d\n", i);
619 V3_Print(vm, VCORE_NONE,"timeouts set on all cores\n ");
622 // Run the simulation
623 // vm->run_state = VM_SIMULATING;
624 vm->run_state = VM_RUNNING;
625 v3_lower_barrier(vm);
628 V3_Print(vm, VCORE_NONE,"Barrier lowered: We are now Simulating!!\n");
630 // block until simulation is complete
631 while (all_blocked == 0) {
634 for (i = 0; i < vm->num_cores; i++) {
635 if (v3_bitmap_check(&timeout_map, i) == 0) {
640 if (all_blocked == 1) {
648 V3_Print(vm, VCORE_NONE,"Simulation is complete\n");
650 // Simulation is complete
651 // Reset back to PAUSED state
653 v3_raise_barrier_nowait(vm, NULL);
654 vm->run_state = VM_PAUSED;
656 v3_bitmap_reset(&timeout_map);
658 v3_wait_for_barrier(vm, NULL);
664 int v3_get_state_vm(struct v3_vm_info *vm, struct v3_vm_state *s)
667 uint32_t numcores = s->num_vcores > vm->num_cores ? vm->num_cores : s->num_vcores;
669 switch (vm->run_state) {
670 case VM_INVALID: s->state = V3_VM_INVALID; break;
671 case VM_RUNNING: s->state = V3_VM_RUNNING; break;
672 case VM_STOPPED: s->state = V3_VM_STOPPED; break;
673 case VM_PAUSED: s->state = V3_VM_PAUSED; break;
674 case VM_ERROR: s->state = V3_VM_ERROR; break;
675 case VM_SIMULATING: s->state = V3_VM_SIMULATING; break;
676 default: s->state = V3_VM_UNKNOWN; break;
679 s->mem_base_paddr = (void*)(vm->mem_map.base_region.host_addr);
680 s->mem_size = vm->mem_size;
682 s->num_vcores = numcores;
684 for (i=0;i<numcores;i++) {
685 switch (vm->cores[i].core_run_state) {
686 case CORE_INVALID: s->vcore[i].state = V3_VCORE_INVALID; break;
687 case CORE_RUNNING: s->vcore[i].state = V3_VCORE_RUNNING; break;
688 case CORE_STOPPED: s->vcore[i].state = V3_VCORE_STOPPED; break;
689 default: s->vcore[i].state = V3_VCORE_UNKNOWN; break;
691 switch (vm->cores[i].cpu_mode) {
692 case REAL: s->vcore[i].cpu_mode = V3_VCORE_CPU_REAL; break;
693 case PROTECTED: s->vcore[i].cpu_mode = V3_VCORE_CPU_PROTECTED; break;
694 case PROTECTED_PAE: s->vcore[i].cpu_mode = V3_VCORE_CPU_PROTECTED_PAE; break;
695 case LONG: s->vcore[i].cpu_mode = V3_VCORE_CPU_LONG; break;
696 case LONG_32_COMPAT: s->vcore[i].cpu_mode = V3_VCORE_CPU_LONG_32_COMPAT; break;
697 case LONG_16_COMPAT: s->vcore[i].cpu_mode = V3_VCORE_CPU_LONG_16_COMPAT; break;
698 default: s->vcore[i].cpu_mode = V3_VCORE_CPU_UNKNOWN; break;
700 switch (vm->cores[i].shdw_pg_mode) {
701 case SHADOW_PAGING: s->vcore[i].mem_state = V3_VCORE_MEM_STATE_SHADOW; break;
702 case NESTED_PAGING: s->vcore[i].mem_state = V3_VCORE_MEM_STATE_NESTED; break;
703 default: s->vcore[i].mem_state = V3_VCORE_MEM_STATE_UNKNOWN; break;
705 switch (vm->cores[i].mem_mode) {
706 case PHYSICAL_MEM: s->vcore[i].mem_mode = V3_VCORE_MEM_MODE_PHYSICAL; break;
707 case VIRTUAL_MEM: s->vcore[i].mem_mode=V3_VCORE_MEM_MODE_VIRTUAL; break;
708 default: s->vcore[i].mem_mode=V3_VCORE_MEM_MODE_UNKNOWN; break;
711 s->vcore[i].pcore=vm->cores[i].pcpu_id;
712 s->vcore[i].last_rip=(void*)(vm->cores[i].rip);
713 s->vcore[i].num_exits=vm->cores[i].num_exits;
720 #ifdef V3_CONFIG_CHECKPOINT
721 #include <palacios/vmm_checkpoint.h>
723 int v3_save_vm(struct v3_vm_info * vm, char * store, char * url) {
724 return v3_chkpt_save_vm(vm, store, url);
728 int v3_load_vm(struct v3_vm_info * vm, char * store, char * url) {
729 return v3_chkpt_load_vm(vm, store, url);
732 #ifdef V3_CONFIG_LIVE_MIGRATION
733 int v3_send_vm(struct v3_vm_info * vm, char * store, char * url) {
734 return v3_chkpt_send_vm(vm, store, url);
738 int v3_receive_vm(struct v3_vm_info * vm, char * store, char * url) {
739 return v3_chkpt_receive_vm(vm, store, url);
746 int v3_free_vm(struct v3_vm_info * vm) {
748 // deinitialize guest (free memory, etc...)
750 if ((vm->run_state != VM_STOPPED) &&
751 (vm->run_state != VM_ERROR)) {
752 PrintError(vm, VCORE_NONE,"Tried to Free VM in invalid runstate (%d)\n", vm->run_state);
756 v3_free_vm_devices(vm);
759 for (i = 0; i < vm->num_cores; i++) {
760 v3_free_core(&(vm->cores[i]));
764 v3_free_vm_internal(vm);
776 v3_cpu_mode_t v3_get_host_cpu_mode() {
786 cr4 = (struct cr4_32 *)&(cr4_val);
789 return PROTECTED_PAE;
797 v3_cpu_mode_t v3_get_host_cpu_mode() {
803 void v3_print_cond(const char * fmt, ...) {
804 if (v3_dbg_enable == 1) {
809 vsnprintf(buf, 2048, fmt, ap);
812 V3_Print(VM_NONE, VCORE_NONE,"%s", buf);
818 void v3_interrupt_cpu(struct v3_vm_info * vm, int logical_cpu, int vector) {
819 extern struct v3_os_hooks * os_hooks;
821 if ((os_hooks) && (os_hooks)->interrupt_cpu) {
822 (os_hooks)->interrupt_cpu(vm, logical_cpu, vector);
828 int v3_vm_enter(struct guest_info * info) {
829 switch (v3_mach_type) {
832 case V3_SVM_REV3_CPU:
833 return v3_svm_enter(info);
839 case V3_VMX_EPT_UG_CPU:
840 return v3_vmx_enter(info);
844 PrintError(info->vm_info, info, "Attemping to enter a guest on an invalid CPU\n");
850 void *v3_get_host_vm(struct v3_vm_info *x)
853 return x->host_priv_data;
859 int v3_get_vcore(struct guest_info *x)