2 * This file is part of the Palacios Virtual Machine Monitor developed
3 * by the V3VEE Project with funding from the United States National
4 * Science Foundation and the Department of Energy.
6 * The V3VEE Project is a joint project between Northwestern University
7 * and the University of New Mexico. You can find out more at
10 * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu>
11 * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org>
12 * All rights reserved.
14 * Author: Jack Lange <jarusl@cs.northwestern.edu>
16 * This is free software. You are permitted to use,
17 * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
20 #include <palacios/vmm.h>
21 #include <palacios/vmm_mem.h>
22 #include <palacios/vmm_intr.h>
23 #include <palacios/vmm_config.h>
24 #include <palacios/vm_guest.h>
25 #include <palacios/vmm_ctrl_regs.h>
26 #include <palacios/vmm_lowlevel.h>
27 #include <palacios/vmm_sprintf.h>
28 #include <palacios/vmm_extensions.h>
29 #include <palacios/vmm_timeout.h>
30 #include <palacios/vmm_options.h>
31 #include <palacios/vmm_cpu_mapper.h>
34 #include <palacios/svm.h>
37 #include <palacios/vmx.h>
40 #ifdef V3_CONFIG_CHECKPOINT
41 #include <palacios/vmm_checkpoint.h>
45 v3_cpu_arch_t v3_cpu_types[V3_CONFIG_MAX_CPUS];
46 v3_cpu_arch_t v3_mach_type = V3_INVALID_CPU;
48 struct v3_os_hooks * os_hooks = NULL;
49 int v3_dbg_enable = 0;
53 static void init_cpu(void * arg) {
54 uint32_t cpu_id = (uint32_t)(addr_t)arg;
57 if (v3_is_svm_capable()) {
58 PrintDebug(VM_NONE, VCORE_NONE, "Machine is SVM Capable\n");
59 v3_init_svm_cpu(cpu_id);
64 if (v3_is_vmx_capable()) {
65 PrintDebug(VM_NONE, VCORE_NONE, "Machine is VMX Capable\n");
66 v3_init_vmx_cpu(cpu_id);
71 PrintError(VM_NONE, VCORE_NONE, "CPU has no virtualization Extensions\n");
76 static void deinit_cpu(void * arg) {
77 uint32_t cpu_id = (uint32_t)(addr_t)arg;
80 switch (v3_cpu_types[cpu_id]) {
84 PrintDebug(VM_NONE, VCORE_NONE, "Deinitializing SVM CPU %d\n", cpu_id);
85 v3_deinit_svm_cpu(cpu_id);
91 case V3_VMX_EPT_UG_CPU:
92 PrintDebug(VM_NONE, VCORE_NONE, "Deinitializing VMX CPU %d\n", cpu_id);
93 v3_deinit_vmx_cpu(cpu_id);
98 PrintError(VM_NONE, VCORE_NONE, "CPU has no virtualization Extensions\n");
103 void Init_V3(struct v3_os_hooks * hooks, char * cpu_mask, int num_cpus, char *options) {
108 V3_Print(VM_NONE, VCORE_NONE, "V3 Print statement to fix a Kitten page fault bug\n");
111 // Set global variables.
114 if (num_cpus>V3_CONFIG_MAX_CPUS) {
115 PrintError(VM_NONE,VCORE_NONE, "Requesting as many as %d cpus, but Palacios is compiled for a maximum of %d. Only the first %d cpus will be considered\n", num_cpus, V3_CONFIG_MAX_CPUS, V3_CONFIG_MAX_CPUS);
118 // Determine the global machine type
119 v3_mach_type = V3_INVALID_CPU;
121 for (i = 0; i < V3_CONFIG_MAX_CPUS; i++) {
122 v3_cpu_types[i] = V3_INVALID_CPU;
125 // Parse host-os defined options into an easily-accessed format.
126 v3_parse_options(options);
128 // Memory manager initialization
131 // Register all the possible device types
134 // Register all shadow paging handlers
135 V3_init_shdw_paging();
137 // Initialize the cpu_mapper framework (must be before extensions)
138 V3_init_cpu_mapper();
140 // Initialize the scheduler framework (must be before extensions)
141 V3_init_scheduling();
143 // Register all extensions
144 V3_init_extensions();
146 // Enabling cpu_mapper
147 V3_enable_cpu_mapper();
149 // Enabling scheduler
150 V3_enable_scheduler();
153 #ifdef V3_CONFIG_SYMMOD
157 #ifdef V3_CONFIG_CHECKPOINT
158 V3_init_checkpoint();
161 if ((hooks) && (hooks->call_on_cpu)) {
163 for (i = 0; i < num_cpus && i < V3_CONFIG_MAX_CPUS; i++) {
167 if ((cpu_mask == NULL) || (*(cpu_mask + major) & (0x1 << minor))) {
168 V3_Print(VM_NONE, VCORE_NONE, "Initializing VMM extensions on cpu %d\n", i);
169 hooks->call_on_cpu(i, &init_cpu, (void *)(addr_t)i);
171 if (v3_mach_type == V3_INVALID_CPU) {
172 v3_mach_type = v3_cpu_types[i];
184 // Reverse order of Init_V3
188 if ((os_hooks) && (os_hooks->call_on_cpu)) {
189 for (i = 0; i < V3_CONFIG_MAX_CPUS; i++) {
190 if (v3_cpu_types[i] != V3_INVALID_CPU) {
191 V3_Call_On_CPU(i, deinit_cpu, (void *)(addr_t)i);
192 //deinit_cpu((void *)(addr_t)i);
197 #ifdef V3_CONFIG_CHECKPOINT
198 V3_deinit_checkpoint();
201 #ifdef V3_CONFIG_SYMMOD
205 V3_disable_scheduler();
207 V3_disable_cpu_mapper();
209 V3_deinit_extensions();
211 V3_deinit_scheduling();
213 V3_deinit_cpu_mapper();
215 V3_deinit_shdw_paging();
227 v3_cpu_arch_t v3_get_cpu_type(int cpu_id) {
228 return v3_cpu_types[cpu_id];
232 struct v3_vm_info * v3_create_vm(void * cfg, void * priv_data, char * name) {
233 struct v3_vm_info * vm = v3_config_guest(cfg, priv_data);
236 PrintError(VM_NONE, VCORE_NONE, "Could not configure guest\n");
240 V3_Print(vm, VCORE_NONE, "CORE 0 RIP=%p\n", (void *)(addr_t)(vm->cores[0].rip));
244 } else if (strlen(name) >= 128) {
245 PrintError(vm, VCORE_NONE,"VM name is too long. Will be truncated to 128 chars.\n");
248 memset(vm->name, 0, 128);
249 strncpy(vm->name, name, 127);
252 * Register this VM with the palacios scheduler. It will ask for admission
255 if(v3_scheduler_register_vm(vm) == -1) {
257 PrintError(vm, VCORE_NONE,"Error registering VM with scheduler\n");
266 static int start_core(void * p)
268 struct guest_info * core = (struct guest_info *)p;
270 if (v3_scheduler_register_core(core) == -1){
271 PrintError(core->vm_info, core,"Error initializing scheduling in core %d\n", core->vcpu_id);
274 PrintDebug(core->vm_info,core,"virtual core %u (on logical core %u): in start_core (RIP=%p)\n",
275 core->vcpu_id, core->pcpu_id, (void *)(addr_t)core->rip);
277 switch (v3_mach_type) {
280 case V3_SVM_REV3_CPU:
281 return v3_start_svm_guest(core);
287 case V3_VMX_EPT_UG_CPU:
288 return v3_start_vmx_guest(core);
292 PrintError(core->vm_info, core, "Attempting to enter a guest on an invalid CPU\n");
299 int v3_start_vm(struct v3_vm_info * vm, unsigned int cpu_mask) {
302 uint8_t * core_mask = (uint8_t *)&cpu_mask; // This is to make future expansion easier
303 uint32_t avail_cores = 0;
305 extern uint64_t v3_mem_block_size;
308 if (vm->run_state != VM_STOPPED) {
309 PrintError(vm, VCORE_NONE, "VM has already been launched (state=%d)\n", (int)vm->run_state);
314 // Do not run if any core is using shadow paging and we are out of 4 GB bounds
315 for (i=0;i<vm->num_cores;i++) {
316 if (vm->cores[i].shdw_pg_mode == SHADOW_PAGING) {
317 for (j=0;j<vm->mem_map.num_base_regions;j++) {
318 if ((vm->mem_map.base_regions[i].host_addr + v3_mem_block_size) >= 0x100000000ULL) {
319 PrintError(vm, VCORE_NONE, "Base memory region %d exceeds 4 GB boundary with shadow paging enabled on core %d.\n",j, i);
320 PrintError(vm, VCORE_NONE, "Any use of non-64 bit mode in the guest is likely to fail in this configuration.\n");
321 PrintError(vm, VCORE_NONE, "If you would like to proceed anyway, remove this check and recompile Palacios.\n");
322 PrintError(vm, VCORE_NONE, "Alternatively, change this VM to use nested paging.\n");
329 /// CHECK IF WE ARE MULTICORE ENABLED....
331 V3_Print(vm, VCORE_NONE, "V3 -- Starting VM (%u cores)\n", vm->num_cores);
332 V3_Print(vm, VCORE_NONE, "CORE 0 RIP=%p\n", (void *)(addr_t)(vm->cores[0].rip));
335 // Check that enough cores are present in the mask to handle vcores
336 for (i = 0; i < V3_CONFIG_MAX_CPUS; i++) {
340 if (core_mask[major] & (0x1 << minor)) {
341 if (v3_cpu_types[i] == V3_INVALID_CPU) {
342 core_mask[major] &= ~(0x1 << minor);
349 vm->avail_cores = avail_cores;
351 if (v3_scheduler_admit_vm(vm) != 0){
352 PrintError(vm, VCORE_NONE,"Error admitting VM %s for scheduling", vm->name);
355 if (v3_cpu_mapper_admit_vm(vm) != 0){
356 PrintError(vm, VCORE_NONE,"Error admitting VM %s for mapping", vm->name);
359 vm->run_state = VM_RUNNING;
361 if(v3_cpu_mapper_register_vm(vm,cpu_mask) == -1) {
363 PrintError(vm, VCORE_NONE,"Error registering VM with cpu_mapper\n");
367 for (vcore_id = 0; vcore_id < vm->num_cores; vcore_id++) {
369 struct guest_info * core = &(vm->cores[vcore_id]);
371 PrintDebug(vm, VCORE_NONE, "Starting virtual core %u on logical core %u\n",
372 vcore_id, core->pcpu_id);
374 sprintf(core->exec_name, "%s-%u", vm->name, vcore_id);
376 PrintDebug(vm, VCORE_NONE, "run: core=%u, func=0x%p, arg=0x%p, name=%s\n",
377 core->pcpu_id, start_core, core, core->exec_name);
379 core->core_run_state = CORE_STOPPED; // core zero will turn itself on
380 core->core_thread = V3_CREATE_THREAD_ON_CPU(core->pcpu_id, start_core, core, core->exec_name);
382 if (core->core_thread == NULL) {
383 PrintError(vm, VCORE_NONE, "Thread launch failed\n");
394 int v3_reset_vm_core(struct guest_info * core, addr_t rip) {
396 switch (v3_cpu_types[core->pcpu_id]) {
399 case V3_SVM_REV3_CPU:
400 PrintDebug(core->vm_info, core, "Resetting SVM Guest CPU %d\n", core->vcpu_id);
401 return v3_reset_svm_vm_core(core, rip);
406 case V3_VMX_EPT_UG_CPU:
407 PrintDebug(core->vm_info, core, "Resetting VMX Guest CPU %d\n", core->vcpu_id);
408 return v3_reset_vmx_vm_core(core, rip);
412 PrintError(core->vm_info, core, "CPU has no virtualization Extensions\n");
421 /* move a virtual core to different physical core */
422 int v3_move_vm_core(struct v3_vm_info * vm, int vcore_id, int target_cpu) {
423 struct guest_info * core = NULL;
425 if ((vcore_id < 0) || (vcore_id >= vm->num_cores)) {
426 PrintError(vm, VCORE_NONE, "Attempted to migrate invalid virtual core (%d)\n", vcore_id);
430 core = &(vm->cores[vcore_id]);
432 if (target_cpu == core->pcpu_id) {
433 PrintError(vm, core, "Attempted to migrate to local core (%d)\n", target_cpu);
434 // well that was pointless
438 if (core->core_thread == NULL) {
439 PrintError(vm, core, "Attempted to migrate a core without a valid thread context\n");
443 while (v3_raise_barrier(vm, NULL) == -1);
445 V3_Print(vm, core, "Performing Migration from %d to %d\n", core->pcpu_id, target_cpu);
447 // Double check that we weren't preemptively migrated
448 if (target_cpu != core->pcpu_id) {
450 V3_Print(vm, core, "Moving Core\n");
452 if(v3_cpu_mapper_admit_core(vm, vcore_id, target_cpu) == -1){
453 PrintError(vm, core, "Core %d can not be admitted in cpu %d\n",vcore_id, target_cpu);
459 switch (v3_cpu_types[core->pcpu_id]) {
462 case V3_VMX_EPT_UG_CPU:
463 PrintDebug(vm, core, "Flushing VMX Guest CPU %d\n", core->vcpu_id);
464 V3_Call_On_CPU(core->pcpu_id, (void (*)(void *))v3_flush_vmx_vm_core, (void *)core);
471 if (V3_MOVE_THREAD_TO_CPU(target_cpu, core->core_thread) != 0) {
472 PrintError(vm, core, "Failed to move Vcore %d to CPU %d\n",
473 core->vcpu_id, target_cpu);
474 v3_lower_barrier(vm);
478 /* There will be a benign race window here:
479 core->pcpu_id will be set to the target core before its fully "migrated"
480 However the core will NEVER run on the old core again, its just in flight to the new core
482 core->pcpu_id = target_cpu;
484 V3_Print(vm, core, "core now at %d\n", core->pcpu_id);
487 v3_lower_barrier(vm);
494 int v3_stop_vm(struct v3_vm_info * vm) {
496 if ((vm->run_state != VM_RUNNING) &&
497 (vm->run_state != VM_SIMULATING)) {
498 PrintError(vm, VCORE_NONE,"Tried to stop VM in invalid runstate (%d)\n", vm->run_state);
502 vm->run_state = VM_STOPPED;
504 // Sanity check to catch any weird execution states
505 if (v3_wait_for_barrier(vm, NULL) == 0) {
506 v3_lower_barrier(vm);
509 // XXX force exit all cores via a cross call/IPI XXX
513 int still_running = 0;
515 for (i = 0; i < vm->num_cores; i++) {
516 if (vm->cores[i].core_run_state != CORE_STOPPED) {
521 if (still_running == 0) {
528 V3_Print(vm, VCORE_NONE,"VM stopped. Returning\n");
534 int v3_pause_vm(struct v3_vm_info * vm) {
536 if (vm->run_state != VM_RUNNING) {
537 PrintError(vm, VCORE_NONE,"Tried to pause a VM that was not running\n");
541 while (v3_raise_barrier(vm, NULL) == -1);
543 vm->run_state = VM_PAUSED;
549 int v3_continue_vm(struct v3_vm_info * vm) {
551 if (vm->run_state != VM_PAUSED) {
552 PrintError(vm, VCORE_NONE,"Tried to continue a VM that was not paused\n");
556 vm->run_state = VM_RUNNING;
558 v3_lower_barrier(vm);
565 static int sim_callback(struct guest_info * core, void * private_data) {
566 struct v3_bitmap * timeout_map = private_data;
568 v3_bitmap_set(timeout_map, core->vcpu_id);
570 V3_Print(core->vm_info, core, "Simulation callback activated (guest_rip=%p)\n", (void *)core->rip);
572 while (v3_bitmap_check(timeout_map, core->vcpu_id) == 1) {
582 int v3_simulate_vm(struct v3_vm_info * vm, unsigned int msecs) {
583 struct v3_bitmap timeout_map;
587 uint64_t cpu_khz = V3_CPU_KHZ();
589 if (vm->run_state != VM_PAUSED) {
590 PrintError(vm, VCORE_NONE,"VM must be paused before simulation begins\n");
594 /* AT this point VM is paused */
597 v3_bitmap_init(&timeout_map, vm->num_cores);
602 // calculate cycles from msecs...
603 // IMPORTANT: Floating point not allowed.
604 cycles = (msecs * cpu_khz);
608 V3_Print(vm, VCORE_NONE,"Simulating %u msecs (%llu cycles) [CPU_KHZ=%llu]\n", msecs, cycles, cpu_khz);
612 for (i = 0; i < vm->num_cores; i++) {
613 if (v3_add_core_timeout(&(vm->cores[i]), cycles, sim_callback, &timeout_map) == -1) {
614 PrintError(vm, VCORE_NONE,"Could not register simulation timeout for core %d\n", i);
619 V3_Print(vm, VCORE_NONE,"timeouts set on all cores\n ");
622 // Run the simulation
623 // vm->run_state = VM_SIMULATING;
624 vm->run_state = VM_RUNNING;
625 v3_lower_barrier(vm);
628 V3_Print(vm, VCORE_NONE,"Barrier lowered: We are now Simulating!!\n");
630 // block until simulation is complete
631 while (all_blocked == 0) {
634 for (i = 0; i < vm->num_cores; i++) {
635 if (v3_bitmap_check(&timeout_map, i) == 0) {
640 if (all_blocked == 1) {
648 V3_Print(vm, VCORE_NONE,"Simulation is complete\n");
650 // Simulation is complete
651 // Reset back to PAUSED state
653 v3_raise_barrier_nowait(vm, NULL);
654 vm->run_state = VM_PAUSED;
656 v3_bitmap_reset(&timeout_map);
658 v3_wait_for_barrier(vm, NULL);
664 int v3_get_state_vm(struct v3_vm_info *vm,
665 struct v3_vm_base_state *base,
666 struct v3_vm_core_state *core,
667 struct v3_vm_mem_state *mem)
670 uint32_t numcores = core->num_vcores > vm->num_cores ? vm->num_cores : core->num_vcores;
671 uint32_t numregions = mem->num_regions > vm->mem_map.num_base_regions ? vm->mem_map.num_base_regions : mem->num_regions;
672 extern uint64_t v3_mem_block_size;
674 switch (vm->run_state) {
675 case VM_INVALID: base->state = V3_VM_INVALID; break;
676 case VM_RUNNING: base->state = V3_VM_RUNNING; break;
677 case VM_STOPPED: base->state = V3_VM_STOPPED; break;
678 case VM_PAUSED: base->state = V3_VM_PAUSED; break;
679 case VM_ERROR: base->state = V3_VM_ERROR; break;
680 case VM_SIMULATING: base->state = V3_VM_SIMULATING; break;
681 default: base->state = V3_VM_UNKNOWN; break;
684 for (i=0;i<numcores;i++) {
685 switch (vm->cores[i].core_run_state) {
686 case CORE_INVALID: core->vcore[i].state = V3_VCORE_INVALID; break;
687 case CORE_RUNNING: core->vcore[i].state = V3_VCORE_RUNNING; break;
688 case CORE_STOPPED: core->vcore[i].state = V3_VCORE_STOPPED; break;
689 default: core->vcore[i].state = V3_VCORE_UNKNOWN; break;
691 switch (vm->cores[i].cpu_mode) {
692 case REAL: core->vcore[i].cpu_mode = V3_VCORE_CPU_REAL; break;
693 case PROTECTED: core->vcore[i].cpu_mode = V3_VCORE_CPU_PROTECTED; break;
694 case PROTECTED_PAE: core->vcore[i].cpu_mode = V3_VCORE_CPU_PROTECTED_PAE; break;
695 case LONG: core->vcore[i].cpu_mode = V3_VCORE_CPU_LONG; break;
696 case LONG_32_COMPAT: core->vcore[i].cpu_mode = V3_VCORE_CPU_LONG_32_COMPAT; break;
697 case LONG_16_COMPAT: core->vcore[i].cpu_mode = V3_VCORE_CPU_LONG_16_COMPAT; break;
698 default: core->vcore[i].cpu_mode = V3_VCORE_CPU_UNKNOWN; break;
700 switch (vm->cores[i].shdw_pg_mode) {
701 case SHADOW_PAGING: core->vcore[i].mem_state = V3_VCORE_MEM_STATE_SHADOW; break;
702 case NESTED_PAGING: core->vcore[i].mem_state = V3_VCORE_MEM_STATE_NESTED; break;
703 default: core->vcore[i].mem_state = V3_VCORE_MEM_STATE_UNKNOWN; break;
705 switch (vm->cores[i].mem_mode) {
706 case PHYSICAL_MEM: core->vcore[i].mem_mode = V3_VCORE_MEM_MODE_PHYSICAL; break;
707 case VIRTUAL_MEM: core->vcore[i].mem_mode=V3_VCORE_MEM_MODE_VIRTUAL; break;
708 default: core->vcore[i].mem_mode=V3_VCORE_MEM_MODE_UNKNOWN; break;
711 core->vcore[i].pcore=vm->cores[i].pcpu_id;
712 core->vcore[i].last_rip=(void*)(vm->cores[i].rip);
713 core->vcore[i].num_exits=vm->cores[i].num_exits;
716 core->num_vcores=numcores;
718 for (i=0;i<vm->mem_map.num_base_regions;i++) {
719 mem->region[i].host_paddr = (void*)(vm->mem_map.base_regions[i].host_addr);
720 mem->region[i].size = v3_mem_block_size;
723 mem->num_regions=numregions;
729 #ifdef V3_CONFIG_CHECKPOINT
730 #include <palacios/vmm_checkpoint.h>
732 int v3_save_vm(struct v3_vm_info * vm, char * store, char * url, v3_chkpt_options_t opts) {
733 return v3_chkpt_save_vm(vm, store, url, opts);
737 int v3_load_vm(struct v3_vm_info * vm, char * store, char * url, v3_chkpt_options_t opts) {
738 return v3_chkpt_load_vm(vm, store, url, opts);
741 #ifdef V3_CONFIG_LIVE_MIGRATION
742 int v3_send_vm(struct v3_vm_info * vm, char * store, char * url, v3_chkpt_options_t opts) {
743 return v3_chkpt_send_vm(vm, store, url, opts);
747 int v3_receive_vm(struct v3_vm_info * vm, char * store, char * url, v3_chkpt_options_t opts) {
748 return v3_chkpt_receive_vm(vm, store, url, opts);
755 int v3_free_vm(struct v3_vm_info * vm) {
757 // deinitialize guest (free memory, etc...)
759 if ((vm->run_state != VM_STOPPED) &&
760 (vm->run_state != VM_ERROR)) {
761 PrintError(vm, VCORE_NONE,"Tried to Free VM in invalid runstate (%d)\n", vm->run_state);
765 v3_free_vm_devices(vm);
768 for (i = 0; i < vm->num_cores; i++) {
769 v3_free_core(&(vm->cores[i]));
773 v3_free_vm_internal(vm);
785 v3_cpu_mode_t v3_get_host_cpu_mode() {
795 cr4 = (struct cr4_32 *)&(cr4_val);
798 return PROTECTED_PAE;
806 v3_cpu_mode_t v3_get_host_cpu_mode() {
812 void v3_print_cond(const char * fmt, ...) {
813 if (v3_dbg_enable == 1) {
818 vsnprintf(buf, 2048, fmt, ap);
821 V3_Print(VM_NONE, VCORE_NONE,"%s", buf);
827 void v3_interrupt_cpu(struct v3_vm_info * vm, int logical_cpu, int vector) {
828 extern struct v3_os_hooks * os_hooks;
830 if ((os_hooks) && (os_hooks)->interrupt_cpu) {
831 (os_hooks)->interrupt_cpu(vm, logical_cpu, vector);
837 int v3_vm_enter(struct guest_info * info) {
838 switch (v3_mach_type) {
841 case V3_SVM_REV3_CPU:
842 return v3_svm_enter(info);
848 case V3_VMX_EPT_UG_CPU:
849 return v3_vmx_enter(info);
853 PrintError(info->vm_info, info, "Attemping to enter a guest on an invalid CPU\n");
859 void *v3_get_host_vm(struct v3_vm_info *x)
862 return x->host_priv_data;
868 int v3_get_vcore(struct guest_info *x)