2 * This file is part of the Palacios Virtual Machine Monitor developed
3 * by the V3VEE Project with funding from the United States National
4 * Science Foundation and the Department of Energy.
6 * The V3VEE Project is a joint project between Northwestern University
7 * and the University of New Mexico. You can find out more at
10 * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu>
11 * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org>
12 * All rights reserved.
14 * Author: Jack Lange <jarusl@cs.northwestern.edu>
16 * This is free software. You are permitted to use,
17 * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
20 #include <palacios/vmm.h>
21 #include <palacios/vmm_intr.h>
22 #include <palacios/vmm_config.h>
23 #include <palacios/vm_guest.h>
24 #include <palacios/vmm_ctrl_regs.h>
25 #include <palacios/vmm_lowlevel.h>
26 #include <palacios/vmm_sprintf.h>
27 #include <palacios/vmm_extensions.h>
28 #include <palacios/vmm_timeout.h>
29 #include <palacios/vmm_options.h>
30 #include <palacios/vmm_cpu_mapper.h>
33 #include <palacios/svm.h>
36 #include <palacios/vmx.h>
39 #ifdef V3_CONFIG_CHECKPOINT
40 #include <palacios/vmm_checkpoint.h>
44 v3_cpu_arch_t v3_cpu_types[V3_CONFIG_MAX_CPUS];
45 v3_cpu_arch_t v3_mach_type = V3_INVALID_CPU;
47 struct v3_os_hooks * os_hooks = NULL;
48 int v3_dbg_enable = 0;
53 static void init_cpu(void * arg) {
54 uint32_t cpu_id = (uint32_t)(addr_t)arg;
57 if (v3_is_svm_capable()) {
58 PrintDebug(VM_NONE, VCORE_NONE, "Machine is SVM Capable\n");
59 v3_init_svm_cpu(cpu_id);
64 if (v3_is_vmx_capable()) {
65 PrintDebug(VM_NONE, VCORE_NONE, "Machine is VMX Capable\n");
66 v3_init_vmx_cpu(cpu_id);
71 PrintError(VM_NONE, VCORE_NONE, "CPU has no virtualization Extensions\n");
76 static void deinit_cpu(void * arg) {
77 uint32_t cpu_id = (uint32_t)(addr_t)arg;
80 switch (v3_cpu_types[cpu_id]) {
84 PrintDebug(VM_NONE, VCORE_NONE, "Deinitializing SVM CPU %d\n", cpu_id);
85 v3_deinit_svm_cpu(cpu_id);
91 case V3_VMX_EPT_UG_CPU:
92 PrintDebug(VM_NONE, VCORE_NONE, "Deinitializing VMX CPU %d\n", cpu_id);
93 v3_deinit_vmx_cpu(cpu_id);
98 PrintError(VM_NONE, VCORE_NONE, "CPU has no virtualization Extensions\n");
103 void Init_V3(struct v3_os_hooks * hooks, char * cpu_mask, int num_cpus, char *options) {
108 V3_Print(VM_NONE, VCORE_NONE, "V3 Print statement to fix a Kitten page fault bug\n");
110 // Set global variables.
113 // Determine the global machine type
114 v3_mach_type = V3_INVALID_CPU;
116 for (i = 0; i < V3_CONFIG_MAX_CPUS; i++) {
117 v3_cpu_types[i] = V3_INVALID_CPU;
120 // Parse host-os defined options into an easily-accessed format.
121 v3_parse_options(options);
123 // Register all the possible device types
126 // Register all shadow paging handlers
127 V3_init_shdw_paging();
129 // Initialize the cpu_mapper framework (must be before extensions)
130 V3_init_cpu_mapper();
132 // Initialize the scheduler framework (must be before extensions)
133 V3_init_scheduling();
135 // Register all extensions
136 V3_init_extensions();
138 // Enabling cpu_mapper
139 V3_enable_cpu_mapper();
141 // Enabling scheduler
142 V3_enable_scheduler();
145 #ifdef V3_CONFIG_SYMMOD
149 #ifdef V3_CONFIG_CHECKPOINT
150 V3_init_checkpoint();
153 if ((hooks) && (hooks->call_on_cpu)) {
155 for (i = 0; i < num_cpus; i++) {
159 if ((cpu_mask == NULL) || (*(cpu_mask + major) & (0x1 << minor))) {
160 V3_Print(VM_NONE, VCORE_NONE, "Initializing VMM extensions on cpu %d\n", i);
161 hooks->call_on_cpu(i, &init_cpu, (void *)(addr_t)i);
163 if (v3_mach_type == V3_INVALID_CPU) {
164 v3_mach_type = v3_cpu_types[i];
177 V3_deinit_shdw_paging();
179 V3_deinit_extensions();
181 #ifdef V3_CONFIG_SYMMOD
185 #ifdef V3_CONFIG_CHECKPOINT
186 V3_deinit_checkpoint();
190 if ((os_hooks) && (os_hooks->call_on_cpu)) {
191 for (i = 0; i < V3_CONFIG_MAX_CPUS; i++) {
192 if (v3_cpu_types[i] != V3_INVALID_CPU) {
193 V3_Call_On_CPU(i, deinit_cpu, (void *)(addr_t)i);
194 //deinit_cpu((void *)(addr_t)i);
202 v3_cpu_arch_t v3_get_cpu_type(int cpu_id) {
203 return v3_cpu_types[cpu_id];
207 struct v3_vm_info * v3_create_vm(void * cfg, void * priv_data, char * name) {
208 struct v3_vm_info * vm = v3_config_guest(cfg, priv_data);
211 PrintError(VM_NONE, VCORE_NONE, "Could not configure guest\n");
215 V3_Print(vm, VCORE_NONE, "CORE 0 RIP=%p\n", (void *)(addr_t)(vm->cores[0].rip));
219 } else if (strlen(name) >= 128) {
220 PrintError(vm, VCORE_NONE,"VM name is too long. Will be truncated to 128 chars.\n");
223 memset(vm->name, 0, 128);
224 strncpy(vm->name, name, 127);
227 * Register this VM with the palacios scheduler. It will ask for admission
230 if(v3_scheduler_register_vm(vm) == -1) {
232 PrintError(vm, VCORE_NONE,"Error registering VM with scheduler\n");
241 static int start_core(void * p)
243 struct guest_info * core = (struct guest_info *)p;
245 if (v3_scheduler_register_core(core) == -1){
246 PrintError(core->vm_info, core,"Error initializing scheduling in core %d\n", core->vcpu_id);
249 PrintDebug(core->vm_info,core,"virtual core %u (on logical core %u): in start_core (RIP=%p)\n",
250 core->vcpu_id, core->pcpu_id, (void *)(addr_t)core->rip);
252 switch (v3_mach_type) {
255 case V3_SVM_REV3_CPU:
256 return v3_start_svm_guest(core);
262 case V3_VMX_EPT_UG_CPU:
263 return v3_start_vmx_guest(core);
267 PrintError(core->vm_info, core, "Attempting to enter a guest on an invalid CPU\n");
274 int v3_start_vm(struct v3_vm_info * vm, unsigned int cpu_mask) {
277 uint8_t * core_mask = (uint8_t *)&cpu_mask; // This is to make future expansion easier
278 uint32_t avail_cores = 0;
282 if (vm->run_state != VM_STOPPED) {
283 PrintError(vm, VCORE_NONE, "VM has already been launched (state=%d)\n", (int)vm->run_state);
288 // Do not run if any core is using shadow paging and we are out of 4 GB bounds
289 for (i=0;i<vm->num_cores;i++) {
290 if (vm->cores[i].shdw_pg_mode == SHADOW_PAGING) {
291 if ((vm->mem_map.base_region.host_addr + vm->mem_size ) >= 0x100000000ULL) {
292 PrintError(vm, VCORE_NONE, "Base memory region exceeds 4 GB boundary with shadow paging enabled on core %d.\n",i);
293 PrintError(vm, VCORE_NONE, "Any use of non-64 bit mode in the guest is likely to fail in this configuration.\n");
294 PrintError(vm, VCORE_NONE, "If you would like to proceed anyway, remove this check and recompile Palacios.\n");
295 PrintError(vm, VCORE_NONE, "Alternatively, change this VM to use nested paging.\n");
301 /// CHECK IF WE ARE MULTICORE ENABLED....
303 V3_Print(vm, VCORE_NONE, "V3 -- Starting VM (%u cores)\n", vm->num_cores);
304 V3_Print(vm, VCORE_NONE, "CORE 0 RIP=%p\n", (void *)(addr_t)(vm->cores[0].rip));
307 // Check that enough cores are present in the mask to handle vcores
308 for (i = 0; i < V3_CONFIG_MAX_CPUS; i++) {
312 if (core_mask[major] & (0x1 << minor)) {
313 if (v3_cpu_types[i] == V3_INVALID_CPU) {
314 core_mask[major] &= ~(0x1 << minor);
321 vm->avail_cores = avail_cores;
323 if (v3_scheduler_admit_vm(vm) != 0){
324 PrintError(vm, VCORE_NONE,"Error admitting VM %s for scheduling", vm->name);
327 if (v3_cpu_mapper_admit_vm(vm) != 0){
328 PrintError(vm, VCORE_NONE,"Error admitting VM %s for mapping", vm->name);
331 vm->run_state = VM_RUNNING;
333 if(v3_cpu_mapper_register_vm(vm,cpu_mask) == -1) {
335 PrintError(vm, VCORE_NONE,"Error registering VM with cpu_mapper\n");
339 for (vcore_id = 0; vcore_id < vm->num_cores; vcore_id++) {
341 struct guest_info * core = &(vm->cores[vcore_id]);
343 PrintDebug(vm, VCORE_NONE, "Starting virtual core %u on logical core %u\n",
344 vcore_id, core->pcpu_id);
346 sprintf(core->exec_name, "%s-%u", vm->name, vcore_id);
348 PrintDebug(vm, VCORE_NONE, "run: core=%u, func=0x%p, arg=0x%p, name=%s\n",
349 core->pcpu_id, start_core, core, core->exec_name);
351 core->core_run_state = CORE_STOPPED; // core zero will turn itself on
352 core->core_thread = V3_CREATE_THREAD_ON_CPU(core->pcpu_id, start_core, core, core->exec_name);
354 if (core->core_thread == NULL) {
355 PrintError(vm, VCORE_NONE, "Thread launch failed\n");
366 int v3_reset_vm_core(struct guest_info * core, addr_t rip) {
368 switch (v3_cpu_types[core->pcpu_id]) {
371 case V3_SVM_REV3_CPU:
372 PrintDebug(core->vm_info, core, "Resetting SVM Guest CPU %d\n", core->vcpu_id);
373 return v3_reset_svm_vm_core(core, rip);
378 case V3_VMX_EPT_UG_CPU:
379 PrintDebug(core->vm_info, core, "Resetting VMX Guest CPU %d\n", core->vcpu_id);
380 return v3_reset_vmx_vm_core(core, rip);
384 PrintError(core->vm_info, core, "CPU has no virtualization Extensions\n");
393 /* move a virtual core to different physical core */
394 int v3_move_vm_core(struct v3_vm_info * vm, int vcore_id, int target_cpu) {
395 struct guest_info * core = NULL;
397 if ((vcore_id < 0) || (vcore_id >= vm->num_cores)) {
398 PrintError(vm, VCORE_NONE, "Attempted to migrate invalid virtual core (%d)\n", vcore_id);
402 core = &(vm->cores[vcore_id]);
404 if (target_cpu == core->pcpu_id) {
405 PrintError(vm, core, "Attempted to migrate to local core (%d)\n", target_cpu);
406 // well that was pointless
410 if (core->core_thread == NULL) {
411 PrintError(vm, core, "Attempted to migrate a core without a valid thread context\n");
415 while (v3_raise_barrier(vm, NULL) == -1);
417 V3_Print(vm, core, "Performing Migration from %d to %d\n", core->pcpu_id, target_cpu);
419 // Double check that we weren't preemptively migrated
420 if (target_cpu != core->pcpu_id) {
422 V3_Print(vm, core, "Moving Core\n");
424 if(v3_cpu_mapper_admit_core(vm, vcore_id, target_cpu) == -1){
425 PrintError(vm, core, "Core %d can not be admitted in cpu %d\n",vcore_id, target_cpu);
431 switch (v3_cpu_types[core->pcpu_id]) {
434 case V3_VMX_EPT_UG_CPU:
435 PrintDebug(vm, core, "Flushing VMX Guest CPU %d\n", core->vcpu_id);
436 V3_Call_On_CPU(core->pcpu_id, (void (*)(void *))v3_flush_vmx_vm_core, (void *)core);
443 if (V3_MOVE_THREAD_TO_CPU(target_cpu, core->core_thread) != 0) {
444 PrintError(vm, core, "Failed to move Vcore %d to CPU %d\n",
445 core->vcpu_id, target_cpu);
446 v3_lower_barrier(vm);
450 /* There will be a benign race window here:
451 core->pcpu_id will be set to the target core before its fully "migrated"
452 However the core will NEVER run on the old core again, its just in flight to the new core
454 core->pcpu_id = target_cpu;
456 V3_Print(vm, core, "core now at %d\n", core->pcpu_id);
459 v3_lower_barrier(vm);
466 int v3_stop_vm(struct v3_vm_info * vm) {
468 if ((vm->run_state != VM_RUNNING) &&
469 (vm->run_state != VM_SIMULATING)) {
470 PrintError(vm, VCORE_NONE,"Tried to stop VM in invalid runstate (%d)\n", vm->run_state);
474 vm->run_state = VM_STOPPED;
476 // Sanity check to catch any weird execution states
477 if (v3_wait_for_barrier(vm, NULL) == 0) {
478 v3_lower_barrier(vm);
481 // XXX force exit all cores via a cross call/IPI XXX
485 int still_running = 0;
487 for (i = 0; i < vm->num_cores; i++) {
488 if (vm->cores[i].core_run_state != CORE_STOPPED) {
493 if (still_running == 0) {
500 V3_Print(vm, VCORE_NONE,"VM stopped. Returning\n");
506 int v3_pause_vm(struct v3_vm_info * vm) {
508 if (vm->run_state != VM_RUNNING) {
509 PrintError(vm, VCORE_NONE,"Tried to pause a VM that was not running\n");
513 while (v3_raise_barrier(vm, NULL) == -1);
515 vm->run_state = VM_PAUSED;
521 int v3_continue_vm(struct v3_vm_info * vm) {
523 if (vm->run_state != VM_PAUSED) {
524 PrintError(vm, VCORE_NONE,"Tried to continue a VM that was not paused\n");
528 vm->run_state = VM_RUNNING;
530 v3_lower_barrier(vm);
537 static int sim_callback(struct guest_info * core, void * private_data) {
538 struct v3_bitmap * timeout_map = private_data;
540 v3_bitmap_set(timeout_map, core->vcpu_id);
542 V3_Print(core->vm_info, core, "Simulation callback activated (guest_rip=%p)\n", (void *)core->rip);
544 while (v3_bitmap_check(timeout_map, core->vcpu_id) == 1) {
554 int v3_simulate_vm(struct v3_vm_info * vm, unsigned int msecs) {
555 struct v3_bitmap timeout_map;
559 uint64_t cpu_khz = V3_CPU_KHZ();
561 if (vm->run_state != VM_PAUSED) {
562 PrintError(vm, VCORE_NONE,"VM must be paused before simulation begins\n");
566 /* AT this point VM is paused */
569 v3_bitmap_init(&timeout_map, vm->num_cores);
574 // calculate cycles from msecs...
575 // IMPORTANT: Floating point not allowed.
576 cycles = (msecs * cpu_khz);
580 V3_Print(vm, VCORE_NONE,"Simulating %u msecs (%llu cycles) [CPU_KHZ=%llu]\n", msecs, cycles, cpu_khz);
584 for (i = 0; i < vm->num_cores; i++) {
585 if (v3_add_core_timeout(&(vm->cores[i]), cycles, sim_callback, &timeout_map) == -1) {
586 PrintError(vm, VCORE_NONE,"Could not register simulation timeout for core %d\n", i);
591 V3_Print(vm, VCORE_NONE,"timeouts set on all cores\n ");
594 // Run the simulation
595 // vm->run_state = VM_SIMULATING;
596 vm->run_state = VM_RUNNING;
597 v3_lower_barrier(vm);
600 V3_Print(vm, VCORE_NONE,"Barrier lowered: We are now Simulating!!\n");
602 // block until simulation is complete
603 while (all_blocked == 0) {
606 for (i = 0; i < vm->num_cores; i++) {
607 if (v3_bitmap_check(&timeout_map, i) == 0) {
612 if (all_blocked == 1) {
620 V3_Print(vm, VCORE_NONE,"Simulation is complete\n");
622 // Simulation is complete
623 // Reset back to PAUSED state
625 v3_raise_barrier_nowait(vm, NULL);
626 vm->run_state = VM_PAUSED;
628 v3_bitmap_reset(&timeout_map);
630 v3_wait_for_barrier(vm, NULL);
636 int v3_get_state_vm(struct v3_vm_info *vm, struct v3_vm_state *s)
639 uint32_t numcores = s->num_vcores > vm->num_cores ? vm->num_cores : s->num_vcores;
641 switch (vm->run_state) {
642 case VM_INVALID: s->state = V3_VM_INVALID; break;
643 case VM_RUNNING: s->state = V3_VM_RUNNING; break;
644 case VM_STOPPED: s->state = V3_VM_STOPPED; break;
645 case VM_PAUSED: s->state = V3_VM_PAUSED; break;
646 case VM_ERROR: s->state = V3_VM_ERROR; break;
647 case VM_SIMULATING: s->state = V3_VM_SIMULATING; break;
648 default: s->state = V3_VM_UNKNOWN; break;
651 s->mem_base_paddr = (void*)(vm->mem_map.base_region.host_addr);
652 s->mem_size = vm->mem_size;
654 s->num_vcores = numcores;
656 for (i=0;i<numcores;i++) {
657 switch (vm->cores[i].core_run_state) {
658 case CORE_INVALID: s->vcore[i].state = V3_VCORE_INVALID; break;
659 case CORE_RUNNING: s->vcore[i].state = V3_VCORE_RUNNING; break;
660 case CORE_STOPPED: s->vcore[i].state = V3_VCORE_STOPPED; break;
661 default: s->vcore[i].state = V3_VCORE_UNKNOWN; break;
663 switch (vm->cores[i].cpu_mode) {
664 case REAL: s->vcore[i].cpu_mode = V3_VCORE_CPU_REAL; break;
665 case PROTECTED: s->vcore[i].cpu_mode = V3_VCORE_CPU_PROTECTED; break;
666 case PROTECTED_PAE: s->vcore[i].cpu_mode = V3_VCORE_CPU_PROTECTED_PAE; break;
667 case LONG: s->vcore[i].cpu_mode = V3_VCORE_CPU_LONG; break;
668 case LONG_32_COMPAT: s->vcore[i].cpu_mode = V3_VCORE_CPU_LONG_32_COMPAT; break;
669 case LONG_16_COMPAT: s->vcore[i].cpu_mode = V3_VCORE_CPU_LONG_16_COMPAT; break;
670 default: s->vcore[i].cpu_mode = V3_VCORE_CPU_UNKNOWN; break;
672 switch (vm->cores[i].shdw_pg_mode) {
673 case SHADOW_PAGING: s->vcore[i].mem_state = V3_VCORE_MEM_STATE_SHADOW; break;
674 case NESTED_PAGING: s->vcore[i].mem_state = V3_VCORE_MEM_STATE_NESTED; break;
675 default: s->vcore[i].mem_state = V3_VCORE_MEM_STATE_UNKNOWN; break;
677 switch (vm->cores[i].mem_mode) {
678 case PHYSICAL_MEM: s->vcore[i].mem_mode = V3_VCORE_MEM_MODE_PHYSICAL; break;
679 case VIRTUAL_MEM: s->vcore[i].mem_mode=V3_VCORE_MEM_MODE_VIRTUAL; break;
680 default: s->vcore[i].mem_mode=V3_VCORE_MEM_MODE_UNKNOWN; break;
683 s->vcore[i].pcore=vm->cores[i].pcpu_id;
684 s->vcore[i].last_rip=(void*)(vm->cores[i].rip);
685 s->vcore[i].num_exits=vm->cores[i].num_exits;
692 #ifdef V3_CONFIG_CHECKPOINT
693 #include <palacios/vmm_checkpoint.h>
695 int v3_save_vm(struct v3_vm_info * vm, char * store, char * url, v3_chkpt_options_t opts) {
696 return v3_chkpt_save_vm(vm, store, url, opts);
700 int v3_load_vm(struct v3_vm_info * vm, char * store, char * url, v3_chkpt_options_t opts) {
701 return v3_chkpt_load_vm(vm, store, url, opts);
704 #ifdef V3_CONFIG_LIVE_MIGRATION
705 int v3_send_vm(struct v3_vm_info * vm, char * store, char * url, v3_chkpt_options_t opts) {
706 return v3_chkpt_send_vm(vm, store, url, opts);
710 int v3_receive_vm(struct v3_vm_info * vm, char * store, char * url, v3_chkpt_options_t opts) {
711 return v3_chkpt_receive_vm(vm, store, url, opts);
718 int v3_free_vm(struct v3_vm_info * vm) {
720 // deinitialize guest (free memory, etc...)
722 if ((vm->run_state != VM_STOPPED) &&
723 (vm->run_state != VM_ERROR)) {
724 PrintError(vm, VCORE_NONE,"Tried to Free VM in invalid runstate (%d)\n", vm->run_state);
728 v3_free_vm_devices(vm);
731 for (i = 0; i < vm->num_cores; i++) {
732 v3_free_core(&(vm->cores[i]));
736 v3_free_vm_internal(vm);
748 v3_cpu_mode_t v3_get_host_cpu_mode() {
758 cr4 = (struct cr4_32 *)&(cr4_val);
761 return PROTECTED_PAE;
769 v3_cpu_mode_t v3_get_host_cpu_mode() {
775 void v3_print_cond(const char * fmt, ...) {
776 if (v3_dbg_enable == 1) {
781 vsnprintf(buf, 2048, fmt, ap);
784 V3_Print(VM_NONE, VCORE_NONE,"%s", buf);
790 void v3_interrupt_cpu(struct v3_vm_info * vm, int logical_cpu, int vector) {
791 extern struct v3_os_hooks * os_hooks;
793 if ((os_hooks) && (os_hooks)->interrupt_cpu) {
794 (os_hooks)->interrupt_cpu(vm, logical_cpu, vector);
800 int v3_vm_enter(struct guest_info * info) {
801 switch (v3_mach_type) {
804 case V3_SVM_REV3_CPU:
805 return v3_svm_enter(info);
811 case V3_VMX_EPT_UG_CPU:
812 return v3_vmx_enter(info);
816 PrintError(info->vm_info, info, "Attemping to enter a guest on an invalid CPU\n");
822 void *v3_get_host_vm(struct v3_vm_info *x)
825 return x->host_priv_data;
831 int v3_get_vcore(struct guest_info *x)