X-Git-Url: http://v3vee.org/palacios/gitweb/gitweb.cgi?a=blobdiff_plain;f=palacios%2Fsrc%2Fpalacios%2Fvmm.c;h=f45a4be56e8ff1e850d8cef827a067f578901045;hb=60ad6a41c6d0ee08ed689e8505eb0c3df0c2a289;hp=9337cdd6b1d41296d797abcb45d1badaeca72cf9;hpb=e94507c7055f81abcf6a95132cb7ad90f1b5e6ca;p=palacios.git diff --git a/palacios/src/palacios/vmm.c b/palacios/src/palacios/vmm.c index 9337cdd..f45a4be 100644 --- a/palacios/src/palacios/vmm.c +++ b/palacios/src/palacios/vmm.c @@ -29,6 +29,9 @@ #include #include #include +#include +#include +#include #ifdef V3_CONFIG_SVM #include @@ -53,6 +56,8 @@ int v3_dbg_enable = 0; static void init_cpu(void * arg) { uint32_t cpu_id = (uint32_t)(addr_t)arg; + v3_init_fp(); + #ifdef V3_CONFIG_SVM if (v3_is_svm_capable()) { PrintDebug(VM_NONE, VCORE_NONE, "Machine is SVM Capable\n"); @@ -98,7 +103,21 @@ static void deinit_cpu(void * arg) { PrintError(VM_NONE, VCORE_NONE, "CPU has no virtualization Extensions\n"); break; } + + v3_deinit_fp(); + +} + + +static int in_long_mode() +{ + uint32_t high, low; + + v3_get_msr(0xc0000080,&high,&low); // EFER + + return ((low & 0x500)== 0x500); // LMA and LME set } + void Init_V3(struct v3_os_hooks * hooks, char * cpu_mask, int num_cpus, char *options) { int i = 0; @@ -108,6 +127,16 @@ void Init_V3(struct v3_os_hooks * hooks, char * cpu_mask, int num_cpus, char *op V3_Print(VM_NONE, VCORE_NONE, "V3 Print statement to fix a Kitten page fault bug\n"); + +#ifndef __V3_64BIT__ +#error Palacios does not support compilation for a 32 bit host OS!!!! +#else + if (!in_long_mode()) { + PrintError(VM_NONE,VCORE_NONE,"Palacios supports execution only in long mode (64 bit).\n"); + return; + } +#endif + // Set global variables. os_hooks = hooks; @@ -125,6 +154,10 @@ void Init_V3(struct v3_os_hooks * hooks, char * cpu_mask, int num_cpus, char *op // Parse host-os defined options into an easily-accessed format. v3_parse_options(options); +#ifdef V3_CONFIG_HVM + v3_init_hvm(); +#endif + // Memory manager initialization v3_init_mem(); @@ -134,6 +167,10 @@ void Init_V3(struct v3_os_hooks * hooks, char * cpu_mask, int num_cpus, char *op // Register all shadow paging handlers V3_init_shdw_paging(); +#ifdef V3_CONFIG_SWAPPING + v3_init_swapping(); +#endif + // Initialize the cpu_mapper framework (must be before extensions) V3_init_cpu_mapper(); @@ -211,6 +248,10 @@ void Shutdown_V3() { V3_deinit_scheduling(); V3_deinit_cpu_mapper(); + +#ifdef V3_CONFIG_SWAPPING + v3_deinit_swapping(); +#endif V3_deinit_shdw_paging(); @@ -218,6 +259,10 @@ void Shutdown_V3() { v3_deinit_mem(); +#ifdef V3_CONFIG_HVM + v3_deinit_hvm(); +#endif + v3_deinit_options(); @@ -228,9 +273,42 @@ v3_cpu_arch_t v3_get_cpu_type(int cpu_id) { return v3_cpu_types[cpu_id]; } +static int start_core(void * p) +{ + struct guest_info * core = (struct guest_info *)p; + + if (v3_scheduler_register_core(core) == -1){ + PrintError(core->vm_info, core,"Error initializing scheduling in core %d\n", core->vcpu_id); + } + + PrintDebug(core->vm_info,core,"virtual core %u (on logical core %u): in start_core (RIP=%p)\n", + core->vcpu_id, core->pcpu_id, (void *)(addr_t)core->rip); -struct v3_vm_info * v3_create_vm(void * cfg, void * priv_data, char * name) { + switch (v3_mach_type) { +#ifdef V3_CONFIG_SVM + case V3_SVM_CPU: + case V3_SVM_REV3_CPU: + return v3_start_svm_guest(core); + break; +#endif +#if V3_CONFIG_VMX + case V3_VMX_CPU: + case V3_VMX_EPT_CPU: + case V3_VMX_EPT_UG_CPU: + return v3_start_vmx_guest(core); + break; +#endif + default: + PrintError(core->vm_info, core, "Attempting to enter a guest on an invalid CPU\n"); + return -1; + } + // should not happen + return 0; +} + +struct v3_vm_info * v3_create_vm(void * cfg, void * priv_data, char * name, unsigned int cpu_mask) { struct v3_vm_info * vm = v3_config_guest(cfg, priv_data); + int vcore_id = 0; if (vm == NULL) { PrintError(VM_NONE, VCORE_NONE, "Could not configure guest\n"); @@ -248,6 +326,11 @@ struct v3_vm_info * v3_create_vm(void * cfg, void * priv_data, char * name) { memset(vm->name, 0, 128); strncpy(vm->name, name, 127); + if(v3_cpu_mapper_register_vm(vm) == -1) { + + PrintError(vm, VCORE_NONE,"Error registering VM with cpu_mapper\n"); + } + /* * Register this VM with the palacios scheduler. It will ask for admission * prior to launch. @@ -257,75 +340,58 @@ struct v3_vm_info * v3_create_vm(void * cfg, void * priv_data, char * name) { PrintError(vm, VCORE_NONE,"Error registering VM with scheduler\n"); } - return vm; -} + if (v3_cpu_mapper_admit_vm(vm,cpu_mask) != 0){ + PrintError(vm, VCORE_NONE,"Error admitting VM %s for mapping", vm->name); + } + for (vcore_id = 0; vcore_id < vm->num_cores; vcore_id++) { + struct guest_info * core = &(vm->cores[vcore_id]); + PrintDebug(vm, VCORE_NONE, "Creating virtual core %u on logical core %u\n", + vcore_id, core->pcpu_id); -static int start_core(void * p) -{ - struct guest_info * core = (struct guest_info *)p; + sprintf(core->exec_name, "%s-%u", vm->name, vcore_id); - if (v3_scheduler_register_core(core) == -1){ - PrintError(core->vm_info, core,"Error initializing scheduling in core %d\n", core->vcpu_id); - } + PrintDebug(vm, VCORE_NONE, "run: core=%u, func=0x%p, arg=0x%p, name=%s\n", + core->pcpu_id, start_core, core, core->exec_name); - PrintDebug(core->vm_info,core,"virtual core %u (on logical core %u): in start_core (RIP=%p)\n", - core->vcpu_id, core->pcpu_id, (void *)(addr_t)core->rip); + core->core_thread = V3_CREATE_THREAD_ON_CPU(core->pcpu_id, start_core, core, core->exec_name); + + if (core->core_thread == NULL) { + PrintError(vm, VCORE_NONE, "Thread creation failed\n"); + v3_stop_vm(vm); + return NULL; + } - switch (v3_mach_type) { -#ifdef V3_CONFIG_SVM - case V3_SVM_CPU: - case V3_SVM_REV3_CPU: - return v3_start_svm_guest(core); - break; -#endif -#if V3_CONFIG_VMX - case V3_VMX_CPU: - case V3_VMX_EPT_CPU: - case V3_VMX_EPT_UG_CPU: - return v3_start_vmx_guest(core); - break; -#endif - default: - PrintError(core->vm_info, core, "Attempting to enter a guest on an invalid CPU\n"); - return -1; } - // should not happen - return 0; + return vm; } int v3_start_vm(struct v3_vm_info * vm, unsigned int cpu_mask) { - uint32_t i,j; + uint32_t i; uint8_t * core_mask = (uint8_t *)&cpu_mask; // This is to make future expansion easier uint32_t avail_cores = 0; int vcore_id = 0; - extern uint64_t v3_mem_block_size; + if (!vm) { + PrintError(VM_NONE, VCORE_NONE, "Asked to start nonexistent VM\n"); + return -1; + } if (vm->run_state != VM_STOPPED) { PrintError(vm, VCORE_NONE, "VM has already been launched (state=%d)\n", (int)vm->run_state); return -1; } - - // Do not run if any core is using shadow paging and we are out of 4 GB bounds - for (i=0;inum_cores;i++) { - if (vm->cores[i].shdw_pg_mode == SHADOW_PAGING) { - for (j=0;jmem_map.num_base_regions;j++) { - if ((vm->mem_map.base_regions[i].host_addr + v3_mem_block_size) >= 0x100000000ULL) { - PrintError(vm, VCORE_NONE, "Base memory region %d exceeds 4 GB boundary with shadow paging enabled on core %d.\n",j, i); - PrintError(vm, VCORE_NONE, "Any use of non-64 bit mode in the guest is likely to fail in this configuration.\n"); - PrintError(vm, VCORE_NONE, "If you would like to proceed anyway, remove this check and recompile Palacios.\n"); - PrintError(vm, VCORE_NONE, "Alternatively, change this VM to use nested paging.\n"); - return -1; - } - } - } +#if V3_CONFIG_HVM + if (v3_setup_hvm_vm_for_boot(vm)) { + PrintError(vm, VCORE_NONE, "HVM setup for boot failed\n"); + return -1; } - +#endif + /// CHECK IF WE ARE MULTICORE ENABLED.... V3_Print(vm, VCORE_NONE, "V3 -- Starting VM (%u cores)\n", vm->num_cores); @@ -347,43 +413,30 @@ int v3_start_vm(struct v3_vm_info * vm, unsigned int cpu_mask) { } vm->avail_cores = avail_cores; - + if (v3_scheduler_admit_vm(vm) != 0){ PrintError(vm, VCORE_NONE,"Error admitting VM %s for scheduling", vm->name); } - if (v3_cpu_mapper_admit_vm(vm) != 0){ - PrintError(vm, VCORE_NONE,"Error admitting VM %s for mapping", vm->name); - } - vm->run_state = VM_RUNNING; - if(v3_cpu_mapper_register_vm(vm,cpu_mask) == -1) { - - PrintError(vm, VCORE_NONE,"Error registering VM with cpu_mapper\n"); - } - - for (vcore_id = 0; vcore_id < vm->num_cores; vcore_id++) { struct guest_info * core = &(vm->cores[vcore_id]); PrintDebug(vm, VCORE_NONE, "Starting virtual core %u on logical core %u\n", vcore_id, core->pcpu_id); - - sprintf(core->exec_name, "%s-%u", vm->name, vcore_id); - PrintDebug(vm, VCORE_NONE, "run: core=%u, func=0x%p, arg=0x%p, name=%s\n", - core->pcpu_id, start_core, core, core->exec_name); + if (core->core_run_state==CORE_INVALID) { + // launch of a fresh VM + core->core_run_state = CORE_STOPPED; + // core zero will turn itself on + } else { + // this is a resume - use whatever its current run_state is + } - core->core_run_state = CORE_STOPPED; // core zero will turn itself on - core->core_thread = V3_CREATE_THREAD_ON_CPU(core->pcpu_id, start_core, core, core->exec_name); + V3_START_THREAD(core->core_thread); - if (core->core_thread == NULL) { - PrintError(vm, VCORE_NONE, "Thread launch failed\n"); - v3_stop_vm(vm); - return -1; - } } return 0; @@ -422,6 +475,11 @@ int v3_reset_vm_core(struct guest_info * core, addr_t rip) { int v3_move_vm_core(struct v3_vm_info * vm, int vcore_id, int target_cpu) { struct guest_info * core = NULL; + if (!vm) { + PrintError(VM_NONE, VCORE_NONE, "Asked to move core of nonexistent VM\n"); + return -1; + } + if ((vcore_id < 0) || (vcore_id >= vm->num_cores)) { PrintError(vm, VCORE_NONE, "Attempted to migrate invalid virtual core (%d)\n", vcore_id); return -1; @@ -489,10 +547,134 @@ int v3_move_vm_core(struct v3_vm_info * vm, int vcore_id, int target_cpu) { return 0; } +/* move a memory region to memory with affinity for a specific physical core */ +int v3_move_vm_mem(struct v3_vm_info * vm, void *gpa, int target_cpu) { + int old_node; + int new_node; + struct v3_mem_region *reg; + void *new_hpa; + int num_pages; + void *old_hpa; + int i; + + if (!vm) { + PrintError(VM_NONE, VCORE_NONE, "Asked to move memory of nonexistent VM\n"); + return -1; + } + + old_node = v3_numa_gpa_to_node(vm,(addr_t)gpa); + + if (old_node<0) { + PrintError(vm, VCORE_NONE, "Cannot determine current node of gpa %p\n",gpa); + return -1; + } + + new_node = v3_numa_cpu_to_node(target_cpu); + + if (new_node<0) { + PrintError(vm, VCORE_NONE, "Cannot determine current node of cpu %d\n",target_cpu); + return -1; + } + + if (new_node==old_node) { + PrintDebug(vm, VCORE_NONE, "Affinity is already established - ignoring request\n"); + return 0; + } + + // We are now going to change the universe, so + // we'll barrier everyone first + + while (v3_raise_barrier(vm, NULL) == -1); + + // get region + + reg = v3_get_mem_region(vm, V3_MEM_CORE_ANY, (addr_t) gpa); + + if (!reg) { + PrintError(vm, VCORE_NONE, "Attempt to migrate non-existent memory\n"); + goto out_fail; + } + + if (!(reg->flags.base) || !(reg->flags.alloced)) { + PrintError(vm, VCORE_NONE, "Attempt to migrate invalid region: base=%d alloced=%d\n", reg->flags.base, reg->flags.alloced); + goto out_fail; + } + + // we now have the allocated base region corresponding to - and not a copy + // we will rewrite this region after moving its contents + + // first, let's double check that we are in fact changing the numa_id... + + if (reg->numa_id==new_node) { + PrintDebug(vm, VCORE_NONE, "Affinity for this base region is already established - ignoring...\n"); + goto out_success; + } + + // region uses exclusive addressing [guest_start,guest_end) + num_pages = (reg->guest_end-reg->guest_start)/PAGE_SIZE; + new_hpa = V3_AllocPagesExtended(num_pages, + PAGE_SIZE_4KB, + new_node, + 0); // no constraints given new shadow pager impl + + if (!new_hpa) { + PrintError(vm, VCORE_NONE, "Cannot allocate memory for new base region...\n"); + goto out_fail; + } + + // Note, assumes virtual contiguity in the host OS... + memcpy(V3_VAddr((void*)new_hpa), V3_VAddr((void*)(reg->host_addr)), num_pages*PAGE_SIZE); + + old_hpa = (void*)(reg->host_addr); + old_node = (int)(reg->numa_id); + + reg->host_addr = (addr_t)new_hpa; + reg->numa_id = v3_numa_hpa_to_node((addr_t)new_hpa); + + // flush all page tables / kill all humans + + for (i=0;inum_cores;i++) { + if (vm->cores[i].shdw_pg_mode==SHADOW_PAGING) { + v3_invalidate_shadow_pts(&(vm->cores[i])); + } else if (vm->cores[i].shdw_pg_mode==NESTED_PAGING) { + // nested invalidator uses inclusive addressing [start,end], not [start,end) + v3_invalidate_nested_addr_range(&(vm->cores[i]),reg->guest_start,reg->guest_end-1,NULL,NULL); + } else { + PrintError(vm,VCORE_NONE, "Cannot determine how to invalidate paging structures! Reverting to previous region.\n"); + // We'll restore things... + reg->host_addr = (addr_t) old_hpa; + reg->numa_id = old_node; + V3_FreePages(new_hpa,num_pages); + goto out_fail; + } + } + + // Now the old region can go away... + V3_FreePages(old_hpa,num_pages); + + PrintDebug(vm,VCORE_NONE,"Migration of memory complete - new region is %p to %p\n", + (void*)(reg->host_addr),(void*)(reg->host_addr+num_pages*PAGE_SIZE-1)); + + out_success: + v3_lower_barrier(vm); + return 0; + + + out_fail: + v3_lower_barrier(vm); + return -1; +} int v3_stop_vm(struct v3_vm_info * vm) { + struct guest_info * running_core; + + if (!vm) { + PrintError(VM_NONE, VCORE_NONE, "Asked to stop nonexistent VM\n"); + return -1; + } + if ((vm->run_state != VM_RUNNING) && (vm->run_state != VM_SIMULATING)) { PrintError(vm, VCORE_NONE,"Tried to stop VM in invalid runstate (%d)\n", vm->run_state); @@ -514,6 +696,7 @@ int v3_stop_vm(struct v3_vm_info * vm) { for (i = 0; i < vm->num_cores; i++) { if (vm->cores[i].core_run_state != CORE_STOPPED) { + running_core = &vm->cores[i]; still_running = 1; } } @@ -522,7 +705,7 @@ int v3_stop_vm(struct v3_vm_info * vm) { break; } - v3_yield(NULL,-1); + v3_scheduler_stop_core(running_core); } V3_Print(vm, VCORE_NONE,"VM stopped. Returning\n"); @@ -533,6 +716,11 @@ int v3_stop_vm(struct v3_vm_info * vm) { int v3_pause_vm(struct v3_vm_info * vm) { + if (!vm) { + PrintError(VM_NONE, VCORE_NONE, "Asked to pause nonexistent VM\n"); + return -1; + } + if (vm->run_state != VM_RUNNING) { PrintError(vm, VCORE_NONE,"Tried to pause a VM that was not running\n"); return -1; @@ -548,6 +736,11 @@ int v3_pause_vm(struct v3_vm_info * vm) { int v3_continue_vm(struct v3_vm_info * vm) { + if (!vm) { + PrintError(VM_NONE, VCORE_NONE, "Asked to continue nonexistent VM\n"); + return -1; + } + if (vm->run_state != VM_PAUSED) { PrintError(vm, VCORE_NONE,"Tried to continue a VM that was not paused\n"); return -1; @@ -570,6 +763,7 @@ static int sim_callback(struct guest_info * core, void * private_data) { V3_Print(core->vm_info, core, "Simulation callback activated (guest_rip=%p)\n", (void *)core->rip); while (v3_bitmap_check(timeout_map, core->vcpu_id) == 1) { + // We spin here if there is noone to yield to v3_yield(NULL,-1); } @@ -586,6 +780,11 @@ int v3_simulate_vm(struct v3_vm_info * vm, unsigned int msecs) { uint64_t cycles = 0; uint64_t cpu_khz = V3_CPU_KHZ(); + if (!vm) { + PrintError(VM_NONE, VCORE_NONE, "Asked to simulate nonexistent VM\n"); + return -1; + } + if (vm->run_state != VM_PAUSED) { PrintError(vm, VCORE_NONE,"VM must be paused before simulation begins\n"); return -1; @@ -640,7 +839,8 @@ int v3_simulate_vm(struct v3_vm_info * vm, unsigned int msecs) { if (all_blocked == 1) { break; } - + + // Intentionally spin if there is no one to yield to v3_yield(NULL,-1); } @@ -667,14 +867,23 @@ int v3_get_state_vm(struct v3_vm_info *vm, struct v3_vm_mem_state *mem) { uint32_t i; - uint32_t numcores = core->num_vcores > vm->num_cores ? vm->num_cores : core->num_vcores; - uint32_t numregions = mem->num_regions > vm->mem_map.num_base_regions ? vm->mem_map.num_base_regions : mem->num_regions; + uint32_t numcores; + uint32_t numregions; extern uint64_t v3_mem_block_size; + if (!vm || !base || !core || !mem) { + PrintError(VM_NONE, VCORE_NONE, "Invalid rquest to v3_get_state_vm\n"); + return -1; + } + + numcores = core->num_vcores > vm->num_cores ? vm->num_cores : core->num_vcores; + numregions = mem->num_regions > vm->mem_map.num_base_regions ? vm->mem_map.num_base_regions : mem->num_regions; + switch (vm->run_state) { case VM_INVALID: base->state = V3_VM_INVALID; break; case VM_RUNNING: base->state = V3_VM_RUNNING; break; case VM_STOPPED: base->state = V3_VM_STOPPED; break; + case VM_RESETTING: base->state = V3_VM_RESETTING; break; case VM_PAUSED: base->state = V3_VM_PAUSED; break; case VM_ERROR: base->state = V3_VM_ERROR; break; case VM_SIMULATING: base->state = V3_VM_SIMULATING; break; @@ -715,9 +924,17 @@ int v3_get_state_vm(struct v3_vm_info *vm, core->num_vcores=numcores; - for (i=0;imem_map.num_base_regions;i++) { + for (i=0;iregion[i].host_paddr = (void*)(vm->mem_map.base_regions[i].host_addr); mem->region[i].size = v3_mem_block_size; +#ifdef V3_CONFIG_SWAPPING + mem->region[i].swapped = vm->mem_map.base_regions[i].flags.swapped; + mem->region[i].pinned = vm->mem_map.base_regions[i].flags.pinned; +#else + mem->region[i].swapped = 0; + mem->region[i].pinned = 0; +#endif + } mem->num_regions=numregions; @@ -730,22 +947,38 @@ int v3_get_state_vm(struct v3_vm_info *vm, #include int v3_save_vm(struct v3_vm_info * vm, char * store, char * url, v3_chkpt_options_t opts) { - return v3_chkpt_save_vm(vm, store, url, opts); + if (!vm || !store || !url) { + PrintError(VM_NONE,VCORE_NONE, "Incorrect arguemnts for v3_save_vm\n"); + return -1; + } + return v3_chkpt_save_vm(vm, store, url, opts); } int v3_load_vm(struct v3_vm_info * vm, char * store, char * url, v3_chkpt_options_t opts) { - return v3_chkpt_load_vm(vm, store, url, opts); + if (!vm || !store || !url) { + PrintError(VM_NONE,VCORE_NONE, "Incorrect arguemnts for v3_load_vm\n"); + return -1; + } + return v3_chkpt_load_vm(vm, store, url, opts); } #ifdef V3_CONFIG_LIVE_MIGRATION int v3_send_vm(struct v3_vm_info * vm, char * store, char * url, v3_chkpt_options_t opts) { - return v3_chkpt_send_vm(vm, store, url, opts); + if (!vm || !store || !url) { + PrintError(VM_NONE,VCORE_NONE, "Incorrect arguemnts for v3_send_vm\n"); + return -1; + } + return v3_chkpt_send_vm(vm, store, url, opts); } int v3_receive_vm(struct v3_vm_info * vm, char * store, char * url, v3_chkpt_options_t opts) { - return v3_chkpt_receive_vm(vm, store, url, opts); + if (!vm || !store || !url) { + PrintError(VM_NONE,VCORE_NONE, "Incorrect arguemnts for v3_receive_vm\n"); + return -1; + } + return v3_chkpt_receive_vm(vm, store, url, opts); } #endif @@ -756,6 +989,11 @@ int v3_free_vm(struct v3_vm_info * vm) { int i = 0; // deinitialize guest (free memory, etc...) + if (!vm) { + PrintError(VM_NONE, VCORE_NONE, "Asked to free nonexistent VM\n"); + return -1; + } + if ((vm->run_state != VM_STOPPED) && (vm->run_state != VM_ERROR)) { PrintError(vm, VCORE_NONE,"Tried to Free VM in invalid runstate (%d)\n", vm->run_state); @@ -766,10 +1004,12 @@ int v3_free_vm(struct v3_vm_info * vm) { // free cores for (i = 0; i < vm->num_cores; i++) { + v3_scheduler_free_core(&(vm->cores[i])); v3_free_core(&(vm->cores[i])); } // free vm + v3_scheduler_free_vm(vm); v3_free_vm_internal(vm); v3_free_config(vm);