X-Git-Url: http://v3vee.org/palacios/gitweb/gitweb.cgi?a=blobdiff_plain;f=palacios%2Fsrc%2Fpalacios%2Fvmm.c;h=0a281ab684c698a6988e87f37d0b7a925f6de44b;hb=6b9abb54ebafd8266f1711b803ccb027675a465f;hp=bbb5b1b175d92910bc66a07cdce32df81a011ea0;hpb=13843de52d67d647f8ef05f736dc8f7d8be6adb3;p=palacios.git diff --git a/palacios/src/palacios/vmm.c b/palacios/src/palacios/vmm.c index bbb5b1b..0a281ab 100644 --- a/palacios/src/palacios/vmm.c +++ b/palacios/src/palacios/vmm.c @@ -18,6 +18,7 @@ */ #include +#include #include #include #include @@ -26,7 +27,11 @@ #include #include #include - +#include +#include +#include +#include +#include #ifdef V3_CONFIG_SVM #include @@ -48,26 +53,27 @@ int v3_dbg_enable = 0; - static void init_cpu(void * arg) { uint32_t cpu_id = (uint32_t)(addr_t)arg; + v3_init_fp(); + #ifdef V3_CONFIG_SVM if (v3_is_svm_capable()) { - PrintDebug("Machine is SVM Capable\n"); + PrintDebug(VM_NONE, VCORE_NONE, "Machine is SVM Capable\n"); v3_init_svm_cpu(cpu_id); } else #endif #ifdef V3_CONFIG_VMX if (v3_is_vmx_capable()) { - PrintDebug("Machine is VMX Capable\n"); + PrintDebug(VM_NONE, VCORE_NONE, "Machine is VMX Capable\n"); v3_init_vmx_cpu(cpu_id); } else #endif { - PrintError("CPU has no virtualization Extensions\n"); + PrintError(VM_NONE, VCORE_NONE, "CPU has no virtualization Extensions\n"); } } @@ -80,7 +86,7 @@ static void deinit_cpu(void * arg) { #ifdef V3_CONFIG_SVM case V3_SVM_CPU: case V3_SVM_REV3_CPU: - PrintDebug("Deinitializing SVM CPU %d\n", cpu_id); + PrintDebug(VM_NONE, VCORE_NONE, "Deinitializing SVM CPU %d\n", cpu_id); v3_deinit_svm_cpu(cpu_id); break; #endif @@ -88,28 +94,56 @@ static void deinit_cpu(void * arg) { case V3_VMX_CPU: case V3_VMX_EPT_CPU: case V3_VMX_EPT_UG_CPU: - PrintDebug("Deinitializing VMX CPU %d\n", cpu_id); + PrintDebug(VM_NONE, VCORE_NONE, "Deinitializing VMX CPU %d\n", cpu_id); v3_deinit_vmx_cpu(cpu_id); break; #endif case V3_INVALID_CPU: default: - PrintError("CPU has no virtualization Extensions\n"); + PrintError(VM_NONE, VCORE_NONE, "CPU has no virtualization Extensions\n"); break; } + + v3_deinit_fp(); + } -void Init_V3(struct v3_os_hooks * hooks, char * cpu_mask, int num_cpus) { +static int in_long_mode() +{ + uint32_t high, low; + + v3_get_msr(0xc0000080,&high,&low); // EFER + + return ((low & 0x500)== 0x500); // LMA and LME set +} + + +void Init_V3(struct v3_os_hooks * hooks, char * cpu_mask, int num_cpus, char *options) { int i = 0; int minor = 0; int major = 0; - V3_Print("V3 Print statement to fix a Kitten page fault bug\n"); + V3_Print(VM_NONE, VCORE_NONE, "V3 Print statement to fix a Kitten page fault bug\n"); + + + +#ifndef __V3_64BIT__ +#error Palacios does not support compilation for a 32 bit host OS!!!! +#else + if (!in_long_mode()) { + PrintError(VM_NONE,VCORE_NONE,"Palacios supports execution only in long mode (64 bit).\n"); + return; + } +#endif // Set global variables. os_hooks = hooks; + if (num_cpus>V3_CONFIG_MAX_CPUS) { + PrintError(VM_NONE,VCORE_NONE, "Requesting as many as %d cpus, but Palacios is compiled for a maximum of %d. Only the first %d cpus will be considered\n", num_cpus, V3_CONFIG_MAX_CPUS, V3_CONFIG_MAX_CPUS); + } + // Determine the global machine type v3_mach_type = V3_INVALID_CPU; @@ -117,15 +151,45 @@ void Init_V3(struct v3_os_hooks * hooks, char * cpu_mask, int num_cpus) { v3_cpu_types[i] = V3_INVALID_CPU; } + // Parse host-os defined options into an easily-accessed format. + v3_parse_options(options); + +#ifdef V3_CONFIG_MULTIBOOT + v3_init_multiboot(); +#endif + +#ifdef V3_CONFIG_HVM + v3_init_hvm(); +#endif + + // Memory manager initialization + v3_init_mem(); + // Register all the possible device types V3_init_devices(); // Register all shadow paging handlers V3_init_shdw_paging(); +#ifdef V3_CONFIG_SWAPPING + v3_init_swapping(); +#endif + + // Initialize the cpu_mapper framework (must be before extensions) + V3_init_cpu_mapper(); + + // Initialize the scheduler framework (must be before extensions) + V3_init_scheduling(); + // Register all extensions V3_init_extensions(); + // Enabling cpu_mapper + V3_enable_cpu_mapper(); + + // Enabling scheduler + V3_enable_scheduler(); + #ifdef V3_CONFIG_SYMMOD V3_init_symmod(); @@ -137,12 +201,12 @@ void Init_V3(struct v3_os_hooks * hooks, char * cpu_mask, int num_cpus) { if ((hooks) && (hooks->call_on_cpu)) { - for (i = 0; i < num_cpus; i++) { + for (i = 0; i < num_cpus && i < V3_CONFIG_MAX_CPUS; i++) { major = i / 8; minor = i % 8; if ((cpu_mask == NULL) || (*(cpu_mask + major) & (0x1 << minor))) { - V3_Print("Initializing VMM extensions on cpu %d\n", i); + V3_Print(VM_NONE, VCORE_NONE, "Initializing VMM extensions on cpu %d\n", i); hooks->call_on_cpu(i, &init_cpu, (void *)(addr_t)i); if (v3_mach_type == V3_INVALID_CPU) { @@ -158,19 +222,9 @@ void Init_V3(struct v3_os_hooks * hooks, char * cpu_mask, int num_cpus) { void Shutdown_V3() { int i; - V3_deinit_devices(); - V3_deinit_shdw_paging(); - - V3_deinit_extensions(); - -#ifdef V3_CONFIG_SYMMOD - V3_deinit_symmod(); -#endif - -#ifdef V3_CONFIG_CHECKPOINT - V3_deinit_checkpoint(); -#endif + // Reverse order of Init_V3 + // bring down CPUs if ((os_hooks) && (os_hooks->call_on_cpu)) { for (i = 0; i < V3_CONFIG_MAX_CPUS; i++) { @@ -181,45 +235,61 @@ void Shutdown_V3() { } } -} +#ifdef V3_CONFIG_CHECKPOINT + V3_deinit_checkpoint(); +#endif +#ifdef V3_CONFIG_SYMMOD + V3_deinit_symmod(); +#endif -v3_cpu_arch_t v3_get_cpu_type(int cpu_id) { - return v3_cpu_types[cpu_id]; -} + V3_disable_scheduler(); + V3_disable_cpu_mapper(); -struct v3_vm_info * v3_create_vm(void * cfg, void * priv_data, char * name) { - struct v3_vm_info * vm = v3_config_guest(cfg, priv_data); + V3_deinit_extensions(); - if (vm == NULL) { - PrintError("Could not configure guest\n"); - return NULL; - } + V3_deinit_scheduling(); + + V3_deinit_cpu_mapper(); - V3_Print("CORE 0 RIP=%p\n", (void *)(addr_t)(vm->cores[0].rip)); +#ifdef V3_CONFIG_SWAPPING + v3_deinit_swapping(); +#endif + + V3_deinit_shdw_paging(); + + V3_deinit_devices(); - if (name == NULL) { - name = "[V3_VM]"; - } else if (strlen(name) >= 128) { - PrintError("VM name is too long. Will be truncated to 128 chars.\n"); - } + v3_deinit_mem(); + +#ifdef V3_CONFIG_HVM + v3_deinit_hvm(); +#endif - memset(vm->name, 0, 128); - strncpy(vm->name, name, 127); +#ifdef V3_CONFIG_MULTIBOOT + v3_deinit_multiboot(); +#endif - return vm; -} + v3_deinit_options(); + +} +v3_cpu_arch_t v3_get_cpu_type(int cpu_id) { + return v3_cpu_types[cpu_id]; +} static int start_core(void * p) { struct guest_info * core = (struct guest_info *)p; + if (v3_scheduler_register_core(core) == -1){ + PrintError(core->vm_info, core,"Error initializing scheduling in core %d\n", core->vcpu_id); + } - PrintDebug("virtual core %u (on logical core %u): in start_core (RIP=%p)\n", + PrintDebug(core->vm_info,core,"virtual core %u (on logical core %u): in start_core (RIP=%p)\n", core->vcpu_id, core->pcpu_id, (void *)(addr_t)core->rip); switch (v3_mach_type) { @@ -237,54 +307,113 @@ static int start_core(void * p) break; #endif default: - PrintError("Attempting to enter a guest on an invalid CPU\n"); + PrintError(core->vm_info, core, "Attempting to enter a guest on an invalid CPU\n"); return -1; } // should not happen return 0; } +struct v3_vm_info * v3_create_vm(void * cfg, void * priv_data, char * name, unsigned int cpu_mask) { + struct v3_vm_info * vm = v3_config_guest(cfg, priv_data); + int vcore_id = 0; -// For the moment very ugly. Eventually we will shift the cpu_mask to an arbitrary sized type... -#define MAX_CORES 32 + if (vm == NULL) { + PrintError(VM_NONE, VCORE_NONE, "Could not configure guest\n"); + return NULL; + } + V3_Print(vm, VCORE_NONE, "CORE 0 RIP=%p\n", (void *)(addr_t)(vm->cores[0].rip)); + + if (name == NULL) { + name = "[V3_VM]"; + } else if (strlen(name) >= 128) { + PrintError(vm, VCORE_NONE,"VM name is too long. Will be truncated to 128 chars.\n"); + } + + memset(vm->name, 0, 128); + strncpy(vm->name, name, 127); + + if(v3_cpu_mapper_register_vm(vm) == -1) { + + PrintError(vm, VCORE_NONE,"Error registering VM with cpu_mapper\n"); + } + + /* + * Register this VM with the palacios scheduler. It will ask for admission + * prior to launch. + */ + if(v3_scheduler_register_vm(vm) == -1) { + + PrintError(vm, VCORE_NONE,"Error registering VM with scheduler\n"); + } + + if (v3_cpu_mapper_admit_vm(vm,cpu_mask) != 0){ + PrintError(vm, VCORE_NONE,"Error admitting VM %s for mapping", vm->name); + } + + for (vcore_id = 0; vcore_id < vm->num_cores; vcore_id++) { + + struct guest_info * core = &(vm->cores[vcore_id]); + + PrintDebug(vm, VCORE_NONE, "Creating virtual core %u on logical core %u\n", + vcore_id, core->pcpu_id); + + sprintf(core->exec_name, "%s-%u", vm->name, vcore_id); + + PrintDebug(vm, VCORE_NONE, "run: core=%u, func=0x%p, arg=0x%p, name=%s\n", + core->pcpu_id, start_core, core, core->exec_name); + + core->core_thread = V3_CREATE_THREAD_ON_CPU(core->pcpu_id, start_core, core, core->exec_name); + + if (core->core_thread == NULL) { + PrintError(vm, VCORE_NONE, "Thread creation failed\n"); + v3_stop_vm(vm); + return NULL; + } + + } + return vm; +} int v3_start_vm(struct v3_vm_info * vm, unsigned int cpu_mask) { + uint32_t i; uint8_t * core_mask = (uint8_t *)&cpu_mask; // This is to make future expansion easier uint32_t avail_cores = 0; int vcore_id = 0; + if (!vm) { + PrintError(VM_NONE, VCORE_NONE, "Asked to start nonexistent VM\n"); + return -1; + } if (vm->run_state != VM_STOPPED) { - PrintError("VM has already been launched (state=%d)\n", (int)vm->run_state); + PrintError(vm, VCORE_NONE, "VM has already been launched (state=%d)\n", (int)vm->run_state); return -1; } - - // Do not run if any core is using shadow paging and we are out of 4 GB bounds - for (i=0;inum_cores;i++) { - if (vm->cores[i].shdw_pg_mode == SHADOW_PAGING) { - if ((vm->mem_map.base_region.host_addr + vm->mem_size ) >= 0x100000000ULL) { - PrintError("Base memory region exceeds 4 GB boundary with shadow paging enabled on core %d.\n",i); - PrintError("Any use of non-64 bit mode in the guest is likely to fail in this configuration.\n"); - PrintError("If you would like to proceed anyway, remove this check and recompile Palacios.\n"); - PrintError("Alternatively, change this VM to use nested paging.\n"); - return -1; - } - } +#if V3_CONFIG_MULTIBOOT + if (v3_setup_multiboot_vm_for_boot(vm)) { + PrintError(vm, VCORE_NONE, "Multiboot setup for boot failed\n"); + return -1; } - - +#endif +#if V3_CONFIG_HVM + if (v3_setup_hvm_vm_for_boot(vm)) { + PrintError(vm, VCORE_NONE, "HVM setup for boot failed\n"); + return -1; + } +#endif /// CHECK IF WE ARE MULTICORE ENABLED.... - V3_Print("V3 -- Starting VM (%u cores)\n", vm->num_cores); - V3_Print("CORE 0 RIP=%p\n", (void *)(addr_t)(vm->cores[0].rip)); + V3_Print(vm, VCORE_NONE, "V3 -- Starting VM (%u cores)\n", vm->num_cores); + V3_Print(vm, VCORE_NONE, "CORE 0 RIP=%p\n", (void *)(addr_t)(vm->cores[0].rip)); // Check that enough cores are present in the mask to handle vcores - for (i = 0; i < MAX_CORES; i++) { + for (i = 0; i < V3_CONFIG_MAX_CPUS; i++) { int major = i / 8; int minor = i % 8; @@ -297,80 +426,33 @@ int v3_start_vm(struct v3_vm_info * vm, unsigned int cpu_mask) { } } + vm->avail_cores = avail_cores; - if (vm->num_cores > avail_cores) { - PrintError("Attempted to start a VM with too many cores (vm->num_cores = %d, avail_cores = %d, MAX=%d)\n", - vm->num_cores, avail_cores, MAX_CORES); - return -1; + if (v3_scheduler_admit_vm(vm) != 0){ + PrintError(vm, VCORE_NONE,"Error admitting VM %s for scheduling", vm->name); } vm->run_state = VM_RUNNING; - // Spawn off threads for each core. - // We work backwards, so that core 0 is always started last. - for (i = 0, vcore_id = vm->num_cores - 1; (i < MAX_CORES) && (vcore_id >= 0); i++) { - int major = 0; - int minor = 0; - struct guest_info * core = &(vm->cores[vcore_id]); - char * specified_cpu = v3_cfg_val(core->core_cfg_data, "target_cpu"); - uint32_t core_idx = 0; - - if (specified_cpu != NULL) { - core_idx = atoi(specified_cpu); - - if ((core_idx < 0) || (core_idx >= MAX_CORES)) { - PrintError("Target CPU out of bounds (%d) (MAX_CORES=%d)\n", core_idx, MAX_CORES); - } - - i--; // We reset the logical core idx. Not strictly necessary I guess... - } else { - core_idx = i; - } - - major = core_idx / 8; - minor = core_idx % 8; - - if ((core_mask[major] & (0x1 << minor)) == 0) { - PrintError("Logical CPU %d not available for virtual core %d; not started\n", - core_idx, vcore_id); - - if (specified_cpu != NULL) { - PrintError("CPU was specified explicitly (%d). HARD ERROR\n", core_idx); - v3_stop_vm(vm); - return -1; - } - - continue; - } - - PrintDebug("Starting virtual core %u on logical core %u\n", - vcore_id, core_idx); - - sprintf(core->exec_name, "%s-%u", vm->name, vcore_id); + for (vcore_id = 0; vcore_id < vm->num_cores; vcore_id++) { - PrintDebug("run: core=%u, func=0x%p, arg=0x%p, name=%s\n", - core_idx, start_core, core, core->exec_name); + struct guest_info * core = &(vm->cores[vcore_id]); - core->core_run_state = CORE_STOPPED; // core zero will turn itself on - core->pcpu_id = core_idx; - core->core_thread = V3_CREATE_THREAD_ON_CPU(core_idx, start_core, core, core->exec_name); + PrintDebug(vm, VCORE_NONE, "Starting virtual core %u on logical core %u\n", + vcore_id, core->pcpu_id); - if (core->core_thread == NULL) { - PrintError("Thread launch failed\n"); - v3_stop_vm(vm); - return -1; + if (core->core_run_state==CORE_INVALID) { + // launch of a fresh VM + core->core_run_state = CORE_STOPPED; + // core zero will turn itself on + } else { + // this is a resume - use whatever its current run_state is } - vcore_id--; - } + V3_START_THREAD(core->core_thread); - if (vcore_id >= 0) { - PrintError("Error starting VM: Not enough available CPU cores\n"); - v3_stop_vm(vm); - return -1; } - return 0; } @@ -382,19 +464,19 @@ int v3_reset_vm_core(struct guest_info * core, addr_t rip) { #ifdef V3_CONFIG_SVM case V3_SVM_CPU: case V3_SVM_REV3_CPU: - PrintDebug("Resetting SVM Guest CPU %d\n", core->vcpu_id); + PrintDebug(core->vm_info, core, "Resetting SVM Guest CPU %d\n", core->vcpu_id); return v3_reset_svm_vm_core(core, rip); #endif #ifdef V3_CONFIG_VMX case V3_VMX_CPU: case V3_VMX_EPT_CPU: case V3_VMX_EPT_UG_CPU: - PrintDebug("Resetting VMX Guest CPU %d\n", core->vcpu_id); + PrintDebug(core->vm_info, core, "Resetting VMX Guest CPU %d\n", core->vcpu_id); return v3_reset_vmx_vm_core(core, rip); #endif case V3_INVALID_CPU: default: - PrintError("CPU has no virtualization Extensions\n"); + PrintError(core->vm_info, core, "CPU has no virtualization Extensions\n"); break; } @@ -402,37 +484,140 @@ int v3_reset_vm_core(struct guest_info * core, addr_t rip) { } +// resets the whole VM (non-HVM) or the ROS (HVM) +int v3_reset_vm(struct v3_vm_info *vm) +{ +#ifdef V3_CONFIG_HVM + if (vm->hvm_state.is_hvm) { + return v3_reset_vm_extended(vm,V3_VM_RESET_ROS,0); + } else { + return v3_reset_vm_extended(vm,V3_VM_RESET_ALL,0); + } +#else + return v3_reset_vm_extended(vm,V3_VM_RESET_ALL,0); +#endif +} + +int v3_reset_vm_extended(struct v3_vm_info *vm, v3_vm_reset_type t, void *data) +{ + uint32_t start, end, i; + uint32_t newcount; + + if (vm->run_state != VM_RUNNING) { + PrintError(vm,VCORE_NONE,"Attempt to reset VM in state %d (must be in running state)\n",vm->run_state); + return -1; + } + + + switch (t) { + case V3_VM_RESET_ALL: +#ifdef V3_CONFIG_HVM + if (vm->hvm_state.is_hvm) { + PrintError(vm,VCORE_NONE,"Attempt to do ALL reset of HVM (not allowed)\n"); + return -1; + } +#endif + start=0; end=vm->num_cores-1; + break; +#ifdef V3_CONFIG_HVM + case V3_VM_RESET_HRT: + case V3_VM_RESET_ROS: + if (vm->hvm_state.is_hvm) { + if (t==V3_VM_RESET_HRT) { + start = vm->hvm_state.first_hrt_core; + end = vm->num_cores-1; + } else { + start = 0; + end = vm->hvm_state.first_hrt_core-1; + } + } else { + PrintError(vm,VCORE_NONE,"This is not an HVM and so HVM-specific resets do not apply\n"); + return -1; + } +#endif + break; + case V3_VM_RESET_CORE_RANGE: + start = ((uint32_t*)data)[0]; + end = ((uint32_t*)data)[1]; + break; + default: + PrintError(vm,VCORE_NONE,"Unsupported reset type %d for this VM\n",t); + return -1; + break; + } + + PrintDebug(vm,VCORE_NONE,"Resetting cores %d through %d\n",start,end); + + newcount = end-start+1; + + for (i=start;i<=end;i++) { + if (!(vm->cores[i].core_run_state == CORE_RUNNING || vm->cores[i].core_run_state == CORE_STOPPED)) { + PrintError(vm,VCORE_NONE,"Cannot reset VM as core %u is in state %d (must be running or stopped)\n",i,vm->cores[i].core_run_state); + return -1; + } + } + + + // This had better be the only thread using the barrier at this point... + v3_init_counting_barrier(&vm->reset_barrier,newcount); + + // OK, I am the reseter, tell the relevant cores what to do + // each will atomically decrement the reset countdown and then + // spin waiting for it to hit zero. + + for (i=start;i<=end;i++) { + vm->cores[i].core_run_state = CORE_RESETTING; + // force exit of core + v3_interrupt_cpu(vm, vm->cores[i].pcpu_id, 0); + } + + // we don't wait for reset to finish + // because reset could have been initiated by a core + + return 0; +} + /* move a virtual core to different physical core */ int v3_move_vm_core(struct v3_vm_info * vm, int vcore_id, int target_cpu) { struct guest_info * core = NULL; + if (!vm) { + PrintError(VM_NONE, VCORE_NONE, "Asked to move core of nonexistent VM\n"); + return -1; + } + if ((vcore_id < 0) || (vcore_id >= vm->num_cores)) { - PrintError("Attempted to migrate invalid virtual core (%d)\n", vcore_id); + PrintError(vm, VCORE_NONE, "Attempted to migrate invalid virtual core (%d)\n", vcore_id); return -1; } core = &(vm->cores[vcore_id]); if (target_cpu == core->pcpu_id) { - PrintError("Attempted to migrate to local core (%d)\n", target_cpu); + PrintError(vm, core, "Attempted to migrate to local core (%d)\n", target_cpu); // well that was pointless return 0; } if (core->core_thread == NULL) { - PrintError("Attempted to migrate a core without a valid thread context\n"); + PrintError(vm, core, "Attempted to migrate a core without a valid thread context\n"); return -1; } while (v3_raise_barrier(vm, NULL) == -1); - V3_Print("Performing Migration from %d to %d\n", core->pcpu_id, target_cpu); + V3_Print(vm, core, "Performing Migration from %d to %d\n", core->pcpu_id, target_cpu); // Double check that we weren't preemptively migrated if (target_cpu != core->pcpu_id) { - V3_Print("Moving Core\n"); + V3_Print(vm, core, "Moving Core\n"); + + if(v3_cpu_mapper_admit_core(vm, vcore_id, target_cpu) == -1){ + PrintError(vm, core, "Core %d can not be admitted in cpu %d\n",vcore_id, target_cpu); + return -1; + } #ifdef V3_CONFIG_VMX @@ -440,7 +625,7 @@ int v3_move_vm_core(struct v3_vm_info * vm, int vcore_id, int target_cpu) { case V3_VMX_CPU: case V3_VMX_EPT_CPU: case V3_VMX_EPT_UG_CPU: - PrintDebug("Flushing VMX Guest CPU %d\n", core->vcpu_id); + PrintDebug(vm, core, "Flushing VMX Guest CPU %d\n", core->vcpu_id); V3_Call_On_CPU(core->pcpu_id, (void (*)(void *))v3_flush_vmx_vm_core, (void *)core); break; default: @@ -449,7 +634,7 @@ int v3_move_vm_core(struct v3_vm_info * vm, int vcore_id, int target_cpu) { #endif if (V3_MOVE_THREAD_TO_CPU(target_cpu, core->core_thread) != 0) { - PrintError("Failed to move Vcore %d to CPU %d\n", + PrintError(vm, core, "Failed to move Vcore %d to CPU %d\n", core->vcpu_id, target_cpu); v3_lower_barrier(vm); return -1; @@ -461,7 +646,7 @@ int v3_move_vm_core(struct v3_vm_info * vm, int vcore_id, int target_cpu) { */ core->pcpu_id = target_cpu; - V3_Print("core now at %d\n", core->pcpu_id); + V3_Print(vm, core, "core now at %d\n", core->pcpu_id); } v3_lower_barrier(vm); @@ -469,13 +654,137 @@ int v3_move_vm_core(struct v3_vm_info * vm, int vcore_id, int target_cpu) { return 0; } +/* move a memory region to memory with affinity for a specific physical core */ +int v3_move_vm_mem(struct v3_vm_info * vm, void *gpa, int target_cpu) { + int old_node; + int new_node; + struct v3_mem_region *reg; + void *new_hpa; + int num_pages; + void *old_hpa; + int i; + + if (!vm) { + PrintError(VM_NONE, VCORE_NONE, "Asked to move memory of nonexistent VM\n"); + return -1; + } + + old_node = v3_numa_gpa_to_node(vm,(addr_t)gpa); + + if (old_node<0) { + PrintError(vm, VCORE_NONE, "Cannot determine current node of gpa %p\n",gpa); + return -1; + } + + new_node = v3_numa_cpu_to_node(target_cpu); + + if (new_node<0) { + PrintError(vm, VCORE_NONE, "Cannot determine current node of cpu %d\n",target_cpu); + return -1; + } + + if (new_node==old_node) { + PrintDebug(vm, VCORE_NONE, "Affinity is already established - ignoring request\n"); + return 0; + } + + // We are now going to change the universe, so + // we'll barrier everyone first + while (v3_raise_barrier(vm, NULL) == -1); + + // get region + + reg = v3_get_mem_region(vm, V3_MEM_CORE_ANY, (addr_t) gpa); + + if (!reg) { + PrintError(vm, VCORE_NONE, "Attempt to migrate non-existent memory\n"); + goto out_fail; + } + + if (!(reg->flags.base) || !(reg->flags.alloced)) { + PrintError(vm, VCORE_NONE, "Attempt to migrate invalid region: base=%d alloced=%d\n", reg->flags.base, reg->flags.alloced); + goto out_fail; + } + + // we now have the allocated base region corresponding to - and not a copy + // we will rewrite this region after moving its contents + + // first, let's double check that we are in fact changing the numa_id... + + if (reg->numa_id==new_node) { + PrintDebug(vm, VCORE_NONE, "Affinity for this base region is already established - ignoring...\n"); + goto out_success; + } + + // region uses exclusive addressing [guest_start,guest_end) + num_pages = (reg->guest_end-reg->guest_start)/PAGE_SIZE; + + new_hpa = V3_AllocPagesExtended(num_pages, + PAGE_SIZE_4KB, + new_node, + 0, 0); // no constraints given new shadow pager impl + + if (!new_hpa) { + PrintError(vm, VCORE_NONE, "Cannot allocate memory for new base region...\n"); + goto out_fail; + } + + // Note, assumes virtual contiguity in the host OS... + memcpy(V3_VAddr((void*)new_hpa), V3_VAddr((void*)(reg->host_addr)), num_pages*PAGE_SIZE); + + old_hpa = (void*)(reg->host_addr); + old_node = (int)(reg->numa_id); + + reg->host_addr = (addr_t)new_hpa; + reg->numa_id = v3_numa_hpa_to_node((addr_t)new_hpa); + + // flush all page tables / kill all humans + + for (i=0;inum_cores;i++) { + if (vm->cores[i].shdw_pg_mode==SHADOW_PAGING) { + v3_invalidate_shadow_pts(&(vm->cores[i])); + } else if (vm->cores[i].shdw_pg_mode==NESTED_PAGING) { + // nested invalidator uses inclusive addressing [start,end], not [start,end) + v3_invalidate_nested_addr_range(&(vm->cores[i]),reg->guest_start,reg->guest_end-1,NULL,NULL); + } else { + PrintError(vm,VCORE_NONE, "Cannot determine how to invalidate paging structures! Reverting to previous region.\n"); + // We'll restore things... + reg->host_addr = (addr_t) old_hpa; + reg->numa_id = old_node; + V3_FreePages(new_hpa,num_pages); + goto out_fail; + } + } + + // Now the old region can go away... + V3_FreePages(old_hpa,num_pages); + + PrintDebug(vm,VCORE_NONE,"Migration of memory complete - new region is %p to %p\n", + (void*)(reg->host_addr),(void*)(reg->host_addr+num_pages*PAGE_SIZE-1)); + + out_success: + v3_lower_barrier(vm); + return 0; + + + out_fail: + v3_lower_barrier(vm); + return -1; +} int v3_stop_vm(struct v3_vm_info * vm) { + struct guest_info * running_core; + + if (!vm) { + PrintError(VM_NONE, VCORE_NONE, "Asked to stop nonexistent VM\n"); + return -1; + } + if ((vm->run_state != VM_RUNNING) && (vm->run_state != VM_SIMULATING)) { - PrintError("Tried to stop VM in invalid runstate (%d)\n", vm->run_state); + PrintError(vm, VCORE_NONE,"Tried to stop VM in invalid runstate (%d)\n", vm->run_state); return -1; } @@ -494,6 +803,7 @@ int v3_stop_vm(struct v3_vm_info * vm) { for (i = 0; i < vm->num_cores; i++) { if (vm->cores[i].core_run_state != CORE_STOPPED) { + running_core = &vm->cores[i]; still_running = 1; } } @@ -502,10 +812,10 @@ int v3_stop_vm(struct v3_vm_info * vm) { break; } - v3_yield(NULL,-1); + v3_scheduler_stop_core(running_core); } - V3_Print("VM stopped. Returning\n"); + V3_Print(vm, VCORE_NONE,"VM stopped. Returning\n"); return 0; } @@ -513,8 +823,13 @@ int v3_stop_vm(struct v3_vm_info * vm) { int v3_pause_vm(struct v3_vm_info * vm) { + if (!vm) { + PrintError(VM_NONE, VCORE_NONE, "Asked to pause nonexistent VM\n"); + return -1; + } + if (vm->run_state != VM_RUNNING) { - PrintError("Tried to pause a VM that was not running\n"); + PrintError(vm, VCORE_NONE,"Tried to pause a VM that was not running\n"); return -1; } @@ -528,8 +843,13 @@ int v3_pause_vm(struct v3_vm_info * vm) { int v3_continue_vm(struct v3_vm_info * vm) { + if (!vm) { + PrintError(VM_NONE, VCORE_NONE, "Asked to continue nonexistent VM\n"); + return -1; + } + if (vm->run_state != VM_PAUSED) { - PrintError("Tried to continue a VM that was not paused\n"); + PrintError(vm, VCORE_NONE,"Tried to continue a VM that was not paused\n"); return -1; } @@ -547,9 +867,10 @@ static int sim_callback(struct guest_info * core, void * private_data) { v3_bitmap_set(timeout_map, core->vcpu_id); - V3_Print("Simulation callback activated (guest_rip=%p)\n", (void *)core->rip); + V3_Print(core->vm_info, core, "Simulation callback activated (guest_rip=%p)\n", (void *)core->rip); while (v3_bitmap_check(timeout_map, core->vcpu_id) == 1) { + // We spin here if there is noone to yield to v3_yield(NULL,-1); } @@ -566,8 +887,13 @@ int v3_simulate_vm(struct v3_vm_info * vm, unsigned int msecs) { uint64_t cycles = 0; uint64_t cpu_khz = V3_CPU_KHZ(); + if (!vm) { + PrintError(VM_NONE, VCORE_NONE, "Asked to simulate nonexistent VM\n"); + return -1; + } + if (vm->run_state != VM_PAUSED) { - PrintError("VM must be paused before simulation begins\n"); + PrintError(vm, VCORE_NONE,"VM must be paused before simulation begins\n"); return -1; } @@ -585,18 +911,18 @@ int v3_simulate_vm(struct v3_vm_info * vm, unsigned int msecs) { - V3_Print("Simulating %u msecs (%llu cycles) [CPU_KHZ=%llu]\n", msecs, cycles, cpu_khz); + V3_Print(vm, VCORE_NONE,"Simulating %u msecs (%llu cycles) [CPU_KHZ=%llu]\n", msecs, cycles, cpu_khz); // set timeout for (i = 0; i < vm->num_cores; i++) { if (v3_add_core_timeout(&(vm->cores[i]), cycles, sim_callback, &timeout_map) == -1) { - PrintError("Could not register simulation timeout for core %d\n", i); + PrintError(vm, VCORE_NONE,"Could not register simulation timeout for core %d\n", i); return -1; } } - V3_Print("timeouts set on all cores\n "); + V3_Print(vm, VCORE_NONE,"timeouts set on all cores\n "); // Run the simulation @@ -605,7 +931,7 @@ int v3_simulate_vm(struct v3_vm_info * vm, unsigned int msecs) { v3_lower_barrier(vm); - V3_Print("Barrier lowered: We are now Simulating!!\n"); + V3_Print(vm, VCORE_NONE,"Barrier lowered: We are now Simulating!!\n"); // block until simulation is complete while (all_blocked == 0) { @@ -620,12 +946,13 @@ int v3_simulate_vm(struct v3_vm_info * vm, unsigned int msecs) { if (all_blocked == 1) { break; } - + + // Intentionally spin if there is no one to yield to v3_yield(NULL,-1); } - V3_Print("Simulation is complete\n"); + V3_Print(vm, VCORE_NONE,"Simulation is complete\n"); // Simulation is complete // Reset back to PAUSED state @@ -641,82 +968,175 @@ int v3_simulate_vm(struct v3_vm_info * vm, unsigned int msecs) { } -int v3_get_state_vm(struct v3_vm_info *vm, struct v3_vm_state *s) +int v3_get_state_vm(struct v3_vm_info *vm, + struct v3_vm_base_state *base, + struct v3_vm_core_state *core, + struct v3_vm_mem_state *mem) { - uint32_t i; - uint32_t numcores = s->num_vcores > vm->num_cores ? vm->num_cores : s->num_vcores; - - switch (vm->run_state) { - case VM_INVALID: s->state = V3_VM_INVALID; break; - case VM_RUNNING: s->state = V3_VM_RUNNING; break; - case VM_STOPPED: s->state = V3_VM_STOPPED; break; - case VM_PAUSED: s->state = V3_VM_PAUSED; break; - case VM_ERROR: s->state = V3_VM_ERROR; break; - case VM_SIMULATING: s->state = V3_VM_SIMULATING; break; - default: s->state = V3_VM_UNKNOWN; break; - } + uint32_t i; + uint32_t numcores; + uint32_t numregions; + extern uint64_t v3_mem_block_size; + void *cur_gpa; - s->mem_base_paddr = (void*)(vm->mem_map.base_region.host_addr); - s->mem_size = vm->mem_size; - - s->num_vcores = numcores; - - for (i=0;icores[i].core_run_state) { - case CORE_INVALID: s->vcore[i].state = V3_VCORE_INVALID; break; - case CORE_RUNNING: s->vcore[i].state = V3_VCORE_RUNNING; break; - case CORE_STOPPED: s->vcore[i].state = V3_VCORE_STOPPED; break; - default: s->vcore[i].state = V3_VCORE_UNKNOWN; break; - } - switch (vm->cores[i].cpu_mode) { - case REAL: s->vcore[i].cpu_mode = V3_VCORE_CPU_REAL; break; - case PROTECTED: s->vcore[i].cpu_mode = V3_VCORE_CPU_PROTECTED; break; - case PROTECTED_PAE: s->vcore[i].cpu_mode = V3_VCORE_CPU_PROTECTED_PAE; break; - case LONG: s->vcore[i].cpu_mode = V3_VCORE_CPU_LONG; break; - case LONG_32_COMPAT: s->vcore[i].cpu_mode = V3_VCORE_CPU_LONG_32_COMPAT; break; - case LONG_16_COMPAT: s->vcore[i].cpu_mode = V3_VCORE_CPU_LONG_16_COMPAT; break; - default: s->vcore[i].cpu_mode = V3_VCORE_CPU_UNKNOWN; break; - } - switch (vm->cores[i].shdw_pg_mode) { - case SHADOW_PAGING: s->vcore[i].mem_state = V3_VCORE_MEM_STATE_SHADOW; break; - case NESTED_PAGING: s->vcore[i].mem_state = V3_VCORE_MEM_STATE_NESTED; break; - default: s->vcore[i].mem_state = V3_VCORE_MEM_STATE_UNKNOWN; break; - } - switch (vm->cores[i].mem_mode) { - case PHYSICAL_MEM: s->vcore[i].mem_mode = V3_VCORE_MEM_MODE_PHYSICAL; break; - case VIRTUAL_MEM: s->vcore[i].mem_mode=V3_VCORE_MEM_MODE_VIRTUAL; break; - default: s->vcore[i].mem_mode=V3_VCORE_MEM_MODE_UNKNOWN; break; - } - - s->vcore[i].pcore=vm->cores[i].pcpu_id; - s->vcore[i].last_rip=(void*)(vm->cores[i].rip); - s->vcore[i].num_exits=vm->cores[i].num_exits; - } + if (!vm || !base || !core || !mem) { + PrintError(VM_NONE, VCORE_NONE, "Invalid request to v3_get_state_vm\n"); + return -1; + } + + numcores = core->num_vcores > vm->num_cores ? vm->num_cores : core->num_vcores; + numregions = mem->num_regions > vm->mem_map.num_base_regions ? vm->mem_map.num_base_regions : mem->num_regions; + + switch (vm->run_state) { + case VM_INVALID: base->state = V3_VM_INVALID; break; + case VM_RUNNING: base->state = V3_VM_RUNNING; break; + case VM_STOPPED: base->state = V3_VM_STOPPED; break; + case VM_PAUSED: base->state = V3_VM_PAUSED; break; + case VM_ERROR: base->state = V3_VM_ERROR; break; + case VM_SIMULATING: base->state = V3_VM_SIMULATING; break; + case VM_RESETTING: base->state = V3_VM_RESETTING; break; + default: base->state = V3_VM_UNKNOWN; break; + } + + base->vm_type = V3_VM_GENERAL; + +#ifdef V3_CONFIG_HVM + if (vm->hvm_state.is_hvm) { + base->vm_type = V3_VM_HVM; + } +#endif + + for (i=0;icores[i].core_run_state) { + case CORE_INVALID: core->vcore[i].state = V3_VCORE_INVALID; break; + case CORE_RUNNING: core->vcore[i].state = V3_VCORE_RUNNING; break; + case CORE_STOPPED: core->vcore[i].state = V3_VCORE_STOPPED; break; + case CORE_RESETTING: core->vcore[i].state = V3_VCORE_RESETTING; break; + default: core->vcore[i].state = V3_VCORE_UNKNOWN; break; + } + switch (vm->cores[i].cpu_mode) { + case REAL: core->vcore[i].cpu_mode = V3_VCORE_CPU_REAL; break; + case PROTECTED: core->vcore[i].cpu_mode = V3_VCORE_CPU_PROTECTED; break; + case PROTECTED_PAE: core->vcore[i].cpu_mode = V3_VCORE_CPU_PROTECTED_PAE; break; + case LONG: core->vcore[i].cpu_mode = V3_VCORE_CPU_LONG; break; + case LONG_32_COMPAT: core->vcore[i].cpu_mode = V3_VCORE_CPU_LONG_32_COMPAT; break; + case LONG_16_COMPAT: core->vcore[i].cpu_mode = V3_VCORE_CPU_LONG_16_COMPAT; break; + default: core->vcore[i].cpu_mode = V3_VCORE_CPU_UNKNOWN; break; + } + switch (vm->cores[i].shdw_pg_mode) { + case SHADOW_PAGING: core->vcore[i].mem_state = V3_VCORE_MEM_STATE_SHADOW; break; + case NESTED_PAGING: core->vcore[i].mem_state = V3_VCORE_MEM_STATE_NESTED; break; + default: core->vcore[i].mem_state = V3_VCORE_MEM_STATE_UNKNOWN; break; + } + switch (vm->cores[i].mem_mode) { + case PHYSICAL_MEM: core->vcore[i].mem_mode = V3_VCORE_MEM_MODE_PHYSICAL; break; + case VIRTUAL_MEM: core->vcore[i].mem_mode=V3_VCORE_MEM_MODE_VIRTUAL; break; + default: core->vcore[i].mem_mode=V3_VCORE_MEM_MODE_UNKNOWN; break; + } + + core->vcore[i].vcore_type = V3_VCORE_GENERAL; + +#ifdef V3_CONFIG_HVM + if (vm->hvm_state.is_hvm) { + if (v3_is_hvm_ros_core(&vm->cores[i])) { + core->vcore[i].vcore_type = V3_VCORE_ROS; + } else { + core->vcore[i].vcore_type = V3_VCORE_HRT; + } + } +#endif + + core->vcore[i].pcore=vm->cores[i].pcpu_id; + core->vcore[i].last_rip=(void*)(vm->cores[i].rip); + core->vcore[i].num_exits=vm->cores[i].num_exits; + } + + core->num_vcores=numcores; + + cur_gpa=0; + + for (i=0;iregion[i].guest_paddr = cur_gpa; + mem->region[i].host_paddr = (void*)(vm->mem_map.base_regions[i].host_addr); + mem->region[i].size = v3_mem_block_size; +#ifdef V3_CONFIG_SWAPPING + mem->region[i].swapped = vm->mem_map.base_regions[i].flags.swapped; + mem->region[i].pinned = vm->mem_map.base_regions[i].flags.pinned; +#else + mem->region[i].swapped = 0; + mem->region[i].pinned = 0; +#endif + + cur_gpa += mem->region[i].size; + } + + mem->num_regions=numregions; + + + mem->mem_size=vm->mem_size; + mem->ros_mem_size=vm->mem_size; + +#ifdef V3_CONFIG_HVM + if (vm->hvm_state.is_hvm) { + mem->ros_mem_size=v3_get_hvm_ros_memsize(vm); + } +#endif - return 0; + return 0; +} + +int v3_get_state_sizes_vm(struct v3_vm_info *vm, + unsigned long long *num_vcores, + unsigned long long *num_regions) +{ + if (!vm || !num_vcores || !num_regions) { + PrintError(VM_NONE, VCORE_NONE, "Invalid request to v3_get_state_sizes\n"); + return -1; + } + + *num_vcores = vm->num_cores; + *num_regions = vm->mem_map.num_base_regions; + + return 0; } #ifdef V3_CONFIG_CHECKPOINT #include -int v3_save_vm(struct v3_vm_info * vm, char * store, char * url) { - return v3_chkpt_save_vm(vm, store, url); +int v3_save_vm(struct v3_vm_info * vm, char * store, char * url, v3_chkpt_options_t opts) { + if (!vm || !store || !url) { + PrintError(VM_NONE,VCORE_NONE, "Incorrect arguemnts for v3_save_vm\n"); + return -1; + } + return v3_chkpt_save_vm(vm, store, url, opts); } -int v3_load_vm(struct v3_vm_info * vm, char * store, char * url) { - return v3_chkpt_load_vm(vm, store, url); +int v3_load_vm(struct v3_vm_info * vm, char * store, char * url, v3_chkpt_options_t opts) { + if (!vm || !store || !url) { + PrintError(VM_NONE,VCORE_NONE, "Incorrect arguemnts for v3_load_vm\n"); + return -1; + } + return v3_chkpt_load_vm(vm, store, url, opts); } #ifdef V3_CONFIG_LIVE_MIGRATION -int v3_send_vm(struct v3_vm_info * vm, char * store, char * url) { - return v3_chkpt_send_vm(vm, store, url); +int v3_send_vm(struct v3_vm_info * vm, char * store, char * url, v3_chkpt_options_t opts) { + if (!vm || !store || !url) { + PrintError(VM_NONE,VCORE_NONE, "Incorrect arguemnts for v3_send_vm\n"); + return -1; + } + return v3_chkpt_send_vm(vm, store, url, opts); } -int v3_receive_vm(struct v3_vm_info * vm, char * store, char * url) { - return v3_chkpt_receive_vm(vm, store, url); +int v3_receive_vm(struct v3_vm_info * vm, char * store, char * url, v3_chkpt_options_t opts) { + if (!vm || !store || !url) { + PrintError(VM_NONE,VCORE_NONE, "Incorrect arguemnts for v3_receive_vm\n"); + return -1; + } + return v3_chkpt_receive_vm(vm, store, url, opts); } #endif @@ -727,9 +1147,14 @@ int v3_free_vm(struct v3_vm_info * vm) { int i = 0; // deinitialize guest (free memory, etc...) + if (!vm) { + PrintError(VM_NONE, VCORE_NONE, "Asked to free nonexistent VM\n"); + return -1; + } + if ((vm->run_state != VM_STOPPED) && (vm->run_state != VM_ERROR)) { - PrintError("Tried to Free VM in invalid runstate (%d)\n", vm->run_state); + PrintError(vm, VCORE_NONE,"Tried to Free VM in invalid runstate (%d)\n", vm->run_state); return -1; } @@ -737,10 +1162,12 @@ int v3_free_vm(struct v3_vm_info * vm) { // free cores for (i = 0; i < vm->num_cores; i++) { + v3_scheduler_free_core(&(vm->cores[i])); v3_free_core(&(vm->cores[i])); } // free vm + v3_scheduler_free_vm(vm); v3_free_vm_internal(vm); v3_free_config(vm); @@ -780,53 +1207,6 @@ v3_cpu_mode_t v3_get_host_cpu_mode() { #endif - - - - -void v3_yield_cond(struct guest_info * info, int usec) { - uint64_t cur_cycle; - cur_cycle = v3_get_host_time(&info->time_state); - - if (cur_cycle > (info->yield_start_cycle + info->vm_info->yield_cycle_period)) { - //PrintDebug("Conditional Yield (cur_cyle=%p, start_cycle=%p, period=%p)\n", - // (void *)cur_cycle, (void *)info->yield_start_cycle, - // (void *)info->yield_cycle_period); - - if (usec < 0) { - V3_Yield(); - } else { - V3_Sleep(usec); - } - - info->yield_start_cycle += info->vm_info->yield_cycle_period; - } -} - - -/* - * unconditional cpu yield - * if the yielding thread is a guest context, the guest quantum is reset on resumption - * Non guest context threads should call this function with a NULL argument - * - * usec <0 => the non-timed yield is used - * usec >=0 => the timed yield is used, which also usually implies interruptible - */ -void v3_yield(struct guest_info * info, int usec) { - if (usec < 0) { - V3_Yield(); - } else { - V3_Sleep(usec); - } - - if (info) { - info->yield_start_cycle += info->vm_info->yield_cycle_period; - } -} - - - - void v3_print_cond(const char * fmt, ...) { if (v3_dbg_enable == 1) { char buf[2048]; @@ -836,7 +1216,7 @@ void v3_print_cond(const char * fmt, ...) { vsnprintf(buf, 2048, fmt, ap); va_end(ap); - V3_Print("%s", buf); + V3_Print(VM_NONE, VCORE_NONE,"%s", buf); } } @@ -868,7 +1248,26 @@ int v3_vm_enter(struct guest_info * info) { break; #endif default: - PrintError("Attemping to enter a guest on an invalid CPU\n"); + PrintError(info->vm_info, info, "Attemping to enter a guest on an invalid CPU\n"); return -1; } } + + +void *v3_get_host_vm(struct v3_vm_info *x) +{ + if (x) { + return x->host_priv_data; + } else { + return 0; + } +} + +int v3_get_vcore(struct guest_info *x) +{ + if (x) { + return x->vcpu_id; + } else { + return -1; + } +}