#include <palacios/vmm_timeout.h>
#include <palacios/vmm_options.h>
#include <palacios/vmm_cpu_mapper.h>
+#include <palacios/vmm_direct_paging.h>
+#include <interfaces/vmm_numa.h>
+#include <interfaces/vmm_file.h>
#ifdef V3_CONFIG_SVM
#include <palacios/svm.h>
static void init_cpu(void * arg) {
uint32_t cpu_id = (uint32_t)(addr_t)arg;
+ v3_init_fp();
+
#ifdef V3_CONFIG_SVM
if (v3_is_svm_capable()) {
PrintDebug(VM_NONE, VCORE_NONE, "Machine is SVM Capable\n");
PrintError(VM_NONE, VCORE_NONE, "CPU has no virtualization Extensions\n");
break;
}
+
+ v3_deinit_fp();
+
+}
+
+
+static int in_long_mode()
+{
+ uint32_t high, low;
+
+ v3_get_msr(0xc0000080,&high,&low); // EFER
+
+ return ((low & 0x500)== 0x500); // LMA and LME set
}
+
void Init_V3(struct v3_os_hooks * hooks, char * cpu_mask, int num_cpus, char *options) {
int i = 0;
V3_Print(VM_NONE, VCORE_NONE, "V3 Print statement to fix a Kitten page fault bug\n");
+
+#ifndef __V3_64BIT__
+#error Palacios does not support compilation for a 32 bit host OS!!!!
+#else
+ if (!in_long_mode()) {
+ PrintError(VM_NONE,VCORE_NONE,"Palacios supports execution only in long mode (64 bit).\n");
+ return;
+ }
+#endif
+
// Set global variables.
os_hooks = hooks;
// Parse host-os defined options into an easily-accessed format.
v3_parse_options(options);
+#ifdef V3_CONFIG_HVM
+ v3_init_hvm();
+#endif
+
// Memory manager initialization
v3_init_mem();
// Register all shadow paging handlers
V3_init_shdw_paging();
+#ifdef V3_CONFIG_SWAPPING
+ v3_init_swapping();
+#endif
+
// Initialize the cpu_mapper framework (must be before extensions)
V3_init_cpu_mapper();
V3_deinit_scheduling();
V3_deinit_cpu_mapper();
+
+#ifdef V3_CONFIG_SWAPPING
+ v3_deinit_swapping();
+#endif
V3_deinit_shdw_paging();
v3_deinit_mem();
+#ifdef V3_CONFIG_HVM
+ v3_deinit_hvm();
+#endif
+
v3_deinit_options();
return v3_cpu_types[cpu_id];
}
+static int start_core(void * p)
+{
+ struct guest_info * core = (struct guest_info *)p;
+
+ if (v3_scheduler_register_core(core) == -1){
+ PrintError(core->vm_info, core,"Error initializing scheduling in core %d\n", core->vcpu_id);
+ }
+
+ PrintDebug(core->vm_info,core,"virtual core %u (on logical core %u): in start_core (RIP=%p)\n",
+ core->vcpu_id, core->pcpu_id, (void *)(addr_t)core->rip);
-struct v3_vm_info * v3_create_vm(void * cfg, void * priv_data, char * name) {
+ switch (v3_mach_type) {
+#ifdef V3_CONFIG_SVM
+ case V3_SVM_CPU:
+ case V3_SVM_REV3_CPU:
+ return v3_start_svm_guest(core);
+ break;
+#endif
+#if V3_CONFIG_VMX
+ case V3_VMX_CPU:
+ case V3_VMX_EPT_CPU:
+ case V3_VMX_EPT_UG_CPU:
+ return v3_start_vmx_guest(core);
+ break;
+#endif
+ default:
+ PrintError(core->vm_info, core, "Attempting to enter a guest on an invalid CPU\n");
+ return -1;
+ }
+ // should not happen
+ return 0;
+}
+
+struct v3_vm_info * v3_create_vm(void * cfg, void * priv_data, char * name, unsigned int cpu_mask) {
struct v3_vm_info * vm = v3_config_guest(cfg, priv_data);
+ int vcore_id = 0;
if (vm == NULL) {
PrintError(VM_NONE, VCORE_NONE, "Could not configure guest\n");
memset(vm->name, 0, 128);
strncpy(vm->name, name, 127);
+ if(v3_cpu_mapper_register_vm(vm) == -1) {
+
+ PrintError(vm, VCORE_NONE,"Error registering VM with cpu_mapper\n");
+ }
+
/*
* Register this VM with the palacios scheduler. It will ask for admission
* prior to launch.
PrintError(vm, VCORE_NONE,"Error registering VM with scheduler\n");
}
- return vm;
-}
+ if (v3_cpu_mapper_admit_vm(vm,cpu_mask) != 0){
+ PrintError(vm, VCORE_NONE,"Error admitting VM %s for mapping", vm->name);
+ }
+ for (vcore_id = 0; vcore_id < vm->num_cores; vcore_id++) {
+ struct guest_info * core = &(vm->cores[vcore_id]);
+ PrintDebug(vm, VCORE_NONE, "Creating virtual core %u on logical core %u\n",
+ vcore_id, core->pcpu_id);
-static int start_core(void * p)
-{
- struct guest_info * core = (struct guest_info *)p;
+ sprintf(core->exec_name, "%s-%u", vm->name, vcore_id);
- if (v3_scheduler_register_core(core) == -1){
- PrintError(core->vm_info, core,"Error initializing scheduling in core %d\n", core->vcpu_id);
- }
+ PrintDebug(vm, VCORE_NONE, "run: core=%u, func=0x%p, arg=0x%p, name=%s\n",
+ core->pcpu_id, start_core, core, core->exec_name);
- PrintDebug(core->vm_info,core,"virtual core %u (on logical core %u): in start_core (RIP=%p)\n",
- core->vcpu_id, core->pcpu_id, (void *)(addr_t)core->rip);
+ core->core_thread = V3_CREATE_THREAD_ON_CPU(core->pcpu_id, start_core, core, core->exec_name);
+
+ if (core->core_thread == NULL) {
+ PrintError(vm, VCORE_NONE, "Thread creation failed\n");
+ v3_stop_vm(vm);
+ return NULL;
+ }
- switch (v3_mach_type) {
-#ifdef V3_CONFIG_SVM
- case V3_SVM_CPU:
- case V3_SVM_REV3_CPU:
- return v3_start_svm_guest(core);
- break;
-#endif
-#if V3_CONFIG_VMX
- case V3_VMX_CPU:
- case V3_VMX_EPT_CPU:
- case V3_VMX_EPT_UG_CPU:
- return v3_start_vmx_guest(core);
- break;
-#endif
- default:
- PrintError(core->vm_info, core, "Attempting to enter a guest on an invalid CPU\n");
- return -1;
}
- // should not happen
- return 0;
+ return vm;
}
int v3_start_vm(struct v3_vm_info * vm, unsigned int cpu_mask) {
- uint32_t i,j;
+ uint32_t i;
uint8_t * core_mask = (uint8_t *)&cpu_mask; // This is to make future expansion easier
uint32_t avail_cores = 0;
int vcore_id = 0;
- extern uint64_t v3_mem_block_size;
+ if (!vm) {
+ PrintError(VM_NONE, VCORE_NONE, "Asked to start nonexistent VM\n");
+ return -1;
+ }
if (vm->run_state != VM_STOPPED) {
PrintError(vm, VCORE_NONE, "VM has already been launched (state=%d)\n", (int)vm->run_state);
return -1;
}
-
- // Do not run if any core is using shadow paging and we are out of 4 GB bounds
- for (i=0;i<vm->num_cores;i++) {
- if (vm->cores[i].shdw_pg_mode == SHADOW_PAGING) {
- for (j=0;j<vm->mem_map.num_base_regions;j++) {
- if ((vm->mem_map.base_regions[i].host_addr + v3_mem_block_size) >= 0x100000000ULL) {
- PrintError(vm, VCORE_NONE, "Base memory region %d exceeds 4 GB boundary with shadow paging enabled on core %d.\n",j, i);
- PrintError(vm, VCORE_NONE, "Any use of non-64 bit mode in the guest is likely to fail in this configuration.\n");
- PrintError(vm, VCORE_NONE, "If you would like to proceed anyway, remove this check and recompile Palacios.\n");
- PrintError(vm, VCORE_NONE, "Alternatively, change this VM to use nested paging.\n");
- return -1;
- }
- }
- }
+#if V3_CONFIG_HVM
+ if (v3_setup_hvm_vm_for_boot(vm)) {
+ PrintError(vm, VCORE_NONE, "HVM setup for boot failed\n");
+ return -1;
}
-
+#endif
+
/// CHECK IF WE ARE MULTICORE ENABLED....
V3_Print(vm, VCORE_NONE, "V3 -- Starting VM (%u cores)\n", vm->num_cores);
}
vm->avail_cores = avail_cores;
-
+
if (v3_scheduler_admit_vm(vm) != 0){
PrintError(vm, VCORE_NONE,"Error admitting VM %s for scheduling", vm->name);
}
- if (v3_cpu_mapper_admit_vm(vm) != 0){
- PrintError(vm, VCORE_NONE,"Error admitting VM %s for mapping", vm->name);
- }
-
vm->run_state = VM_RUNNING;
- if(v3_cpu_mapper_register_vm(vm,cpu_mask) == -1) {
-
- PrintError(vm, VCORE_NONE,"Error registering VM with cpu_mapper\n");
- }
-
-
for (vcore_id = 0; vcore_id < vm->num_cores; vcore_id++) {
struct guest_info * core = &(vm->cores[vcore_id]);
PrintDebug(vm, VCORE_NONE, "Starting virtual core %u on logical core %u\n",
vcore_id, core->pcpu_id);
-
- sprintf(core->exec_name, "%s-%u", vm->name, vcore_id);
- PrintDebug(vm, VCORE_NONE, "run: core=%u, func=0x%p, arg=0x%p, name=%s\n",
- core->pcpu_id, start_core, core, core->exec_name);
+ if (core->core_run_state==CORE_INVALID) {
+ // launch of a fresh VM
+ core->core_run_state = CORE_STOPPED;
+ // core zero will turn itself on
+ } else {
+ // this is a resume - use whatever its current run_state is
+ }
- core->core_run_state = CORE_STOPPED; // core zero will turn itself on
- core->core_thread = V3_CREATE_THREAD_ON_CPU(core->pcpu_id, start_core, core, core->exec_name);
+ V3_START_THREAD(core->core_thread);
- if (core->core_thread == NULL) {
- PrintError(vm, VCORE_NONE, "Thread launch failed\n");
- v3_stop_vm(vm);
- return -1;
- }
}
return 0;
int v3_move_vm_core(struct v3_vm_info * vm, int vcore_id, int target_cpu) {
struct guest_info * core = NULL;
+ if (!vm) {
+ PrintError(VM_NONE, VCORE_NONE, "Asked to move core of nonexistent VM\n");
+ return -1;
+ }
+
if ((vcore_id < 0) || (vcore_id >= vm->num_cores)) {
PrintError(vm, VCORE_NONE, "Attempted to migrate invalid virtual core (%d)\n", vcore_id);
return -1;
return 0;
}
+/* move a memory region to memory with affinity for a specific physical core */
+int v3_move_vm_mem(struct v3_vm_info * vm, void *gpa, int target_cpu) {
+ int old_node;
+ int new_node;
+ struct v3_mem_region *reg;
+ void *new_hpa;
+ int num_pages;
+ void *old_hpa;
+ int i;
+
+ if (!vm) {
+ PrintError(VM_NONE, VCORE_NONE, "Asked to move memory of nonexistent VM\n");
+ return -1;
+ }
+
+ old_node = v3_numa_gpa_to_node(vm,(addr_t)gpa);
+
+ if (old_node<0) {
+ PrintError(vm, VCORE_NONE, "Cannot determine current node of gpa %p\n",gpa);
+ return -1;
+ }
+
+ new_node = v3_numa_cpu_to_node(target_cpu);
+
+ if (new_node<0) {
+ PrintError(vm, VCORE_NONE, "Cannot determine current node of cpu %d\n",target_cpu);
+ return -1;
+ }
+
+ if (new_node==old_node) {
+ PrintDebug(vm, VCORE_NONE, "Affinity is already established - ignoring request\n");
+ return 0;
+ }
+
+ // We are now going to change the universe, so
+ // we'll barrier everyone first
+
+ while (v3_raise_barrier(vm, NULL) == -1);
+
+ // get region
+
+ reg = v3_get_mem_region(vm, V3_MEM_CORE_ANY, (addr_t) gpa);
+
+ if (!reg) {
+ PrintError(vm, VCORE_NONE, "Attempt to migrate non-existent memory\n");
+ goto out_fail;
+ }
+
+ if (!(reg->flags.base) || !(reg->flags.alloced)) {
+ PrintError(vm, VCORE_NONE, "Attempt to migrate invalid region: base=%d alloced=%d\n", reg->flags.base, reg->flags.alloced);
+ goto out_fail;
+ }
+
+ // we now have the allocated base region corresponding to - and not a copy
+ // we will rewrite this region after moving its contents
+
+ // first, let's double check that we are in fact changing the numa_id...
+
+ if (reg->numa_id==new_node) {
+ PrintDebug(vm, VCORE_NONE, "Affinity for this base region is already established - ignoring...\n");
+ goto out_success;
+ }
+
+ // region uses exclusive addressing [guest_start,guest_end)
+ num_pages = (reg->guest_end-reg->guest_start)/PAGE_SIZE;
+ new_hpa = V3_AllocPagesExtended(num_pages,
+ PAGE_SIZE_4KB,
+ new_node,
+ 0); // no constraints given new shadow pager impl
+
+ if (!new_hpa) {
+ PrintError(vm, VCORE_NONE, "Cannot allocate memory for new base region...\n");
+ goto out_fail;
+ }
+
+ // Note, assumes virtual contiguity in the host OS...
+ memcpy(V3_VAddr((void*)new_hpa), V3_VAddr((void*)(reg->host_addr)), num_pages*PAGE_SIZE);
+
+ old_hpa = (void*)(reg->host_addr);
+ old_node = (int)(reg->numa_id);
+
+ reg->host_addr = (addr_t)new_hpa;
+ reg->numa_id = v3_numa_hpa_to_node((addr_t)new_hpa);
+
+ // flush all page tables / kill all humans
+
+ for (i=0;i<vm->num_cores;i++) {
+ if (vm->cores[i].shdw_pg_mode==SHADOW_PAGING) {
+ v3_invalidate_shadow_pts(&(vm->cores[i]));
+ } else if (vm->cores[i].shdw_pg_mode==NESTED_PAGING) {
+ // nested invalidator uses inclusive addressing [start,end], not [start,end)
+ v3_invalidate_nested_addr_range(&(vm->cores[i]),reg->guest_start,reg->guest_end-1,NULL,NULL);
+ } else {
+ PrintError(vm,VCORE_NONE, "Cannot determine how to invalidate paging structures! Reverting to previous region.\n");
+ // We'll restore things...
+ reg->host_addr = (addr_t) old_hpa;
+ reg->numa_id = old_node;
+ V3_FreePages(new_hpa,num_pages);
+ goto out_fail;
+ }
+ }
+
+ // Now the old region can go away...
+ V3_FreePages(old_hpa,num_pages);
+
+ PrintDebug(vm,VCORE_NONE,"Migration of memory complete - new region is %p to %p\n",
+ (void*)(reg->host_addr),(void*)(reg->host_addr+num_pages*PAGE_SIZE-1));
+
+ out_success:
+ v3_lower_barrier(vm);
+ return 0;
+
+
+ out_fail:
+ v3_lower_barrier(vm);
+ return -1;
+}
int v3_stop_vm(struct v3_vm_info * vm) {
+ struct guest_info * running_core;
+
+ if (!vm) {
+ PrintError(VM_NONE, VCORE_NONE, "Asked to stop nonexistent VM\n");
+ return -1;
+ }
+
if ((vm->run_state != VM_RUNNING) &&
(vm->run_state != VM_SIMULATING)) {
PrintError(vm, VCORE_NONE,"Tried to stop VM in invalid runstate (%d)\n", vm->run_state);
for (i = 0; i < vm->num_cores; i++) {
if (vm->cores[i].core_run_state != CORE_STOPPED) {
+ running_core = &vm->cores[i];
still_running = 1;
}
}
break;
}
- v3_yield(NULL,-1);
+ v3_scheduler_stop_core(running_core);
}
V3_Print(vm, VCORE_NONE,"VM stopped. Returning\n");
int v3_pause_vm(struct v3_vm_info * vm) {
+ if (!vm) {
+ PrintError(VM_NONE, VCORE_NONE, "Asked to pause nonexistent VM\n");
+ return -1;
+ }
+
if (vm->run_state != VM_RUNNING) {
PrintError(vm, VCORE_NONE,"Tried to pause a VM that was not running\n");
return -1;
int v3_continue_vm(struct v3_vm_info * vm) {
+ if (!vm) {
+ PrintError(VM_NONE, VCORE_NONE, "Asked to continue nonexistent VM\n");
+ return -1;
+ }
+
if (vm->run_state != VM_PAUSED) {
PrintError(vm, VCORE_NONE,"Tried to continue a VM that was not paused\n");
return -1;
V3_Print(core->vm_info, core, "Simulation callback activated (guest_rip=%p)\n", (void *)core->rip);
while (v3_bitmap_check(timeout_map, core->vcpu_id) == 1) {
+ // We spin here if there is noone to yield to
v3_yield(NULL,-1);
}
uint64_t cycles = 0;
uint64_t cpu_khz = V3_CPU_KHZ();
+ if (!vm) {
+ PrintError(VM_NONE, VCORE_NONE, "Asked to simulate nonexistent VM\n");
+ return -1;
+ }
+
if (vm->run_state != VM_PAUSED) {
PrintError(vm, VCORE_NONE,"VM must be paused before simulation begins\n");
return -1;
if (all_blocked == 1) {
break;
}
-
+
+ // Intentionally spin if there is no one to yield to
v3_yield(NULL,-1);
}
struct v3_vm_mem_state *mem)
{
uint32_t i;
- uint32_t numcores = core->num_vcores > vm->num_cores ? vm->num_cores : core->num_vcores;
- uint32_t numregions = mem->num_regions > vm->mem_map.num_base_regions ? vm->mem_map.num_base_regions : mem->num_regions;
+ uint32_t numcores;
+ uint32_t numregions;
extern uint64_t v3_mem_block_size;
+ if (!vm || !base || !core || !mem) {
+ PrintError(VM_NONE, VCORE_NONE, "Invalid rquest to v3_get_state_vm\n");
+ return -1;
+ }
+
+ numcores = core->num_vcores > vm->num_cores ? vm->num_cores : core->num_vcores;
+ numregions = mem->num_regions > vm->mem_map.num_base_regions ? vm->mem_map.num_base_regions : mem->num_regions;
+
switch (vm->run_state) {
case VM_INVALID: base->state = V3_VM_INVALID; break;
case VM_RUNNING: base->state = V3_VM_RUNNING; break;
core->num_vcores=numcores;
- for (i=0;i<vm->mem_map.num_base_regions;i++) {
+ for (i=0;i<numregions;i++) {
mem->region[i].host_paddr = (void*)(vm->mem_map.base_regions[i].host_addr);
mem->region[i].size = v3_mem_block_size;
+#ifdef V3_CONFIG_SWAPPING
+ mem->region[i].swapped = vm->mem_map.base_regions[i].flags.swapped;
+ mem->region[i].pinned = vm->mem_map.base_regions[i].flags.pinned;
+#else
+ mem->region[i].swapped = 0;
+ mem->region[i].pinned = 0;
+#endif
+
}
mem->num_regions=numregions;
#include <palacios/vmm_checkpoint.h>
int v3_save_vm(struct v3_vm_info * vm, char * store, char * url, v3_chkpt_options_t opts) {
- return v3_chkpt_save_vm(vm, store, url, opts);
+ if (!vm || !store || !url) {
+ PrintError(VM_NONE,VCORE_NONE, "Incorrect arguemnts for v3_save_vm\n");
+ return -1;
+ }
+ return v3_chkpt_save_vm(vm, store, url, opts);
}
int v3_load_vm(struct v3_vm_info * vm, char * store, char * url, v3_chkpt_options_t opts) {
- return v3_chkpt_load_vm(vm, store, url, opts);
+ if (!vm || !store || !url) {
+ PrintError(VM_NONE,VCORE_NONE, "Incorrect arguemnts for v3_load_vm\n");
+ return -1;
+ }
+ return v3_chkpt_load_vm(vm, store, url, opts);
}
#ifdef V3_CONFIG_LIVE_MIGRATION
int v3_send_vm(struct v3_vm_info * vm, char * store, char * url, v3_chkpt_options_t opts) {
- return v3_chkpt_send_vm(vm, store, url, opts);
+ if (!vm || !store || !url) {
+ PrintError(VM_NONE,VCORE_NONE, "Incorrect arguemnts for v3_send_vm\n");
+ return -1;
+ }
+ return v3_chkpt_send_vm(vm, store, url, opts);
}
int v3_receive_vm(struct v3_vm_info * vm, char * store, char * url, v3_chkpt_options_t opts) {
- return v3_chkpt_receive_vm(vm, store, url, opts);
+ if (!vm || !store || !url) {
+ PrintError(VM_NONE,VCORE_NONE, "Incorrect arguemnts for v3_receive_vm\n");
+ return -1;
+ }
+ return v3_chkpt_receive_vm(vm, store, url, opts);
}
#endif
int i = 0;
// deinitialize guest (free memory, etc...)
+ if (!vm) {
+ PrintError(VM_NONE, VCORE_NONE, "Asked to free nonexistent VM\n");
+ return -1;
+ }
+
if ((vm->run_state != VM_STOPPED) &&
(vm->run_state != VM_ERROR)) {
PrintError(vm, VCORE_NONE,"Tried to Free VM in invalid runstate (%d)\n", vm->run_state);
// free cores
for (i = 0; i < vm->num_cores; i++) {
+ v3_scheduler_free_core(&(vm->cores[i]));
v3_free_core(&(vm->cores[i]));
}
// free vm
+ v3_scheduler_free_vm(vm);
v3_free_vm_internal(vm);
v3_free_config(vm);