X-Git-Url: http://v3vee.org/palacios/gitweb/gitweb.cgi?a=blobdiff_plain;f=palacios%2Fsrc%2Fpalacios%2Fvmm_fw_cfg.c;h=80af5a4268f41f698f8c5d7ae5cd75a01f68d755;hb=a5d2c00cc461b4a60a1360a2a0bba55cef467bab;hp=9550db745b9a143e4dc0923a3ee1c3d2c2293c6f;hpb=2311ec427a582889e4b62ffc8cbd2249a8ade07f;p=palacios.git diff --git a/palacios/src/palacios/vmm_fw_cfg.c b/palacios/src/palacios/vmm_fw_cfg.c index 9550db7..80af5a4 100644 --- a/palacios/src/palacios/vmm_fw_cfg.c +++ b/palacios/src/palacios/vmm_fw_cfg.c @@ -25,6 +25,64 @@ #include +/* + This subsystem of Palacios interacts with the SEABIOS in order to + create highly customized configurations for the guest. Currently, + the primary purpose of such configuration is to pass a NUMA configuration + to the guest via ACPI. Currently, we are able to create NUMA domains, + map regions of guest physical addresses to them, and map vcores to them. + Additionally, these virtual NUMA domains are then mapped to physical + (host) NUMA domains. Other elements of Palacios handle vcore to + physical core mapping, as well as guest memory allocation such that + the needed physical NUMA domain mapping is correct. + + The following describes how the XML configuration of a virtual NUMA guest + works. + + (How many numa domains the guest will see) + (guest physical addresses x to y-1 are numa domain i and + numa domain i is mapped to host numa domain j) + + ... + + + For example, a 4 virtual domain guest mapped toa 2 domain host: + + + + + + + + + You also need to map the virtual cores to the domains, which is + done with the tag. This usually also indicates which physical core + the virtual core maps to, so that the NUMA topology the guest sees has + performance characteristics that make sense. + + (How many virtual cores we have) + (vcore 0 maps to virtual numa zone i and pcore q) + (vcore 1 maps to virtual numa zone j and pcore r) + ... + + + For example, here are 8 virtual cores maped across our numa domains, pairwise + + + + + + + + + + + + +*/ + + + #define FW_CFG_CTL_PORT 0x510 #define FW_CFG_DATA_PORT 0x511 @@ -93,7 +151,10 @@ struct e820_table { */ -static int fw_cfg_add_bytes(struct v3_fw_cfg_state * cfg_state, uint16_t key, uint8_t * data, uint32_t len) +// +// Internal version assumes data is allocated +// +static int fw_cfg_add_bytes_internal(struct v3_fw_cfg_state * cfg_state, uint16_t key, uint8_t * data, uint32_t len) { int arch = !!(key & FW_CFG_ARCH_LOCAL); // JRL: Well this is demented... Its basically generating a 1 or 0 from a mask operation @@ -110,13 +171,36 @@ static int fw_cfg_add_bytes(struct v3_fw_cfg_state * cfg_state, uint16_t key, ui return 1; } +// +// General purpose version will allocate a temp +// +// +static int fw_cfg_add_bytes(struct v3_fw_cfg_state * cfg_state, uint16_t key, uint8_t * data, uint32_t len) +{ + // must make a copy of the data so that the deinit function will work correctly... + + uint16_t * copy = NULL; + + copy = V3_Malloc(len); + if (!copy) { + PrintError(VM_NONE,VCORE_NONE,"Failed to allocate temp\n"); + return 0; + } + memcpy(copy,data,len); + return fw_cfg_add_bytes_internal(cfg_state, key, (uint8_t *)copy, sizeof(uint16_t)); +} + static int fw_cfg_add_i16(struct v3_fw_cfg_state * cfg_state, uint16_t key, uint16_t value) { uint16_t * copy = NULL; copy = V3_Malloc(sizeof(uint16_t)); + if (!copy) { + PrintError(VM_NONE,VCORE_NONE,"Failed to allocate temp\n"); + return 0; + } *copy = value; - return fw_cfg_add_bytes(cfg_state, key, (uint8_t *)copy, sizeof(uint16_t)); + return fw_cfg_add_bytes_internal(cfg_state, key, (uint8_t *)copy, sizeof(uint16_t)); } static int fw_cfg_add_i32(struct v3_fw_cfg_state * cfg_state, uint16_t key, uint32_t value) @@ -124,8 +208,12 @@ static int fw_cfg_add_i32(struct v3_fw_cfg_state * cfg_state, uint16_t key, uint uint32_t * copy = NULL; copy = V3_Malloc(sizeof(uint32_t)); + if (!copy) { + PrintError(VM_NONE,VCORE_NONE,"Failed to allocate temp\n"); + return 0; + } *copy = value; - return fw_cfg_add_bytes(cfg_state, key, (uint8_t *)copy, sizeof(uint32_t)); + return fw_cfg_add_bytes_internal(cfg_state, key, (uint8_t *)copy, sizeof(uint32_t)); } static int fw_cfg_add_i64(struct v3_fw_cfg_state * cfg_state, uint16_t key, uint64_t value) @@ -133,8 +221,12 @@ static int fw_cfg_add_i64(struct v3_fw_cfg_state * cfg_state, uint16_t key, uint uint64_t * copy = NULL; copy = V3_Malloc(sizeof(uint64_t)); + if (!copy) { + PrintError(VM_NONE,VCORE_NONE,"Failed to allocate temp\n"); + return 0; + } *copy = value; - return fw_cfg_add_bytes(cfg_state, key, (uint8_t *)copy, sizeof(uint64_t)); + return fw_cfg_add_bytes_internal(cfg_state, key, (uint8_t *)copy, sizeof(uint64_t)); } static int fw_cfg_ctl_read(struct guest_info * core, uint16_t port, void * src, uint_t length, void * priv_data) { @@ -142,7 +234,7 @@ static int fw_cfg_ctl_read(struct guest_info * core, uint16_t port, void * src, } static int fw_cfg_ctl_write(struct guest_info * core, uint16_t port, void * src, uint_t length, void * priv_data) { - V3_ASSERT(length == 2); + V3_ASSERT(core->vm_info, core, length == 2); struct v3_fw_cfg_state * cfg_state = (struct v3_fw_cfg_state *)priv_data; uint16_t key = *(uint16_t *)src; @@ -163,7 +255,7 @@ static int fw_cfg_ctl_write(struct guest_info * core, uint16_t port, void * src, static int fw_cfg_data_read(struct guest_info * core, uint16_t port, void * src, uint_t length, void * priv_data) { - V3_ASSERT(length == 1); + V3_ASSERT(core->vm_info, core, length == 1); struct v3_fw_cfg_state * cfg_state = (struct v3_fw_cfg_state *)priv_data; int arch = !!(cfg_state->cur_entry & FW_CFG_ARCH_LOCAL); @@ -185,7 +277,7 @@ static int fw_cfg_data_read(struct guest_info * core, uint16_t port, void * src, } static int fw_cfg_data_write(struct guest_info * core, uint16_t port, void * src, uint_t length, void * priv_data) { - V3_ASSERT(length == 1); + V3_ASSERT(core->vm_info, core, length == 1); struct v3_fw_cfg_state * cfg_state = (struct v3_fw_cfg_state *)priv_data; int arch = !!(cfg_state->cur_entry & FW_CFG_ARCH_LOCAL); @@ -212,14 +304,14 @@ static struct e820_table * e820_populate(struct v3_vm_info * vm) { int i = 0; if (vm->mem_map.e820_count > E820_MAX_COUNT) { - PrintError("Too much E820 table entries! (max is %d)\n", E820_MAX_COUNT); + PrintError(vm, VCORE_NONE,"Too much E820 table entries! (max is %d)\n", E820_MAX_COUNT); return NULL; } e820 = V3_Malloc(sizeof(struct e820_table)); if (e820 == NULL) { - PrintError("Out of memory!\n"); + PrintError(vm, VCORE_NONE, "Out of memory!\n"); return NULL; } @@ -236,19 +328,49 @@ static struct e820_table * e820_populate(struct v3_vm_info * vm) { } */ +void v3_fw_cfg_deinit(struct v3_vm_info *vm) { + struct v3_fw_cfg_state * cfg_state = &(vm->fw_cfg_state); + int i, j; + + for (i = 0; i < 2; ++i) { + for (j = 0; j < FW_CFG_MAX_ENTRY; ++j) { + if (cfg_state->entries[i][j].data != NULL) + V3_Free(cfg_state->entries[i][j].data); + } + } + + v3_unhook_io_port(vm, FW_CFG_CTL_PORT); + v3_unhook_io_port(vm, FW_CFG_DATA_PORT); + +} + int v3_fw_cfg_init(struct v3_vm_info * vm) { struct v3_fw_cfg_state * cfg_state = &(vm->fw_cfg_state); int ret = 0; + uint64_t mem_size = vm->mem_size; + uint32_t num_cores = vm->num_cores; + +#ifdef V3_CONFIG_HVM + mem_size = v3_get_hvm_ros_memsize(vm); + num_cores = v3_get_hvm_ros_cores(vm); +#endif + + // Be paranoid about starting this as all "unallocated" + memset(cfg_state,0,sizeof(struct v3_fw_cfg_state)); + +#ifndef V3_CONFIG_SEABIOS + V3_Print(vm,VCORE_NONE,"Warning: Configuring SEABIOS firmware, but SEABIOS is not being used in this build of Palacios. Configuration will be dormant.\n"); +#endif /* struct e820_table * e820 = e820_populate(vm); if (e820 == NULL) { - PrintError("Failed to populate E820 for FW interface!\n"); + PrintError(vm, VCORE_NONE, "Failed to populate E820 for FW interface!\n"); return -1; } @@ -260,20 +382,21 @@ int v3_fw_cfg_init(struct v3_vm_info * vm) { if (ret != 0) { // V3_Free(e820); - PrintError("Failed to hook FW CFG ports!\n"); + PrintError(vm, VCORE_NONE, "Failed to hook FW CFG ports!\n"); + v3_fw_cfg_deinit(vm); return -1; } fw_cfg_add_bytes(cfg_state, FW_CFG_SIGNATURE, (uint8_t *)"QEMU", 4); //fw_cfg_add_bytes(cfg_state, FW_CFG_UUID, qemu_uuid, 16); fw_cfg_add_i16(cfg_state, FW_CFG_NOGRAPHIC, /*(uint16_t)(display_type == DT_NOGRAPHIC)*/ 0); - fw_cfg_add_i16(cfg_state, FW_CFG_NB_CPUS, (uint16_t)vm->num_cores); - fw_cfg_add_i16(cfg_state, FW_CFG_MAX_CPUS, (uint16_t)vm->num_cores); + fw_cfg_add_i16(cfg_state, FW_CFG_NB_CPUS, (uint16_t)num_cores); + fw_cfg_add_i16(cfg_state, FW_CFG_MAX_CPUS, (uint16_t)num_cores); fw_cfg_add_i16(cfg_state, FW_CFG_BOOT_MENU, (uint16_t)1); //fw_cfg_bootsplash(cfg_state); fw_cfg_add_i32(cfg_state, FW_CFG_ID, 1); - fw_cfg_add_i64(cfg_state, FW_CFG_RAM_SIZE, (uint64_t)vm->mem_size / (1024 * 1024)); + fw_cfg_add_i64(cfg_state, FW_CFG_RAM_SIZE, mem_size / (1024 * 1024)); //fw_cfg_add_bytes(cfg_state, FW_CFG_ACPI_TABLES, (uint8_t *)acpi_tables, // acpi_tables_len); @@ -306,7 +429,7 @@ int v3_fw_cfg_init(struct v3_vm_info * vm) { /* locations in fw_cfg NUMA array for each info region. */ int node_offset = 0; int core_offset = 1; - int mem_offset = 1 + vm->num_cores; + int mem_offset = 1 + num_cores; if (num_nodes_str) { num_nodes = atoi(num_nodes_str); @@ -317,21 +440,22 @@ int v3_fw_cfg_init(struct v3_vm_info * vm) { int i = 0; // Allocate the global NUMA configuration array - numa_fw_cfg = V3_Malloc((1 + vm->num_cores + num_nodes) * sizeof(uint64_t)); + numa_fw_cfg = V3_Malloc((1 + num_cores + num_nodes) * sizeof(uint64_t)); if (numa_fw_cfg == NULL) { - PrintError("Could not allocate fw_cfg NUMA config space\n"); + PrintError(vm, VCORE_NONE, "Could not allocate fw_cfg NUMA config space\n"); + v3_fw_cfg_deinit(vm); return -1; } - memset(numa_fw_cfg, 0, (1 + vm->num_cores + num_nodes) * sizeof(uint64_t)); + memset(numa_fw_cfg, 0, (1 + num_cores + num_nodes) * sizeof(uint64_t)); // First 8 bytes is the number of NUMA zones numa_fw_cfg[node_offset] = num_nodes; // Next region is array of core->node mappings - for (i = 0; i < vm->num_cores; i++) { + for (i = 0; i < num_cores; i++) { char * vnode_str = v3_cfg_val(vm->cores[i].core_cfg_data, "vnode"); if (vnode_str == NULL) { @@ -369,8 +493,8 @@ int v3_fw_cfg_init(struct v3_vm_info * vm) { int vnode_id = 0; if ((!start_addr_str) || (!end_addr_str) || (!vnode_id_str)) { - PrintError("Invalid memory layout in configuration\n"); - V3_Free(numa_fw_cfg); + PrintError(vm, VCORE_NONE, "Invalid memory layout in configuration\n"); + v3_fw_cfg_deinit(vm); return -1; } @@ -389,14 +513,14 @@ int v3_fw_cfg_init(struct v3_vm_info * vm) { { uint64_t region_start = 0; - V3_Print("NUMA CONFIG: (nodes=%llu)\n", numa_fw_cfg[0]); + V3_Print(vm, VCORE_NONE, "NUMA CONFIG: (nodes=%llu)\n", numa_fw_cfg[0]); - for (i = 0; i < vm->num_cores; i++) { - V3_Print("\tCore %d -> Node %llu\n", i, numa_fw_cfg[core_offset + i]); + for (i = 0; i < num_cores; i++) { + V3_Print(vm, VCORE_NONE, "\tCore %d -> Node %llu\n", i, numa_fw_cfg[core_offset + i]); } for (i = 0; i < num_nodes; i++) { - V3_Print("\tMem (%p - %p) -> Node %d\n", (void *)region_start, + V3_Print(vm, VCORE_NONE, "\tMem (%p - %p) -> Node %d\n", (void *)region_start, (void *)numa_fw_cfg[mem_offset + i], i); region_start += numa_fw_cfg[mem_offset + i]; @@ -405,8 +529,8 @@ int v3_fw_cfg_init(struct v3_vm_info * vm) { // Register the NUMA cfg array with the FW_CFG interface - fw_cfg_add_bytes(cfg_state, FW_CFG_NUMA, (uint8_t *)numa_fw_cfg, - (1 + vm->num_cores + num_nodes) * sizeof(uint64_t)); + fw_cfg_add_bytes_internal(cfg_state, FW_CFG_NUMA, (uint8_t *)numa_fw_cfg, + (1 + num_cores + num_nodes) * sizeof(uint64_t)); } } @@ -415,18 +539,6 @@ int v3_fw_cfg_init(struct v3_vm_info * vm) { return 0; } -void v3_fw_cfg_deinit(struct v3_vm_info *vm) { - struct v3_fw_cfg_state * cfg_state = &(vm->fw_cfg_state); - int i, j; - - for (i = 0; i < 2; ++i) { - for (j = 0; j < FW_CFG_MAX_ENTRY; ++j) { - if (cfg_state->entries[i][j].data != NULL) - V3_Free(cfg_state->entries[i][j].data); - } - } -} -