#include <palacios/vm_guest.h>
+/*
+ This subsystem of Palacios interacts with the SEABIOS in order to
+ create highly customized configurations for the guest. Currently,
+ the primary purpose of such configuration is to pass a NUMA configuration
+ to the guest via ACPI. Currently, we are able to create NUMA domains,
+ map regions of guest physical addresses to them, and map vcores to them.
+ Additionally, these virtual NUMA domains are then mapped to physical
+ (host) NUMA domains. Other elements of Palacios handle vcore to
+ physical core mapping, as well as guest memory allocation such that
+ the needed physical NUMA domain mapping is correct.
+
+ The following describes how the XML configuration of a virtual NUMA guest
+ works.
+
+ <mem_layout vnodes=n> (How many numa domains the guest will see)
+ (guest physical addresses x to y-1 are numa domain i and
+ numa domain i is mapped to host numa domain j)
+ <region vnode=i start_addr=x end_addr=y node=j>
+ ...
+ <mem_layout>
+
+ For example, a 4 virtual domain guest mapped toa 2 domain host:
+
+ <mem_layout vnodes="4">
+ <region vnode="0" start_addr="0x00000000" end_addr="0x10000000" node="0" />
+ <region vnode="1" start_addr="0x10000000" end_addr="0x20000000" node="1" />
+ <region vnode="2" start_addr="0x20000000" end_addr="0x30000000" node="0" />
+ <region vnode="3" start_addr="0x30000000" end_addr="0x40000000" node="1" />
+ </mem_layout>
+
+ You also need to map the virtual cores to the domains, which is
+ done with the <cores> tag. This usually also indicates which physical core
+ the virtual core maps to, so that the NUMA topology the guest sees has
+ performance characteristics that make sense.
+
+ <cores count=m> (How many virtual cores we have)
+ <core vnode=i target_cpu=q> (vcore 0 maps to virtual numa zone i and pcore q)
+ <core vnode=j target_cpu=r> (vcore 1 maps to virtual numa zone j and pcore r)
+ ...
+ <cores>
+
+ For example, here are 8 virtual cores maped across our numa domains, pairwise
+
+ <cores count="8">
+ <core target_cpu="1" vnode="0"/>
+ <core target_cpu="2" vnode="0"/>
+ <core target_cpu="3" vnode="1"/>
+ <core target_cpu="4" vnode="1"/>
+ <core target_cpu="5" vnode="2"/>
+ <core target_cpu="6" vnode="2"/>
+ <core target_cpu="7" vnode="3"/>
+ <core target_cpu="8" vnode="3"/>
+ </cores>
+
+*/
+
+
+
#define FW_CFG_CTL_PORT 0x510
#define FW_CFG_DATA_PORT 0x511
*/
-static int fw_cfg_add_bytes(struct v3_fw_cfg_state * cfg_state, uint16_t key, uint8_t * data, uint32_t len)
+//
+// Internal version assumes data is allocated
+//
+static int fw_cfg_add_bytes_internal(struct v3_fw_cfg_state * cfg_state, uint16_t key, uint8_t * data, uint32_t len)
{
int arch = !!(key & FW_CFG_ARCH_LOCAL);
// JRL: Well this is demented... Its basically generating a 1 or 0 from a mask operation
return 1;
}
+//
+// General purpose version will allocate a temp
+//
+//
+static int fw_cfg_add_bytes(struct v3_fw_cfg_state * cfg_state, uint16_t key, uint8_t * data, uint32_t len)
+{
+ // must make a copy of the data so that the deinit function will work correctly...
+
+ uint16_t * copy = NULL;
+
+ copy = V3_Malloc(len);
+ if (!copy) {
+ PrintError(VM_NONE,VCORE_NONE,"Failed to allocate temp\n");
+ return 0;
+ }
+ memcpy(copy,data,len);
+ return fw_cfg_add_bytes_internal(cfg_state, key, (uint8_t *)copy, sizeof(uint16_t));
+}
+
static int fw_cfg_add_i16(struct v3_fw_cfg_state * cfg_state, uint16_t key, uint16_t value)
{
uint16_t * copy = NULL;
copy = V3_Malloc(sizeof(uint16_t));
+ if (!copy) {
+ PrintError(VM_NONE,VCORE_NONE,"Failed to allocate temp\n");
+ return 0;
+ }
*copy = value;
- return fw_cfg_add_bytes(cfg_state, key, (uint8_t *)copy, sizeof(uint16_t));
+ return fw_cfg_add_bytes_internal(cfg_state, key, (uint8_t *)copy, sizeof(uint16_t));
}
static int fw_cfg_add_i32(struct v3_fw_cfg_state * cfg_state, uint16_t key, uint32_t value)
uint32_t * copy = NULL;
copy = V3_Malloc(sizeof(uint32_t));
+ if (!copy) {
+ PrintError(VM_NONE,VCORE_NONE,"Failed to allocate temp\n");
+ return 0;
+ }
*copy = value;
- return fw_cfg_add_bytes(cfg_state, key, (uint8_t *)copy, sizeof(uint32_t));
+ return fw_cfg_add_bytes_internal(cfg_state, key, (uint8_t *)copy, sizeof(uint32_t));
}
static int fw_cfg_add_i64(struct v3_fw_cfg_state * cfg_state, uint16_t key, uint64_t value)
uint64_t * copy = NULL;
copy = V3_Malloc(sizeof(uint64_t));
+ if (!copy) {
+ PrintError(VM_NONE,VCORE_NONE,"Failed to allocate temp\n");
+ return 0;
+ }
*copy = value;
- return fw_cfg_add_bytes(cfg_state, key, (uint8_t *)copy, sizeof(uint64_t));
+ return fw_cfg_add_bytes_internal(cfg_state, key, (uint8_t *)copy, sizeof(uint64_t));
}
static int fw_cfg_ctl_read(struct guest_info * core, uint16_t port, void * src, uint_t length, void * priv_data) {
}
*/
+void v3_fw_cfg_deinit(struct v3_vm_info *vm) {
+ struct v3_fw_cfg_state * cfg_state = &(vm->fw_cfg_state);
+ int i, j;
+
+ for (i = 0; i < 2; ++i) {
+ for (j = 0; j < FW_CFG_MAX_ENTRY; ++j) {
+ if (cfg_state->entries[i][j].data != NULL)
+ V3_Free(cfg_state->entries[i][j].data);
+ }
+ }
+
+ v3_unhook_io_port(vm, FW_CFG_CTL_PORT);
+ v3_unhook_io_port(vm, FW_CFG_DATA_PORT);
+
+}
+
int v3_fw_cfg_init(struct v3_vm_info * vm) {
struct v3_fw_cfg_state * cfg_state = &(vm->fw_cfg_state);
int ret = 0;
+ uint64_t mem_size = vm->mem_size;
+ uint32_t num_cores = vm->num_cores;
+
+#ifdef V3_CONFIG_HVM
+ mem_size = v3_get_hvm_ros_memsize(vm);
+ num_cores = v3_get_hvm_ros_cores(vm);
+#endif
+
+ // Be paranoid about starting this as all "unallocated"
+ memset(cfg_state,0,sizeof(struct v3_fw_cfg_state));
+
+#ifndef V3_CONFIG_SEABIOS
+ V3_Print(vm,VCORE_NONE,"Warning: Configuring SEABIOS firmware, but SEABIOS is not being used in this build of Palacios. Configuration will be dormant.\n");
+#endif
/*
struct e820_table * e820 = e820_populate(vm);
if (ret != 0) {
// V3_Free(e820);
PrintError(vm, VCORE_NONE, "Failed to hook FW CFG ports!\n");
+ v3_fw_cfg_deinit(vm);
return -1;
}
fw_cfg_add_bytes(cfg_state, FW_CFG_SIGNATURE, (uint8_t *)"QEMU", 4);
//fw_cfg_add_bytes(cfg_state, FW_CFG_UUID, qemu_uuid, 16);
fw_cfg_add_i16(cfg_state, FW_CFG_NOGRAPHIC, /*(uint16_t)(display_type == DT_NOGRAPHIC)*/ 0);
- fw_cfg_add_i16(cfg_state, FW_CFG_NB_CPUS, (uint16_t)vm->num_cores);
- fw_cfg_add_i16(cfg_state, FW_CFG_MAX_CPUS, (uint16_t)vm->num_cores);
+ fw_cfg_add_i16(cfg_state, FW_CFG_NB_CPUS, (uint16_t)num_cores);
+ fw_cfg_add_i16(cfg_state, FW_CFG_MAX_CPUS, (uint16_t)num_cores);
fw_cfg_add_i16(cfg_state, FW_CFG_BOOT_MENU, (uint16_t)1);
//fw_cfg_bootsplash(cfg_state);
fw_cfg_add_i32(cfg_state, FW_CFG_ID, 1);
- fw_cfg_add_i64(cfg_state, FW_CFG_RAM_SIZE, (uint64_t)vm->mem_size / (1024 * 1024));
+ fw_cfg_add_i64(cfg_state, FW_CFG_RAM_SIZE, mem_size / (1024 * 1024));
//fw_cfg_add_bytes(cfg_state, FW_CFG_ACPI_TABLES, (uint8_t *)acpi_tables,
// acpi_tables_len);
/* locations in fw_cfg NUMA array for each info region. */
int node_offset = 0;
int core_offset = 1;
- int mem_offset = 1 + vm->num_cores;
+ int mem_offset = 1 + num_cores;
if (num_nodes_str) {
num_nodes = atoi(num_nodes_str);
int i = 0;
// Allocate the global NUMA configuration array
- numa_fw_cfg = V3_Malloc((1 + vm->num_cores + num_nodes) * sizeof(uint64_t));
+ numa_fw_cfg = V3_Malloc((1 + num_cores + num_nodes) * sizeof(uint64_t));
if (numa_fw_cfg == NULL) {
PrintError(vm, VCORE_NONE, "Could not allocate fw_cfg NUMA config space\n");
+ v3_fw_cfg_deinit(vm);
return -1;
}
- memset(numa_fw_cfg, 0, (1 + vm->num_cores + num_nodes) * sizeof(uint64_t));
+ memset(numa_fw_cfg, 0, (1 + num_cores + num_nodes) * sizeof(uint64_t));
// First 8 bytes is the number of NUMA zones
numa_fw_cfg[node_offset] = num_nodes;
// Next region is array of core->node mappings
- for (i = 0; i < vm->num_cores; i++) {
+ for (i = 0; i < num_cores; i++) {
char * vnode_str = v3_cfg_val(vm->cores[i].core_cfg_data, "vnode");
if (vnode_str == NULL) {
if ((!start_addr_str) || (!end_addr_str) || (!vnode_id_str)) {
PrintError(vm, VCORE_NONE, "Invalid memory layout in configuration\n");
- V3_Free(numa_fw_cfg);
+ v3_fw_cfg_deinit(vm);
return -1;
}
V3_Print(vm, VCORE_NONE, "NUMA CONFIG: (nodes=%llu)\n", numa_fw_cfg[0]);
- for (i = 0; i < vm->num_cores; i++) {
+ for (i = 0; i < num_cores; i++) {
V3_Print(vm, VCORE_NONE, "\tCore %d -> Node %llu\n", i, numa_fw_cfg[core_offset + i]);
}
// Register the NUMA cfg array with the FW_CFG interface
- fw_cfg_add_bytes(cfg_state, FW_CFG_NUMA, (uint8_t *)numa_fw_cfg,
- (1 + vm->num_cores + num_nodes) * sizeof(uint64_t));
+ fw_cfg_add_bytes_internal(cfg_state, FW_CFG_NUMA, (uint8_t *)numa_fw_cfg,
+ (1 + num_cores + num_nodes) * sizeof(uint64_t));
}
}
return 0;
}
-void v3_fw_cfg_deinit(struct v3_vm_info *vm) {
- struct v3_fw_cfg_state * cfg_state = &(vm->fw_cfg_state);
- int i, j;
-
- for (i = 0; i < 2; ++i) {
- for (j = 0; j < FW_CFG_MAX_ENTRY; ++j) {
- if (cfg_state->entries[i][j].data != NULL)
- V3_Free(cfg_state->entries[i][j].data);
- }
- }
-}
-