X-Git-Url: http://v3vee.org/palacios/gitweb/gitweb.cgi?a=blobdiff_plain;f=palacios%2Fsrc%2Fpalacios%2Fvmm_fw_cfg.c;h=80af5a4268f41f698f8c5d7ae5cd75a01f68d755;hb=a5d2c00cc461b4a60a1360a2a0bba55cef467bab;hp=9550db745b9a143e4dc0923a3ee1c3d2c2293c6f;hpb=2311ec427a582889e4b62ffc8cbd2249a8ade07f;p=palacios.git

diff --git a/palacios/src/palacios/vmm_fw_cfg.c b/palacios/src/palacios/vmm_fw_cfg.c
index 9550db7..80af5a4 100644
--- a/palacios/src/palacios/vmm_fw_cfg.c
+++ b/palacios/src/palacios/vmm_fw_cfg.c
@@ -25,6 +25,64 @@
 #include <palacios/vm_guest.h>
 
 
+/*
+  This subsystem of Palacios interacts with the SEABIOS in order to 
+  create highly customized configurations for the guest.  Currently,
+  the primary purpose of such configuration is to pass a NUMA configuration
+  to the guest via ACPI.  Currently, we are able to create NUMA domains,
+  map regions of guest physical addresses to them, and map vcores to them.
+  Additionally, these virtual NUMA domains are then mapped to physical
+  (host) NUMA domains.   Other elements of Palacios handle vcore to 
+  physical core mapping, as well as guest memory allocation such that
+  the needed physical NUMA domain mapping is correct.
+
+  The following describes how the XML configuration of a virtual NUMA guest
+  works.
+
+  <mem_layout vnodes=n>  (How many numa domains the guest will see)
+      (guest physical addresses x to y-1 are numa domain i and 
+       numa domain i is mapped to host numa domain j)
+     <region vnode=i start_addr=x end_addr=y node=j> 
+     ...
+  <mem_layout>
+  
+  For example, a 4 virtual domain guest mapped toa 2 domain host:
+  
+  <mem_layout vnodes="4">
+    <region vnode="0" start_addr="0x00000000" end_addr="0x10000000" node="0" />
+    <region vnode="1" start_addr="0x10000000" end_addr="0x20000000" node="1" />
+    <region vnode="2" start_addr="0x20000000" end_addr="0x30000000" node="0" />
+    <region vnode="3" start_addr="0x30000000" end_addr="0x40000000" node="1" />
+  </mem_layout>
+	
+  You also need to map the virtual cores to the domains, which is 
+  done with the <cores> tag.  This usually also indicates which physical core
+  the virtual core maps to, so that the NUMA topology the guest sees has 
+  performance characteristics that make sense.
+       
+  <cores count=m>  (How many virtual cores we have)
+     <core vnode=i target_cpu=q> (vcore 0 maps to virtual numa zone i and pcore q)
+     <core vnode=j target_cpu=r> (vcore 1 maps to virtual numa zone j and pcore r)
+      ...
+   <cores>
+
+   For example, here are 8 virtual cores maped across our numa domains, pairwise
+
+   <cores count="8">
+      <core target_cpu="1" vnode="0"/>
+      <core target_cpu="2" vnode="0"/>
+      <core target_cpu="3" vnode="1"/>
+      <core target_cpu="4" vnode="1"/>
+      <core target_cpu="5" vnode="2"/>
+      <core target_cpu="6" vnode="2"/>
+      <core target_cpu="7" vnode="3"/>
+      <core target_cpu="8" vnode="3"/>
+   </cores>
+
+*/
+
+
+
 #define FW_CFG_CTL_PORT     0x510
 #define FW_CFG_DATA_PORT    0x511
 
@@ -93,7 +151,10 @@ struct e820_table {
 
 */
 
-static int fw_cfg_add_bytes(struct v3_fw_cfg_state * cfg_state, uint16_t key, uint8_t * data, uint32_t len)
+//
+// Internal version assumes data is allocated
+//
+static int fw_cfg_add_bytes_internal(struct v3_fw_cfg_state * cfg_state, uint16_t key, uint8_t * data, uint32_t len)
 {
     int arch = !!(key & FW_CFG_ARCH_LOCAL);
     // JRL: Well this is demented... Its basically generating a 1 or 0 from a mask operation
@@ -110,13 +171,36 @@ static int fw_cfg_add_bytes(struct v3_fw_cfg_state * cfg_state, uint16_t key, ui
     return 1;
 }
 
+//
+// General purpose version will allocate a temp
+//
+//
+static int fw_cfg_add_bytes(struct v3_fw_cfg_state * cfg_state, uint16_t key, uint8_t * data, uint32_t len)
+{
+    // must make a copy of the data so that the deinit function will work correctly...
+
+    uint16_t * copy = NULL;
+
+    copy = V3_Malloc(len);
+    if (!copy) { 
+	PrintError(VM_NONE,VCORE_NONE,"Failed to allocate temp\n");
+	return 0;
+    }
+    memcpy(copy,data,len);
+    return fw_cfg_add_bytes_internal(cfg_state, key, (uint8_t *)copy, sizeof(uint16_t));
+}
+
 static int fw_cfg_add_i16(struct v3_fw_cfg_state * cfg_state, uint16_t key, uint16_t value)
 {
     uint16_t * copy = NULL;
 
     copy = V3_Malloc(sizeof(uint16_t));
+    if (!copy) { 
+	PrintError(VM_NONE,VCORE_NONE,"Failed to allocate temp\n");
+	return 0;
+    }
     *copy = value;
-    return fw_cfg_add_bytes(cfg_state, key, (uint8_t *)copy, sizeof(uint16_t));
+    return fw_cfg_add_bytes_internal(cfg_state, key, (uint8_t *)copy, sizeof(uint16_t));
 }
 
 static int fw_cfg_add_i32(struct v3_fw_cfg_state * cfg_state, uint16_t key, uint32_t value)
@@ -124,8 +208,12 @@ static int fw_cfg_add_i32(struct v3_fw_cfg_state * cfg_state, uint16_t key, uint
     uint32_t * copy = NULL;
 
     copy = V3_Malloc(sizeof(uint32_t));
+    if (!copy) { 
+	PrintError(VM_NONE,VCORE_NONE,"Failed to allocate temp\n");
+	return 0;
+    }
     *copy = value;
-    return fw_cfg_add_bytes(cfg_state, key, (uint8_t *)copy, sizeof(uint32_t));
+    return fw_cfg_add_bytes_internal(cfg_state, key, (uint8_t *)copy, sizeof(uint32_t));
 }
 
 static int fw_cfg_add_i64(struct v3_fw_cfg_state * cfg_state, uint16_t key, uint64_t value)
@@ -133,8 +221,12 @@ static int fw_cfg_add_i64(struct v3_fw_cfg_state * cfg_state, uint16_t key, uint
     uint64_t * copy = NULL;
 
     copy = V3_Malloc(sizeof(uint64_t));
+    if (!copy) { 
+	PrintError(VM_NONE,VCORE_NONE,"Failed to allocate temp\n");
+	return 0;
+    }
     *copy = value;
-    return fw_cfg_add_bytes(cfg_state, key, (uint8_t *)copy, sizeof(uint64_t));
+    return fw_cfg_add_bytes_internal(cfg_state, key, (uint8_t *)copy, sizeof(uint64_t));
 }
 
 static int fw_cfg_ctl_read(struct guest_info * core, uint16_t port, void * src, uint_t length, void * priv_data) {
@@ -142,7 +234,7 @@ static int fw_cfg_ctl_read(struct guest_info * core, uint16_t port, void * src,
 }
 
 static int fw_cfg_ctl_write(struct guest_info * core, uint16_t port, void * src, uint_t length, void * priv_data) {
-    V3_ASSERT(length == 2);
+    V3_ASSERT(core->vm_info, core, length == 2);
 
     struct v3_fw_cfg_state * cfg_state = (struct v3_fw_cfg_state *)priv_data;
     uint16_t key = *(uint16_t *)src;
@@ -163,7 +255,7 @@ static int fw_cfg_ctl_write(struct guest_info * core, uint16_t port, void * src,
 
 
 static int fw_cfg_data_read(struct guest_info * core, uint16_t port, void * src, uint_t length, void * priv_data) {
-    V3_ASSERT(length == 1);
+    V3_ASSERT(core->vm_info, core, length == 1);
 
     struct v3_fw_cfg_state * cfg_state = (struct v3_fw_cfg_state *)priv_data;
     int arch = !!(cfg_state->cur_entry & FW_CFG_ARCH_LOCAL);
@@ -185,7 +277,7 @@ static int fw_cfg_data_read(struct guest_info * core, uint16_t port, void * src,
 }
 
 static int fw_cfg_data_write(struct guest_info * core, uint16_t port, void * src, uint_t length, void * priv_data) {
-    V3_ASSERT(length == 1);
+    V3_ASSERT(core->vm_info, core, length == 1);
 
     struct v3_fw_cfg_state * cfg_state = (struct v3_fw_cfg_state *)priv_data;
     int arch = !!(cfg_state->cur_entry & FW_CFG_ARCH_LOCAL);
@@ -212,14 +304,14 @@ static struct e820_table * e820_populate(struct v3_vm_info * vm) {
     int i = 0;
 
     if (vm->mem_map.e820_count > E820_MAX_COUNT) {
-        PrintError("Too much E820 table entries! (max is %d)\n", E820_MAX_COUNT);
+        PrintError(vm, VCORE_NONE,"Too much E820 table entries! (max is %d)\n", E820_MAX_COUNT);
         return NULL;
     }
 
     e820 = V3_Malloc(sizeof(struct e820_table));
 
     if (e820 == NULL) {
-        PrintError("Out of memory!\n");
+        PrintError(vm, VCORE_NONE, "Out of memory!\n");
         return NULL;
     }
 
@@ -236,19 +328,49 @@ static struct e820_table * e820_populate(struct v3_vm_info * vm) {
 }
 */
 
+void v3_fw_cfg_deinit(struct v3_vm_info *vm) {
+    struct v3_fw_cfg_state * cfg_state = &(vm->fw_cfg_state);
+    int i, j;
+
+    for (i = 0; i < 2; ++i) {
+        for (j = 0; j < FW_CFG_MAX_ENTRY; ++j) {
+            if (cfg_state->entries[i][j].data != NULL)
+                V3_Free(cfg_state->entries[i][j].data);
+        }
+    }
+
+    v3_unhook_io_port(vm, FW_CFG_CTL_PORT);
+    v3_unhook_io_port(vm, FW_CFG_DATA_PORT);
+
+}
+
 int v3_fw_cfg_init(struct v3_vm_info * vm) {
 
 
 
     struct v3_fw_cfg_state * cfg_state = &(vm->fw_cfg_state);
     int ret = 0;
+    uint64_t mem_size = vm->mem_size;
+    uint32_t num_cores = vm->num_cores;
+
+#ifdef V3_CONFIG_HVM
+    mem_size = v3_get_hvm_ros_memsize(vm);
+    num_cores = v3_get_hvm_ros_cores(vm);
+#endif
 
+    
+    // Be paranoid about starting this as all "unallocated"
+    memset(cfg_state,0,sizeof(struct v3_fw_cfg_state));
+
+#ifndef V3_CONFIG_SEABIOS
+    V3_Print(vm,VCORE_NONE,"Warning: Configuring SEABIOS firmware, but SEABIOS is not being used in this build of Palacios.  Configuration will be dormant.\n");
+#endif
 
     /* 
        struct e820_table * e820 = e820_populate(vm);
 
        if (e820 == NULL) {
-        PrintError("Failed to populate E820 for FW interface!\n");
+        PrintError(vm, VCORE_NONE, "Failed to populate E820 for FW interface!\n");
         return -1;
 	}
 
@@ -260,20 +382,21 @@ int v3_fw_cfg_init(struct v3_vm_info * vm) {
 
     if (ret != 0) {
 	//  V3_Free(e820);
-        PrintError("Failed to hook FW CFG ports!\n");
+        PrintError(vm, VCORE_NONE, "Failed to hook FW CFG ports!\n");
+	v3_fw_cfg_deinit(vm);
         return -1;
     }
 
     fw_cfg_add_bytes(cfg_state, FW_CFG_SIGNATURE, (uint8_t *)"QEMU", 4);
     //fw_cfg_add_bytes(cfg_state, FW_CFG_UUID, qemu_uuid, 16);
     fw_cfg_add_i16(cfg_state, FW_CFG_NOGRAPHIC, /*(uint16_t)(display_type == DT_NOGRAPHIC)*/ 0);
-    fw_cfg_add_i16(cfg_state, FW_CFG_NB_CPUS, (uint16_t)vm->num_cores);
-    fw_cfg_add_i16(cfg_state, FW_CFG_MAX_CPUS, (uint16_t)vm->num_cores);
+    fw_cfg_add_i16(cfg_state, FW_CFG_NB_CPUS, (uint16_t)num_cores);
+    fw_cfg_add_i16(cfg_state, FW_CFG_MAX_CPUS, (uint16_t)num_cores);
     fw_cfg_add_i16(cfg_state, FW_CFG_BOOT_MENU, (uint16_t)1);
     //fw_cfg_bootsplash(cfg_state);
 
     fw_cfg_add_i32(cfg_state, FW_CFG_ID, 1);
-    fw_cfg_add_i64(cfg_state, FW_CFG_RAM_SIZE, (uint64_t)vm->mem_size / (1024 * 1024));
+    fw_cfg_add_i64(cfg_state, FW_CFG_RAM_SIZE, mem_size / (1024 * 1024));
 
     //fw_cfg_add_bytes(cfg_state, FW_CFG_ACPI_TABLES, (uint8_t *)acpi_tables,
     //       acpi_tables_len);
@@ -306,7 +429,7 @@ int v3_fw_cfg_init(struct v3_vm_info * vm) {
 	/* locations in fw_cfg NUMA array for each info region. */
 	int node_offset = 0;
 	int core_offset = 1;
-	int mem_offset = 1 + vm->num_cores;
+	int mem_offset = 1 + num_cores;
 	
 	if (num_nodes_str) {
 	    num_nodes = atoi(num_nodes_str);
@@ -317,21 +440,22 @@ int v3_fw_cfg_init(struct v3_vm_info * vm) {
 	    int i = 0;
 
 	    // Allocate the global NUMA configuration array
-	    numa_fw_cfg = V3_Malloc((1 + vm->num_cores + num_nodes) * sizeof(uint64_t));
+	    numa_fw_cfg = V3_Malloc((1 + num_cores + num_nodes) * sizeof(uint64_t));
 
 	    if (numa_fw_cfg == NULL) {
-		PrintError("Could not allocate fw_cfg NUMA config space\n");
+		PrintError(vm, VCORE_NONE, "Could not allocate fw_cfg NUMA config space\n");
+		v3_fw_cfg_deinit(vm);
 		return -1;
 	    }
 
-	    memset(numa_fw_cfg, 0, (1 + vm->num_cores + num_nodes) * sizeof(uint64_t));
+	    memset(numa_fw_cfg, 0, (1 + num_cores + num_nodes) * sizeof(uint64_t));
 
 	    // First 8 bytes is the number of NUMA zones
 	    numa_fw_cfg[node_offset] = num_nodes;
 	    
 	    
 	    // Next region is array of core->node mappings
-	    for (i = 0; i < vm->num_cores; i++) {
+	    for (i = 0; i < num_cores; i++) {
 		char * vnode_str = v3_cfg_val(vm->cores[i].core_cfg_data, "vnode");
 		
 		if (vnode_str == NULL) {
@@ -369,8 +493,8 @@ int v3_fw_cfg_init(struct v3_vm_info * vm) {
 		    int vnode_id = 0;
 
 		    if ((!start_addr_str) || (!end_addr_str) || (!vnode_id_str)) {
-			PrintError("Invalid memory layout in configuration\n");
-			V3_Free(numa_fw_cfg);
+			PrintError(vm, VCORE_NONE, "Invalid memory layout in configuration\n");
+			v3_fw_cfg_deinit(vm);
 			return -1;
 		    }
 		    
@@ -389,14 +513,14 @@ int v3_fw_cfg_init(struct v3_vm_info * vm) {
 	    {
 		uint64_t region_start = 0;
 		
-		V3_Print("NUMA CONFIG: (nodes=%llu)\n", numa_fw_cfg[0]);
+		V3_Print(vm, VCORE_NONE, "NUMA CONFIG: (nodes=%llu)\n", numa_fw_cfg[0]);
 	
-		for (i = 0; i < vm->num_cores; i++) {
-		    V3_Print("\tCore %d -> Node %llu\n", i, numa_fw_cfg[core_offset + i]);
+		for (i = 0; i < num_cores; i++) {
+		    V3_Print(vm, VCORE_NONE, "\tCore %d -> Node %llu\n", i, numa_fw_cfg[core_offset + i]);
 		}
 	
 		for (i = 0; i < num_nodes; i++) {
-		    V3_Print("\tMem (%p - %p) -> Node %d\n", (void *)region_start, 
+		    V3_Print(vm, VCORE_NONE, "\tMem (%p - %p) -> Node %d\n", (void *)region_start, 
 			     (void *)numa_fw_cfg[mem_offset + i], i);
 		    
 		    region_start += numa_fw_cfg[mem_offset + i];
@@ -405,8 +529,8 @@ int v3_fw_cfg_init(struct v3_vm_info * vm) {
 
 
 	    // Register the NUMA cfg array with the FW_CFG interface
-	    fw_cfg_add_bytes(cfg_state, FW_CFG_NUMA, (uint8_t *)numa_fw_cfg,
-			     (1 + vm->num_cores + num_nodes) * sizeof(uint64_t));
+	    fw_cfg_add_bytes_internal(cfg_state, FW_CFG_NUMA, (uint8_t *)numa_fw_cfg,
+				      (1 + num_cores + num_nodes) * sizeof(uint64_t));
 
 	}
     }
@@ -415,18 +539,6 @@ int v3_fw_cfg_init(struct v3_vm_info * vm) {
     return 0;
 }
 
-void v3_fw_cfg_deinit(struct v3_vm_info *vm) {
-    struct v3_fw_cfg_state * cfg_state = &(vm->fw_cfg_state);
-    int i, j;
-
-    for (i = 0; i < 2; ++i) {
-        for (j = 0; j < FW_CFG_MAX_ENTRY; ++j) {
-            if (cfg_state->entries[i][j].data != NULL)
-                V3_Free(cfg_state->entries[i][j].data);
-        }
-    }
-}
-