From: Alexander Kudryavtsev <alexk@ispras.ru>
Date: Fri, 7 Jun 2013 20:13:53 +0000 (-0500)
Subject: add FW_CFG interface implementation for SEABIOS
X-Git-Url: http://v3vee.org/palacios/gitweb/gitweb.cgi?a=commitdiff_plain;h=2311ec427a582889e4b62ffc8cbd2249a8ade07f;p=palacios.releases.git

add FW_CFG interface implementation for SEABIOS
---

diff --git a/palacios/include/palacios/vmm_fw_cfg.h b/palacios/include/palacios/vmm_fw_cfg.h
new file mode 100644
index 0000000..91b3d94
--- /dev/null
+++ b/palacios/include/palacios/vmm_fw_cfg.h
@@ -0,0 +1,54 @@
+/*
+ * This file is part of the Palacios Virtual Machine Monitor developed
+ * by the V3VEE Project with funding from the United States National
+ * Science Foundation and the Department of Energy.
+ *
+ * The V3VEE Project is a joint project between Northwestern University
+ * and the University of New Mexico.  You can find out more at
+ * http://www.v3vee.org
+ *
+ * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org>
+ * All rights reserved.
+ *
+ * Author: Alexander Kudryavtsev <alexk@ispras.ru>
+ *
+ * This is free software.  You are permitted to use,
+ * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
+ */
+
+#ifndef __VMM_FW_CFG_H__
+#define __VMM_FW_CFG_H__
+
+#ifdef __V3VEE__
+
+#include <palacios/vmm_types.h>
+
+#define FW_CFG_FILE_FIRST       0x20
+#define FW_CFG_FILE_SLOTS       0x10
+#define FW_CFG_MAX_ENTRY        (FW_CFG_FILE_FIRST + FW_CFG_FILE_SLOTS)
+
+typedef void (*v3_fw_cfg_cb)(void * opaque, uint8_t * data);
+
+struct v3_fw_cfg_entry {
+    uint32_t len;
+    uint8_t * data;
+    void * callback_opaque;
+    v3_fw_cfg_cb callback;
+};
+
+
+struct v3_fw_cfg_state {
+    struct v3_fw_cfg_entry entries[2][FW_CFG_MAX_ENTRY];
+    uint16_t cur_entry;
+    uint32_t cur_offset;
+};
+
+struct v3_vm_info;
+
+
+int v3_fw_cfg_init(struct v3_vm_info * vm);
+void v3_fw_cfg_deinit(struct v3_vm_info * vm);
+
+#endif
+
+#endif
diff --git a/palacios/src/palacios/vmm_fw_cfg.c b/palacios/src/palacios/vmm_fw_cfg.c
new file mode 100644
index 0000000..9550db7
--- /dev/null
+++ b/palacios/src/palacios/vmm_fw_cfg.c
@@ -0,0 +1,447 @@
+/*
+ * This file is part of the Palacios Virtual Machine Monitor developed
+ * by the V3VEE Project with funding from the United States National
+ * Science Foundation and the Department of Energy.
+ *
+ * The V3VEE Project is a joint project between Northwestern University
+ * and the University of New Mexico.  You can find out more at
+ * http://www.v3vee.org
+ *
+ * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org>
+ * All rights reserved.
+ *
+ * Author: Alexander Kudryavtsev <alexk@ispras.ru>
+ *         Implementation of FW_CFG interface 
+ * Author: Jack Lange <jacklange@cs.pitt.edu>
+ *         NUMA modifications
+ *
+ * This is free software.  You are permitted to use,
+ * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
+ */
+
+#include <palacios/vmm_fw_cfg.h>
+#include <palacios/vmm_mem.h>
+#include <palacios/vmm.h>
+#include <palacios/vm_guest.h>
+
+
+#define FW_CFG_CTL_PORT     0x510
+#define FW_CFG_DATA_PORT    0x511
+
+#define FW_CFG_SIGNATURE        0x00
+#define FW_CFG_ID               0x01
+#define FW_CFG_UUID             0x02
+#define FW_CFG_RAM_SIZE         0x03
+#define FW_CFG_NOGRAPHIC        0x04
+#define FW_CFG_NB_CPUS          0x05
+#define FW_CFG_MACHINE_ID       0x06
+#define FW_CFG_KERNEL_ADDR      0x07
+#define FW_CFG_KERNEL_SIZE      0x08
+#define FW_CFG_KERNEL_CMDLINE   0x09
+#define FW_CFG_INITRD_ADDR      0x0a
+#define FW_CFG_INITRD_SIZE      0x0b
+#define FW_CFG_BOOT_DEVICE      0x0c
+#define FW_CFG_NUMA             0x0d
+#define FW_CFG_BOOT_MENU        0x0e
+#define FW_CFG_MAX_CPUS         0x0f
+#define FW_CFG_KERNEL_ENTRY     0x10
+#define FW_CFG_KERNEL_DATA      0x11
+#define FW_CFG_INITRD_DATA      0x12
+#define FW_CFG_CMDLINE_ADDR     0x13
+#define FW_CFG_CMDLINE_SIZE     0x14
+#define FW_CFG_CMDLINE_DATA     0x15
+#define FW_CFG_SETUP_ADDR       0x16
+#define FW_CFG_SETUP_SIZE       0x17
+#define FW_CFG_SETUP_DATA       0x18
+#define FW_CFG_FILE_DIR         0x19
+
+#define FW_CFG_WRITE_CHANNEL    0x4000
+#define FW_CFG_ARCH_LOCAL       0x8000
+#define FW_CFG_ENTRY_MASK       ~(FW_CFG_WRITE_CHANNEL | FW_CFG_ARCH_LOCAL)
+
+#define FW_CFG_ACPI_TABLES (FW_CFG_ARCH_LOCAL + 0)
+#define FW_CFG_SMBIOS_ENTRIES (FW_CFG_ARCH_LOCAL + 1)
+#define FW_CFG_IRQ0_OVERRIDE (FW_CFG_ARCH_LOCAL + 2)
+#define FW_CFG_E820_TABLE (FW_CFG_ARCH_LOCAL + 3)
+#define FW_CFG_HPET (FW_CFG_ARCH_LOCAL + 4)
+
+#define FW_CFG_INVALID          0xffff
+
+
+
+
+/*
+enum v3_e820_types {
+    E820_TYPE_FREE      = 1,
+    E820_TYPE_RESV      = 2,
+    E820_TYPE_ACPI_RECL = 3,
+    E820_TYPE_ACPI_NVS  = 4,
+    E820_TYPE_BAD       = 5
+};
+
+#define E820_MAX_COUNT 128
+struct e820_entry_packed {
+    uint64_t addr;
+    uint64_t size;
+    uint32_t type;
+} __attribute__((packed));
+
+struct e820_table {
+    uint32_t count;
+    struct e820_entry_packed entry[E820_MAX_COUNT];
+} __attribute__((packed)) __attribute((__aligned__(4)));
+
+*/
+
+static int fw_cfg_add_bytes(struct v3_fw_cfg_state * cfg_state, uint16_t key, uint8_t * data, uint32_t len)
+{
+    int arch = !!(key & FW_CFG_ARCH_LOCAL);
+    // JRL: Well this is demented... Its basically generating a 1 or 0 from a mask operation
+
+    key &= FW_CFG_ENTRY_MASK;
+
+    if (key >= FW_CFG_MAX_ENTRY) {
+        return 0;
+    }
+
+    cfg_state->entries[arch][key].data = data;
+    cfg_state->entries[arch][key].len = len;
+
+    return 1;
+}
+
+static int fw_cfg_add_i16(struct v3_fw_cfg_state * cfg_state, uint16_t key, uint16_t value)
+{
+    uint16_t * copy = NULL;
+
+    copy = V3_Malloc(sizeof(uint16_t));
+    *copy = value;
+    return fw_cfg_add_bytes(cfg_state, key, (uint8_t *)copy, sizeof(uint16_t));
+}
+
+static int fw_cfg_add_i32(struct v3_fw_cfg_state * cfg_state, uint16_t key, uint32_t value)
+{
+    uint32_t * copy = NULL;
+
+    copy = V3_Malloc(sizeof(uint32_t));
+    *copy = value;
+    return fw_cfg_add_bytes(cfg_state, key, (uint8_t *)copy, sizeof(uint32_t));
+}
+
+static int fw_cfg_add_i64(struct v3_fw_cfg_state * cfg_state, uint16_t key, uint64_t value)
+{
+    uint64_t * copy = NULL;
+
+    copy = V3_Malloc(sizeof(uint64_t));
+    *copy = value;
+    return fw_cfg_add_bytes(cfg_state, key, (uint8_t *)copy, sizeof(uint64_t));
+}
+
+static int fw_cfg_ctl_read(struct guest_info * core, uint16_t port, void * src, uint_t length, void * priv_data) {
+    return length;
+}
+
+static int fw_cfg_ctl_write(struct guest_info * core, uint16_t port, void * src, uint_t length, void * priv_data) {
+    V3_ASSERT(length == 2);
+
+    struct v3_fw_cfg_state * cfg_state = (struct v3_fw_cfg_state *)priv_data;
+    uint16_t key = *(uint16_t *)src;
+    int ret = 0;
+
+    cfg_state->cur_offset = 0;
+
+    if ((key & FW_CFG_ENTRY_MASK) >= FW_CFG_MAX_ENTRY) {
+        cfg_state->cur_entry = FW_CFG_INVALID;
+        ret = 0;
+    } else {
+        cfg_state->cur_entry = key;
+        ret = 1;
+    }
+
+    return length;
+}
+
+
+static int fw_cfg_data_read(struct guest_info * core, uint16_t port, void * src, uint_t length, void * priv_data) {
+    V3_ASSERT(length == 1);
+
+    struct v3_fw_cfg_state * cfg_state = (struct v3_fw_cfg_state *)priv_data;
+    int arch = !!(cfg_state->cur_entry & FW_CFG_ARCH_LOCAL);
+    struct v3_fw_cfg_entry * cfg_entry = &cfg_state->entries[arch][cfg_state->cur_entry & FW_CFG_ENTRY_MASK];
+    uint8_t ret;
+
+    if ( (cfg_state->cur_entry == FW_CFG_INVALID) || 
+	 (cfg_entry->data == NULL) || 
+	 (cfg_state->cur_offset >= cfg_entry->len)) {
+
+        ret = 0;
+    } else {
+        ret = cfg_entry->data[cfg_state->cur_offset++];
+    }
+
+    *(uint8_t *)src = ret;
+
+    return length;
+}
+
+static int fw_cfg_data_write(struct guest_info * core, uint16_t port, void * src, uint_t length, void * priv_data) {
+    V3_ASSERT(length == 1);
+
+    struct v3_fw_cfg_state * cfg_state = (struct v3_fw_cfg_state *)priv_data;
+    int arch = !!(cfg_state->cur_entry & FW_CFG_ARCH_LOCAL);
+    struct v3_fw_cfg_entry * cfg_entry = &cfg_state->entries[arch][cfg_state->cur_entry & FW_CFG_ENTRY_MASK];
+
+    if ( (cfg_state->cur_entry & FW_CFG_WRITE_CHANNEL) && 
+	 (cfg_entry->callback != NULL) &&
+	 (cfg_state->cur_offset < cfg_entry->len)) {
+
+        cfg_entry->data[cfg_state->cur_offset++] = *(uint8_t *)src;
+
+        if (cfg_state->cur_offset == cfg_entry->len) {
+            cfg_entry->callback(cfg_entry->callback_opaque, cfg_entry->data);
+            cfg_state->cur_offset = 0;
+        }
+    }
+    return length;
+}
+
+/*
+static struct e820_table * e820_populate(struct v3_vm_info * vm) {
+    struct v3_e820_entry * entry = NULL;
+    struct e820_table * e820 = NULL;
+    int i = 0;
+
+    if (vm->mem_map.e820_count > E820_MAX_COUNT) {
+        PrintError("Too much E820 table entries! (max is %d)\n", E820_MAX_COUNT);
+        return NULL;
+    }
+
+    e820 = V3_Malloc(sizeof(struct e820_table));
+
+    if (e820 == NULL) {
+        PrintError("Out of memory!\n");
+        return NULL;
+    }
+
+    e820->count = vm->mem_map.e820_count;
+
+    list_for_each_entry(entry, &vm->mem_map.e820_list, list) {
+        e820->entry[i].addr = e->addr;
+        e820->entry[i].size = e->size;
+        e820->entry[i].type = e->type;
+        ++i;
+    }
+
+    return e820;
+}
+*/
+
+int v3_fw_cfg_init(struct v3_vm_info * vm) {
+
+
+
+    struct v3_fw_cfg_state * cfg_state = &(vm->fw_cfg_state);
+    int ret = 0;
+
+
+    /* 
+       struct e820_table * e820 = e820_populate(vm);
+
+       if (e820 == NULL) {
+        PrintError("Failed to populate E820 for FW interface!\n");
+        return -1;
+	}
+
+    */
+
+
+    ret |= v3_hook_io_port(vm, FW_CFG_CTL_PORT, fw_cfg_ctl_read, &fw_cfg_ctl_write, cfg_state);
+    ret |= v3_hook_io_port(vm, FW_CFG_DATA_PORT, fw_cfg_data_read, &fw_cfg_data_write, cfg_state);
+
+    if (ret != 0) {
+	//  V3_Free(e820);
+        PrintError("Failed to hook FW CFG ports!\n");
+        return -1;
+    }
+
+    fw_cfg_add_bytes(cfg_state, FW_CFG_SIGNATURE, (uint8_t *)"QEMU", 4);
+    //fw_cfg_add_bytes(cfg_state, FW_CFG_UUID, qemu_uuid, 16);
+    fw_cfg_add_i16(cfg_state, FW_CFG_NOGRAPHIC, /*(uint16_t)(display_type == DT_NOGRAPHIC)*/ 0);
+    fw_cfg_add_i16(cfg_state, FW_CFG_NB_CPUS, (uint16_t)vm->num_cores);
+    fw_cfg_add_i16(cfg_state, FW_CFG_MAX_CPUS, (uint16_t)vm->num_cores);
+    fw_cfg_add_i16(cfg_state, FW_CFG_BOOT_MENU, (uint16_t)1);
+    //fw_cfg_bootsplash(cfg_state);
+
+    fw_cfg_add_i32(cfg_state, FW_CFG_ID, 1);
+    fw_cfg_add_i64(cfg_state, FW_CFG_RAM_SIZE, (uint64_t)vm->mem_size / (1024 * 1024));
+
+    //fw_cfg_add_bytes(cfg_state, FW_CFG_ACPI_TABLES, (uint8_t *)acpi_tables,
+    //       acpi_tables_len);
+
+    fw_cfg_add_i32(cfg_state, FW_CFG_IRQ0_OVERRIDE, 1);
+
+    /*
+      smbios_table = smbios_get_table(&smbios_len);
+    
+      if (smbios_table) {
+           fw_cfg_add_bytes(cfg_state, FW_CFG_SMBIOS_ENTRIES,
+                            smbios_table, smbios_len);
+      }
+
+      fw_cfg_add_bytes(cfg_state, FW_CFG_E820_TABLE, (uint8_t *)e820,
+                     sizeof(struct e820_table));
+
+      fw_cfg_add_bytes(cfg_state, FW_CFG_HPET, (uint8_t *)&hpet_cfg,
+                     sizeof(struct hpet_fw_config));
+    */
+
+
+
+    /* NUMA layout */
+    {
+	v3_cfg_tree_t * layout_cfg = v3_cfg_subtree(vm->cfg_data->cfg, "mem_layout");
+	char * num_nodes_str = v3_cfg_val(layout_cfg, "vnodes");
+	int num_nodes = 0;
+	
+	/* locations in fw_cfg NUMA array for each info region. */
+	int node_offset = 0;
+	int core_offset = 1;
+	int mem_offset = 1 + vm->num_cores;
+	
+	if (num_nodes_str) {
+	    num_nodes = atoi(num_nodes_str);
+	}
+
+	if (num_nodes > 0) {
+	    uint64_t * numa_fw_cfg = NULL;
+	    int i = 0;
+
+	    // Allocate the global NUMA configuration array
+	    numa_fw_cfg = V3_Malloc((1 + vm->num_cores + num_nodes) * sizeof(uint64_t));
+
+	    if (numa_fw_cfg == NULL) {
+		PrintError("Could not allocate fw_cfg NUMA config space\n");
+		return -1;
+	    }
+
+	    memset(numa_fw_cfg, 0, (1 + vm->num_cores + num_nodes) * sizeof(uint64_t));
+
+	    // First 8 bytes is the number of NUMA zones
+	    numa_fw_cfg[node_offset] = num_nodes;
+	    
+	    
+	    // Next region is array of core->node mappings
+	    for (i = 0; i < vm->num_cores; i++) {
+		char * vnode_str = v3_cfg_val(vm->cores[i].core_cfg_data, "vnode");
+		
+		if (vnode_str == NULL) {
+		    // if no cpu was specified then NUMA layout is randomized, and we're screwed...
+		    numa_fw_cfg[core_offset + i] = 0;
+		} else {
+		    numa_fw_cfg[core_offset + i] = (uint64_t)atoi(vnode_str);
+		}
+	    }
+
+
+
+	    /* Final region is an array of node->mem_size mappings
+	     * this assumes that memory is assigned to NUMA nodes in consecutive AND contiguous blocks
+	     * NO INTERLEAVING ALLOWED
+	     * e.g. node 0 points to the first x bytes of memory, node 1 points to the next y bytes, etc
+	     *     The array only stores the x,y,... values, indexed by the node ID
+	     *     We should probably fix this, but that will require modifications to SEABIOS
+	     * 
+	     *
+	     * For now we will assume that the xml data is set accordingly, so we will just walk through the mem regions specified there.
+	     *   NOTE: This will overwrite configurations if multiple xml regions are defined for each node
+	     */
+
+	    {
+		v3_cfg_tree_t * region_desc = v3_cfg_subtree(layout_cfg, "region");
+		
+		while (region_desc) {
+		    char * start_addr_str = v3_cfg_val(region_desc, "start_addr");
+		    char * end_addr_str = v3_cfg_val(region_desc, "end_addr");
+		    char * vnode_id_str = v3_cfg_val(region_desc, "vnode");
+		    
+		    addr_t start_addr = 0;
+		    addr_t end_addr = 0;
+		    int vnode_id = 0;
+
+		    if ((!start_addr_str) || (!end_addr_str) || (!vnode_id_str)) {
+			PrintError("Invalid memory layout in configuration\n");
+			V3_Free(numa_fw_cfg);
+			return -1;
+		    }
+		    
+		    start_addr = atox(start_addr_str);
+		    end_addr = atox(end_addr_str);
+		    vnode_id = atoi(vnode_id_str);
+		    
+		    numa_fw_cfg[mem_offset + vnode_id] = end_addr - start_addr;
+
+		    region_desc = v3_cfg_next_branch(region_desc);
+		}
+	    }
+
+
+	    /* Print the NUMA mapping being passed in */
+	    {
+		uint64_t region_start = 0;
+		
+		V3_Print("NUMA CONFIG: (nodes=%llu)\n", numa_fw_cfg[0]);
+	
+		for (i = 0; i < vm->num_cores; i++) {
+		    V3_Print("\tCore %d -> Node %llu\n", i, numa_fw_cfg[core_offset + i]);
+		}
+	
+		for (i = 0; i < num_nodes; i++) {
+		    V3_Print("\tMem (%p - %p) -> Node %d\n", (void *)region_start, 
+			     (void *)numa_fw_cfg[mem_offset + i], i);
+		    
+		    region_start += numa_fw_cfg[mem_offset + i];
+		}
+	    }
+
+
+	    // Register the NUMA cfg array with the FW_CFG interface
+	    fw_cfg_add_bytes(cfg_state, FW_CFG_NUMA, (uint8_t *)numa_fw_cfg,
+			     (1 + vm->num_cores + num_nodes) * sizeof(uint64_t));
+
+	}
+    }
+
+
+    return 0;
+}
+
+void v3_fw_cfg_deinit(struct v3_vm_info *vm) {
+    struct v3_fw_cfg_state * cfg_state = &(vm->fw_cfg_state);
+    int i, j;
+
+    for (i = 0; i < 2; ++i) {
+        for (j = 0; j < FW_CFG_MAX_ENTRY; ++j) {
+            if (cfg_state->entries[i][j].data != NULL)
+                V3_Free(cfg_state->entries[i][j].data);
+        }
+    }
+}
+
+
+
+
+/* E820 code for HVM enabled bochs bios:  */
+#if 0
+/* E820 location in HVM virtual address space. Taken from VMXASSIST. */
+#define HVM_E820_PAGE        0x00090000
+#define HVM_E820_NR_OFFSET   0x000001E8
+#define HVM_E820_OFFSET      0x000002D0
+    // Copy E820 to BIOS. See rombios.c, copy_e820_table function.
+    addr_t e820_ptr = (addr_t)V3_VAddr((void *)(vm->mem_map.base_region.host_addr + HVM_E820_PAGE));
+
+    *(uint16_t *)(e820_ptr + HVM_E820_NR_OFFSET) = e820->count;
+    memcpy((void *)(e820_ptr + HVM_E820_OFFSET), &e820->entry[0], sizeof(e820->entry[0]) * e820->count);
+    V3_Free(e820);
+
+    return 0;
+#endif