Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


add FW_CFG interface implementation for SEABIOS
Alexander Kudryavtsev [Fri, 7 Jun 2013 20:13:53 +0000 (15:13 -0500)]
palacios/include/palacios/vmm_fw_cfg.h [new file with mode: 0644]
palacios/src/palacios/vmm_fw_cfg.c [new file with mode: 0644]

diff --git a/palacios/include/palacios/vmm_fw_cfg.h b/palacios/include/palacios/vmm_fw_cfg.h
new file mode 100644 (file)
index 0000000..91b3d94
--- /dev/null
@@ -0,0 +1,54 @@
+/*
+ * This file is part of the Palacios Virtual Machine Monitor developed
+ * by the V3VEE Project with funding from the United States National
+ * Science Foundation and the Department of Energy.
+ *
+ * The V3VEE Project is a joint project between Northwestern University
+ * and the University of New Mexico.  You can find out more at
+ * http://www.v3vee.org
+ *
+ * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org>
+ * All rights reserved.
+ *
+ * Author: Alexander Kudryavtsev <alexk@ispras.ru>
+ *
+ * This is free software.  You are permitted to use,
+ * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
+ */
+
+#ifndef __VMM_FW_CFG_H__
+#define __VMM_FW_CFG_H__
+
+#ifdef __V3VEE__
+
+#include <palacios/vmm_types.h>
+
+#define FW_CFG_FILE_FIRST       0x20
+#define FW_CFG_FILE_SLOTS       0x10
+#define FW_CFG_MAX_ENTRY        (FW_CFG_FILE_FIRST + FW_CFG_FILE_SLOTS)
+
+typedef void (*v3_fw_cfg_cb)(void * opaque, uint8_t * data);
+
+struct v3_fw_cfg_entry {
+    uint32_t len;
+    uint8_t * data;
+    void * callback_opaque;
+    v3_fw_cfg_cb callback;
+};
+
+
+struct v3_fw_cfg_state {
+    struct v3_fw_cfg_entry entries[2][FW_CFG_MAX_ENTRY];
+    uint16_t cur_entry;
+    uint32_t cur_offset;
+};
+
+struct v3_vm_info;
+
+
+int v3_fw_cfg_init(struct v3_vm_info * vm);
+void v3_fw_cfg_deinit(struct v3_vm_info * vm);
+
+#endif
+
+#endif
diff --git a/palacios/src/palacios/vmm_fw_cfg.c b/palacios/src/palacios/vmm_fw_cfg.c
new file mode 100644 (file)
index 0000000..9550db7
--- /dev/null
@@ -0,0 +1,447 @@
+/*
+ * This file is part of the Palacios Virtual Machine Monitor developed
+ * by the V3VEE Project with funding from the United States National
+ * Science Foundation and the Department of Energy.
+ *
+ * The V3VEE Project is a joint project between Northwestern University
+ * and the University of New Mexico.  You can find out more at
+ * http://www.v3vee.org
+ *
+ * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org>
+ * All rights reserved.
+ *
+ * Author: Alexander Kudryavtsev <alexk@ispras.ru>
+ *         Implementation of FW_CFG interface 
+ * Author: Jack Lange <jacklange@cs.pitt.edu>
+ *         NUMA modifications
+ *
+ * This is free software.  You are permitted to use,
+ * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
+ */
+
+#include <palacios/vmm_fw_cfg.h>
+#include <palacios/vmm_mem.h>
+#include <palacios/vmm.h>
+#include <palacios/vm_guest.h>
+
+
+#define FW_CFG_CTL_PORT     0x510
+#define FW_CFG_DATA_PORT    0x511
+
+#define FW_CFG_SIGNATURE        0x00
+#define FW_CFG_ID               0x01
+#define FW_CFG_UUID             0x02
+#define FW_CFG_RAM_SIZE         0x03
+#define FW_CFG_NOGRAPHIC        0x04
+#define FW_CFG_NB_CPUS          0x05
+#define FW_CFG_MACHINE_ID       0x06
+#define FW_CFG_KERNEL_ADDR      0x07
+#define FW_CFG_KERNEL_SIZE      0x08
+#define FW_CFG_KERNEL_CMDLINE   0x09
+#define FW_CFG_INITRD_ADDR      0x0a
+#define FW_CFG_INITRD_SIZE      0x0b
+#define FW_CFG_BOOT_DEVICE      0x0c
+#define FW_CFG_NUMA             0x0d
+#define FW_CFG_BOOT_MENU        0x0e
+#define FW_CFG_MAX_CPUS         0x0f
+#define FW_CFG_KERNEL_ENTRY     0x10
+#define FW_CFG_KERNEL_DATA      0x11
+#define FW_CFG_INITRD_DATA      0x12
+#define FW_CFG_CMDLINE_ADDR     0x13
+#define FW_CFG_CMDLINE_SIZE     0x14
+#define FW_CFG_CMDLINE_DATA     0x15
+#define FW_CFG_SETUP_ADDR       0x16
+#define FW_CFG_SETUP_SIZE       0x17
+#define FW_CFG_SETUP_DATA       0x18
+#define FW_CFG_FILE_DIR         0x19
+
+#define FW_CFG_WRITE_CHANNEL    0x4000
+#define FW_CFG_ARCH_LOCAL       0x8000
+#define FW_CFG_ENTRY_MASK       ~(FW_CFG_WRITE_CHANNEL | FW_CFG_ARCH_LOCAL)
+
+#define FW_CFG_ACPI_TABLES (FW_CFG_ARCH_LOCAL + 0)
+#define FW_CFG_SMBIOS_ENTRIES (FW_CFG_ARCH_LOCAL + 1)
+#define FW_CFG_IRQ0_OVERRIDE (FW_CFG_ARCH_LOCAL + 2)
+#define FW_CFG_E820_TABLE (FW_CFG_ARCH_LOCAL + 3)
+#define FW_CFG_HPET (FW_CFG_ARCH_LOCAL + 4)
+
+#define FW_CFG_INVALID          0xffff
+
+
+
+
+/*
+enum v3_e820_types {
+    E820_TYPE_FREE      = 1,
+    E820_TYPE_RESV      = 2,
+    E820_TYPE_ACPI_RECL = 3,
+    E820_TYPE_ACPI_NVS  = 4,
+    E820_TYPE_BAD       = 5
+};
+
+#define E820_MAX_COUNT 128
+struct e820_entry_packed {
+    uint64_t addr;
+    uint64_t size;
+    uint32_t type;
+} __attribute__((packed));
+
+struct e820_table {
+    uint32_t count;
+    struct e820_entry_packed entry[E820_MAX_COUNT];
+} __attribute__((packed)) __attribute((__aligned__(4)));
+
+*/
+
+static int fw_cfg_add_bytes(struct v3_fw_cfg_state * cfg_state, uint16_t key, uint8_t * data, uint32_t len)
+{
+    int arch = !!(key & FW_CFG_ARCH_LOCAL);
+    // JRL: Well this is demented... Its basically generating a 1 or 0 from a mask operation
+
+    key &= FW_CFG_ENTRY_MASK;
+
+    if (key >= FW_CFG_MAX_ENTRY) {
+        return 0;
+    }
+
+    cfg_state->entries[arch][key].data = data;
+    cfg_state->entries[arch][key].len = len;
+
+    return 1;
+}
+
+static int fw_cfg_add_i16(struct v3_fw_cfg_state * cfg_state, uint16_t key, uint16_t value)
+{
+    uint16_t * copy = NULL;
+
+    copy = V3_Malloc(sizeof(uint16_t));
+    *copy = value;
+    return fw_cfg_add_bytes(cfg_state, key, (uint8_t *)copy, sizeof(uint16_t));
+}
+
+static int fw_cfg_add_i32(struct v3_fw_cfg_state * cfg_state, uint16_t key, uint32_t value)
+{
+    uint32_t * copy = NULL;
+
+    copy = V3_Malloc(sizeof(uint32_t));
+    *copy = value;
+    return fw_cfg_add_bytes(cfg_state, key, (uint8_t *)copy, sizeof(uint32_t));
+}
+
+static int fw_cfg_add_i64(struct v3_fw_cfg_state * cfg_state, uint16_t key, uint64_t value)
+{
+    uint64_t * copy = NULL;
+
+    copy = V3_Malloc(sizeof(uint64_t));
+    *copy = value;
+    return fw_cfg_add_bytes(cfg_state, key, (uint8_t *)copy, sizeof(uint64_t));
+}
+
+static int fw_cfg_ctl_read(struct guest_info * core, uint16_t port, void * src, uint_t length, void * priv_data) {
+    return length;
+}
+
+static int fw_cfg_ctl_write(struct guest_info * core, uint16_t port, void * src, uint_t length, void * priv_data) {
+    V3_ASSERT(length == 2);
+
+    struct v3_fw_cfg_state * cfg_state = (struct v3_fw_cfg_state *)priv_data;
+    uint16_t key = *(uint16_t *)src;
+    int ret = 0;
+
+    cfg_state->cur_offset = 0;
+
+    if ((key & FW_CFG_ENTRY_MASK) >= FW_CFG_MAX_ENTRY) {
+        cfg_state->cur_entry = FW_CFG_INVALID;
+        ret = 0;
+    } else {
+        cfg_state->cur_entry = key;
+        ret = 1;
+    }
+
+    return length;
+}
+
+
+static int fw_cfg_data_read(struct guest_info * core, uint16_t port, void * src, uint_t length, void * priv_data) {
+    V3_ASSERT(length == 1);
+
+    struct v3_fw_cfg_state * cfg_state = (struct v3_fw_cfg_state *)priv_data;
+    int arch = !!(cfg_state->cur_entry & FW_CFG_ARCH_LOCAL);
+    struct v3_fw_cfg_entry * cfg_entry = &cfg_state->entries[arch][cfg_state->cur_entry & FW_CFG_ENTRY_MASK];
+    uint8_t ret;
+
+    if ( (cfg_state->cur_entry == FW_CFG_INVALID) || 
+        (cfg_entry->data == NULL) || 
+        (cfg_state->cur_offset >= cfg_entry->len)) {
+
+        ret = 0;
+    } else {
+        ret = cfg_entry->data[cfg_state->cur_offset++];
+    }
+
+    *(uint8_t *)src = ret;
+
+    return length;
+}
+
+static int fw_cfg_data_write(struct guest_info * core, uint16_t port, void * src, uint_t length, void * priv_data) {
+    V3_ASSERT(length == 1);
+
+    struct v3_fw_cfg_state * cfg_state = (struct v3_fw_cfg_state *)priv_data;
+    int arch = !!(cfg_state->cur_entry & FW_CFG_ARCH_LOCAL);
+    struct v3_fw_cfg_entry * cfg_entry = &cfg_state->entries[arch][cfg_state->cur_entry & FW_CFG_ENTRY_MASK];
+
+    if ( (cfg_state->cur_entry & FW_CFG_WRITE_CHANNEL) && 
+        (cfg_entry->callback != NULL) &&
+        (cfg_state->cur_offset < cfg_entry->len)) {
+
+        cfg_entry->data[cfg_state->cur_offset++] = *(uint8_t *)src;
+
+        if (cfg_state->cur_offset == cfg_entry->len) {
+            cfg_entry->callback(cfg_entry->callback_opaque, cfg_entry->data);
+            cfg_state->cur_offset = 0;
+        }
+    }
+    return length;
+}
+
+/*
+static struct e820_table * e820_populate(struct v3_vm_info * vm) {
+    struct v3_e820_entry * entry = NULL;
+    struct e820_table * e820 = NULL;
+    int i = 0;
+
+    if (vm->mem_map.e820_count > E820_MAX_COUNT) {
+        PrintError("Too much E820 table entries! (max is %d)\n", E820_MAX_COUNT);
+        return NULL;
+    }
+
+    e820 = V3_Malloc(sizeof(struct e820_table));
+
+    if (e820 == NULL) {
+        PrintError("Out of memory!\n");
+        return NULL;
+    }
+
+    e820->count = vm->mem_map.e820_count;
+
+    list_for_each_entry(entry, &vm->mem_map.e820_list, list) {
+        e820->entry[i].addr = e->addr;
+        e820->entry[i].size = e->size;
+        e820->entry[i].type = e->type;
+        ++i;
+    }
+
+    return e820;
+}
+*/
+
+int v3_fw_cfg_init(struct v3_vm_info * vm) {
+
+
+
+    struct v3_fw_cfg_state * cfg_state = &(vm->fw_cfg_state);
+    int ret = 0;
+
+
+    /* 
+       struct e820_table * e820 = e820_populate(vm);
+
+       if (e820 == NULL) {
+        PrintError("Failed to populate E820 for FW interface!\n");
+        return -1;
+       }
+
+    */
+
+
+    ret |= v3_hook_io_port(vm, FW_CFG_CTL_PORT, fw_cfg_ctl_read, &fw_cfg_ctl_write, cfg_state);
+    ret |= v3_hook_io_port(vm, FW_CFG_DATA_PORT, fw_cfg_data_read, &fw_cfg_data_write, cfg_state);
+
+    if (ret != 0) {
+       //  V3_Free(e820);
+        PrintError("Failed to hook FW CFG ports!\n");
+        return -1;
+    }
+
+    fw_cfg_add_bytes(cfg_state, FW_CFG_SIGNATURE, (uint8_t *)"QEMU", 4);
+    //fw_cfg_add_bytes(cfg_state, FW_CFG_UUID, qemu_uuid, 16);
+    fw_cfg_add_i16(cfg_state, FW_CFG_NOGRAPHIC, /*(uint16_t)(display_type == DT_NOGRAPHIC)*/ 0);
+    fw_cfg_add_i16(cfg_state, FW_CFG_NB_CPUS, (uint16_t)vm->num_cores);
+    fw_cfg_add_i16(cfg_state, FW_CFG_MAX_CPUS, (uint16_t)vm->num_cores);
+    fw_cfg_add_i16(cfg_state, FW_CFG_BOOT_MENU, (uint16_t)1);
+    //fw_cfg_bootsplash(cfg_state);
+
+    fw_cfg_add_i32(cfg_state, FW_CFG_ID, 1);
+    fw_cfg_add_i64(cfg_state, FW_CFG_RAM_SIZE, (uint64_t)vm->mem_size / (1024 * 1024));
+
+    //fw_cfg_add_bytes(cfg_state, FW_CFG_ACPI_TABLES, (uint8_t *)acpi_tables,
+    //       acpi_tables_len);
+
+    fw_cfg_add_i32(cfg_state, FW_CFG_IRQ0_OVERRIDE, 1);
+
+    /*
+      smbios_table = smbios_get_table(&smbios_len);
+    
+      if (smbios_table) {
+           fw_cfg_add_bytes(cfg_state, FW_CFG_SMBIOS_ENTRIES,
+                            smbios_table, smbios_len);
+      }
+
+      fw_cfg_add_bytes(cfg_state, FW_CFG_E820_TABLE, (uint8_t *)e820,
+                     sizeof(struct e820_table));
+
+      fw_cfg_add_bytes(cfg_state, FW_CFG_HPET, (uint8_t *)&hpet_cfg,
+                     sizeof(struct hpet_fw_config));
+    */
+
+
+
+    /* NUMA layout */
+    {
+       v3_cfg_tree_t * layout_cfg = v3_cfg_subtree(vm->cfg_data->cfg, "mem_layout");
+       char * num_nodes_str = v3_cfg_val(layout_cfg, "vnodes");
+       int num_nodes = 0;
+       
+       /* locations in fw_cfg NUMA array for each info region. */
+       int node_offset = 0;
+       int core_offset = 1;
+       int mem_offset = 1 + vm->num_cores;
+       
+       if (num_nodes_str) {
+           num_nodes = atoi(num_nodes_str);
+       }
+
+       if (num_nodes > 0) {
+           uint64_t * numa_fw_cfg = NULL;
+           int i = 0;
+
+           // Allocate the global NUMA configuration array
+           numa_fw_cfg = V3_Malloc((1 + vm->num_cores + num_nodes) * sizeof(uint64_t));
+
+           if (numa_fw_cfg == NULL) {
+               PrintError("Could not allocate fw_cfg NUMA config space\n");
+               return -1;
+           }
+
+           memset(numa_fw_cfg, 0, (1 + vm->num_cores + num_nodes) * sizeof(uint64_t));
+
+           // First 8 bytes is the number of NUMA zones
+           numa_fw_cfg[node_offset] = num_nodes;
+           
+           
+           // Next region is array of core->node mappings
+           for (i = 0; i < vm->num_cores; i++) {
+               char * vnode_str = v3_cfg_val(vm->cores[i].core_cfg_data, "vnode");
+               
+               if (vnode_str == NULL) {
+                   // if no cpu was specified then NUMA layout is randomized, and we're screwed...
+                   numa_fw_cfg[core_offset + i] = 0;
+               } else {
+                   numa_fw_cfg[core_offset + i] = (uint64_t)atoi(vnode_str);
+               }
+           }
+
+
+
+           /* Final region is an array of node->mem_size mappings
+            * this assumes that memory is assigned to NUMA nodes in consecutive AND contiguous blocks
+            * NO INTERLEAVING ALLOWED
+            * e.g. node 0 points to the first x bytes of memory, node 1 points to the next y bytes, etc
+            *     The array only stores the x,y,... values, indexed by the node ID
+            *     We should probably fix this, but that will require modifications to SEABIOS
+            * 
+            *
+            * For now we will assume that the xml data is set accordingly, so we will just walk through the mem regions specified there.
+            *   NOTE: This will overwrite configurations if multiple xml regions are defined for each node
+            */
+
+           {
+               v3_cfg_tree_t * region_desc = v3_cfg_subtree(layout_cfg, "region");
+               
+               while (region_desc) {
+                   char * start_addr_str = v3_cfg_val(region_desc, "start_addr");
+                   char * end_addr_str = v3_cfg_val(region_desc, "end_addr");
+                   char * vnode_id_str = v3_cfg_val(region_desc, "vnode");
+                   
+                   addr_t start_addr = 0;
+                   addr_t end_addr = 0;
+                   int vnode_id = 0;
+
+                   if ((!start_addr_str) || (!end_addr_str) || (!vnode_id_str)) {
+                       PrintError("Invalid memory layout in configuration\n");
+                       V3_Free(numa_fw_cfg);
+                       return -1;
+                   }
+                   
+                   start_addr = atox(start_addr_str);
+                   end_addr = atox(end_addr_str);
+                   vnode_id = atoi(vnode_id_str);
+                   
+                   numa_fw_cfg[mem_offset + vnode_id] = end_addr - start_addr;
+
+                   region_desc = v3_cfg_next_branch(region_desc);
+               }
+           }
+
+
+           /* Print the NUMA mapping being passed in */
+           {
+               uint64_t region_start = 0;
+               
+               V3_Print("NUMA CONFIG: (nodes=%llu)\n", numa_fw_cfg[0]);
+       
+               for (i = 0; i < vm->num_cores; i++) {
+                   V3_Print("\tCore %d -> Node %llu\n", i, numa_fw_cfg[core_offset + i]);
+               }
+       
+               for (i = 0; i < num_nodes; i++) {
+                   V3_Print("\tMem (%p - %p) -> Node %d\n", (void *)region_start, 
+                            (void *)numa_fw_cfg[mem_offset + i], i);
+                   
+                   region_start += numa_fw_cfg[mem_offset + i];
+               }
+           }
+
+
+           // Register the NUMA cfg array with the FW_CFG interface
+           fw_cfg_add_bytes(cfg_state, FW_CFG_NUMA, (uint8_t *)numa_fw_cfg,
+                            (1 + vm->num_cores + num_nodes) * sizeof(uint64_t));
+
+       }
+    }
+
+
+    return 0;
+}
+
+void v3_fw_cfg_deinit(struct v3_vm_info *vm) {
+    struct v3_fw_cfg_state * cfg_state = &(vm->fw_cfg_state);
+    int i, j;
+
+    for (i = 0; i < 2; ++i) {
+        for (j = 0; j < FW_CFG_MAX_ENTRY; ++j) {
+            if (cfg_state->entries[i][j].data != NULL)
+                V3_Free(cfg_state->entries[i][j].data);
+        }
+    }
+}
+
+
+
+
+/* E820 code for HVM enabled bochs bios:  */
+#if 0
+/* E820 location in HVM virtual address space. Taken from VMXASSIST. */
+#define HVM_E820_PAGE        0x00090000
+#define HVM_E820_NR_OFFSET   0x000001E8
+#define HVM_E820_OFFSET      0x000002D0
+    // Copy E820 to BIOS. See rombios.c, copy_e820_table function.
+    addr_t e820_ptr = (addr_t)V3_VAddr((void *)(vm->mem_map.base_region.host_addr + HVM_E820_PAGE));
+
+    *(uint16_t *)(e820_ptr + HVM_E820_NR_OFFSET) = e820->count;
+    memcpy((void *)(e820_ptr + HVM_E820_OFFSET), &e820->entry[0], sizeof(e820->entry[0]) * e820->count);
+    V3_Free(e820);
+
+    return 0;
+#endif