2 * This file is part of the Palacios Virtual Machine Monitor developed
3 * by the V3VEE Project with funding from the United States National
4 * Science Foundation and the Department of Energy.
6 * The V3VEE Project is a joint project between Northwestern University
7 * and the University of New Mexico. You can find out more at
10 * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org>
11 * All rights reserved.
13 * Author: Alexander Kudryavtsev <alexk@ispras.ru>
14 * Implementation of FW_CFG interface
15 * Author: Jack Lange <jacklange@cs.pitt.edu>
18 * This is free software. You are permitted to use,
19 * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
22 #include <palacios/vmm_fw_cfg.h>
23 #include <palacios/vmm_mem.h>
24 #include <palacios/vmm.h>
25 #include <palacios/vm_guest.h>
28 #define FW_CFG_CTL_PORT 0x510
29 #define FW_CFG_DATA_PORT 0x511
31 #define FW_CFG_SIGNATURE 0x00
32 #define FW_CFG_ID 0x01
33 #define FW_CFG_UUID 0x02
34 #define FW_CFG_RAM_SIZE 0x03
35 #define FW_CFG_NOGRAPHIC 0x04
36 #define FW_CFG_NB_CPUS 0x05
37 #define FW_CFG_MACHINE_ID 0x06
38 #define FW_CFG_KERNEL_ADDR 0x07
39 #define FW_CFG_KERNEL_SIZE 0x08
40 #define FW_CFG_KERNEL_CMDLINE 0x09
41 #define FW_CFG_INITRD_ADDR 0x0a
42 #define FW_CFG_INITRD_SIZE 0x0b
43 #define FW_CFG_BOOT_DEVICE 0x0c
44 #define FW_CFG_NUMA 0x0d
45 #define FW_CFG_BOOT_MENU 0x0e
46 #define FW_CFG_MAX_CPUS 0x0f
47 #define FW_CFG_KERNEL_ENTRY 0x10
48 #define FW_CFG_KERNEL_DATA 0x11
49 #define FW_CFG_INITRD_DATA 0x12
50 #define FW_CFG_CMDLINE_ADDR 0x13
51 #define FW_CFG_CMDLINE_SIZE 0x14
52 #define FW_CFG_CMDLINE_DATA 0x15
53 #define FW_CFG_SETUP_ADDR 0x16
54 #define FW_CFG_SETUP_SIZE 0x17
55 #define FW_CFG_SETUP_DATA 0x18
56 #define FW_CFG_FILE_DIR 0x19
58 #define FW_CFG_WRITE_CHANNEL 0x4000
59 #define FW_CFG_ARCH_LOCAL 0x8000
60 #define FW_CFG_ENTRY_MASK ~(FW_CFG_WRITE_CHANNEL | FW_CFG_ARCH_LOCAL)
62 #define FW_CFG_ACPI_TABLES (FW_CFG_ARCH_LOCAL + 0)
63 #define FW_CFG_SMBIOS_ENTRIES (FW_CFG_ARCH_LOCAL + 1)
64 #define FW_CFG_IRQ0_OVERRIDE (FW_CFG_ARCH_LOCAL + 2)
65 #define FW_CFG_E820_TABLE (FW_CFG_ARCH_LOCAL + 3)
66 #define FW_CFG_HPET (FW_CFG_ARCH_LOCAL + 4)
68 #define FW_CFG_INVALID 0xffff
77 E820_TYPE_ACPI_RECL = 3,
78 E820_TYPE_ACPI_NVS = 4,
82 #define E820_MAX_COUNT 128
83 struct e820_entry_packed {
87 } __attribute__((packed));
91 struct e820_entry_packed entry[E820_MAX_COUNT];
92 } __attribute__((packed)) __attribute((__aligned__(4)));
96 static int fw_cfg_add_bytes(struct v3_fw_cfg_state * cfg_state, uint16_t key, uint8_t * data, uint32_t len)
98 int arch = !!(key & FW_CFG_ARCH_LOCAL);
99 // JRL: Well this is demented... Its basically generating a 1 or 0 from a mask operation
101 key &= FW_CFG_ENTRY_MASK;
103 if (key >= FW_CFG_MAX_ENTRY) {
107 cfg_state->entries[arch][key].data = data;
108 cfg_state->entries[arch][key].len = len;
113 static int fw_cfg_add_i16(struct v3_fw_cfg_state * cfg_state, uint16_t key, uint16_t value)
115 uint16_t * copy = NULL;
117 copy = V3_Malloc(sizeof(uint16_t));
119 return fw_cfg_add_bytes(cfg_state, key, (uint8_t *)copy, sizeof(uint16_t));
122 static int fw_cfg_add_i32(struct v3_fw_cfg_state * cfg_state, uint16_t key, uint32_t value)
124 uint32_t * copy = NULL;
126 copy = V3_Malloc(sizeof(uint32_t));
128 return fw_cfg_add_bytes(cfg_state, key, (uint8_t *)copy, sizeof(uint32_t));
131 static int fw_cfg_add_i64(struct v3_fw_cfg_state * cfg_state, uint16_t key, uint64_t value)
133 uint64_t * copy = NULL;
135 copy = V3_Malloc(sizeof(uint64_t));
137 return fw_cfg_add_bytes(cfg_state, key, (uint8_t *)copy, sizeof(uint64_t));
140 static int fw_cfg_ctl_read(struct guest_info * core, uint16_t port, void * src, uint_t length, void * priv_data) {
144 static int fw_cfg_ctl_write(struct guest_info * core, uint16_t port, void * src, uint_t length, void * priv_data) {
145 V3_ASSERT(core->vm_info, core, length == 2);
147 struct v3_fw_cfg_state * cfg_state = (struct v3_fw_cfg_state *)priv_data;
148 uint16_t key = *(uint16_t *)src;
151 cfg_state->cur_offset = 0;
153 if ((key & FW_CFG_ENTRY_MASK) >= FW_CFG_MAX_ENTRY) {
154 cfg_state->cur_entry = FW_CFG_INVALID;
157 cfg_state->cur_entry = key;
165 static int fw_cfg_data_read(struct guest_info * core, uint16_t port, void * src, uint_t length, void * priv_data) {
166 V3_ASSERT(core->vm_info, core, length == 1);
168 struct v3_fw_cfg_state * cfg_state = (struct v3_fw_cfg_state *)priv_data;
169 int arch = !!(cfg_state->cur_entry & FW_CFG_ARCH_LOCAL);
170 struct v3_fw_cfg_entry * cfg_entry = &cfg_state->entries[arch][cfg_state->cur_entry & FW_CFG_ENTRY_MASK];
173 if ( (cfg_state->cur_entry == FW_CFG_INVALID) ||
174 (cfg_entry->data == NULL) ||
175 (cfg_state->cur_offset >= cfg_entry->len)) {
179 ret = cfg_entry->data[cfg_state->cur_offset++];
182 *(uint8_t *)src = ret;
187 static int fw_cfg_data_write(struct guest_info * core, uint16_t port, void * src, uint_t length, void * priv_data) {
188 V3_ASSERT(core->vm_info, core, length == 1);
190 struct v3_fw_cfg_state * cfg_state = (struct v3_fw_cfg_state *)priv_data;
191 int arch = !!(cfg_state->cur_entry & FW_CFG_ARCH_LOCAL);
192 struct v3_fw_cfg_entry * cfg_entry = &cfg_state->entries[arch][cfg_state->cur_entry & FW_CFG_ENTRY_MASK];
194 if ( (cfg_state->cur_entry & FW_CFG_WRITE_CHANNEL) &&
195 (cfg_entry->callback != NULL) &&
196 (cfg_state->cur_offset < cfg_entry->len)) {
198 cfg_entry->data[cfg_state->cur_offset++] = *(uint8_t *)src;
200 if (cfg_state->cur_offset == cfg_entry->len) {
201 cfg_entry->callback(cfg_entry->callback_opaque, cfg_entry->data);
202 cfg_state->cur_offset = 0;
209 static struct e820_table * e820_populate(struct v3_vm_info * vm) {
210 struct v3_e820_entry * entry = NULL;
211 struct e820_table * e820 = NULL;
214 if (vm->mem_map.e820_count > E820_MAX_COUNT) {
215 PrintError(vm, VCORE_NONE,"Too much E820 table entries! (max is %d)\n", E820_MAX_COUNT);
219 e820 = V3_Malloc(sizeof(struct e820_table));
222 PrintError(vm, VCORE_NONE, "Out of memory!\n");
226 e820->count = vm->mem_map.e820_count;
228 list_for_each_entry(entry, &vm->mem_map.e820_list, list) {
229 e820->entry[i].addr = e->addr;
230 e820->entry[i].size = e->size;
231 e820->entry[i].type = e->type;
239 int v3_fw_cfg_init(struct v3_vm_info * vm) {
243 struct v3_fw_cfg_state * cfg_state = &(vm->fw_cfg_state);
248 struct e820_table * e820 = e820_populate(vm);
251 PrintError(vm, VCORE_NONE, "Failed to populate E820 for FW interface!\n");
258 ret |= v3_hook_io_port(vm, FW_CFG_CTL_PORT, fw_cfg_ctl_read, &fw_cfg_ctl_write, cfg_state);
259 ret |= v3_hook_io_port(vm, FW_CFG_DATA_PORT, fw_cfg_data_read, &fw_cfg_data_write, cfg_state);
263 PrintError(vm, VCORE_NONE, "Failed to hook FW CFG ports!\n");
267 fw_cfg_add_bytes(cfg_state, FW_CFG_SIGNATURE, (uint8_t *)"QEMU", 4);
268 //fw_cfg_add_bytes(cfg_state, FW_CFG_UUID, qemu_uuid, 16);
269 fw_cfg_add_i16(cfg_state, FW_CFG_NOGRAPHIC, /*(uint16_t)(display_type == DT_NOGRAPHIC)*/ 0);
270 fw_cfg_add_i16(cfg_state, FW_CFG_NB_CPUS, (uint16_t)vm->num_cores);
271 fw_cfg_add_i16(cfg_state, FW_CFG_MAX_CPUS, (uint16_t)vm->num_cores);
272 fw_cfg_add_i16(cfg_state, FW_CFG_BOOT_MENU, (uint16_t)1);
273 //fw_cfg_bootsplash(cfg_state);
275 fw_cfg_add_i32(cfg_state, FW_CFG_ID, 1);
276 fw_cfg_add_i64(cfg_state, FW_CFG_RAM_SIZE, (uint64_t)vm->mem_size / (1024 * 1024));
278 //fw_cfg_add_bytes(cfg_state, FW_CFG_ACPI_TABLES, (uint8_t *)acpi_tables,
281 fw_cfg_add_i32(cfg_state, FW_CFG_IRQ0_OVERRIDE, 1);
284 smbios_table = smbios_get_table(&smbios_len);
287 fw_cfg_add_bytes(cfg_state, FW_CFG_SMBIOS_ENTRIES,
288 smbios_table, smbios_len);
291 fw_cfg_add_bytes(cfg_state, FW_CFG_E820_TABLE, (uint8_t *)e820,
292 sizeof(struct e820_table));
294 fw_cfg_add_bytes(cfg_state, FW_CFG_HPET, (uint8_t *)&hpet_cfg,
295 sizeof(struct hpet_fw_config));
302 v3_cfg_tree_t * layout_cfg = v3_cfg_subtree(vm->cfg_data->cfg, "mem_layout");
303 char * num_nodes_str = v3_cfg_val(layout_cfg, "vnodes");
306 /* locations in fw_cfg NUMA array for each info region. */
309 int mem_offset = 1 + vm->num_cores;
312 num_nodes = atoi(num_nodes_str);
316 uint64_t * numa_fw_cfg = NULL;
319 // Allocate the global NUMA configuration array
320 numa_fw_cfg = V3_Malloc((1 + vm->num_cores + num_nodes) * sizeof(uint64_t));
322 if (numa_fw_cfg == NULL) {
323 PrintError(vm, VCORE_NONE, "Could not allocate fw_cfg NUMA config space\n");
327 memset(numa_fw_cfg, 0, (1 + vm->num_cores + num_nodes) * sizeof(uint64_t));
329 // First 8 bytes is the number of NUMA zones
330 numa_fw_cfg[node_offset] = num_nodes;
333 // Next region is array of core->node mappings
334 for (i = 0; i < vm->num_cores; i++) {
335 char * vnode_str = v3_cfg_val(vm->cores[i].core_cfg_data, "vnode");
337 if (vnode_str == NULL) {
338 // if no cpu was specified then NUMA layout is randomized, and we're screwed...
339 numa_fw_cfg[core_offset + i] = 0;
341 numa_fw_cfg[core_offset + i] = (uint64_t)atoi(vnode_str);
347 /* Final region is an array of node->mem_size mappings
348 * this assumes that memory is assigned to NUMA nodes in consecutive AND contiguous blocks
349 * NO INTERLEAVING ALLOWED
350 * e.g. node 0 points to the first x bytes of memory, node 1 points to the next y bytes, etc
351 * The array only stores the x,y,... values, indexed by the node ID
352 * We should probably fix this, but that will require modifications to SEABIOS
355 * For now we will assume that the xml data is set accordingly, so we will just walk through the mem regions specified there.
356 * NOTE: This will overwrite configurations if multiple xml regions are defined for each node
360 v3_cfg_tree_t * region_desc = v3_cfg_subtree(layout_cfg, "region");
362 while (region_desc) {
363 char * start_addr_str = v3_cfg_val(region_desc, "start_addr");
364 char * end_addr_str = v3_cfg_val(region_desc, "end_addr");
365 char * vnode_id_str = v3_cfg_val(region_desc, "vnode");
367 addr_t start_addr = 0;
371 if ((!start_addr_str) || (!end_addr_str) || (!vnode_id_str)) {
372 PrintError(vm, VCORE_NONE, "Invalid memory layout in configuration\n");
373 V3_Free(numa_fw_cfg);
377 start_addr = atox(start_addr_str);
378 end_addr = atox(end_addr_str);
379 vnode_id = atoi(vnode_id_str);
381 numa_fw_cfg[mem_offset + vnode_id] = end_addr - start_addr;
383 region_desc = v3_cfg_next_branch(region_desc);
388 /* Print the NUMA mapping being passed in */
390 uint64_t region_start = 0;
392 V3_Print(vm, VCORE_NONE, "NUMA CONFIG: (nodes=%llu)\n", numa_fw_cfg[0]);
394 for (i = 0; i < vm->num_cores; i++) {
395 V3_Print(vm, VCORE_NONE, "\tCore %d -> Node %llu\n", i, numa_fw_cfg[core_offset + i]);
398 for (i = 0; i < num_nodes; i++) {
399 V3_Print(vm, VCORE_NONE, "\tMem (%p - %p) -> Node %d\n", (void *)region_start,
400 (void *)numa_fw_cfg[mem_offset + i], i);
402 region_start += numa_fw_cfg[mem_offset + i];
407 // Register the NUMA cfg array with the FW_CFG interface
408 fw_cfg_add_bytes(cfg_state, FW_CFG_NUMA, (uint8_t *)numa_fw_cfg,
409 (1 + vm->num_cores + num_nodes) * sizeof(uint64_t));
418 void v3_fw_cfg_deinit(struct v3_vm_info *vm) {
419 struct v3_fw_cfg_state * cfg_state = &(vm->fw_cfg_state);
422 for (i = 0; i < 2; ++i) {
423 for (j = 0; j < FW_CFG_MAX_ENTRY; ++j) {
424 if (cfg_state->entries[i][j].data != NULL)
425 V3_Free(cfg_state->entries[i][j].data);
433 /* E820 code for HVM enabled bochs bios: */
435 /* E820 location in HVM virtual address space. Taken from VMXASSIST. */
436 #define HVM_E820_PAGE 0x00090000
437 #define HVM_E820_NR_OFFSET 0x000001E8
438 #define HVM_E820_OFFSET 0x000002D0
439 // Copy E820 to BIOS. See rombios.c, copy_e820_table function.
440 addr_t e820_ptr = (addr_t)V3_VAddr((void *)(vm->mem_map.base_region.host_addr + HVM_E820_PAGE));
442 *(uint16_t *)(e820_ptr + HVM_E820_NR_OFFSET) = e820->count;
443 memcpy((void *)(e820_ptr + HVM_E820_OFFSET), &e820->entry[0], sizeof(e820->entry[0]) * e820->count);