--- /dev/null
+/*
+ * This file is part of the Palacios Virtual Machine Monitor developed
+ * by the V3VEE Project with funding from the United States National
+ * Science Foundation and the Department of Energy.
+ *
+ * The V3VEE Project is a joint project between Northwestern University
+ * and the University of New Mexico. You can find out more at
+ * http://www.v3vee.org
+ *
+ * Copyright (c) 2015, The V3VEE Project <http://www.v3vee.org>
+ * All rights reserved.
+ *
+ * Author: Peter Dinda <pdinda@northwestern.edu>
+ *
+ * This is free software. You are permitted to use,
+ * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
+ */
+
+
+#ifndef __VMM_MULTIBOOT_H
+#define __VMM_MULTIBOOT_H
+
+
+#ifdef __V3VEE__
+
+#include <palacios/vmm_types.h>
+
+
+typedef struct mb_header {
+ uint32_t magic;
+ uint32_t arch;
+#define ARCH_X86 0
+ uint32_t headerlen;
+ uint32_t checksum;
+} __attribute__((packed)) mb_header_t;
+
+typedef struct mb_tag {
+ uint16_t type;
+ uint16_t flags;
+ uint32_t size;
+} __attribute__((packed)) mb_tag_t;
+
+#define MB_TAG_INFO 1
+typedef struct mb_info_req {
+ mb_tag_t tag;
+ uint32_t types[0];
+} __attribute__((packed)) mb_info_t;
+
+
+typedef uint32_t u_virt, u_phys;
+
+#define MB_TAG_ADDRESS 2
+typedef struct mb_addr {
+ mb_tag_t tag;
+ u_virt header_addr;
+ u_virt load_addr;
+ u_virt load_end_addr;
+ u_virt bss_end_addr;
+} __attribute__((packed)) mb_addr_t;
+
+#define MB_TAG_ENTRY 3
+typedef struct mb_entry {
+ mb_tag_t tag;
+ u_virt entry_addr;
+} __attribute__((packed)) mb_entry_t;
+
+#define MB_TAG_FLAGS 4
+typedef struct mb_flags {
+ mb_tag_t tag;
+ uint32_t console_flags;
+} __attribute__((packed)) mb_flags_t;
+
+#define MB_TAG_FRAMEBUF 5
+typedef struct mb_framebuf {
+ mb_tag_t tag;
+ uint32_t width;
+ uint32_t height;
+ uint32_t depth;
+} __attribute__((packed)) mb_framebuf_t;
+
+#define MB_TAG_MODALIGN 6
+typedef struct mb_modalign {
+ mb_tag_t tag;
+ uint32_t size;
+} __attribute__((packed)) mb_modalign_t;
+
+
+// For HVM, which can use a pure 64 bit variant
+// version of multiboot. The existence of
+// this tag indicates that this special mode is
+// requested
+#define MB_TAG_MB64_HRT 0xf00d
+typedef struct mb_mb64_hrt {
+ mb_tag_t tag;
+ uint32_t hrt_flags;
+} __attribute__((packed)) mb_mb64_hrt_t;
+
+typedef struct mb_data {
+ mb_header_t *header;
+ mb_info_t *info;
+ mb_addr_t *addr;
+ mb_entry_t *entry;
+ mb_flags_t *flags;
+ mb_framebuf_t *framebuf;
+ mb_modalign_t *modalign;
+ mb_mb64_hrt_t *mb64_hrt;
+} mb_data_t;
+
+struct v3_vm_multiboot {
+ uint8_t is_multiboot;
+ struct v3_cfg_file *mb_file;
+ mb_data_t mb_data;
+ // GPA where we put the MB record, GDT, TSS, etc
+ // The kernel load address and size are as in mb_data
+ void *mb_data_gpa;
+};
+
+// There is no core structure for
+// multiboot capability
+
+struct v3_xml;
+
+int v3_init_multiboot();
+int v3_deinit_multiboot();
+
+int v3_init_multiboot_vm(struct v3_vm_info *vm, struct v3_xml *config);
+int v3_deinit_multiboot_vm(struct v3_vm_info *vm);
+
+int v3_init_multiboot_core(struct guest_info *core);
+int v3_deinit_multiboot_core(struct guest_info *core);
+
+int v3_setup_multiboot_vm_for_boot(struct v3_vm_info *vm);
+int v3_setup_multiboot_core_for_boot(struct guest_info *core);
+
+int v3_handle_multiboot_reset(struct guest_info *core);
+
+// The following are utility functions that HVM builds on
+int v3_parse_multiboot_header(struct v3_cfg_file *file, mb_data_t *result);
+int v3_write_multiboot_kernel(struct v3_vm_info *vm, mb_data_t *mb, struct v3_cfg_file *file,
+ void *base, uint64_t limit);
+// The multiboot table is prepared from the perspective of the given
+// core - this allows it to be generated appropriately for ROS and HRT cores
+// when used in an HVM
+uint64_t v3_build_multiboot_table(struct guest_info *core, uint8_t *dest, uint64_t size);
+
+#endif /* ! __V3VEE__ */
+
+
+#endif
uint32_t first_hrt_apic_id;
uint32_t have_hrt_ioapic;
uint32_t first_hrt_ioapic_entry;
+ uint64_t first_hrt_addr;
} __attribute__((packed)) mb_info_hrt_t;
- only ROS memory visible
- regular multiboot or bios boot assumed
HRT core
- - full HRT memory visible
+ - all memory visible
- HRT64 multiboot assumed
*/
uint64_t v3_build_multiboot_table(struct guest_info *core, uint8_t *dest, uint64_t size)
{
struct v3_vm_info *vm = core->vm_info;
- mb_info_header_t *header;
+ mb_info_header_t *header=0;
#ifdef V3_CONFIG_HVM
- mb_info_hrt_t *hrt;
+ mb_info_hrt_t *hrt=0;
#endif
- mb_info_mem_t *mem;
- mb_info_memmap_t *memmap;
- mb_info_tag_t *tag;
- uint64_t num_mem, cur_mem;
+ mb_info_mem_t *mem=0;
+ mb_info_memmap_t *memmap=0;
+ mb_info_tag_t *tag=0;
+ uint64_t num_mem=0, cur_mem=0;
uint64_t total_mem = vm->mem_size;
hrt->first_hrt_apic_id = vm->hvm_state.first_hrt_core;
hrt->have_hrt_ioapic=0;
hrt->first_hrt_ioapic_entry=0;
+ hrt->first_hrt_addr = vm->hvm_state.first_hrt_gpa;
}
#endif
int v3_write_multiboot_kernel(struct v3_vm_info *vm, mb_data_t *mb, struct v3_cfg_file *file,
void *base, uint64_t limit)
{
- uint32_t offset;
+ uint32_t offset=0;
+ uint32_t header_offset = (uint32_t) ((uint64_t)(mb->header) - (uint64_t)(file->data));
+ uint32_t size;
if (!mb->addr || !mb->entry) {
PrintError(vm,VCORE_NONE, "multiboot: kernel is missing address or entry point\n");
return -1;
}
- offset = mb->addr->load_addr - mb->addr->header_addr;
+ offset = header_offset - (mb->addr->header_addr - mb->addr->load_addr);
+ size = mb->addr->load_end_addr - mb->addr->load_addr;
+
+ if (size != file->size-offset) {
+ V3_Print(vm,VCORE_NONE,"multiboot: strange: size computed as %u, but file->size-offset = %llu\n",size,file->size-offset);
+ }
- // Skip the ELF header - assume 1 page... weird....
// We are trying to do as little ELF loading here as humanly possible
v3_write_gpa_memory(&vm->cores[0],
(addr_t)(mb->addr->load_addr),
- file->size-PAGE_SIZE-offset,
- file->data+PAGE_SIZE+offset);
+ size,
+ file->data+offset);
PrintDebug(vm,VCORE_NONE,
"multiboot: wrote 0x%llx bytes starting at offset 0x%llx to %p\n",
- (uint64_t) file->size-PAGE_SIZE-offset,
- (uint64_t) PAGE_SIZE+offset,
+ (uint64_t) size,
+ (uint64_t) offset,
(void*)(addr_t)(mb->addr->load_addr));
+ size = mb->addr->bss_end_addr - mb->addr->load_end_addr + 1;
+
+ // Now we need to zero the BSS
+ v3_set_gpa_memory(&vm->cores[0],
+ (addr_t)(mb->addr->load_end_addr),
+ size,
+ 0);
+
+ PrintDebug(vm,VCORE_NONE,
+ "multiboot: zeroed 0x%llx bytes starting at %p\n",
+ (uint64_t) size,
+ (void*)(addr_t)(mb->addr->load_end_addr));
+
+
return 0;
}
static void write_tss(struct v3_vm_info *vm, void *base, uint64_t limit)
{
- int i;
- uint64_t tss_data=0x0;
-
- for (i=0;i<limit/8;i++) {
- v3_write_gpa_memory(&vm->cores[0],(addr_t)(base+8*i),8,(uint8_t*) &tss_data);
- }
+ v3_set_gpa_memory(&vm->cores[0],(addr_t)base,limit,0);
PrintDebug(vm,VCORE_NONE,"multiboot: wrote TSS at %p\n",base);
}
core->segments.cs.selector = 0x8 ; // entry 1 of GDT (RPL=0)
core->segments.cs.base = (addr_t) base;
core->segments.cs.limit = limit;
- core->segments.cs.type = 0xe;
- core->segments.cs.system = 0;
+ core->segments.cs.type = 0xa;
+ core->segments.cs.system = 1;
core->segments.cs.dpl = 0;
core->segments.cs.present = 1;
core->segments.cs.long_mode = 0;
+ core->segments.cs.db = 1; // 32 bit operand and address size
+ core->segments.cs.granularity = 1; // pages
// DS, SS, etc are identical
core->segments.ds.selector = 0x10; // entry 2 of GDT (RPL=0)
core->segments.ds.base = (addr_t) base;
core->segments.ds.limit = limit;
- core->segments.ds.type = 0x6;
- core->segments.ds.system = 0;
+ core->segments.ds.type = 0x2;
+ core->segments.ds.system = 1;
core->segments.ds.dpl = 0;
core->segments.ds.present = 1;
core->segments.ds.long_mode = 0;
-
+ core->segments.ds.db = 1; // 32 bit operand and address size
+ core->segments.ds.granularity = 1; // pages
+
memcpy(&core->segments.ss,&core->segments.ds,sizeof(core->segments.ds));
memcpy(&core->segments.es,&core->segments.ds,sizeof(core->segments.ds));
memcpy(&core->segments.fs,&core->segments.ds,sizeof(core->segments.ds));
return 0;
}
+
+
+int v3_handle_multiboot_reset(struct guest_info *core)
+{
+ int rc;
+
+ if (core->core_run_state!=CORE_RESETTING) {
+ return 0;
+ }
+
+ if (!core->vm_info->mb_state.is_multiboot) {
+ return 0;
+ }
+
+ // wait for everyone
+ v3_counting_barrier(&core->vm_info->reset_barrier);
+
+ if (core->vcpu_id==0) {
+ // I am leader (this is true if I am a ROS core or this is a non-HVM)
+ core->vm_info->run_state = VM_RESETTING;
+ }
+
+ rc=0;
+
+ if (core->vcpu_id==0) {
+ // we will recopy the image
+ rc |= v3_setup_multiboot_vm_for_boot(core->vm_info);
+ }
+
+ rc |= v3_setup_multiboot_core_for_boot(core);
+
+ if (core->vcpu_id==0) {
+ core->core_run_state = CORE_RUNNING;
+ core->vm_info->run_state = VM_RUNNING;
+ } else {
+ // for APs, we need to bring them back to the init state
+ core->cpu_mode = REAL;
+ core->mem_mode = PHYSICAL_MEM;
+ core->core_run_state = CORE_STOPPED;
+ }
+
+ // sync on the way out
+ v3_counting_barrier(&core->vm_info->reset_barrier);
+
+ if (rc<0) {
+ return rc;
+ } else {
+ return 1; // reboot
+ }
+}
+