Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


Multiboot enhancements
Peter Dinda [Thu, 18 Jun 2015 22:06:07 +0000 (17:06 -0500)]
- enhancements to HRT info block
- clear of BSS (expected by MB kernel)
- actually included header file
- reset capability

palacios/include/palacios/vmm_multiboot.h [new file with mode: 0644]
palacios/src/palacios/vmm_multiboot.c

diff --git a/palacios/include/palacios/vmm_multiboot.h b/palacios/include/palacios/vmm_multiboot.h
new file mode 100644 (file)
index 0000000..c5c370c
--- /dev/null
@@ -0,0 +1,149 @@
+/*
+ * This file is part of the Palacios Virtual Machine Monitor developed
+ * by the V3VEE Project with funding from the United States National 
+ * Science Foundation and the Department of Energy.  
+ *
+ * The V3VEE Project is a joint project between Northwestern University
+ * and the University of New Mexico.  You can find out more at 
+ * http://www.v3vee.org
+ *
+ * Copyright (c) 2015, The V3VEE Project <http://www.v3vee.org> 
+ * All rights reserved.
+ *
+ * Author: Peter Dinda <pdinda@northwestern.edu>
+ *
+ * This is free software.  You are permitted to use,
+ * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
+ */
+
+
+#ifndef __VMM_MULTIBOOT_H
+#define __VMM_MULTIBOOT_H
+
+
+#ifdef __V3VEE__ 
+
+#include <palacios/vmm_types.h>
+
+
+typedef struct mb_header {
+    uint32_t magic;
+    uint32_t arch; 
+#define ARCH_X86 0
+    uint32_t headerlen;
+    uint32_t checksum;
+} __attribute__((packed)) mb_header_t;
+
+typedef struct mb_tag {
+    uint16_t type;
+    uint16_t flags;
+    uint32_t size;
+} __attribute__((packed)) mb_tag_t;
+
+#define MB_TAG_INFO    1
+typedef struct mb_info_req {
+    mb_tag_t tag;
+    uint32_t types[0];
+} __attribute__((packed)) mb_info_t;
+
+
+typedef uint32_t u_virt, u_phys;
+
+#define MB_TAG_ADDRESS 2
+typedef struct mb_addr {
+    mb_tag_t tag;
+    u_virt   header_addr;
+    u_virt   load_addr;
+    u_virt   load_end_addr;
+    u_virt   bss_end_addr;
+} __attribute__((packed)) mb_addr_t;
+
+#define MB_TAG_ENTRY 3
+typedef struct mb_entry {
+    mb_tag_t tag;
+    u_virt   entry_addr;
+} __attribute__((packed)) mb_entry_t;
+
+#define MB_TAG_FLAGS 4
+typedef struct mb_flags {
+    mb_tag_t tag;
+    uint32_t console_flags;
+} __attribute__((packed)) mb_flags_t;
+
+#define MB_TAG_FRAMEBUF 5
+typedef struct mb_framebuf {
+    mb_tag_t tag;
+    uint32_t width;
+    uint32_t height;
+    uint32_t depth;
+} __attribute__((packed)) mb_framebuf_t;
+
+#define MB_TAG_MODALIGN 6
+typedef struct mb_modalign {
+    mb_tag_t tag;
+    uint32_t size;
+} __attribute__((packed)) mb_modalign_t;
+
+
+// For HVM, which can use a pure 64 bit variant
+// version of multiboot.  The existence of
+// this tag indicates that this special mode is
+// requested
+#define MB_TAG_MB64_HRT 0xf00d
+typedef struct mb_mb64_hrt {
+    mb_tag_t       tag;
+    uint32_t       hrt_flags;
+} __attribute__((packed)) mb_mb64_hrt_t;
+
+typedef struct mb_data {
+    mb_header_t   *header;
+    mb_info_t     *info;
+    mb_addr_t     *addr;
+    mb_entry_t    *entry;
+    mb_flags_t    *flags;
+    mb_framebuf_t *framebuf;
+    mb_modalign_t *modalign;
+    mb_mb64_hrt_t *mb64_hrt;
+} mb_data_t;
+
+struct v3_vm_multiboot {
+    uint8_t   is_multiboot;
+    struct v3_cfg_file *mb_file;
+    mb_data_t mb_data;
+    // GPA where we put the MB record, GDT, TSS, etc
+    // The kernel load address and size are as in mb_data
+    void     *mb_data_gpa; 
+};
+
+// There is no core structure for
+// multiboot capability
+
+struct v3_xml;
+
+int v3_init_multiboot();
+int v3_deinit_multiboot();
+
+int v3_init_multiboot_vm(struct v3_vm_info *vm, struct v3_xml *config);
+int v3_deinit_multiboot_vm(struct v3_vm_info *vm);
+
+int v3_init_multiboot_core(struct guest_info *core);
+int v3_deinit_multiboot_core(struct guest_info *core);
+
+int v3_setup_multiboot_vm_for_boot(struct v3_vm_info *vm);
+int v3_setup_multiboot_core_for_boot(struct guest_info *core);
+
+int v3_handle_multiboot_reset(struct guest_info *core);
+
+// The following are utility functions that HVM builds on
+int      v3_parse_multiboot_header(struct v3_cfg_file *file, mb_data_t *result);
+int      v3_write_multiboot_kernel(struct v3_vm_info *vm, mb_data_t *mb, struct v3_cfg_file *file, 
+                                  void *base, uint64_t limit);
+// The multiboot table is prepared from the perspective of the given
+// core - this allows it to be generated appropriately for ROS and HRT cores
+// when used in an HVM
+uint64_t v3_build_multiboot_table(struct guest_info *core, uint8_t *dest, uint64_t size);
+
+#endif /* ! __V3VEE__ */
+
+
+#endif
index 0a7f60b..c007291 100644 (file)
@@ -217,6 +217,7 @@ typedef struct mb_info_hrt {
     uint32_t       first_hrt_apic_id;
     uint32_t       have_hrt_ioapic;
     uint32_t       first_hrt_ioapic_entry;
+    uint64_t       first_hrt_addr;
 } __attribute__((packed)) mb_info_hrt_t;
 
 
@@ -449,7 +450,7 @@ int v3_parse_multiboot_header(struct v3_cfg_file *file, mb_data_t *result)
     - only ROS memory visible
     - regular multiboot or bios boot assumed
    HRT core
-    - full HRT memory visible
+    - all memory visible
     - HRT64 multiboot assumed
 
 */
@@ -457,14 +458,14 @@ int v3_parse_multiboot_header(struct v3_cfg_file *file, mb_data_t *result)
 uint64_t v3_build_multiboot_table(struct guest_info *core, uint8_t *dest, uint64_t size)
 {
     struct v3_vm_info *vm = core->vm_info;
-    mb_info_header_t *header;
+    mb_info_header_t *header=0;
 #ifdef V3_CONFIG_HVM
-    mb_info_hrt_t *hrt;
+    mb_info_hrt_t *hrt=0;
 #endif
-    mb_info_mem_t *mem;
-    mb_info_memmap_t *memmap;
-    mb_info_tag_t *tag;
-    uint64_t num_mem, cur_mem;
+    mb_info_mem_t *mem=0;
+    mb_info_memmap_t *memmap=0;
+    mb_info_tag_t *tag=0;
+    uint64_t num_mem=0, cur_mem=0;
     
     uint64_t total_mem = vm->mem_size;
 
@@ -544,6 +545,7 @@ uint64_t v3_build_multiboot_table(struct guest_info *core, uint8_t *dest, uint64
        hrt->first_hrt_apic_id = vm->hvm_state.first_hrt_core;
        hrt->have_hrt_ioapic=0;
        hrt->first_hrt_ioapic_entry=0;
+       hrt->first_hrt_addr = vm->hvm_state.first_hrt_gpa;
     }
 #endif
 
@@ -636,7 +638,9 @@ uint64_t v3_build_multiboot_table(struct guest_info *core, uint8_t *dest, uint64
 int v3_write_multiboot_kernel(struct v3_vm_info *vm, mb_data_t *mb, struct v3_cfg_file *file,
                              void *base, uint64_t limit)
 {
-    uint32_t offset;
+    uint32_t offset=0;
+    uint32_t header_offset = (uint32_t) ((uint64_t)(mb->header) - (uint64_t)(file->data));
+    uint32_t size;
 
     if (!mb->addr || !mb->entry) { 
        PrintError(vm,VCORE_NONE, "multiboot: kernel is missing address or entry point\n");
@@ -650,21 +654,39 @@ int v3_write_multiboot_kernel(struct v3_vm_info *vm, mb_data_t *mb, struct v3_cf
        return -1;
     }
 
-    offset = mb->addr->load_addr - mb->addr->header_addr;
+    offset = header_offset - (mb->addr->header_addr - mb->addr->load_addr);
+    size = mb->addr->load_end_addr - mb->addr->load_addr;
+    
+    if (size != file->size-offset) { 
+       V3_Print(vm,VCORE_NONE,"multiboot: strange: size computed as %u, but file->size-offset = %llu\n",size,file->size-offset);
+    }
 
-    // Skip the ELF header - assume 1 page... weird.... 
     // We are trying to do as little ELF loading here as humanly possible
     v3_write_gpa_memory(&vm->cores[0],
                        (addr_t)(mb->addr->load_addr),
-                       file->size-PAGE_SIZE-offset,
-                       file->data+PAGE_SIZE+offset);
+                       size,
+                       file->data+offset);
 
     PrintDebug(vm,VCORE_NONE,
               "multiboot: wrote 0x%llx bytes starting at offset 0x%llx to %p\n",
-              (uint64_t) file->size-PAGE_SIZE-offset,
-              (uint64_t) PAGE_SIZE+offset,
+              (uint64_t) size,
+              (uint64_t) offset,
               (void*)(addr_t)(mb->addr->load_addr));
 
+    size = mb->addr->bss_end_addr - mb->addr->load_end_addr + 1;
+
+    // Now we need to zero the BSS
+    v3_set_gpa_memory(&vm->cores[0],
+                     (addr_t)(mb->addr->load_end_addr),
+                     size,
+                     0);
+                     
+    PrintDebug(vm,VCORE_NONE,
+              "multiboot: zeroed 0x%llx bytes starting at %p\n",
+              (uint64_t) size,
+              (void*)(addr_t)(mb->addr->load_end_addr));
+                     
+
     return 0;
 
 }
@@ -732,12 +754,7 @@ static void write_gdt(struct v3_vm_info *vm, void *base, uint64_t limit)
        
 static void write_tss(struct v3_vm_info *vm, void *base, uint64_t limit)
 {
-    int i;
-    uint64_t tss_data=0x0;
-
-    for (i=0;i<limit/8;i++) {
-       v3_write_gpa_memory(&vm->cores[0],(addr_t)(base+8*i),8,(uint8_t*) &tss_data);
-    }
+    v3_set_gpa_memory(&vm->cores[0],(addr_t)base,limit,0);
 
     PrintDebug(vm,VCORE_NONE,"multiboot: wrote TSS at %p\n",base);
 }
@@ -944,22 +961,26 @@ int v3_setup_multiboot_core_for_boot(struct guest_info *core)
     core->segments.cs.selector = 0x8 ; // entry 1 of GDT (RPL=0)
     core->segments.cs.base = (addr_t) base;
     core->segments.cs.limit = limit;
-    core->segments.cs.type = 0xe;
-    core->segments.cs.system = 0; 
+    core->segments.cs.type = 0xa;
+    core->segments.cs.system = 1; 
     core->segments.cs.dpl = 0;
     core->segments.cs.present = 1;
     core->segments.cs.long_mode = 0;
+    core->segments.cs.db = 1; // 32 bit operand and address size
+    core->segments.cs.granularity = 1; // pages
 
     // DS, SS, etc are identical
     core->segments.ds.selector = 0x10; // entry 2 of GDT (RPL=0)
     core->segments.ds.base = (addr_t) base;
     core->segments.ds.limit = limit;
-    core->segments.ds.type = 0x6;
-    core->segments.ds.system = 0; 
+    core->segments.ds.type = 0x2;
+    core->segments.ds.system = 1; 
     core->segments.ds.dpl = 0;
     core->segments.ds.present = 1;
     core->segments.ds.long_mode = 0;
-    
+    core->segments.ds.db = 1; // 32 bit operand and address size
+    core->segments.ds.granularity = 1; // pages
+
     memcpy(&core->segments.ss,&core->segments.ds,sizeof(core->segments.ds));
     memcpy(&core->segments.es,&core->segments.ds,sizeof(core->segments.ds));
     memcpy(&core->segments.fs,&core->segments.ds,sizeof(core->segments.ds));
@@ -984,3 +1005,54 @@ int v3_setup_multiboot_core_for_boot(struct guest_info *core)
 
     return 0;
 }
+
+
+int v3_handle_multiboot_reset(struct guest_info *core)
+{
+    int rc;
+
+    if (core->core_run_state!=CORE_RESETTING) { 
+       return 0;
+    }
+
+    if (!core->vm_info->mb_state.is_multiboot) { 
+       return 0;
+    }
+
+    // wait for everyone
+    v3_counting_barrier(&core->vm_info->reset_barrier);
+
+    if (core->vcpu_id==0) {
+       // I am leader (this is true if I am a ROS core or this is a non-HVM)
+       core->vm_info->run_state = VM_RESETTING;
+    }
+
+    rc=0;
+       
+    if (core->vcpu_id==0) {
+       // we will recopy the image
+       rc |= v3_setup_multiboot_vm_for_boot(core->vm_info);
+    }
+
+    rc |= v3_setup_multiboot_core_for_boot(core);
+
+    if (core->vcpu_id==0) { 
+       core->core_run_state = CORE_RUNNING;
+       core->vm_info->run_state = VM_RUNNING;
+    } else {
+       // for APs, we need to bring them back to the init state
+       core->cpu_mode = REAL;
+       core->mem_mode = PHYSICAL_MEM;
+       core->core_run_state = CORE_STOPPED;
+    }
+
+    // sync on the way out
+    v3_counting_barrier(&core->vm_info->reset_barrier);
+
+    if (rc<0) {
+       return rc;
+    } else {
+       return 1; // reboot
+    }
+}
+