Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


Addition of basic multiboot functionality plus refactor of HVM
Peter Dinda [Tue, 19 May 2015 16:27:47 +0000 (11:27 -0500)]
functionality to extend multiboot

Kconfig
palacios/include/palacios/vm_guest.h
palacios/include/palacios/vmm_hvm.h
palacios/src/palacios/Makefile
palacios/src/palacios/svm.c
palacios/src/palacios/vm_guest.c
palacios/src/palacios/vmm.c
palacios/src/palacios/vmm_config.c
palacios/src/palacios/vmm_hvm.c
palacios/src/palacios/vmm_multiboot.c [new file with mode: 0644]
palacios/src/palacios/vmx.c

diff --git a/Kconfig b/Kconfig
index 5288b22..e8cae68 100644 (file)
--- a/Kconfig
+++ b/Kconfig
@@ -57,6 +57,8 @@ config OTHER_OS
 
 endchoice
 
+source "Kconfig.stdlibs"
+
 config CRAY_XT
         bool "Red Storm (Cray XT3/XT4)"
         help
@@ -260,185 +262,7 @@ config DEBUG_CHECKPOINT
 
 endmenu
 
-
-source "Kconfig.stdlibs"
-
-
-menu "Virtual Paging"
-
-config NESTED_PAGING
-       bool "Enable nested paging"
-        default y 
-        help
-           Enable nested paging (should always be on)
-
-config SHADOW_PAGING
-       bool "Enable shadow paging"
-       default y
-       help 
-          Enables shadow paging for virtual machines
-
-
-config SHADOW_PAGING_VTLB
-       bool "Virtual TLB"
-       default y
-       depends on SHADOW_PAGING
-       help 
-          Enables Virtual TLB implemenation for shadow paging
-           Virtual TLB now uses PAE so there are no 4 GB restrictions
-
-
-config DEBUG_SHDW_PG_VTLB
-       bool "Enable VTLB debugging"
-       default n
-       depends on SHADOW_PAGING_VTLB
-       help
-         Enables debugging messages for VTLB implementation
-
-config SHADOW_PAGING_CACHE
-       bool "Shadow Page Cache"
-       default n
-       depends on SHADOW_PAGING && EXPERIMENTAL
-       help 
-          Enables caching implementation of shadow paging
-
-config DEBUG_SHADOW_PAGING_CACHE
-       bool "Enable Shadow Page Cache Debugging"
-        default n
-        depends on SHADOW_PAGING_CACHE
-        help
-           Enables debugging messages for the VTLB + Caching implementation
-
-#config SHADOW_PAGING_KVM
-#      bool "KVM-style Shadow Pager"
-#      default n
-#      depends on SHADOW_PAGING && EXPERIMENTAL
-#      help 
-#         Enables shadow pager derived from KVM 
-#           You probably do not want this and it will probably not compile!
-#
-#config DEBUG_SHADOW_PAGING_KVM 
-#      bool "Enable KVM-style Shadow Pager Debugging"
-#        default n
-#        depends on SHADOW_PAGING_KVM
-#        help
-#           Enables debugging messages for the KVM-style shadow pager
-
-
-config SWAPPING
-        bool "Enable swapping"
-        default n
-        depends on (SHADOW_PAGING || NESTED_PAGING) && FILE
-        help
-           Enables swapping of regions of guest physical memory to a file 
-
-config DEBUG_SWAPPING
-        bool "Enable swapping debugging"
-       default n
-        depends on SWAPPING
-        help
-           Provides debugging output from the swapping system
-
-config MEM_TRACK
-        bool "Enable memory access tracking"
-       default n
-       depends on SHADOW_PAGING || NESTED_PAGING
-       help
-          Allows tracking of memory accesses on a page granularity
-
-config DEBUG_MEM_TRACK
-        bool "Enable memory access tracking debugging" 
-       default n
-       depends on MEM_TRACK
-       help
-          Provides debugging output for memory access tracking
-
-endmenu
-
-menu "Symbiotic Functions"
-
-config SYMBIOTIC
-       bool "Enable Symbiotic Functionality"
-       default n
-       help 
-         Enable Symbiotic components of the VMM. 
-         This includes the SymSpy interface.
-
-config SYMCALL
-       bool "Symbiotic upcalls"
-       default n
-       depends on SYMBIOTIC && EXPERIMENTAL
-       help
-         Enables the Symbiotic upcall interface
-
-config SWAPBYPASS
-       bool "SwapBypass"
-       default n
-       depends on SYMBIOTIC && SYMCALL && EXPERIMENTAL
-       help 
-         This enables the SwapBypass architecture
-
-config SWAPBYPASS_TELEMETRY
-       bool "Enable SwapBypass Telemetry"
-       default n
-       depends on TELEMETRY && SWAPBYPASS
-       help 
-         Enable the telemetry information for the SwapBypass subsystem
-
-menuconfig SYMMOD 
-       bool "Symbiotic Modules"
-       default n
-       depends on EXPERIMENTAL
-#      depends on SYMBIOTIC
-       help
-         Enable Symbiotic module loading
-
-
-endmenu
-
-menu "VNET"
-
-config VNET
-        bool "Enable Vnet in Palacios"
-        default n
-        help
-          Enable the Vnet in Palacios
-
-config DEBUG_VNET
-        depends on VNET
-        bool "Enable Vnet Debug in Palacios"
-        default n
-        help
-          Enable the Vnet debug in Palacios
-
-
-endmenu
-
-source "palacios/src/gears/Kconfig"
-
-
-menu "HVM" 
-
-config HVM
-       bool "Support Hybrid Virtual Machines"
-       default n
-       help 
-          If set, it is possible to make VMs that are partitioned
-          (cores, memory, devices, hardware access, etc) into 
-          a part ("the ROS") that supports normal VM operation and
-          a part ("the HRT") that supports Hybrid Run-Times,
-          for example Nautilus-based HRTs for parallel languages.
-
-config DEBUG_HVM
-        depends on HVM
-        bool "Enable HVM debugging in Palacios"
-        default n
-        help
-          Enable HVM debugging output
-
-endmenu
-
-menu "Debug configuration"
+menu "Debug Configuration"
 
 ## Is unwind information useful
 
@@ -585,7 +409,106 @@ config DEBUG_MEM_ALLOC
 endmenu
 
 
-menu "BIOS Selection"
+
+
+menu "Virtual Paging"
+
+config NESTED_PAGING
+       bool "Enable nested paging"
+        default y 
+        help
+           Enable nested paging (should always be on)
+
+config SHADOW_PAGING
+       bool "Enable shadow paging"
+       default y
+       help 
+          Enables shadow paging for virtual machines
+
+
+config SHADOW_PAGING_VTLB
+       bool "Virtual TLB"
+       default y
+       depends on SHADOW_PAGING
+       help 
+          Enables Virtual TLB implemenation for shadow paging
+           Virtual TLB now uses PAE so there are no 4 GB restrictions
+
+
+config DEBUG_SHDW_PG_VTLB
+       bool "Enable VTLB debugging"
+       default n
+       depends on SHADOW_PAGING_VTLB
+       help
+         Enables debugging messages for VTLB implementation
+
+config SHADOW_PAGING_CACHE
+       bool "Shadow Page Cache"
+       default n
+       depends on SHADOW_PAGING && EXPERIMENTAL
+       help 
+          Enables caching implementation of shadow paging
+
+config DEBUG_SHADOW_PAGING_CACHE
+       bool "Enable Shadow Page Cache Debugging"
+        default n
+        depends on SHADOW_PAGING_CACHE
+        help
+           Enables debugging messages for the VTLB + Caching implementation
+
+#config SHADOW_PAGING_KVM
+#      bool "KVM-style Shadow Pager"
+#      default n
+#      depends on SHADOW_PAGING && EXPERIMENTAL
+#      help 
+#         Enables shadow pager derived from KVM 
+#           You probably do not want this and it will probably not compile!
+#
+#config DEBUG_SHADOW_PAGING_KVM 
+#      bool "Enable KVM-style Shadow Pager Debugging"
+#        default n
+#        depends on SHADOW_PAGING_KVM
+#        help
+#           Enables debugging messages for the KVM-style shadow pager
+
+
+config SWAPPING
+        bool "Enable swapping"
+        default n
+        depends on (SHADOW_PAGING || NESTED_PAGING) && FILE
+        help
+           Enables swapping of regions of guest physical memory to a file 
+
+config DEBUG_SWAPPING
+        bool "Enable swapping debugging"
+       default n
+        depends on SWAPPING
+        help
+           Provides debugging output from the swapping system
+
+config MEM_TRACK
+        bool "Enable memory access tracking"
+       default n
+       depends on SHADOW_PAGING || NESTED_PAGING
+       help
+          Allows tracking of memory accesses on a page granularity
+
+config DEBUG_MEM_TRACK
+        bool "Enable memory access tracking debugging" 
+       default n
+       depends on MEM_TRACK
+       help
+          Provides debugging output for memory access tracking
+
+endmenu
+
+
+source "palacios/src/devices/Kconfig"
+
+menu "Boot Environments"
+
+
+menu "BIOS"
 
 choice 
        prompt "Boot Code Selection"
@@ -613,7 +536,6 @@ config OTHERBIOS
 
 endchoice
 
-
 config SEABIOS_PATH
        string "Path to pre-built SEABIOS binary"
         depends on SEABIOS
@@ -666,8 +588,116 @@ config VMXASSIST_PATH
          This is vmxassist image to boot real mode guests on 
          Intel VMX Platforms
 
+endmenu 
+
+menu Multiboot
+
+config MULTIBOOT
+       bool "Support Multiboot2-compliant boot"
+       default y
+       help 
+          If set, it is possible to boot a multiboot2 compliant
+          kernel directly.
+
+config DEBUG_MULTIBOOT
+        depends on MULTIBOOT
+        bool "Enable Multiboot2 debugging in Palacios"
+        default n
+        help
+          Enable Multiboot2 debugging output
+
 endmenu
 
 
-source "palacios/src/devices/Kconfig"
+endmenu
+
+menu "Symbiosis"
+
+config SYMBIOTIC
+       bool "Enable Symbiotic Functionality"
+       default n
+       help 
+         Enable Symbiotic components of the VMM. 
+         This includes the SymSpy interface.
+
+config SYMCALL
+       bool "Symbiotic upcalls"
+       default n
+       depends on SYMBIOTIC && EXPERIMENTAL
+       help
+         Enables the Symbiotic upcall interface
+
+config SWAPBYPASS
+       bool "SwapBypass"
+       default n
+       depends on SYMBIOTIC && SYMCALL && EXPERIMENTAL
+       help 
+         This enables the SwapBypass architecture
+
+config SWAPBYPASS_TELEMETRY
+       bool "Enable SwapBypass Telemetry"
+       default n
+       depends on TELEMETRY && SWAPBYPASS
+       help 
+         Enable the telemetry information for the SwapBypass subsystem
+
+menuconfig SYMMOD 
+       bool "Symbiotic Modules"
+       default n
+       depends on EXPERIMENTAL
+#      depends on SYMBIOTIC
+       help
+         Enable Symbiotic module loading
+
+
+endmenu
+
+menu "VNET"
+
+config VNET
+        bool "Enable Vnet in Palacios"
+        default n
+        help
+          Enable the Vnet in Palacios
+
+config DEBUG_VNET
+        depends on VNET
+        bool "Enable Vnet Debug in Palacios"
+        default n
+        help
+          Enable the Vnet debug in Palacios
+
+
+endmenu
+
+source "palacios/src/gears/Kconfig"
+
+
+menu HVM
+
+config HVM
+       bool "Support Hybrid Virtual Machines"
+       depends on MULTIBOOT
+       default n
+       help 
+          If set, it is possible to make VMs that are partitioned
+          (cores, memory, devices, hardware access, etc) into 
+          a part ("the ROS") that supports normal VM operation and
+          a part ("the HRT") that supports Hybrid Run-Times,
+          for example Nautilus-based HRTs for parallel languages.
+
+config DEBUG_HVM
+        depends on HVM
+        bool "Enable HVM debugging in Palacios"
+        default n
+        help
+          Enable HVM debugging output
+
+endmenu
+
+
+
+
+
+
 
index 3029c3c..e646c86 100644 (file)
@@ -69,6 +69,10 @@ struct v3_sym_core_state;
 #include <palacios/vmm_mem_track.h>
 #endif
 
+#ifdef V3_CONFIG_MULTIBOOT
+#include <palacios/vmm_multiboot.h>
+#endif
+
 #ifdef V3_CONFIG_HVM
 #include <palacios/vmm_hvm.h>
 #endif
@@ -264,10 +268,15 @@ struct v3_vm_info {
     struct v3_vm_mem_track memtrack_state;
 #endif
 
+#ifdef V3_CONFIG_MULTIBOOT
+    struct v3_vm_multiboot  mb_state;
+#endif
+
 #ifdef V3_CONFIG_HVM
     struct v3_vm_hvm  hvm_state;
 #endif
 
+
     uint64_t yield_cycle_period;  
 
 
index 6d145c2..576828c 100644 (file)
@@ -30,10 +30,13 @@ struct v3_vm_hvm {
     uint32_t  first_hrt_core;
     uint64_t  first_hrt_gpa;
     struct v3_cfg_file *hrt_file;
+    uint64_t  hrt_entry_addr;
+    enum { HRT_BLOB, HRT_ELF64, HRT_MBOOT2, HRT_MBOOT64 } hrt_type;
 };
 
 struct v3_core_hvm {
     uint8_t   is_hrt;
+    uint64_t  last_boot_start;
 };
 
 struct v3_xml;
index 0ca0f57..1bbce2a 100644 (file)
@@ -93,6 +93,7 @@ obj-$(V3_CONFIG_SYMMOD) += vmm_symmod.o
 
 obj-$(V3_CONFIG_MEM_TRACK) += vmm_mem_track.o
 
+obj-$(V3_CONFIG_MULTIBOOT) += vmm_multiboot.o
 obj-$(V3_CONFIG_HVM) += vmm_hvm.o vmm_hvm_lowlevel.o
 
 obj-y += mmu/
index 05fd183..0e37c4c 100644 (file)
@@ -850,6 +850,13 @@ int v3_start_svm_guest(struct guest_info * info) {
     PrintDebug(info->vm_info, info, "Starting SVM core %u (on logical core %u)\n", info->vcpu_id, info->pcpu_id);
 
 
+#ifdef V3_CONFIG_MULTIBOOT
+    if (v3_setup_multiboot_core_for_boot(info)) { 
+       PrintError(info->vm_info, info, "Failed to setup Multiboot core...\n");
+       return -1;
+    }
+#endif
+
 #ifdef V3_CONFIG_HVM
     if (v3_setup_hvm_hrt_core_for_boot(info)) { 
        PrintError(info->vm_info, info, "Failed to setup HRT core...\n");
index d325e6b..41158e7 100644 (file)
@@ -322,6 +322,11 @@ int v3_free_vm_internal(struct v3_vm_info * vm) {
     v3_deinit_hvm_vm(vm);
 #endif
 
+#ifdef V3_CONFIG_MULTIBOOT
+    v3_deinit_multiboot_vm(vm);
+#endif
+
+
 #ifdef V3_CONFIG_SYMBIOTIC
     v3_deinit_symbiotic_vm(vm);
 #endif
@@ -474,6 +479,10 @@ int v3_free_core(struct guest_info * core) {
     v3_deinit_hvm_core(core);
 #endif
 
+#ifdef V3_CONFIG_MULTIBOOT
+    v3_deinit_multiboot_core(core);
+#endif
+
     v3_deinit_decoder(core);
 
     v3_deinit_intr_controllers(core);
index b7f45cd..6257294 100644 (file)
@@ -154,6 +154,10 @@ void Init_V3(struct v3_os_hooks * hooks, char * cpu_mask, int num_cpus, char *op
     // Parse host-os defined options into an easily-accessed format.
     v3_parse_options(options);
 
+#ifdef V3_CONFIG_MULTIBOOT
+    v3_init_multiboot();
+#endif
+
 #ifdef V3_CONFIG_HVM
     v3_init_hvm();
 #endif
@@ -263,6 +267,10 @@ void Shutdown_V3() {
     v3_deinit_hvm();
 #endif
 
+#ifdef V3_CONFIG_MULTIBOOT
+    v3_deinit_multiboot();
+#endif
+
     v3_deinit_options();
     
 
@@ -385,6 +393,12 @@ int v3_start_vm(struct v3_vm_info * vm, unsigned int cpu_mask) {
         return -1;
     }
 
+#if V3_CONFIG_MULTIBOOT
+    if (v3_setup_multiboot_vm_for_boot(vm)) { 
+       PrintError(vm, VCORE_NONE, "Multiboot setup for boot failed\n");
+       return -1;
+    }
+#endif
 #if V3_CONFIG_HVM
     if (v3_setup_hvm_vm_for_boot(vm)) { 
        PrintError(vm, VCORE_NONE, "HVM setup for boot failed\n");
index 38cfb70..7987957 100644 (file)
 #include <palacios/vmm_swapping.h>
 #endif
 
+#ifdef V3_CONFIG_MULTIBOOT
+#include <palacios/vmm_multiboot.h>
+#endif
+
 #ifdef V3_CONFIG_HVM
 #include <palacios/vmm_hvm.h>
 #endif
@@ -360,6 +364,12 @@ static int pre_config_vm(struct v3_vm_info * vm, v3_cfg_tree_t * vm_cfg) {
        return -1;
     }
 
+#ifdef V3_CONFIG_MULTIBOOT
+    if (v3_init_multiboot_vm(vm,vm_cfg)) { 
+       PrintError(vm,VCORE_NONE,"Cannot initialize Multiboot for VM\n");
+       return -1;
+    }
+#endif
 #ifdef V3_CONFIG_HVM
     if (v3_init_hvm_vm(vm,vm_cfg)) { 
        PrintError(vm,VCORE_NONE,"Cannot initialize HVM for VM\n");
@@ -434,6 +444,12 @@ static int pre_config_core(struct guest_info * info, v3_cfg_tree_t * core_cfg) {
        return -1;
     }
 
+#ifdef V3_CONFIG_MULTIBOOT
+    if (v3_init_multiboot_core(info)) { 
+       PrintError(info->vm_info, info, "Error Initializing Multiboot Core\n");
+       return -1;
+    }
+#endif
 #ifdef V3_CONFIG_HVM
     if (v3_init_hvm_core(info)) { 
        PrintError(info->vm_info, info, "Error Initializing HVM Core\n");
index b1f7013..edff2fd 100644 (file)
@@ -28,8 +28,8 @@
 
 #include <palacios/vm_guest_mem.h>
 
-#include <stdio.h>
-#include <stdlib.h>
+#include <palacios/vmm_debug.h>
+
 
 /*
 
 #define PrintDebug(fmt, args...)
 #endif
 
+
+// if set, we will map the first 1 GB of memory using a 3 level
+// hierarchy, for compatibility with Nautilus out of the box.
+// Otherwise we will map the first 512 GB using a 2 level
+// hieratchy
+#define HVM_MAP_1G_2M 1
+
 int v3_init_hvm()
 {
     PrintDebug(VM_NONE,VCORE_NONE, "hvm: init\n");
@@ -83,8 +90,16 @@ int v3_deinit_hvm()
 
 static int hvm_hcall_handler(struct guest_info * core , hcall_id_t hcall_id, void * priv_data)
 {
-    V3_Print(core->vm_info,core, "hvm: received hypercall %x  rax=%llx rbx=%llx rcx=%llx\n",
-            hcall_id, core->vm_regs.rax, core->vm_regs.rbx, core->vm_regs.rcx);
+    uint64_t c;
+
+    rdtscll(c);
+
+
+    V3_Print(core->vm_info,core, "hvm: received hypercall %x  rax=%llx rbx=%llx rcx=%llx at cycle count %llu (%llu cycles since last boot start) num_exits=%llu since initial boot\n",
+            hcall_id, core->vm_regs.rax, core->vm_regs.rbx, core->vm_regs.rcx, c, c-core->hvm_state.last_boot_start, core->num_exits);
+    v3_print_core_telemetry(core);
+    //    v3_print_guest_state(core);
+
     return 0;
 }
 
@@ -98,7 +113,7 @@ int v3_init_hvm_vm(struct v3_vm_info *vm, struct v3_xml *config)
     char *enable;
     char *ros_cores;
     char *ros_mem;
-    char *hrt_file_id;
+    char *hrt_file_id=0;
 
     PrintDebug(vm, VCORE_NONE, "hvm: vm init\n");
 
@@ -317,10 +332,18 @@ void     v3_hvm_find_apics_seen_by_core(struct guest_info *core, struct v3_vm_in
     }
 }
 
+#define MAX(x,y) ((x)>(y)?(x):(y))
+#define MIN(x,y) ((x)<(y)?(x):(y))
+
+#ifdef HVM_MAP_1G_2M
+#define BOOT_STATE_END_ADDR (MIN(vm->mem_size,0x40000000ULL))
+#else
+#define BOOT_STATE_END_ADDR (MIN(vm->mem_size,0x800000000ULL))
+#endif
 
 static void get_null_int_handler_loc(struct v3_vm_info *vm, void **base, uint64_t *limit)
 {
-    *base = (void*) PAGE_ADDR(vm->mem_size - PAGE_SIZE);
+    *base = (void*) PAGE_ADDR(BOOT_STATE_END_ADDR - PAGE_SIZE);
     *limit = PAGE_SIZE;
 }
 
@@ -372,7 +395,7 @@ static void write_null_int_handler(struct v3_vm_info *vm)
 
 static void get_idt_loc(struct v3_vm_info *vm, void **base, uint64_t *limit)
 {
-    *base = (void*) PAGE_ADDR(vm->mem_size - 2 * PAGE_SIZE);
+    *base = (void*) PAGE_ADDR(BOOT_STATE_END_ADDR - 2 * PAGE_SIZE);
     *limit = 16*256;
 }
 
@@ -450,7 +473,7 @@ static void write_idt(struct v3_vm_info *vm)
 
 static void get_gdt_loc(struct v3_vm_info *vm, void **base, uint64_t *limit)
 {
-    *base = (void*)PAGE_ADDR(vm->mem_size - 3 * PAGE_SIZE);
+    *base = (void*)PAGE_ADDR(BOOT_STATE_END_ADDR - 3 * PAGE_SIZE);
     *limit = 8*3;
 }
 
@@ -475,7 +498,7 @@ static void write_gdt(struct v3_vm_info *vm)
 
 static void get_tss_loc(struct v3_vm_info *vm, void **base, uint64_t *limit)
 {
-    *base = (void*)PAGE_ADDR(vm->mem_size - 4 * PAGE_SIZE);
+    *base = (void*)PAGE_ADDR(BOOT_STATE_END_ADDR - 4 * PAGE_SIZE);
     *limit = PAGE_SIZE;
 }
 
@@ -501,15 +524,31 @@ static void write_tss(struct v3_vm_info *vm)
      512 entries
   1 top level
      1 entries
+
+OR
+  
+  PTS MAP FIRST 1 GB identity mapped:
+  1 third level
+    512 entries
+  1 second level
+    1 entries
+  1 top level
+    1 entries
 */
 
 static void get_pt_loc(struct v3_vm_info *vm, void **base, uint64_t *limit)
 {
-    *base = (void*)PAGE_ADDR(vm->mem_size-(5+1)*PAGE_SIZE);
+#ifdef HVM_MAP_1G_2M
+    *base = (void*)PAGE_ADDR(BOOT_STATE_END_ADDR-(5+2)*PAGE_SIZE);
+    *limit =  3*PAGE_SIZE;
+#else
+    *base = (void*)PAGE_ADDR(BOOT_STATE_END_ADDR-(5+1)*PAGE_SIZE);
     *limit =  2*PAGE_SIZE;
+#endif
 }
 
-static void write_pt(struct v3_vm_info *vm)
+#ifndef HVM_MAP_1G_2M
+static void write_pt_2level_512GB(struct v3_vm_info *vm)
 {
     void *base;
     uint64_t size;
@@ -522,6 +561,10 @@ static void write_pt(struct v3_vm_info *vm)
        PrintError(vm,VCORE_NONE,"Cannot support pt request, defaulting\n");
     }
 
+    if (vm->mem_size > 0x800000000ULL) { 
+       PrintError(vm,VCORE_NONE, "VM has more than 512 GB\n");
+    }
+
     memset(&pdpe,0,sizeof(pdpe));
     pdpe.present=1;
     pdpe.writable=1;
@@ -544,12 +587,89 @@ static void write_pt(struct v3_vm_info *vm)
        v3_write_gpa_memory(&vm->cores[0],(addr_t)(base+i*sizeof(pml4e)),sizeof(pml4e),(uint8_t*)&pml4e);
     }
 
-    PrintDebug(vm,VCORE_NONE,"hvm: Wrote page tables (1 PML4, 1 PDPE) at %p\n",base);
+    PrintDebug(vm,VCORE_NONE,"hvm: Wrote page tables (1 PML4, 1 PDPE) at %p (512 GB mapped)\n",base);
+}
+
+#else 
+
+static void write_pt_3level_1GB(struct v3_vm_info *vm)
+{
+    void *base;
+    uint64_t size;
+    struct pml4e64 pml4e;
+    struct pdpe64 pdpe;
+    struct pde64 pde;
+
+    uint64_t i;
+
+    get_pt_loc(vm,&base, &size);
+    if (size!=3*PAGE_SIZE) { 
+       PrintError(vm,VCORE_NONE,"Cannot support pt request, defaulting\n");
+    }
+
+    if (vm->mem_size > 0x40000000ULL) { 
+       PrintError(vm,VCORE_NONE, "VM has more than 1 GB\n");
+    }
+
+    memset(&pde,0,sizeof(pde));
+    pde.present=1;
+    pde.writable=1;
+    pde.large_page=1;
+    
+    for (i=0;i<512;i++) {
+       pde.pt_base_addr = i*0x200;  // 0x200 = 512 pages = 2 MB
+       v3_write_gpa_memory(&vm->cores[0],
+                           (addr_t)(base+2*PAGE_SIZE+i*sizeof(pde)),
+                           sizeof(pde),(uint8_t*)&pde);
+    }
+
+    memset(&pdpe,0,sizeof(pdpe));
+    pdpe.present=1;
+    pdpe.writable=1;
+    pdpe.large_page=0;
+
+    pdpe.pd_base_addr = PAGE_BASE_ADDR((addr_t)(base+2*PAGE_SIZE));
+
+    v3_write_gpa_memory(&vm->cores[0],(addr_t)base+PAGE_SIZE,sizeof(pdpe),(uint8_t*)&pdpe);    
+    
+    for (i=1;i<512;i++) {
+       pdpe.present = 0; 
+       v3_write_gpa_memory(&vm->cores[0],(addr_t)(base+PAGE_SIZE+i*sizeof(pdpe)),sizeof(pdpe),(uint8_t*)&pdpe);
+    }
+
+    memset(&pml4e,0,sizeof(pml4e));
+    pml4e.present=1;
+    pml4e.writable=1;
+    pml4e.pdp_base_addr = PAGE_BASE_ADDR((addr_t)(base+PAGE_SIZE));
+
+    v3_write_gpa_memory(&vm->cores[0],(addr_t)base,sizeof(pml4e),(uint8_t*)&pml4e);    
+
+    for (i=1;i<512;i++) {
+       pml4e.present=0;
+       v3_write_gpa_memory(&vm->cores[0],(addr_t)(base+i*sizeof(pml4e)),sizeof(pml4e),(uint8_t*)&pml4e);
+    }
+
+    PrintDebug(vm,VCORE_NONE,"hvm: Wrote page tables (1 PML4, 1 PDPE, 1 PDP) at %p (1 GB mapped)\n",base);
+}
+
+#endif
+
+static void write_pt(struct v3_vm_info *vm)
+{
+#ifdef HVM_MAP_1G_2M
+    write_pt_3level_1GB(vm);
+#else
+    write_pt_2level_512GB(vm);
+#endif
 }
 
 static void get_bp_loc(struct v3_vm_info *vm, void **base, uint64_t *limit)
 {
-    *base = (void*) PAGE_ADDR(vm->mem_size-(6+1)*PAGE_SIZE);
+#ifdef HVM_MAP_1G_2M
+    *base = (void*) PAGE_ADDR(BOOT_STATE_END_ADDR-(6+2)*PAGE_SIZE);
+#else
+    *base = (void*) PAGE_ADDR(BOOT_STATE_END_ADDR-(6+1)*PAGE_SIZE);
+#endif
     *limit =  PAGE_SIZE;
 }
 
@@ -593,22 +713,146 @@ static void get_hrt_loc(struct v3_vm_info *vm, void **base, uint64_t *limit)
     *limit = bp_base - *base;
 }
 
-static void write_hrt(struct v3_vm_info *vm)
+
+#define ERROR(fmt, args...) PrintError(VM_NONE,VCORE_NONE,"hvm: " fmt,##args)
+#define INFO(fmt, args...) PrintDebug(VM_NONE,VCORE_NONE,"hvm: " fmt,##args)
+
+#define ELF_MAGIC    0x464c457f
+#define MB2_MAGIC    0xe85250d6
+
+#define MB2_INFO_MAGIC    0x36d76289
+
+static int is_elf(uint8_t *data, uint64_t size)
+{
+    if (*((uint32_t*)data)==ELF_MAGIC) {
+       return 1;
+    } else { 
+       return 0;
+    }
+}
+
+static mb_header_t *find_mb_header(uint8_t *data, uint64_t size)
+{
+    uint64_t limit = size > 32768 ? 32768 : size;
+    uint64_t i;
+
+    // Scan for the .boot magic cookie
+    // must be in first 32K, assume 4 byte aligned
+    for (i=0;i<limit;i+=4) { 
+       if (*((uint32_t*)&data[i])==MB2_MAGIC) {
+           INFO("Found multiboot header at offset 0x%llx\n",i);
+           return (mb_header_t *) &data[i];
+       }
+    }
+    return 0;
+}
+
+
+// 
+// BROKEN - THIS DOES NOT DO WHAT YOU THINK
+//
+static int setup_elf(struct v3_vm_info *vm, void *base, uint64_t limit)
+{
+    v3_write_gpa_memory(&vm->cores[0],(addr_t)base,vm->hvm_state.hrt_file->size,vm->hvm_state.hrt_file->data);
+
+    vm->hvm_state.hrt_entry_addr = (uint64_t) (base+0x40);
+
+    PrintDebug(vm,VCORE_NONE,"hvm: wrote HRT ELF %s at %p\n", vm->hvm_state.hrt_file->tag,base);
+    PrintDebug(vm,VCORE_NONE,"hvm: set ELF entry to %p and hoping for the best...\n", (void*) vm->hvm_state.hrt_entry_addr);
+    
+    vm->hvm_state.hrt_type = HRT_ELF64;
+
+    return 0;
+
+}
+
+static int setup_mb_kernel(struct v3_vm_info *vm, void *base, uint64_t limit)
+{
+    mb_data_t mb;
+    uint32_t offset;
+
+
+    // FIX USING GENERIC TOOLS
+
+    if (v3_parse_multiboot_header(vm->hvm_state.hrt_file,&mb)) { 
+       PrintError(vm,VCORE_NONE, "hvm: failed to parse multiboot kernel header\n");
+       return -1;
+    }
+
+    if (!mb.addr || !mb.entry) { 
+       PrintError(vm,VCORE_NONE, "hvm: kernel is missing address or entry point\n");
+       return -1;
+    }
+
+    if (((void*)(uint64_t)(mb.addr->header_addr) < base ) ||
+       ((void*)(uint64_t)(mb.addr->load_end_addr) > base+limit) ||
+       ((void*)(uint64_t)(mb.addr->bss_end_addr) > base+limit)) { 
+       PrintError(vm,VCORE_NONE, "hvm: kernel is not within the allowed portion of HVM\n");
+       return -1;
+    }
+
+    offset = mb.addr->load_addr - mb.addr->header_addr;
+
+    // Skip the ELF header - assume 1 page... weird.... 
+    v3_write_gpa_memory(&vm->cores[0],
+                       (addr_t)(mb.addr->load_addr),
+                       vm->hvm_state.hrt_file->size-PAGE_SIZE-offset,
+                       vm->hvm_state.hrt_file->data+PAGE_SIZE+offset);
+
+       
+    // vm->hvm_state.hrt_entry_addr = (uint64_t) mb.entry->entry_addr + PAGE_SIZE; //HACK PAD
+
+    vm->hvm_state.hrt_entry_addr = (uint64_t) mb.entry->entry_addr;
+    
+    vm->hvm_state.hrt_type = HRT_MBOOT64;
+
+    PrintDebug(vm,VCORE_NONE,
+              "hvm: wrote 0x%llx bytes starting at offset 0x%llx to %p; set entry to %p\n",
+              (uint64_t) vm->hvm_state.hrt_file->size-PAGE_SIZE-offset,
+              (uint64_t) PAGE_SIZE+offset,
+              (void*)(addr_t)(mb.addr->load_addr),
+              (void*) vm->hvm_state.hrt_entry_addr);
+    return 0;
+
+}
+
+
+static int setup_hrt(struct v3_vm_info *vm)
 {
     void *base;
     uint64_t limit;
 
     get_hrt_loc(vm,&base,&limit);
-    
+
     if (vm->hvm_state.hrt_file->size > limit) { 
        PrintError(vm,VCORE_NONE,"hvm: Cannot map HRT because it is too big (%llu bytes, but only have %llu space\n", vm->hvm_state.hrt_file->size, (uint64_t)limit);
-       return;
+       return -1;
     }
 
-    v3_write_gpa_memory(&vm->cores[0],(addr_t)base,vm->hvm_state.hrt_file->size,vm->hvm_state.hrt_file->data);
+    if (!is_elf(vm->hvm_state.hrt_file->data,vm->hvm_state.hrt_file->size)) { 
+       PrintError(vm,VCORE_NONE,"hvm: supplied HRT is not an ELF but we are going to act like it is!\n");
+       if (setup_elf(vm,base,limit)) {
+           PrintError(vm,VCORE_NONE,"hvm: Fake ELF setup failed\n");
+           return -1;
+       }
+       vm->hvm_state.hrt_type=HRT_BLOB;
+    } else {
+       if (find_mb_header(vm->hvm_state.hrt_file->data,vm->hvm_state.hrt_file->size)) { 
+           PrintDebug(vm,VCORE_NONE,"hvm: appears to be a multiboot kernel\n");
+           if (setup_mb_kernel(vm,base,limit)) { 
+               PrintError(vm,VCORE_NONE,"hvm: multiboot kernel setup failed\n");
+               return -1;
+           } 
+       } else {
+           PrintDebug(vm,VCORE_NONE,"hvm: supplied HRT is an ELF\n");
+           if (setup_elf(vm,base,limit)) {
+               PrintError(vm,VCORE_NONE,"hvm: Fake ELF setup failed\n");
+               return -1;
+           }
+       }
+    }
 
-    PrintDebug(vm,VCORE_NONE,"hvm: wrote HRT %s at %p\n", vm->hvm_state.hrt_file->tag,base);
-    
+    return 0;
 }
 
 
@@ -659,7 +903,10 @@ int v3_setup_hvm_vm_for_boot(struct v3_vm_info *vm)
 
     write_bp(vm);
     
-    write_hrt(vm);
+    if (setup_hrt(vm)) {
+       PrintError(vm,VCORE_NONE,"hvm: failed to setup HRT\n");
+       return -1;
+    } 
 
 
     PrintDebug(vm,VCORE_NONE,"hvm: setup of HVM memory done\n");
@@ -693,6 +940,8 @@ int v3_setup_hvm_hrt_core_for_boot(struct guest_info *core)
     void *base;
     uint64_t limit;
 
+    rdtscll(core->hvm_state.last_boot_start);
+
     if (!core->hvm_state.is_hrt) { 
        PrintDebug(core->vm_info,core,"hvm: skipping HRT setup for core %u as it is not an HRT core\n", core->vcpu_id);
        return 0;
@@ -725,21 +974,27 @@ int v3_setup_hvm_hrt_core_for_boot(struct guest_info *core)
     core->vm_regs.rdi = (v3_reg_t) base;
     // HRT entry point
     get_hrt_loc(core->vm_info, &base,&limit);
-    core->rip = (uint64_t) base + 0x40; // hack for test.o
+    core->rip = (uint64_t) core->vm_info->hvm_state.hrt_entry_addr ; 
 
     // Setup CRs for long mode and our stub page table
     // CR0: PG, PE
     core->ctrl_regs.cr0 = 0x80000001;
+    core->shdw_pg_state.guest_cr0 = core->ctrl_regs.cr0;
+
     // CR2: don't care (output from #PF)
     // CE3: set to our PML4E, without setting PCD or PWT
     get_pt_loc(core->vm_info, &base,&limit);
     core->ctrl_regs.cr3 = PAGE_ADDR((addr_t)base);
+    core->shdw_pg_state.guest_cr3 = core->ctrl_regs.cr3;
+
     // CR4: PGE, PAE, PSE (last byte: 1 0 1 1 0 0 0 0)
     core->ctrl_regs.cr4 = 0xb0;
+    core->shdw_pg_state.guest_cr4 = core->ctrl_regs.cr4;
     // CR8 as usual
     // RFLAGS zeroed is fine: come in with interrupts off
-    // EFER needs SVME LMA LME (last 16 bites: 0 0 0 1 0 1 0 1 0 0 0 0 0 0 0 0
+    // EFER needs SVME LMA LME (last 16 bits: 0 0 0 1 0 1 0 1 0 0 0 0 0 0 0 0
     core->ctrl_regs.efer = 0x1500;
+    core->shdw_pg_state.guest_efer.value = core->ctrl_regs.efer;
 
 
     /* 
@@ -817,10 +1072,83 @@ int v3_setup_hvm_hrt_core_for_boot(struct guest_info *core)
     memcpy(&core->segments.fs,&core->segments.ds,sizeof(core->segments.ds));
     memcpy(&core->segments.gs,&core->segments.ds,sizeof(core->segments.ds));
     
+
+    if (core->vm_info->hvm_state.hrt_type==HRT_MBOOT64) { 
+       /*
+         Temporary hackery for multiboot2 "64"
+         We will push the MB structure onto the stack and update RSP
+         and RBX
+       */
+       uint8_t buf[256];
+       uint64_t size;
+       
+       if ((size=v3_build_multiboot_table(core,buf,256))==-1) { 
+           PrintError(core->vm_info,core,"hvm: Failed to write MB info\n");
+           return -1;
+       }
+       core->vm_regs.rsp -= size;
+
+       v3_write_gpa_memory(core,
+                           core->vm_regs.rsp,
+                           size,
+                           buf);
+
+       PrintDebug(core->vm_info,core, "hvm: wrote MB info at %p\n", (void*)core->vm_regs.rsp);
+
+       if (core->vcpu_id == core->vm_info->hvm_state.first_hrt_core) {
+           // We are the BSP for this HRT
+           // this is where rbx needs to point
+           core->vm_regs.rbx = core->vm_regs.rsp;
+           PrintDebug(core->vm_info,core, "hvm: \"BSP\" core\n");
+       } else {
+           // We are an AP for this HRT
+           // so we don't get the multiboot struct
+           core->vm_regs.rbx = 0;
+           PrintDebug(core->vm_info,core, "hvm: \"AP\" core\n");
+       }
+
+
+
+       // one more push, something that looks like a return address
+       size=0;
+       core->vm_regs.rsp -= 8;
+
+       v3_write_gpa_memory(core,
+                           core->vm_regs.rsp,
+                           8,
+                           (uint8_t*) &size);
+       
+       // Now for our magic - this signals
+       // the kernel that a multiboot loader loaded it
+       // and that rbx points to its offered data
+       core->vm_regs.rax = MB2_INFO_MAGIC;
+    
+       /* 
+          Note that "real" MB starts in protected mode without paging
+          This hack starts in long mode... so these requirements go
+          out the window for a large part
+
+          Requirements:
+
+          OK EAX has magic 
+          OK EBX points to MB info
+          OK CS = base 0, offset big, code (LONG MODE)
+          OK DS,ES,FS,GS,SS => base 0, offset big, data (LONG MODE)
+          OK A20 gate on
+          XXX CR0 PE on PG off (nope)
+          XXX EFLAGS IF and VM off
+       */
+          
+
+
+    }
+
+
     // reset paging here for shadow... 
 
     if (core->shdw_pg_mode != NESTED_PAGING) { 
        PrintError(core->vm_info, core, "hvm: shadow paging guest... this will end badly\n");
+       return -1;
     }
 
 
diff --git a/palacios/src/palacios/vmm_multiboot.c b/palacios/src/palacios/vmm_multiboot.c
new file mode 100644 (file)
index 0000000..0a7f60b
--- /dev/null
@@ -0,0 +1,986 @@
+/* 
+ * This file is part of the Palacios Virtual Machine Monitor developed
+ * by the V3VEE Project with funding from the United States National 
+ * Science Foundation and the Department of Energy.  
+ *
+ * The V3VEE Project is a joint project between Northwestern University
+ * and the University of New Mexico.  You can find out more at 
+ * http://www.v3vee.org
+ *
+ * Copyright (c) 2015, The V3VEE Project <http://www.v3vee.org> 
+ * All rights reserved.
+ *
+ * Author:  Peter Dinda <pdinda@northwestern.edu>
+ *
+ * This is free software.  You are permitted to use,
+ * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
+ */
+
+#include <palacios/vmm_mem.h>
+#include <palacios/vmm.h>
+#include <palacios/vmm_util.h>
+#include <palacios/vmm_emulator.h>
+#include <palacios/vm_guest.h>
+#include <palacios/vmm_debug.h>
+#include <palacios/vmm_hypercall.h>
+
+#include <palacios/vmm_xml.h>
+
+#include <palacios/vm_guest_mem.h>
+
+#include <palacios/vmm_debug.h>
+
+
+/*
+
+  In a Pal file:
+
+  <files> 
+    <file id="multibootelf" filename="multibootelf.o" />
+  </files>
+
+  <multiboot enable="y" file_id="multibootelf" />
+
+
+*/
+
+#ifndef V3_CONFIG_DEBUG_MULTIBOOT
+#undef PrintDebug
+#define PrintDebug(fmt, args...)
+#endif
+
+
+int v3_init_multiboot()
+{
+    PrintDebug(VM_NONE,VCORE_NONE, "multiboot: init\n");
+    return 0;
+}
+
+int v3_deinit_multiboot()
+{
+    PrintDebug(VM_NONE,VCORE_NONE, "multiboot: deinit\n");
+    return 0;
+}
+
+
+
+#define CEIL_DIV(x,y) (((x)/(y)) + !!((x)%(y)))
+
+int v3_init_multiboot_vm(struct v3_vm_info *vm, struct v3_xml *config)
+{
+    v3_cfg_tree_t *mb_config;
+    char *enable;
+    char *mb_file_id=0;
+
+    PrintDebug(vm, VCORE_NONE, "multiboot: vm init\n");
+
+    memset(&vm->mb_state,0,sizeof(struct v3_vm_multiboot));
+    vm->mb_state.is_multiboot=0;
+
+    if (!config || !(mb_config=v3_cfg_subtree(config,"multiboot"))) {
+       PrintDebug(vm,VCORE_NONE,"multiboot: no multiboot configuration found - normal boot will occur\n");
+       goto out_ok;
+    }
+    
+    if (!(enable=v3_cfg_val(mb_config,"enable")) || strcasecmp(enable,"y")) {
+       PrintDebug(vm,VCORE_NONE,"multiboot: multiboot configuration disabled\n");
+       goto out_ok;
+    }
+
+    if (!(mb_file_id=v3_cfg_val(mb_config,"file_id"))) { 
+       PrintError(vm,VCORE_NONE,"multiboot: multiboot block without file_id...\n");
+       return -1;
+    }
+
+    vm->mb_state.mb_file = v3_cfg_get_file(vm,mb_file_id);
+    
+    if (!vm->mb_state.mb_file) { 
+       PrintError(vm,VCORE_NONE,"multiboot: multiboot block contains bad file_id (%s)\n",mb_file_id);
+       return -1;
+    }
+
+    vm->mb_state.is_multiboot=1;
+
+ out_ok:
+    if (vm->mb_state.is_multiboot) {
+       V3_Print(vm,VCORE_NONE,"multiboot: file_id=%s (tag %s)]\n",
+                mb_file_id,
+                vm->mb_state.mb_file->tag);
+    } else {
+       V3_Print(vm,VCORE_NONE,"multiboot: This is not a multiboot VM\n");
+    }
+    return 0;
+    
+}
+
+
+int v3_deinit_multiboot_vm(struct v3_vm_info *vm)
+{
+    PrintDebug(vm, VCORE_NONE, "multiboot: multiboot VM deinit\n");
+
+    return 0;
+}
+
+int v3_init_multiboot_core(struct guest_info *core)
+{
+    PrintDebug(core->vm_info, VCORE_NONE, "multiboot: multiboot core init\n");
+
+    // Nothing to do at this point
+
+    return 0;
+}
+
+int v3_deinit_multiboot_core(struct guest_info *core)
+{
+    PrintDebug(core->vm_info, VCORE_NONE, "multiboot: multiboot core deinit\n");
+
+    return 0;
+}
+
+
+
+
+#define ERROR(fmt, args...) PrintError(VM_NONE,VCORE_NONE,"multiboot: " fmt,##args)
+#define INFO(fmt, args...) PrintDebug(VM_NONE,VCORE_NONE,"multiboot: " fmt,##args)
+
+
+/******************************************************************
+     Data contained in the ELF file we will attempt to boot  
+******************************************************************/
+
+#define ELF_MAGIC    0x464c457f
+#define MB2_MAGIC    0xe85250d6
+
+
+/******************************************************************
+     Data we will pass to the kernel via rbx
+******************************************************************/
+
+#define MB2_INFO_MAGIC    0x36d76289
+
+typedef struct mb_info_header {
+    uint32_t  totalsize;
+    uint32_t  reserved;
+} __attribute__((packed)) mb_info_header_t;
+
+// A tag of type 0, size 8 indicates last value
+//
+typedef struct mb_info_tag {
+    uint32_t  type;
+    uint32_t  size;
+} __attribute__((packed)) mb_info_tag_t;
+
+
+#define MB_INFO_MEM_TAG  4
+typedef struct mb_info_mem {
+    mb_info_tag_t tag;
+    uint32_t  mem_lower; // 0..640K in KB 
+    uint32_t  mem_upper; // in KB to first hole - 1 MB
+} __attribute__((packed)) mb_info_mem_t;
+
+#define MB_INFO_CMDLINE_TAG  1
+// note alignment of 8 bytes required for each... 
+typedef struct mb_info_cmdline {
+    mb_info_tag_t tag;
+    uint32_t  size;      // includes zero termination
+    uint8_t   string[];  // zero terminated
+} __attribute__((packed)) mb_info_cmdline_t;
+
+
+#define MEM_RAM   1
+#define MEM_ACPI  3
+#define MEM_RESV  4
+
+typedef struct mb_info_memmap_entry {
+    uint64_t  base_addr;
+    uint64_t  length;
+    uint32_t  type;
+    uint32_t  reserved;
+} __attribute__((packed)) mb_info_memmap_entry_t;
+
+#define MB_INFO_MEMMAP_TAG  6
+// note alignment of 8 bytes required for each... 
+typedef struct mb_info_memmap {
+    mb_info_tag_t tag;
+    uint32_t  entry_size;     // multiple of 8
+    uint32_t  entry_version;  // 0
+    mb_info_memmap_entry_t  entries[];
+} __attribute__((packed)) mb_info_memmap_t;
+
+#define MB_INFO_HRT_TAG 0xf00df00d
+typedef struct mb_info_hrt {
+    mb_info_tag_t  tag;
+    // apic ids are 0..num_apics-1
+    // apic and ioapic addresses are the well known places
+    uint32_t       total_num_apics;
+    uint32_t       first_hrt_apic_id;
+    uint32_t       have_hrt_ioapic;
+    uint32_t       first_hrt_ioapic_entry;
+} __attribute__((packed)) mb_info_hrt_t;
+
+
+// We are not doing:
+//
+// - BIOS Boot Devie
+// - Modules
+// - ELF symbols
+// - Boot Loader name
+// - APM table
+// - VBE info
+// - Framebuffer info
+//
+
+static int is_elf(uint8_t *data, uint64_t size)
+{
+    if (*((uint32_t*)data)==ELF_MAGIC) {
+       return 1;
+    } else { 
+       return 0;
+    }
+}
+
+static mb_header_t *find_mb_header(uint8_t *data, uint64_t size)
+{
+    uint64_t limit = size > 32768 ? 32768 : size;
+    uint64_t i;
+
+    // Scan for the .boot magic cookie
+    // must be in first 32K, assume 4 byte aligned
+    for (i=0;i<limit;i+=4) { 
+       if (*((uint32_t*)&data[i])==MB2_MAGIC) {
+           INFO("Found multiboot header at offset 0x%llx\n",i);
+           return (mb_header_t *) &data[i];
+       }
+    }
+    return 0;
+}
+
+static int checksum4_ok(uint32_t *data, uint64_t size)
+{
+    int i;
+    uint32_t sum=0;
+
+    for (i=0;i<size;i++) {
+       sum+=data[i];
+    }
+
+    return sum==0;
+}
+
+static int parse_multiboot_kernel(uint8_t *data, uint64_t size, mb_data_t *mb)
+{
+    uint64_t i;
+
+    mb_header_t *mb_header=0;
+    mb_tag_t *mb_tag=0;
+    mb_info_t *mb_inf=0;
+    mb_addr_t *mb_addr=0;
+    mb_entry_t *mb_entry=0;
+    mb_flags_t *mb_flags=0;
+    mb_framebuf_t *mb_framebuf=0;
+    mb_modalign_t *mb_modalign=0;
+    mb_mb64_hrt_t *mb_mb64_hrt=0;
+
+
+    if (!is_elf(data,size)) { 
+       ERROR("HRT is not an ELF\n");
+       return -1;
+    }
+
+    mb_header = find_mb_header(data,size);
+
+    if (!mb_header) { 
+       ERROR("No multiboot header found\n");
+       return -1;
+    }
+
+    // Checksum applies only to the header itself, not to 
+    // the subsequent tags... 
+    if (!checksum4_ok((uint32_t*)mb_header,4)) { 
+       ERROR("Multiboot header has bad checksum\n");
+       return -1;
+    }
+
+    INFO("Multiboot header: arch=0x%x, headerlen=0x%x\n", mb_header->arch, mb_header->headerlen);
+
+    mb_tag = (mb_tag_t*)((void*)mb_header+16);
+
+    while (!(mb_tag->type==0 && mb_tag->size==8)) {
+       INFO("tag: type 0x%x flags=0x%x size=0x%x\n",mb_tag->type, mb_tag->flags,mb_tag->size);
+       switch (mb_tag->type) {
+           case MB_TAG_INFO: {
+               if (mb_inf) { 
+                   ERROR("Multiple info tags found!\n");
+                   return -1;
+               }
+               mb_inf = (mb_info_t*)mb_tag;
+               INFO(" info request - types follow\n");
+               for (i=0;(mb_tag->size-8)/4;i++) {
+                   INFO("  %llu: type 0x%x\n", i, mb_inf->types[i]);
+               }
+           }
+               break;
+
+           case MB_TAG_ADDRESS: {
+               if (mb_addr) { 
+                   ERROR("Multiple address tags found!\n");
+                   return -1;
+               }
+               mb_addr = (mb_addr_t*)mb_tag;
+               INFO(" address\n");
+               INFO("  header_addr     =  0x%x\n", mb_addr->header_addr);
+               INFO("  load_addr       =  0x%x\n", mb_addr->load_addr);
+               INFO("  load_end_addr   =  0x%x\n", mb_addr->load_end_addr);
+               INFO("  bss_end_addr    =  0x%x\n", mb_addr->bss_end_addr);
+           }
+               break;
+
+           case MB_TAG_ENTRY: {
+               if (mb_entry) { 
+                   ERROR("Multiple entry tags found!\n");
+                   return -1;
+               }
+               mb_entry=(mb_entry_t*)mb_tag;
+               INFO(" entry\n");
+               INFO("  entry_addr      =  0x%x\n", mb_entry->entry_addr);
+           }
+               break;
+               
+           case MB_TAG_FLAGS: {
+               if (mb_flags) { 
+                   ERROR("Multiple flags tags found!\n");
+                   return -1;
+               }
+               mb_flags = (mb_flags_t*)mb_tag;
+               INFO(" flags\n");
+               INFO("  console_flags   =  0x%x\n", mb_flags->console_flags);
+           }
+               break;
+               
+           case MB_TAG_FRAMEBUF: {
+               if (mb_framebuf) { 
+                   ERROR("Multiple framebuf tags found!\n");
+                   return -1;
+               }
+               mb_framebuf = (mb_framebuf_t*)mb_tag;
+               INFO(" framebuf\n");
+               INFO("  width           =  0x%x\n", mb_framebuf->width);
+               INFO("  height          =  0x%x\n", mb_framebuf->height);
+               INFO("  depth           =  0x%x\n", mb_framebuf->depth);
+           }
+               break;
+
+           case MB_TAG_MODALIGN: {
+               if (mb_modalign) { 
+                   ERROR("Multiple modalign tags found!\n");
+                   return -1;
+               }
+               mb_modalign = (mb_modalign_t*)mb_tag;
+               INFO(" modalign\n");
+               INFO("  size            =  0x%x\n", mb_modalign->size);
+           }
+               break;
+#if 0
+           case MB_TAG_MB64_HRT: {
+               if (mb_mb64_hrt) { 
+                   ERROR("Multiple mb64_hrt tags found!\n");
+                   return -1;
+               }
+               mb_mb64_hrt = (mb_mb64_hrt_t*)mb_tag;
+               INFO(" mb64_hrt\n");
+           }
+               break;
+#endif
+               
+           default: 
+               INFO("Unknown tag... Skipping...\n");
+               break;
+       }
+       mb_tag = (mb_tag_t *)(((void*)mb_tag) + mb_tag->size);
+    }
+
+    // copy out to caller
+    mb->header=mb_header;
+    mb->info=mb_inf;
+    mb->addr=mb_addr;
+    mb->entry=mb_entry;
+    mb->flags=mb_flags;
+    mb->framebuf=mb_framebuf;
+    mb->modalign=mb_modalign;
+    mb->mb64_hrt=mb_mb64_hrt;
+
+    return 0;
+}
+
+
+int v3_parse_multiboot_header(struct v3_cfg_file *file, mb_data_t *result)
+{
+    return parse_multiboot_kernel(file->data,file->size,result);
+}
+
+
+#define APIC_BASE     0xfee00000
+#define IOAPIC_BASE   0xfec00000
+
+/*
+  MB_INFO_HEADER
+  MB_HRT  (if this is an HVM
+  MB_BASIC_MEMORY
+  MB_MEMORY_MAP
+    0..640K  RAM
+    640K..1024 reserved
+    1024..ioapic_base RAM
+    ioapic_base to ioapic_base+page reserved
+    ioapic_base+page to apic_base ram
+    apic_base oto apic_base+page reserved
+    apic_base+page to total RAM
+
+   
+ The multiboot structure that is written reflects the 
+ perspective of the core given the kind of VM it is part of.
+
+ Regular VM
+    - core does not matter 
+    - all memory visible
+
+ HVM
+   ROS core
+    - only ROS memory visible
+    - regular multiboot or bios boot assumed
+   HRT core
+    - full HRT memory visible
+    - HRT64 multiboot assumed
+
+*/
+
+uint64_t v3_build_multiboot_table(struct guest_info *core, uint8_t *dest, uint64_t size)
+{
+    struct v3_vm_info *vm = core->vm_info;
+    mb_info_header_t *header;
+#ifdef V3_CONFIG_HVM
+    mb_info_hrt_t *hrt;
+#endif
+    mb_info_mem_t *mem;
+    mb_info_memmap_t *memmap;
+    mb_info_tag_t *tag;
+    uint64_t num_mem, cur_mem;
+    
+    uint64_t total_mem = vm->mem_size;
+
+#ifdef V3_CONFIG_HVM
+    if (vm->hvm_state.is_hvm) { 
+       if (v3_is_hvm_ros_core(core)) {
+           PrintDebug(core->vm_info,core,"multiboot: hvm: building mb table from ROS core perspective\n");
+           total_mem = v3_get_hvm_ros_memsize(vm);
+       } else {
+           PrintDebug(core->vm_info,core,"multiboot: hvm: building mb table from HRT core perspective\n");
+           total_mem = v3_get_hvm_hrt_memsize(vm);     
+       }
+    }
+#endif
+
+    // assume we have > 1 MB + apic+ioapic
+    num_mem = 5;
+    if (total_mem>IOAPIC_BASE+PAGE_SIZE) {
+       num_mem++;
+    }
+    if (total_mem>APIC_BASE+PAGE_SIZE) {
+       num_mem++;
+    }
+
+
+    uint64_t needed = 
+       sizeof(mb_info_header_t) +
+#ifdef V3_CONFIG_HVM
+       core->vm_info->hvm_state.is_hvm && core->hvm_state.is_hrt ? sizeof(mb_info_hrt_t) : 0 
+#endif
+       + 
+       sizeof(mb_info_mem_t) + 
+       sizeof(mb_info_memmap_t) + 
+       sizeof(mb_info_memmap_entry_t) * num_mem  +
+       sizeof(mb_info_tag_t);
+
+    if (needed>size) { 
+       return 0;
+    }
+
+    uint8_t *next;
+
+    if (needed>size) {
+       ERROR("Cannot fit MB info in needed space\n");
+       return -1;
+    }
+
+    next = dest;
+
+    header = (mb_info_header_t*)next;
+    next += sizeof(mb_info_header_t);
+
+#if V3_CONFIG_HVM
+    if (core->vm_info->hvm_state.is_hvm && v3_is_hvm_hrt_core(core)) { 
+       hrt = (mb_info_hrt_t*)next;
+       next += sizeof(mb_info_hrt_t);
+    }
+#endif
+
+    mem = (mb_info_mem_t*)next;
+    next += sizeof(mb_info_mem_t);
+
+    memmap = (mb_info_memmap_t*)next;
+    next += sizeof(mb_info_memmap_t) + num_mem * sizeof(mb_info_memmap_entry_t);
+
+    tag = (mb_info_tag_t*)next;
+    next += sizeof(mb_info_tag_t);
+
+    header->totalsize = (uint32_t)(next - dest);
+    header->reserved = 0;
+
+#ifdef V3_CONFIG_HVM
+    if (core->vm_info->hvm_state.is_hvm && v3_is_hvm_hrt_core(core)) { 
+       hrt->tag.type = MB_INFO_HRT_TAG;
+       hrt->tag.size = sizeof(mb_info_hrt_t);
+       hrt->total_num_apics = vm->num_cores;
+       hrt->first_hrt_apic_id = vm->hvm_state.first_hrt_core;
+       hrt->have_hrt_ioapic=0;
+       hrt->first_hrt_ioapic_entry=0;
+    }
+#endif
+
+    mem->tag.type = MB_INFO_MEM_TAG;
+    mem->tag.size = sizeof(mb_info_mem_t);
+    mem->mem_lower = 640; // thank you, bill gates
+    mem->mem_upper = (total_mem  - 1024 * 1024) / 1024;
+
+    memmap->tag.type = MB_INFO_MEMMAP_TAG;
+    memmap->tag.size = sizeof(mb_info_memmap_t) + num_mem * sizeof(mb_info_memmap_entry_t);
+    memmap->entry_size = 24;
+    memmap->entry_version = 0;
+
+    cur_mem=0;
+
+    // first 640K
+    memmap->entries[cur_mem].base_addr = 0;
+    memmap->entries[cur_mem].length = 640*1024;
+    memmap->entries[cur_mem].type = MEM_RAM;
+    memmap->entries[cur_mem].reserved = 0;
+    cur_mem++;
+
+    // legacy io (640K->1 MB)
+    memmap->entries[cur_mem].base_addr = 640*1024;
+    memmap->entries[cur_mem].length = 384*1024;
+    memmap->entries[cur_mem].type = MEM_RESV;
+    memmap->entries[cur_mem].reserved = 1;
+    cur_mem++;
+
+    // first meg to ioapic
+    memmap->entries[cur_mem].base_addr = 1024*1024;
+    memmap->entries[cur_mem].length = (total_mem < IOAPIC_BASE ? total_mem : IOAPIC_BASE) - 1024*1024;
+    memmap->entries[cur_mem].type = MEM_RAM;
+    memmap->entries[cur_mem].reserved = 0;
+    cur_mem++;
+
+    // ioapic reservation
+    memmap->entries[cur_mem].base_addr = IOAPIC_BASE;
+    memmap->entries[cur_mem].length = PAGE_SIZE;
+    memmap->entries[cur_mem].type = MEM_RESV;
+    memmap->entries[cur_mem].reserved = 1;
+    cur_mem++;
+
+    if (total_mem > (IOAPIC_BASE + PAGE_SIZE)) {
+       // memory between ioapic and apic
+       memmap->entries[cur_mem].base_addr = IOAPIC_BASE+PAGE_SIZE;
+       memmap->entries[cur_mem].length = (total_mem < APIC_BASE ? total_mem : APIC_BASE) - (IOAPIC_BASE+PAGE_SIZE);;
+       memmap->entries[cur_mem].type = MEM_RAM;
+       memmap->entries[cur_mem].reserved = 0;
+       cur_mem++;
+    } 
+
+    // apic
+    memmap->entries[cur_mem].base_addr = APIC_BASE;
+    memmap->entries[cur_mem].length = PAGE_SIZE;
+    memmap->entries[cur_mem].type = MEM_RESV;
+    memmap->entries[cur_mem].reserved = 1;
+    cur_mem++;
+
+    if (total_mem > (APIC_BASE + PAGE_SIZE)) {
+       // memory after apic
+       memmap->entries[cur_mem].base_addr = APIC_BASE+PAGE_SIZE;
+       memmap->entries[cur_mem].length = total_mem - (APIC_BASE+PAGE_SIZE);
+       memmap->entries[cur_mem].type = MEM_RAM;
+       memmap->entries[cur_mem].reserved = 0;
+       cur_mem++;
+    } 
+
+    for (cur_mem=0;cur_mem<num_mem;cur_mem++) { 
+       PrintDebug(vm, VCORE_NONE,
+                  "multiboot: entry %llu: %p (%llx bytes) - type %x %s\n",
+                  cur_mem, 
+                  (void*) memmap->entries[cur_mem].base_addr,
+                  memmap->entries[cur_mem].length,
+                  memmap->entries[cur_mem].type,
+                  memmap->entries[cur_mem].reserved ? "reserved" : "");
+    }
+
+
+
+    // This demarcates end of list
+    tag->type = 0;
+    tag->size = 8;
+
+    return header->totalsize;
+
+}
+
+
+int v3_write_multiboot_kernel(struct v3_vm_info *vm, mb_data_t *mb, struct v3_cfg_file *file,
+                             void *base, uint64_t limit)
+{
+    uint32_t offset;
+
+    if (!mb->addr || !mb->entry) { 
+       PrintError(vm,VCORE_NONE, "multiboot: kernel is missing address or entry point\n");
+       return -1;
+    }
+
+    if (((void*)(uint64_t)(mb->addr->header_addr) < base ) ||
+       ((void*)(uint64_t)(mb->addr->load_end_addr) > base+limit) ||
+       ((void*)(uint64_t)(mb->addr->bss_end_addr) > base+limit)) { 
+       PrintError(vm,VCORE_NONE, "multiboot: kernel is not within the allowed portion of VM\n");
+       return -1;
+    }
+
+    offset = mb->addr->load_addr - mb->addr->header_addr;
+
+    // Skip the ELF header - assume 1 page... weird.... 
+    // We are trying to do as little ELF loading here as humanly possible
+    v3_write_gpa_memory(&vm->cores[0],
+                       (addr_t)(mb->addr->load_addr),
+                       file->size-PAGE_SIZE-offset,
+                       file->data+PAGE_SIZE+offset);
+
+    PrintDebug(vm,VCORE_NONE,
+              "multiboot: wrote 0x%llx bytes starting at offset 0x%llx to %p\n",
+              (uint64_t) file->size-PAGE_SIZE-offset,
+              (uint64_t) PAGE_SIZE+offset,
+              (void*)(addr_t)(mb->addr->load_addr));
+
+    return 0;
+
+}
+
+
+static int setup_multiboot_kernel(struct v3_vm_info *vm)
+{
+    void *base = 0;
+    uint64_t limit = vm->mem_size;
+
+
+    if (vm->mb_state.mb_file->size > limit) { 
+       PrintError(vm,VCORE_NONE,"multiboot: Cannot map kernel because it is too big (%llu bytes, but only have %llu space\n", vm->mb_state.mb_file->size, (uint64_t)limit);
+       return -1;
+    }
+
+    if (!is_elf(vm->mb_state.mb_file->data,vm->mb_state.mb_file->size)) { 
+       PrintError(vm,VCORE_NONE,"multiboot: supplied kernel is not an ELF\n");
+       return -1;
+    } else {
+       if (find_mb_header(vm->mb_state.mb_file->data,vm->mb_state.mb_file->size)) { 
+           PrintDebug(vm,VCORE_NONE,"multiboot: appears to be a multiboot kernel\n");
+           if (v3_parse_multiboot_header(vm->mb_state.mb_file,&vm->mb_state.mb_data)) { 
+               PrintError(vm,VCORE_NONE,"multiboot: cannot parse multiboot kernel header\n");
+               return -1;
+           }
+           if (v3_write_multiboot_kernel(vm, &(vm->mb_state.mb_data),vm->mb_state.mb_file,base,limit)) { 
+               PrintError(vm,VCORE_NONE,"multiboot: multiboot kernel setup failed\n");
+               return -1;
+           } 
+       } else {
+           PrintError(vm,VCORE_NONE,"multiboot: multiboot kernel has no header\n");
+           return -1;
+       }
+    }
+    
+    return 0;
+    
+}
+
+// 32 bit GDT entries
+//
+//         base24-31    flags2  limit16-19 access8  base16-23   base0-15   limit0-15
+// null       0           0          0       0         0           0           0
+// code       0           1100       f     10011010    0           0         ffff
+// data       0           1100       f     10010010    0           0         ffff
+//
+// null =   00 00 00 00 00 00 00 00
+// code =   00 cf 9a 00 00 00 ff ff 
+// data =   00 cf 92 00 00 00 ff ff
+//
+static uint64_t gdt32[3] = {
+    0x0000000000000000, /* null */
+    0x00cf9a000000ffff, /* code (note lme=0) */
+    0x00cf92000000ffff, /* data */
+};
+
+static void write_gdt(struct v3_vm_info *vm, void *base, uint64_t limit)
+{
+    v3_write_gpa_memory(&vm->cores[0],(addr_t)base,limit,(uint8_t*) gdt32);
+
+    PrintDebug(vm,VCORE_NONE,"multiboot: wrote GDT at %p\n",base);
+}
+
+       
+static void write_tss(struct v3_vm_info *vm, void *base, uint64_t limit)
+{
+    int i;
+    uint64_t tss_data=0x0;
+
+    for (i=0;i<limit/8;i++) {
+       v3_write_gpa_memory(&vm->cores[0],(addr_t)(base+8*i),8,(uint8_t*) &tss_data);
+    }
+
+    PrintDebug(vm,VCORE_NONE,"multiboot: wrote TSS at %p\n",base);
+}
+
+static void write_table(struct v3_vm_info *vm, void *base, uint64_t limit)
+{
+    uint64_t size;
+    uint8_t buf[256];
+
+    limit = limit < 256 ? limit : 256;
+
+    size = v3_build_multiboot_table(&vm->cores[0], buf, limit);
+
+    if (size>256 || size==0) { 
+       PrintError(vm,VCORE_NONE,"multiboot: cannot build multiboot table\n");
+       return;
+    }
+    
+    v3_write_gpa_memory(&vm->cores[0],(addr_t)base,size,buf);
+
+}
+
+
+
+/*
+  GPA layout:
+
+  GDT
+  TSS
+  MBinfo   
+  Kernel at its desired load address (or error)
+
+*/
+
+
+int v3_setup_multiboot_vm_for_boot(struct v3_vm_info *vm)
+{
+    void *kernel_start_gpa;
+    void *kernel_end_gpa;
+    void *mb_gpa;
+    void *tss_gpa;
+    void *gdt_gpa;
+
+    if (!vm->mb_state.is_multiboot) { 
+       PrintDebug(vm,VCORE_NONE,"multiboot: skipping multiboot setup for boot as this is not a multiboot VM\n");
+       return 0;
+    }
+
+    
+    if (setup_multiboot_kernel(vm)) {
+       PrintError(vm,VCORE_NONE,"multiboot: failed to setup kernel\n");
+       return -1;
+    } 
+
+    kernel_start_gpa = (void*) (uint64_t) (vm->mb_state.mb_data.addr->load_addr);
+    kernel_end_gpa = (void*) (uint64_t) (vm->mb_state.mb_data.addr->bss_end_addr);
+
+    // Is there room below the kernel? 
+    if ((uint64_t)kernel_start_gpa > 19*4096 ) {
+       // at least 3 pages between 64K and start of kernel 
+       // place at 64K
+       mb_gpa=(void*)(16*4096);
+    } else {
+       // is there room above the kernel?
+       if ((uint64_t)kernel_end_gpa < vm->mem_size-4*4096) { 
+           if (((uint64_t)kernel_end_gpa + 4 * 4096) <= 0xffffffff) { 
+               mb_gpa=(void*) (4096*((uint64_t)kernel_end_gpa/4096 + 1));
+           } else {
+               PrintError(vm,VCORE_NONE,"multiboot: no room for mb data below 4 GB\n");
+               return -1;
+           } 
+       } else {
+           PrintError(vm,VCORE_NONE,"multiboot: no room for mb data above kernel\n");
+           return -1;
+       }
+    }
+
+    PrintDebug(vm,VCORE_NONE,"multiboot: mb data will start at %p\n",mb_gpa);
+
+    vm->mb_state.mb_data_gpa=mb_gpa;
+
+    tss_gpa = mb_gpa + 1 * 4096;
+    gdt_gpa = mb_gpa + 2 * 4096;
+
+    write_table(vm,mb_gpa,4096);
+    
+    write_tss(vm,tss_gpa,4096);
+
+    write_gdt(vm,gdt_gpa,4096);
+
+    PrintDebug(vm,VCORE_NONE,"multiboot: setup of memory done\n");
+
+    return 0;
+}
+
+/*
+  On entry:
+
+   IDTR not set
+   GDTR points to stub GDT
+   TR   points to stub TSS
+   CR0  has PE and not PG
+   EIP  is entry point to kernel
+   EBX  points to multiboot info
+   EAX  multiboot magic cookie
+
+*/
+int v3_setup_multiboot_core_for_boot(struct guest_info *core)
+{
+    void *base;
+    uint64_t limit;
+
+    if (!core->vm_info->mb_state.is_multiboot) {
+       PrintDebug(core->vm_info,core,"multiboot: skipping mb core setup as this is not an mb VM\n");
+       return 0;
+    }
+       
+    if (core->vcpu_id != 0) {
+       PrintDebug(core->vm_info,core,"multiboot: skipping mb core setup as this is not the BSP core\n");
+       return 0;
+    }
+
+
+    PrintDebug(core->vm_info, core, "multiboot: setting up MB BSP core for boot\n");
+
+    
+    memset(&core->vm_regs,0,sizeof(core->vm_regs));
+    memset(&core->ctrl_regs,0,sizeof(core->ctrl_regs));
+    memset(&core->dbg_regs,0,sizeof(core->dbg_regs));
+    memset(&core->segments,0,sizeof(core->segments));    
+    memset(&core->msrs,0,sizeof(core->msrs));    
+    memset(&core->fp_state,0,sizeof(core->fp_state));    
+
+    // We need to be in protected mode at ring zero
+    core->cpl = 0; // we are going right into the kernel
+    core->cpu_mode = PROTECTED;
+    core->mem_mode = PHYSICAL_MEM; 
+    // default run-state is fine, we are core zero
+    // core->core_run_state = CORE_RUNNING ;
+
+    // right into the kernel
+    core->rip = (uint64_t) core->vm_info->mb_state.mb_data.entry->entry_addr;
+
+    // Setup CRs for protected mode
+    // CR0:  PE (but no PG)
+    core->ctrl_regs.cr0 = 0x1;
+    core->shdw_pg_state.guest_cr0 = core->ctrl_regs.cr0;
+
+    // CR2: don't care (output from #PF)
+    // CR3: don't care (no paging)
+    core->ctrl_regs.cr3 = 0;
+    core->shdw_pg_state.guest_cr3 = core->ctrl_regs.cr3;
+
+    // CR4: no features 
+    core->ctrl_regs.cr4 = 0x0;
+    core->shdw_pg_state.guest_cr4 = core->ctrl_regs.cr4;
+    // CR8 as usual
+    // RFLAGS zeroed is fine: come in with interrupts off
+    // EFER needs SVME and LME but not LMA (last 16 bits: 0 0 0 1 0 1 0 0   0 0 0 0 0 0 0 0
+    core->ctrl_regs.efer = 0x1400;
+    core->shdw_pg_state.guest_efer.value = core->ctrl_regs.efer;
+
+
+    /* 
+       Notes on selectors:
+
+       selector is 13 bits of index, 1 bit table indicator 
+       (0=>GDT), 2 bit RPL
+       
+       index is scaled by 8, even in long mode, where some entries 
+       are 16 bytes long.... 
+          -> code, data descriptors have 8 byte format
+             because base, limit, etc, are ignored (no segmentation)
+          -> interrupt/trap gates have 16 byte format 
+             because offset needs to be 64 bits
+    */
+    
+    // There is no IDTR set and interrupts are disabled
+
+    // Install our stub GDT
+    core->segments.gdtr.selector = 0;
+    core->segments.gdtr.base = (addr_t) core->vm_info->mb_state.mb_data_gpa+2*4096;
+    core->segments.gdtr.limit = 4096-1;
+    core->segments.gdtr.type = 0x6;
+    core->segments.gdtr.system = 1; 
+    core->segments.gdtr.dpl = 0;
+    core->segments.gdtr.present = 1;
+    core->segments.gdtr.long_mode = 0;
+    
+    // And our TSS
+    core->segments.tr.selector = 0;
+    core->segments.tr.base = (addr_t) core->vm_info->mb_state.mb_data_gpa+1*4096;
+    core->segments.tr.limit = 4096-1;
+    core->segments.tr.type = 0x6;
+    core->segments.tr.system = 1; 
+    core->segments.tr.dpl = 0;
+    core->segments.tr.present = 1;
+    core->segments.tr.long_mode = 0;
+    
+    base = 0x0;
+    limit = -1;
+
+    // And CS
+    core->segments.cs.selector = 0x8 ; // entry 1 of GDT (RPL=0)
+    core->segments.cs.base = (addr_t) base;
+    core->segments.cs.limit = limit;
+    core->segments.cs.type = 0xe;
+    core->segments.cs.system = 0; 
+    core->segments.cs.dpl = 0;
+    core->segments.cs.present = 1;
+    core->segments.cs.long_mode = 0;
+
+    // DS, SS, etc are identical
+    core->segments.ds.selector = 0x10; // entry 2 of GDT (RPL=0)
+    core->segments.ds.base = (addr_t) base;
+    core->segments.ds.limit = limit;
+    core->segments.ds.type = 0x6;
+    core->segments.ds.system = 0; 
+    core->segments.ds.dpl = 0;
+    core->segments.ds.present = 1;
+    core->segments.ds.long_mode = 0;
+    
+    memcpy(&core->segments.ss,&core->segments.ds,sizeof(core->segments.ds));
+    memcpy(&core->segments.es,&core->segments.ds,sizeof(core->segments.ds));
+    memcpy(&core->segments.fs,&core->segments.ds,sizeof(core->segments.ds));
+    memcpy(&core->segments.gs,&core->segments.ds,sizeof(core->segments.ds));
+    
+
+
+    // Now for our magic - this signals
+    // the kernel that a multiboot loader loaded it
+    // and that rbx points to its offered data
+    core->vm_regs.rax = MB2_INFO_MAGIC;
+
+    core->vm_regs.rbx = (uint64_t) (core->vm_info->mb_state.mb_data_gpa);
+
+    // reset paging here for shadow... 
+
+    if (core->shdw_pg_mode != NESTED_PAGING) { 
+       PrintError(core->vm_info, core, "multiboot: shadow paging guest... this will end badly\n");
+       return -1;
+    }
+
+
+    return 0;
+}
index daf8eee..23d631a 100644 (file)
@@ -1198,6 +1198,13 @@ int v3_start_vmx_guest(struct guest_info * info) {
 
     PrintDebug(info->vm_info, info, "Starting VMX core %u\n", info->vcpu_id);
 
+#ifdef V3_CONFIG_MULTIBOOT
+    if (v3_setup_multiboot_core_for_boot(info)) { 
+       PrintError(info->vm_info, info, "Failed to setup Multiboot core...\n");
+       return -1;
+    }
+#endif
+
 #ifdef V3_CONFIG_HVM
     if (v3_setup_hvm_hrt_core_for_boot(info)) { 
        PrintError(info->vm_info, info, "Failed to setup HRT core...\n");