Addition of basic multiboot functionality plus refactor of HVM

diff --git a/Kconfig b/Kconfig

index 5288b22..e8cae68 100644 (file)
--- a/Kconfig
+++ b/Kconfig
@@ -57,6 +57,8 @@ config OTHER_OS
 
 endchoice
 
+source "Kconfig.stdlibs"
+
 config CRAY_XT
         bool "Red Storm (Cray XT3/XT4)"
         help
@@ -260,185 +262,7 @@ config DEBUG_CHECKPOINT
 
 endmenu
 
-
-source "Kconfig.stdlibs"
-
-
-menu "Virtual Paging"
-
-config NESTED_PAGING
-       bool "Enable nested paging"
-        default y 
-        help
-           Enable nested paging (should always be on)
-
-config SHADOW_PAGING
-       bool "Enable shadow paging"
-       default y
-       help 
-          Enables shadow paging for virtual machines
-
-
-config SHADOW_PAGING_VTLB
-       bool "Virtual TLB"
-       default y
-       depends on SHADOW_PAGING
-       help 
-          Enables Virtual TLB implemenation for shadow paging
-           Virtual TLB now uses PAE so there are no 4 GB restrictions
-
-
-config DEBUG_SHDW_PG_VTLB
-       bool "Enable VTLB debugging"
-       default n
-       depends on SHADOW_PAGING_VTLB
-       help
-         Enables debugging messages for VTLB implementation
-
-config SHADOW_PAGING_CACHE
-       bool "Shadow Page Cache"
-       default n
-       depends on SHADOW_PAGING && EXPERIMENTAL
-       help 
-          Enables caching implementation of shadow paging
-
-config DEBUG_SHADOW_PAGING_CACHE
-       bool "Enable Shadow Page Cache Debugging"
-        default n
-        depends on SHADOW_PAGING_CACHE
-        help
-           Enables debugging messages for the VTLB + Caching implementation
-
-#config SHADOW_PAGING_KVM
-#      bool "KVM-style Shadow Pager"
-#      default n
-#      depends on SHADOW_PAGING && EXPERIMENTAL
-#      help 
-#         Enables shadow pager derived from KVM 
-#           You probably do not want this and it will probably not compile!
-#
-#config DEBUG_SHADOW_PAGING_KVM 
-#      bool "Enable KVM-style Shadow Pager Debugging"
-#        default n
-#        depends on SHADOW_PAGING_KVM
-#        help
-#           Enables debugging messages for the KVM-style shadow pager
-
-
-config SWAPPING
-        bool "Enable swapping"
-        default n
-        depends on (SHADOW_PAGING || NESTED_PAGING) && FILE
-        help
-           Enables swapping of regions of guest physical memory to a file 
-
-config DEBUG_SWAPPING
-        bool "Enable swapping debugging"
-       default n
-        depends on SWAPPING
-        help
-           Provides debugging output from the swapping system
-
-config MEM_TRACK
-        bool "Enable memory access tracking"
-       default n
-       depends on SHADOW_PAGING || NESTED_PAGING
-       help
-          Allows tracking of memory accesses on a page granularity
-
-config DEBUG_MEM_TRACK
-        bool "Enable memory access tracking debugging" 
-       default n
-       depends on MEM_TRACK
-       help
-          Provides debugging output for memory access tracking
-
-endmenu
-
-menu "Symbiotic Functions"
-
-config SYMBIOTIC
-       bool "Enable Symbiotic Functionality"
-       default n
-       help 
-         Enable Symbiotic components of the VMM. 
-         This includes the SymSpy interface.
-
-config SYMCALL
-       bool "Symbiotic upcalls"
-       default n
-       depends on SYMBIOTIC && EXPERIMENTAL
-       help
-         Enables the Symbiotic upcall interface
-
-config SWAPBYPASS
-       bool "SwapBypass"
-       default n
-       depends on SYMBIOTIC && SYMCALL && EXPERIMENTAL
-       help 
-         This enables the SwapBypass architecture
-
-config SWAPBYPASS_TELEMETRY
-       bool "Enable SwapBypass Telemetry"
-       default n
-       depends on TELEMETRY && SWAPBYPASS
-       help 
-         Enable the telemetry information for the SwapBypass subsystem
-
-menuconfig SYMMOD 
-       bool "Symbiotic Modules"
-       default n
-       depends on EXPERIMENTAL
-#      depends on SYMBIOTIC
-       help
-         Enable Symbiotic module loading
-
-
-endmenu
-
-menu "VNET"
-
-config VNET
-        bool "Enable Vnet in Palacios"
-        default n
-        help
-          Enable the Vnet in Palacios
-
-config DEBUG_VNET
-        depends on VNET
-        bool "Enable Vnet Debug in Palacios"
-        default n
-        help
-          Enable the Vnet debug in Palacios
-
-
-endmenu
-
-source "palacios/src/gears/Kconfig"
-
-
-menu "HVM" 
-
-config HVM
-       bool "Support Hybrid Virtual Machines"
-       default n
-       help 
-          If set, it is possible to make VMs that are partitioned
-          (cores, memory, devices, hardware access, etc) into 
-          a part ("the ROS") that supports normal VM operation and
-          a part ("the HRT") that supports Hybrid Run-Times,
-          for example Nautilus-based HRTs for parallel languages.
-
-config DEBUG_HVM
-        depends on HVM
-        bool "Enable HVM debugging in Palacios"
-        default n
-        help
-          Enable HVM debugging output
-
-endmenu
-
-menu "Debug configuration"
+menu "Debug Configuration"
 
 ## Is unwind information useful
 
@@ -585,7 +409,106 @@ config DEBUG_MEM_ALLOC
 endmenu
 
 
-menu "BIOS Selection"
+
+
+menu "Virtual Paging"
+
+config NESTED_PAGING
+       bool "Enable nested paging"
+        default y 
+        help
+           Enable nested paging (should always be on)
+
+config SHADOW_PAGING
+       bool "Enable shadow paging"
+       default y
+       help 
+          Enables shadow paging for virtual machines
+
+
+config SHADOW_PAGING_VTLB
+       bool "Virtual TLB"
+       default y
+       depends on SHADOW_PAGING
+       help 
+          Enables Virtual TLB implemenation for shadow paging
+           Virtual TLB now uses PAE so there are no 4 GB restrictions
+
+
+config DEBUG_SHDW_PG_VTLB
+       bool "Enable VTLB debugging"
+       default n
+       depends on SHADOW_PAGING_VTLB
+       help
+         Enables debugging messages for VTLB implementation
+
+config SHADOW_PAGING_CACHE
+       bool "Shadow Page Cache"
+       default n
+       depends on SHADOW_PAGING && EXPERIMENTAL
+       help 
+          Enables caching implementation of shadow paging
+
+config DEBUG_SHADOW_PAGING_CACHE
+       bool "Enable Shadow Page Cache Debugging"
+        default n
+        depends on SHADOW_PAGING_CACHE
+        help
+           Enables debugging messages for the VTLB + Caching implementation
+
+#config SHADOW_PAGING_KVM
+#      bool "KVM-style Shadow Pager"
+#      default n
+#      depends on SHADOW_PAGING && EXPERIMENTAL
+#      help 
+#         Enables shadow pager derived from KVM 
+#           You probably do not want this and it will probably not compile!
+#
+#config DEBUG_SHADOW_PAGING_KVM 
+#      bool "Enable KVM-style Shadow Pager Debugging"
+#        default n
+#        depends on SHADOW_PAGING_KVM
+#        help
+#           Enables debugging messages for the KVM-style shadow pager
+
+
+config SWAPPING
+        bool "Enable swapping"
+        default n
+        depends on (SHADOW_PAGING || NESTED_PAGING) && FILE
+        help
+           Enables swapping of regions of guest physical memory to a file 
+
+config DEBUG_SWAPPING
+        bool "Enable swapping debugging"
+       default n
+        depends on SWAPPING
+        help
+           Provides debugging output from the swapping system
+
+config MEM_TRACK
+        bool "Enable memory access tracking"
+       default n
+       depends on SHADOW_PAGING || NESTED_PAGING
+       help
+          Allows tracking of memory accesses on a page granularity
+
+config DEBUG_MEM_TRACK
+        bool "Enable memory access tracking debugging" 
+       default n
+       depends on MEM_TRACK
+       help
+          Provides debugging output for memory access tracking
+
+endmenu
+
+
+source "palacios/src/devices/Kconfig"
+
+menu "Boot Environments"
+
+
+menu "BIOS"
 
 choice 
        prompt "Boot Code Selection"
@@ -613,7 +536,6 @@ config OTHERBIOS
 
 endchoice
 
-
 config SEABIOS_PATH
        string "Path to pre-built SEABIOS binary"
         depends on SEABIOS
@@ -666,8 +588,116 @@ config VMXASSIST_PATH
          This is vmxassist image to boot real mode guests on 
          Intel VMX Platforms
 
+endmenu 
+
+menu Multiboot
+
+config MULTIBOOT
+       bool "Support Multiboot2-compliant boot"
+       default y
+       help 
+          If set, it is possible to boot a multiboot2 compliant
+          kernel directly.
+
+config DEBUG_MULTIBOOT
+        depends on MULTIBOOT
+        bool "Enable Multiboot2 debugging in Palacios"
+        default n
+        help
+          Enable Multiboot2 debugging output
+
 endmenu
 
 
-source "palacios/src/devices/Kconfig"
+endmenu
+
+menu "Symbiosis"
+
+config SYMBIOTIC
+       bool "Enable Symbiotic Functionality"
+       default n
+       help 
+         Enable Symbiotic components of the VMM. 
+         This includes the SymSpy interface.
+
+config SYMCALL
+       bool "Symbiotic upcalls"
+       default n
+       depends on SYMBIOTIC && EXPERIMENTAL
+       help
+         Enables the Symbiotic upcall interface
+
+config SWAPBYPASS
+       bool "SwapBypass"
+       default n
+       depends on SYMBIOTIC && SYMCALL && EXPERIMENTAL
+       help 
+         This enables the SwapBypass architecture
+
+config SWAPBYPASS_TELEMETRY
+       bool "Enable SwapBypass Telemetry"
+       default n
+       depends on TELEMETRY && SWAPBYPASS
+       help 
+         Enable the telemetry information for the SwapBypass subsystem
+
+menuconfig SYMMOD 
+       bool "Symbiotic Modules"
+       default n
+       depends on EXPERIMENTAL
+#      depends on SYMBIOTIC
+       help
+         Enable Symbiotic module loading
+
+
+endmenu
+
+menu "VNET"
+
+config VNET
+        bool "Enable Vnet in Palacios"
+        default n
+        help
+          Enable the Vnet in Palacios
+
+config DEBUG_VNET
+        depends on VNET
+        bool "Enable Vnet Debug in Palacios"
+        default n
+        help
+          Enable the Vnet debug in Palacios
+
+
+endmenu
+
+source "palacios/src/gears/Kconfig"
+
+
+menu HVM
+
+config HVM
+       bool "Support Hybrid Virtual Machines"
+       depends on MULTIBOOT
+       default n
+       help 
+          If set, it is possible to make VMs that are partitioned
+          (cores, memory, devices, hardware access, etc) into 
+          a part ("the ROS") that supports normal VM operation and
+          a part ("the HRT") that supports Hybrid Run-Times,
+          for example Nautilus-based HRTs for parallel languages.
+
+config DEBUG_HVM
+        depends on HVM
+        bool "Enable HVM debugging in Palacios"
+        default n
+        help
+          Enable HVM debugging output
+
+endmenu
+
+
+
+
+
+
 
diff --git a/palacios/include/palacios/vm_guest.h b/palacios/include/palacios/vm_guest.h

index 3029c3c..e646c86 100644 (file)
--- a/palacios/include/palacios/vm_guest.h
+++ b/palacios/include/palacios/vm_guest.h
@@ -69,6 +69,10 @@ struct v3_sym_core_state;
 #include <palacios/vmm_mem_track.h>
 #endif
 
+#ifdef V3_CONFIG_MULTIBOOT
+#include <palacios/vmm_multiboot.h>
+#endif
+
 #ifdef V3_CONFIG_HVM
 #include <palacios/vmm_hvm.h>
 #endif
@@ -264,10 +268,15 @@ struct v3_vm_info {
     struct v3_vm_mem_track memtrack_state;
 #endif
 
+#ifdef V3_CONFIG_MULTIBOOT
+    struct v3_vm_multiboot  mb_state;
+#endif
+
 #ifdef V3_CONFIG_HVM
     struct v3_vm_hvm  hvm_state;
 #endif
 
+
     uint64_t yield_cycle_period;  
 
 
diff --git a/palacios/include/palacios/vmm_hvm.h b/palacios/include/palacios/vmm_hvm.h

index 6d145c2..576828c 100644 (file)
--- a/palacios/include/palacios/vmm_hvm.h
+++ b/palacios/include/palacios/vmm_hvm.h
@@ -30,10 +30,13 @@ struct v3_vm_hvm {
     uint32_t  first_hrt_core;
     uint64_t  first_hrt_gpa;
     struct v3_cfg_file *hrt_file;
+    uint64_t  hrt_entry_addr;
+    enum { HRT_BLOB, HRT_ELF64, HRT_MBOOT2, HRT_MBOOT64 } hrt_type;
 };
 
 struct v3_core_hvm {
     uint8_t   is_hrt;
+    uint64_t  last_boot_start;
 };
 
 struct v3_xml;
diff --git a/palacios/src/palacios/Makefile b/palacios/src/palacios/Makefile

index 0ca0f57..1bbce2a 100644 (file)
--- a/palacios/src/palacios/Makefile
+++ b/palacios/src/palacios/Makefile
@@ -93,6 +93,7 @@ obj-$(V3_CONFIG_SYMMOD) += vmm_symmod.o
 
 obj-$(V3_CONFIG_MEM_TRACK) += vmm_mem_track.o
 
+obj-$(V3_CONFIG_MULTIBOOT) += vmm_multiboot.o
 obj-$(V3_CONFIG_HVM) += vmm_hvm.o vmm_hvm_lowlevel.o
 
 obj-y += mmu/
diff --git a/palacios/src/palacios/svm.c b/palacios/src/palacios/svm.c

index 05fd183..0e37c4c 100644 (file)
--- a/palacios/src/palacios/svm.c
+++ b/palacios/src/palacios/svm.c
@@ -850,6 +850,13 @@ int v3_start_svm_guest(struct guest_info * info) {
     PrintDebug(info->vm_info, info, "Starting SVM core %u (on logical core %u)\n", info->vcpu_id, info->pcpu_id);
 
 
+#ifdef V3_CONFIG_MULTIBOOT
+    if (v3_setup_multiboot_core_for_boot(info)) { 
+       PrintError(info->vm_info, info, "Failed to setup Multiboot core...\n");
+       return -1;
+    }
+#endif
+
 #ifdef V3_CONFIG_HVM
     if (v3_setup_hvm_hrt_core_for_boot(info)) { 
        PrintError(info->vm_info, info, "Failed to setup HRT core...\n");
diff --git a/palacios/src/palacios/vm_guest.c b/palacios/src/palacios/vm_guest.c

index d325e6b..41158e7 100644 (file)
--- a/palacios/src/palacios/vm_guest.c
+++ b/palacios/src/palacios/vm_guest.c
@@ -322,6 +322,11 @@ int v3_free_vm_internal(struct v3_vm_info * vm) {
     v3_deinit_hvm_vm(vm);
 #endif
 
+#ifdef V3_CONFIG_MULTIBOOT
+    v3_deinit_multiboot_vm(vm);
+#endif
+
+
 #ifdef V3_CONFIG_SYMBIOTIC
     v3_deinit_symbiotic_vm(vm);
 #endif
@@ -474,6 +479,10 @@ int v3_free_core(struct guest_info * core) {
     v3_deinit_hvm_core(core);
 #endif
 
+#ifdef V3_CONFIG_MULTIBOOT
+    v3_deinit_multiboot_core(core);
+#endif
+
     v3_deinit_decoder(core);
 
     v3_deinit_intr_controllers(core);
diff --git a/palacios/src/palacios/vmm.c b/palacios/src/palacios/vmm.c

index b7f45cd..6257294 100644 (file)
--- a/palacios/src/palacios/vmm.c
+++ b/palacios/src/palacios/vmm.c
@@ -154,6 +154,10 @@ void Init_V3(struct v3_os_hooks * hooks, char * cpu_mask, int num_cpus, char *op
     // Parse host-os defined options into an easily-accessed format.
     v3_parse_options(options);
 
+#ifdef V3_CONFIG_MULTIBOOT
+    v3_init_multiboot();
+#endif
+
 #ifdef V3_CONFIG_HVM
     v3_init_hvm();
 #endif
@@ -263,6 +267,10 @@ void Shutdown_V3() {
     v3_deinit_hvm();
 #endif
 
+#ifdef V3_CONFIG_MULTIBOOT
+    v3_deinit_multiboot();
+#endif
+
     v3_deinit_options();
     
 
@@ -385,6 +393,12 @@ int v3_start_vm(struct v3_vm_info * vm, unsigned int cpu_mask) {
         return -1;
     }
 
+#if V3_CONFIG_MULTIBOOT
+    if (v3_setup_multiboot_vm_for_boot(vm)) { 
+       PrintError(vm, VCORE_NONE, "Multiboot setup for boot failed\n");
+       return -1;
+    }
+#endif
 #if V3_CONFIG_HVM
     if (v3_setup_hvm_vm_for_boot(vm)) { 
        PrintError(vm, VCORE_NONE, "HVM setup for boot failed\n");
diff --git a/palacios/src/palacios/vmm_config.c b/palacios/src/palacios/vmm_config.c

index 38cfb70..7987957 100644 (file)
--- a/palacios/src/palacios/vmm_config.c
+++ b/palacios/src/palacios/vmm_config.c
@@ -37,6 +37,10 @@
 #include <palacios/vmm_swapping.h>
 #endif
 
+#ifdef V3_CONFIG_MULTIBOOT
+#include <palacios/vmm_multiboot.h>
+#endif
+
 #ifdef V3_CONFIG_HVM
 #include <palacios/vmm_hvm.h>
 #endif
@@ -360,6 +364,12 @@ static int pre_config_vm(struct v3_vm_info * vm, v3_cfg_tree_t * vm_cfg) {
        return -1;
     }
 
+#ifdef V3_CONFIG_MULTIBOOT
+    if (v3_init_multiboot_vm(vm,vm_cfg)) { 
+       PrintError(vm,VCORE_NONE,"Cannot initialize Multiboot for VM\n");
+       return -1;
+    }
+#endif
 #ifdef V3_CONFIG_HVM
     if (v3_init_hvm_vm(vm,vm_cfg)) { 
        PrintError(vm,VCORE_NONE,"Cannot initialize HVM for VM\n");
@@ -434,6 +444,12 @@ static int pre_config_core(struct guest_info * info, v3_cfg_tree_t * core_cfg) {
        return -1;
     }
 
+#ifdef V3_CONFIG_MULTIBOOT
+    if (v3_init_multiboot_core(info)) { 
+       PrintError(info->vm_info, info, "Error Initializing Multiboot Core\n");
+       return -1;
+    }
+#endif
 #ifdef V3_CONFIG_HVM
     if (v3_init_hvm_core(info)) { 
        PrintError(info->vm_info, info, "Error Initializing HVM Core\n");
diff --git a/palacios/src/palacios/vmm_hvm.c b/palacios/src/palacios/vmm_hvm.c

index b1f7013..edff2fd 100644 (file)
--- a/palacios/src/palacios/vmm_hvm.c
+++ b/palacios/src/palacios/vmm_hvm.c
@@ -28,8 +28,8 @@
 
 #include <palacios/vm_guest_mem.h>
 
-#include <stdio.h>
-#include <stdlib.h>
+#include <palacios/vmm_debug.h>
+
 
 /*
 
@@ -68,6 +68,13 @@
 #define PrintDebug(fmt, args...)
 #endif
 
+
+// if set, we will map the first 1 GB of memory using a 3 level
+// hierarchy, for compatibility with Nautilus out of the box.
+// Otherwise we will map the first 512 GB using a 2 level
+// hieratchy
+#define HVM_MAP_1G_2M 1
+
 int v3_init_hvm()
 {
     PrintDebug(VM_NONE,VCORE_NONE, "hvm: init\n");
@@ -83,8 +90,16 @@ int v3_deinit_hvm()
 
 static int hvm_hcall_handler(struct guest_info * core , hcall_id_t hcall_id, void * priv_data)
 {
-    V3_Print(core->vm_info,core, "hvm: received hypercall %x  rax=%llx rbx=%llx rcx=%llx\n",
-            hcall_id, core->vm_regs.rax, core->vm_regs.rbx, core->vm_regs.rcx);
+    uint64_t c;
+
+    rdtscll(c);
+
+
+    V3_Print(core->vm_info,core, "hvm: received hypercall %x  rax=%llx rbx=%llx rcx=%llx at cycle count %llu (%llu cycles since last boot start) num_exits=%llu since initial boot\n",
+            hcall_id, core->vm_regs.rax, core->vm_regs.rbx, core->vm_regs.rcx, c, c-core->hvm_state.last_boot_start, core->num_exits);
+    v3_print_core_telemetry(core);
+    //    v3_print_guest_state(core);
+
     return 0;
 }
 
@@ -98,7 +113,7 @@ int v3_init_hvm_vm(struct v3_vm_info *vm, struct v3_xml *config)
     char *enable;
     char *ros_cores;
     char *ros_mem;
-    char *hrt_file_id;
+    char *hrt_file_id=0;
 
     PrintDebug(vm, VCORE_NONE, "hvm: vm init\n");
 
@@ -317,10 +332,18 @@ void     v3_hvm_find_apics_seen_by_core(struct guest_info *core, struct v3_vm_in
     }
 }
 
+#define MAX(x,y) ((x)>(y)?(x):(y))
+#define MIN(x,y) ((x)<(y)?(x):(y))
+
+#ifdef HVM_MAP_1G_2M
+#define BOOT_STATE_END_ADDR (MIN(vm->mem_size,0x40000000ULL))
+#else
+#define BOOT_STATE_END_ADDR (MIN(vm->mem_size,0x800000000ULL))
+#endif
 
 static void get_null_int_handler_loc(struct v3_vm_info *vm, void **base, uint64_t *limit)
 {
-    *base = (void*) PAGE_ADDR(vm->mem_size - PAGE_SIZE);
+    *base = (void*) PAGE_ADDR(BOOT_STATE_END_ADDR - PAGE_SIZE);
     *limit = PAGE_SIZE;
 }
 
@@ -372,7 +395,7 @@ static void write_null_int_handler(struct v3_vm_info *vm)
 
 static void get_idt_loc(struct v3_vm_info *vm, void **base, uint64_t *limit)
 {
-    *base = (void*) PAGE_ADDR(vm->mem_size - 2 * PAGE_SIZE);
+    *base = (void*) PAGE_ADDR(BOOT_STATE_END_ADDR - 2 * PAGE_SIZE);
     *limit = 16*256;
 }
 
@@ -450,7 +473,7 @@ static void write_idt(struct v3_vm_info *vm)
 
 static void get_gdt_loc(struct v3_vm_info *vm, void **base, uint64_t *limit)
 {
-    *base = (void*)PAGE_ADDR(vm->mem_size - 3 * PAGE_SIZE);
+    *base = (void*)PAGE_ADDR(BOOT_STATE_END_ADDR - 3 * PAGE_SIZE);
     *limit = 8*3;
 }
 
@@ -475,7 +498,7 @@ static void write_gdt(struct v3_vm_info *vm)
 
 static void get_tss_loc(struct v3_vm_info *vm, void **base, uint64_t *limit)
 {
-    *base = (void*)PAGE_ADDR(vm->mem_size - 4 * PAGE_SIZE);
+    *base = (void*)PAGE_ADDR(BOOT_STATE_END_ADDR - 4 * PAGE_SIZE);
     *limit = PAGE_SIZE;
 }
 
@@ -501,15 +524,31 @@ static void write_tss(struct v3_vm_info *vm)
      512 entries
   1 top level
      1 entries
+
+OR
+  
+  PTS MAP FIRST 1 GB identity mapped:
+  1 third level
+    512 entries
+  1 second level
+    1 entries
+  1 top level
+    1 entries
 */
 
 static void get_pt_loc(struct v3_vm_info *vm, void **base, uint64_t *limit)
 {
-    *base = (void*)PAGE_ADDR(vm->mem_size-(5+1)*PAGE_SIZE);
+#ifdef HVM_MAP_1G_2M
+    *base = (void*)PAGE_ADDR(BOOT_STATE_END_ADDR-(5+2)*PAGE_SIZE);
+    *limit =  3*PAGE_SIZE;
+#else
+    *base = (void*)PAGE_ADDR(BOOT_STATE_END_ADDR-(5+1)*PAGE_SIZE);
     *limit =  2*PAGE_SIZE;
+#endif
 }
 
-static void write_pt(struct v3_vm_info *vm)
+#ifndef HVM_MAP_1G_2M
+static void write_pt_2level_512GB(struct v3_vm_info *vm)
 {
     void *base;
     uint64_t size;
@@ -522,6 +561,10 @@ static void write_pt(struct v3_vm_info *vm)
        PrintError(vm,VCORE_NONE,"Cannot support pt request, defaulting\n");
     }
 
+    if (vm->mem_size > 0x800000000ULL) { 
+       PrintError(vm,VCORE_NONE, "VM has more than 512 GB\n");
+    }
+
     memset(&pdpe,0,sizeof(pdpe));
     pdpe.present=1;
     pdpe.writable=1;
@@ -544,12 +587,89 @@ static void write_pt(struct v3_vm_info *vm)
        v3_write_gpa_memory(&vm->cores[0],(addr_t)(base+i*sizeof(pml4e)),sizeof(pml4e),(uint8_t*)&pml4e);
     }
 
-    PrintDebug(vm,VCORE_NONE,"hvm: Wrote page tables (1 PML4, 1 PDPE) at %p\n",base);
+    PrintDebug(vm,VCORE_NONE,"hvm: Wrote page tables (1 PML4, 1 PDPE) at %p (512 GB mapped)\n",base);
+}
+
+#else 
+
+static void write_pt_3level_1GB(struct v3_vm_info *vm)
+{
+    void *base;
+    uint64_t size;
+    struct pml4e64 pml4e;
+    struct pdpe64 pdpe;
+    struct pde64 pde;
+
+    uint64_t i;
+
+    get_pt_loc(vm,&base, &size);
+    if (size!=3*PAGE_SIZE) { 
+       PrintError(vm,VCORE_NONE,"Cannot support pt request, defaulting\n");
+    }
+
+    if (vm->mem_size > 0x40000000ULL) { 
+       PrintError(vm,VCORE_NONE, "VM has more than 1 GB\n");
+    }
+
+    memset(&pde,0,sizeof(pde));
+    pde.present=1;
+    pde.writable=1;
+    pde.large_page=1;
+    
+    for (i=0;i<512;i++) {
+       pde.pt_base_addr = i*0x200;  // 0x200 = 512 pages = 2 MB
+       v3_write_gpa_memory(&vm->cores[0],
+                           (addr_t)(base+2*PAGE_SIZE+i*sizeof(pde)),
+                           sizeof(pde),(uint8_t*)&pde);
+    }
+
+    memset(&pdpe,0,sizeof(pdpe));
+    pdpe.present=1;
+    pdpe.writable=1;
+    pdpe.large_page=0;
+
+    pdpe.pd_base_addr = PAGE_BASE_ADDR((addr_t)(base+2*PAGE_SIZE));
+
+    v3_write_gpa_memory(&vm->cores[0],(addr_t)base+PAGE_SIZE,sizeof(pdpe),(uint8_t*)&pdpe);    
+    
+    for (i=1;i<512;i++) {
+       pdpe.present = 0; 
+       v3_write_gpa_memory(&vm->cores[0],(addr_t)(base+PAGE_SIZE+i*sizeof(pdpe)),sizeof(pdpe),(uint8_t*)&pdpe);
+    }
+
+    memset(&pml4e,0,sizeof(pml4e));
+    pml4e.present=1;
+    pml4e.writable=1;
+    pml4e.pdp_base_addr = PAGE_BASE_ADDR((addr_t)(base+PAGE_SIZE));
+
+    v3_write_gpa_memory(&vm->cores[0],(addr_t)base,sizeof(pml4e),(uint8_t*)&pml4e);    
+
+    for (i=1;i<512;i++) {
+       pml4e.present=0;
+       v3_write_gpa_memory(&vm->cores[0],(addr_t)(base+i*sizeof(pml4e)),sizeof(pml4e),(uint8_t*)&pml4e);
+    }
+
+    PrintDebug(vm,VCORE_NONE,"hvm: Wrote page tables (1 PML4, 1 PDPE, 1 PDP) at %p (1 GB mapped)\n",base);
+}
+
+#endif
+
+static void write_pt(struct v3_vm_info *vm)
+{
+#ifdef HVM_MAP_1G_2M
+    write_pt_3level_1GB(vm);
+#else
+    write_pt_2level_512GB(vm);
+#endif
 }
 
 static void get_bp_loc(struct v3_vm_info *vm, void **base, uint64_t *limit)
 {
-    *base = (void*) PAGE_ADDR(vm->mem_size-(6+1)*PAGE_SIZE);
+#ifdef HVM_MAP_1G_2M
+    *base = (void*) PAGE_ADDR(BOOT_STATE_END_ADDR-(6+2)*PAGE_SIZE);
+#else
+    *base = (void*) PAGE_ADDR(BOOT_STATE_END_ADDR-(6+1)*PAGE_SIZE);
+#endif
     *limit =  PAGE_SIZE;
 }
 
@@ -593,22 +713,146 @@ static void get_hrt_loc(struct v3_vm_info *vm, void **base, uint64_t *limit)
     *limit = bp_base - *base;
 }
 
-static void write_hrt(struct v3_vm_info *vm)
+
+#define ERROR(fmt, args...) PrintError(VM_NONE,VCORE_NONE,"hvm: " fmt,##args)
+#define INFO(fmt, args...) PrintDebug(VM_NONE,VCORE_NONE,"hvm: " fmt,##args)
+
+#define ELF_MAGIC    0x464c457f
+#define MB2_MAGIC    0xe85250d6
+
+#define MB2_INFO_MAGIC    0x36d76289
+
+static int is_elf(uint8_t *data, uint64_t size)
+{
+    if (*((uint32_t*)data)==ELF_MAGIC) {
+       return 1;
+    } else { 
+       return 0;
+    }
+}
+
+static mb_header_t *find_mb_header(uint8_t *data, uint64_t size)
+{
+    uint64_t limit = size > 32768 ? 32768 : size;
+    uint64_t i;
+
+    // Scan for the .boot magic cookie
+    // must be in first 32K, assume 4 byte aligned
+    for (i=0;i<limit;i+=4) { 
+       if (*((uint32_t*)&data[i])==MB2_MAGIC) {
+           INFO("Found multiboot header at offset 0x%llx\n",i);
+           return (mb_header_t *) &data[i];
+       }
+    }
+    return 0;
+}
+
+
+// 
+// BROKEN - THIS DOES NOT DO WHAT YOU THINK
+//
+static int setup_elf(struct v3_vm_info *vm, void *base, uint64_t limit)
+{
+    v3_write_gpa_memory(&vm->cores[0],(addr_t)base,vm->hvm_state.hrt_file->size,vm->hvm_state.hrt_file->data);
+
+    vm->hvm_state.hrt_entry_addr = (uint64_t) (base+0x40);
+
+    PrintDebug(vm,VCORE_NONE,"hvm: wrote HRT ELF %s at %p\n", vm->hvm_state.hrt_file->tag,base);
+    PrintDebug(vm,VCORE_NONE,"hvm: set ELF entry to %p and hoping for the best...\n", (void*) vm->hvm_state.hrt_entry_addr);
+    
+    vm->hvm_state.hrt_type = HRT_ELF64;
+
+    return 0;
+
+}
+
+static int setup_mb_kernel(struct v3_vm_info *vm, void *base, uint64_t limit)
+{
+    mb_data_t mb;
+    uint32_t offset;
+
+
+    // FIX USING GENERIC TOOLS
+
+    if (v3_parse_multiboot_header(vm->hvm_state.hrt_file,&mb)) { 
+       PrintError(vm,VCORE_NONE, "hvm: failed to parse multiboot kernel header\n");
+       return -1;
+    }
+
+    if (!mb.addr || !mb.entry) { 
+       PrintError(vm,VCORE_NONE, "hvm: kernel is missing address or entry point\n");
+       return -1;
+    }
+
+    if (((void*)(uint64_t)(mb.addr->header_addr) < base ) ||
+       ((void*)(uint64_t)(mb.addr->load_end_addr) > base+limit) ||
+       ((void*)(uint64_t)(mb.addr->bss_end_addr) > base+limit)) { 
+       PrintError(vm,VCORE_NONE, "hvm: kernel is not within the allowed portion of HVM\n");
+       return -1;
+    }
+
+    offset = mb.addr->load_addr - mb.addr->header_addr;
+
+    // Skip the ELF header - assume 1 page... weird.... 
+    v3_write_gpa_memory(&vm->cores[0],
+                       (addr_t)(mb.addr->load_addr),
+                       vm->hvm_state.hrt_file->size-PAGE_SIZE-offset,
+                       vm->hvm_state.hrt_file->data+PAGE_SIZE+offset);
+
+       
+    // vm->hvm_state.hrt_entry_addr = (uint64_t) mb.entry->entry_addr + PAGE_SIZE; //HACK PAD
+
+    vm->hvm_state.hrt_entry_addr = (uint64_t) mb.entry->entry_addr;
+    
+    vm->hvm_state.hrt_type = HRT_MBOOT64;
+
+    PrintDebug(vm,VCORE_NONE,
+              "hvm: wrote 0x%llx bytes starting at offset 0x%llx to %p; set entry to %p\n",
+              (uint64_t) vm->hvm_state.hrt_file->size-PAGE_SIZE-offset,
+              (uint64_t) PAGE_SIZE+offset,
+              (void*)(addr_t)(mb.addr->load_addr),
+              (void*) vm->hvm_state.hrt_entry_addr);
+    return 0;
+
+}
+
+
+static int setup_hrt(struct v3_vm_info *vm)
 {
     void *base;
     uint64_t limit;
 
     get_hrt_loc(vm,&base,&limit);
-    
+
     if (vm->hvm_state.hrt_file->size > limit) { 
        PrintError(vm,VCORE_NONE,"hvm: Cannot map HRT because it is too big (%llu bytes, but only have %llu space\n", vm->hvm_state.hrt_file->size, (uint64_t)limit);
-       return;
+       return -1;
     }
 
-    v3_write_gpa_memory(&vm->cores[0],(addr_t)base,vm->hvm_state.hrt_file->size,vm->hvm_state.hrt_file->data);
+    if (!is_elf(vm->hvm_state.hrt_file->data,vm->hvm_state.hrt_file->size)) { 
+       PrintError(vm,VCORE_NONE,"hvm: supplied HRT is not an ELF but we are going to act like it is!\n");
+       if (setup_elf(vm,base,limit)) {
+           PrintError(vm,VCORE_NONE,"hvm: Fake ELF setup failed\n");
+           return -1;
+       }
+       vm->hvm_state.hrt_type=HRT_BLOB;
+    } else {
+       if (find_mb_header(vm->hvm_state.hrt_file->data,vm->hvm_state.hrt_file->size)) { 
+           PrintDebug(vm,VCORE_NONE,"hvm: appears to be a multiboot kernel\n");
+           if (setup_mb_kernel(vm,base,limit)) { 
+               PrintError(vm,VCORE_NONE,"hvm: multiboot kernel setup failed\n");
+               return -1;
+           } 
+       } else {
+           PrintDebug(vm,VCORE_NONE,"hvm: supplied HRT is an ELF\n");
+           if (setup_elf(vm,base,limit)) {
+               PrintError(vm,VCORE_NONE,"hvm: Fake ELF setup failed\n");
+               return -1;
+           }
+       }
+    }
 
-    PrintDebug(vm,VCORE_NONE,"hvm: wrote HRT %s at %p\n", vm->hvm_state.hrt_file->tag,base);
-    
+    return 0;
 }
 
 
@@ -659,7 +903,10 @@ int v3_setup_hvm_vm_for_boot(struct v3_vm_info *vm)
 
     write_bp(vm);
     
-    write_hrt(vm);
+    if (setup_hrt(vm)) {
+       PrintError(vm,VCORE_NONE,"hvm: failed to setup HRT\n");
+       return -1;
+    } 
 
 
     PrintDebug(vm,VCORE_NONE,"hvm: setup of HVM memory done\n");
@@ -693,6 +940,8 @@ int v3_setup_hvm_hrt_core_for_boot(struct guest_info *core)
     void *base;
     uint64_t limit;
 
+    rdtscll(core->hvm_state.last_boot_start);
+
     if (!core->hvm_state.is_hrt) { 
        PrintDebug(core->vm_info,core,"hvm: skipping HRT setup for core %u as it is not an HRT core\n", core->vcpu_id);
        return 0;
@@ -725,21 +974,27 @@ int v3_setup_hvm_hrt_core_for_boot(struct guest_info *core)
     core->vm_regs.rdi = (v3_reg_t) base;
     // HRT entry point
     get_hrt_loc(core->vm_info, &base,&limit);
-    core->rip = (uint64_t) base + 0x40; // hack for test.o
+    core->rip = (uint64_t) core->vm_info->hvm_state.hrt_entry_addr ; 
 
     // Setup CRs for long mode and our stub page table
     // CR0: PG, PE
     core->ctrl_regs.cr0 = 0x80000001;
+    core->shdw_pg_state.guest_cr0 = core->ctrl_regs.cr0;
+
     // CR2: don't care (output from #PF)
     // CE3: set to our PML4E, without setting PCD or PWT
     get_pt_loc(core->vm_info, &base,&limit);
     core->ctrl_regs.cr3 = PAGE_ADDR((addr_t)base);
+    core->shdw_pg_state.guest_cr3 = core->ctrl_regs.cr3;
+
     // CR4: PGE, PAE, PSE (last byte: 1 0 1 1 0 0 0 0)
     core->ctrl_regs.cr4 = 0xb0;
+    core->shdw_pg_state.guest_cr4 = core->ctrl_regs.cr4;
     // CR8 as usual
     // RFLAGS zeroed is fine: come in with interrupts off
-    // EFER needs SVME LMA LME (last 16 bites: 0 0 0 1 0 1 0 1 0 0 0 0 0 0 0 0
+    // EFER needs SVME LMA LME (last 16 bits: 0 0 0 1 0 1 0 1 0 0 0 0 0 0 0 0
     core->ctrl_regs.efer = 0x1500;
+    core->shdw_pg_state.guest_efer.value = core->ctrl_regs.efer;
 
 
     /* 
@@ -817,10 +1072,83 @@ int v3_setup_hvm_hrt_core_for_boot(struct guest_info *core)
     memcpy(&core->segments.fs,&core->segments.ds,sizeof(core->segments.ds));
     memcpy(&core->segments.gs,&core->segments.ds,sizeof(core->segments.ds));
     
+
+    if (core->vm_info->hvm_state.hrt_type==HRT_MBOOT64) { 
+       /*
+         Temporary hackery for multiboot2 "64"
+         We will push the MB structure onto the stack and update RSP
+         and RBX
+       */
+       uint8_t buf[256];
+       uint64_t size;
+       
+       if ((size=v3_build_multiboot_table(core,buf,256))==-1) { 
+           PrintError(core->vm_info,core,"hvm: Failed to write MB info\n");
+           return -1;
+       }
+       core->vm_regs.rsp -= size;
+
+       v3_write_gpa_memory(core,
+                           core->vm_regs.rsp,
+                           size,
+                           buf);
+
+       PrintDebug(core->vm_info,core, "hvm: wrote MB info at %p\n", (void*)core->vm_regs.rsp);
+
+       if (core->vcpu_id == core->vm_info->hvm_state.first_hrt_core) {
+           // We are the BSP for this HRT
+           // this is where rbx needs to point
+           core->vm_regs.rbx = core->vm_regs.rsp;
+           PrintDebug(core->vm_info,core, "hvm: \"BSP\" core\n");
+       } else {
+           // We are an AP for this HRT
+           // so we don't get the multiboot struct
+           core->vm_regs.rbx = 0;
+           PrintDebug(core->vm_info,core, "hvm: \"AP\" core\n");
+       }
+
+
+
+       // one more push, something that looks like a return address
+       size=0;
+       core->vm_regs.rsp -= 8;
+
+       v3_write_gpa_memory(core,
+                           core->vm_regs.rsp,
+                           8,
+                           (uint8_t*) &size);
+       
+       // Now for our magic - this signals
+       // the kernel that a multiboot loader loaded it
+       // and that rbx points to its offered data
+       core->vm_regs.rax = MB2_INFO_MAGIC;
+    
+       /* 
+          Note that "real" MB starts in protected mode without paging
+          This hack starts in long mode... so these requirements go
+          out the window for a large part
+
+          Requirements:
+
+          OK EAX has magic 
+          OK EBX points to MB info
+          OK CS = base 0, offset big, code (LONG MODE)
+          OK DS,ES,FS,GS,SS => base 0, offset big, data (LONG MODE)
+          OK A20 gate on
+          XXX CR0 PE on PG off (nope)
+          XXX EFLAGS IF and VM off
+       */
+          
+
+
+    }
+
+
     // reset paging here for shadow... 
 
     if (core->shdw_pg_mode != NESTED_PAGING) { 
        PrintError(core->vm_info, core, "hvm: shadow paging guest... this will end badly\n");
+       return -1;
     }
 
 
diff --git a/palacios/src/palacios/vmm_multiboot.c b/palacios/src/palacios/vmm_multiboot.c

new file mode 100644 (file)

index 0000000..0a7f60b
--- /dev/null
+++ b/palacios/src/palacios/vmm_multiboot.c
@@ -0,0 +1,986 @@
+/* 
+ * This file is part of the Palacios Virtual Machine Monitor developed
+ * by the V3VEE Project with funding from the United States National 
+ * Science Foundation and the Department of Energy.  
+ *
+ * The V3VEE Project is a joint project between Northwestern University
+ * and the University of New Mexico.  You can find out more at 
+ * http://www.v3vee.org
+ *
+ * Copyright (c) 2015, The V3VEE Project <http://www.v3vee.org> 
+ * All rights reserved.
+ *
+ * Author:  Peter Dinda <pdinda@northwestern.edu>
+ *
+ * This is free software.  You are permitted to use,
+ * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
+ */
+
+#include <palacios/vmm_mem.h>
+#include <palacios/vmm.h>
+#include <palacios/vmm_util.h>
+#include <palacios/vmm_emulator.h>
+#include <palacios/vm_guest.h>
+#include <palacios/vmm_debug.h>
+#include <palacios/vmm_hypercall.h>
+
+#include <palacios/vmm_xml.h>
+
+#include <palacios/vm_guest_mem.h>
+
+#include <palacios/vmm_debug.h>
+
+
+/*
+
+  In a Pal file:
+
+  <files> 
+    <file id="multibootelf" filename="multibootelf.o" />
+  </files>
+
+  <multiboot enable="y" file_id="multibootelf" />
+
+
+*/
+
+#ifndef V3_CONFIG_DEBUG_MULTIBOOT
+#undef PrintDebug
+#define PrintDebug(fmt, args...)
+#endif
+
+
+int v3_init_multiboot()
+{
+    PrintDebug(VM_NONE,VCORE_NONE, "multiboot: init\n");
+    return 0;
+}
+
+int v3_deinit_multiboot()
+{
+    PrintDebug(VM_NONE,VCORE_NONE, "multiboot: deinit\n");
+    return 0;
+}
+
+
+
+#define CEIL_DIV(x,y) (((x)/(y)) + !!((x)%(y)))
+
+int v3_init_multiboot_vm(struct v3_vm_info *vm, struct v3_xml *config)
+{
+    v3_cfg_tree_t *mb_config;
+    char *enable;
+    char *mb_file_id=0;
+
+    PrintDebug(vm, VCORE_NONE, "multiboot: vm init\n");
+
+    memset(&vm->mb_state,0,sizeof(struct v3_vm_multiboot));
+    vm->mb_state.is_multiboot=0;
+
+    if (!config || !(mb_config=v3_cfg_subtree(config,"multiboot"))) {
+       PrintDebug(vm,VCORE_NONE,"multiboot: no multiboot configuration found - normal boot will occur\n");
+       goto out_ok;
+    }
+    
+    if (!(enable=v3_cfg_val(mb_config,"enable")) || strcasecmp(enable,"y")) {
+       PrintDebug(vm,VCORE_NONE,"multiboot: multiboot configuration disabled\n");
+       goto out_ok;
+    }
+
+    if (!(mb_file_id=v3_cfg_val(mb_config,"file_id"))) { 
+       PrintError(vm,VCORE_NONE,"multiboot: multiboot block without file_id...\n");
+       return -1;
+    }
+
+    vm->mb_state.mb_file = v3_cfg_get_file(vm,mb_file_id);
+    
+    if (!vm->mb_state.mb_file) { 
+       PrintError(vm,VCORE_NONE,"multiboot: multiboot block contains bad file_id (%s)\n",mb_file_id);
+       return -1;
+    }
+
+    vm->mb_state.is_multiboot=1;
+
+ out_ok:
+    if (vm->mb_state.is_multiboot) {
+       V3_Print(vm,VCORE_NONE,"multiboot: file_id=%s (tag %s)]\n",
+                mb_file_id,
+                vm->mb_state.mb_file->tag);
+    } else {
+       V3_Print(vm,VCORE_NONE,"multiboot: This is not a multiboot VM\n");
+    }
+    return 0;
+    
+}
+
+
+int v3_deinit_multiboot_vm(struct v3_vm_info *vm)
+{
+    PrintDebug(vm, VCORE_NONE, "multiboot: multiboot VM deinit\n");
+
+    return 0;
+}
+
+int v3_init_multiboot_core(struct guest_info *core)
+{
+    PrintDebug(core->vm_info, VCORE_NONE, "multiboot: multiboot core init\n");
+
+    // Nothing to do at this point
+
+    return 0;
+}
+
+int v3_deinit_multiboot_core(struct guest_info *core)
+{
+    PrintDebug(core->vm_info, VCORE_NONE, "multiboot: multiboot core deinit\n");
+
+    return 0;
+}
+
+
+
+
+#define ERROR(fmt, args...) PrintError(VM_NONE,VCORE_NONE,"multiboot: " fmt,##args)
+#define INFO(fmt, args...) PrintDebug(VM_NONE,VCORE_NONE,"multiboot: " fmt,##args)
+
+
+ 
+/******************************************************************
+     Data contained in the ELF file we will attempt to boot  
+******************************************************************/
+
+#define ELF_MAGIC    0x464c457f
+#define MB2_MAGIC    0xe85250d6
+
+
+/******************************************************************
+     Data we will pass to the kernel via rbx
+******************************************************************/
+
+#define MB2_INFO_MAGIC    0x36d76289
+
+typedef struct mb_info_header {
+    uint32_t  totalsize;
+    uint32_t  reserved;
+} __attribute__((packed)) mb_info_header_t;
+
+// A tag of type 0, size 8 indicates last value
+//
+typedef struct mb_info_tag {
+    uint32_t  type;
+    uint32_t  size;
+} __attribute__((packed)) mb_info_tag_t;
+
+
+#define MB_INFO_MEM_TAG  4
+typedef struct mb_info_mem {
+    mb_info_tag_t tag;
+    uint32_t  mem_lower; // 0..640K in KB 
+    uint32_t  mem_upper; // in KB to first hole - 1 MB
+} __attribute__((packed)) mb_info_mem_t;
+
+#define MB_INFO_CMDLINE_TAG  1
+// note alignment of 8 bytes required for each... 
+typedef struct mb_info_cmdline {
+    mb_info_tag_t tag;
+    uint32_t  size;      // includes zero termination
+    uint8_t   string[];  // zero terminated
+} __attribute__((packed)) mb_info_cmdline_t;
+
+
+#define MEM_RAM   1
+#define MEM_ACPI  3
+#define MEM_RESV  4
+
+typedef struct mb_info_memmap_entry {
+    uint64_t  base_addr;
+    uint64_t  length;
+    uint32_t  type;
+    uint32_t  reserved;
+} __attribute__((packed)) mb_info_memmap_entry_t;
+
+#define MB_INFO_MEMMAP_TAG  6
+// note alignment of 8 bytes required for each... 
+typedef struct mb_info_memmap {
+    mb_info_tag_t tag;
+    uint32_t  entry_size;     // multiple of 8
+    uint32_t  entry_version;  // 0
+    mb_info_memmap_entry_t  entries[];
+} __attribute__((packed)) mb_info_memmap_t;
+
+#define MB_INFO_HRT_TAG 0xf00df00d
+typedef struct mb_info_hrt {
+    mb_info_tag_t  tag;
+    // apic ids are 0..num_apics-1
+    // apic and ioapic addresses are the well known places
+    uint32_t       total_num_apics;
+    uint32_t       first_hrt_apic_id;
+    uint32_t       have_hrt_ioapic;
+    uint32_t       first_hrt_ioapic_entry;
+} __attribute__((packed)) mb_info_hrt_t;
+
+
+// We are not doing:
+//
+// - BIOS Boot Devie
+// - Modules
+// - ELF symbols
+// - Boot Loader name
+// - APM table
+// - VBE info
+// - Framebuffer info
+//
+
+static int is_elf(uint8_t *data, uint64_t size)
+{
+    if (*((uint32_t*)data)==ELF_MAGIC) {
+       return 1;
+    } else { 
+       return 0;
+    }
+}
+
+static mb_header_t *find_mb_header(uint8_t *data, uint64_t size)
+{
+    uint64_t limit = size > 32768 ? 32768 : size;
+    uint64_t i;
+
+    // Scan for the .boot magic cookie
+    // must be in first 32K, assume 4 byte aligned
+    for (i=0;i<limit;i+=4) { 
+       if (*((uint32_t*)&data[i])==MB2_MAGIC) {
+           INFO("Found multiboot header at offset 0x%llx\n",i);
+           return (mb_header_t *) &data[i];
+       }
+    }
+    return 0;
+}
+
+static int checksum4_ok(uint32_t *data, uint64_t size)
+{
+    int i;
+    uint32_t sum=0;
+
+    for (i=0;i<size;i++) {
+       sum+=data[i];
+    }
+
+    return sum==0;
+}
+
+static int parse_multiboot_kernel(uint8_t *data, uint64_t size, mb_data_t *mb)
+{
+    uint64_t i;
+
+    mb_header_t *mb_header=0;
+    mb_tag_t *mb_tag=0;
+    mb_info_t *mb_inf=0;
+    mb_addr_t *mb_addr=0;
+    mb_entry_t *mb_entry=0;
+    mb_flags_t *mb_flags=0;
+    mb_framebuf_t *mb_framebuf=0;
+    mb_modalign_t *mb_modalign=0;
+    mb_mb64_hrt_t *mb_mb64_hrt=0;
+
+
+    if (!is_elf(data,size)) { 
+       ERROR("HRT is not an ELF\n");
+       return -1;
+    }
+
+    mb_header = find_mb_header(data,size);
+
+    if (!mb_header) { 
+       ERROR("No multiboot header found\n");
+       return -1;
+    }
+
+    // Checksum applies only to the header itself, not to 
+    // the subsequent tags... 
+    if (!checksum4_ok((uint32_t*)mb_header,4)) { 
+       ERROR("Multiboot header has bad checksum\n");
+       return -1;
+    }
+
+    INFO("Multiboot header: arch=0x%x, headerlen=0x%x\n", mb_header->arch, mb_header->headerlen);
+
+    mb_tag = (mb_tag_t*)((void*)mb_header+16);
+
+    while (!(mb_tag->type==0 && mb_tag->size==8)) {
+       INFO("tag: type 0x%x flags=0x%x size=0x%x\n",mb_tag->type, mb_tag->flags,mb_tag->size);
+       switch (mb_tag->type) {
+           case MB_TAG_INFO: {
+               if (mb_inf) { 
+                   ERROR("Multiple info tags found!\n");
+                   return -1;
+               }
+               mb_inf = (mb_info_t*)mb_tag;
+               INFO(" info request - types follow\n");
+               for (i=0;(mb_tag->size-8)/4;i++) {
+                   INFO("  %llu: type 0x%x\n", i, mb_inf->types[i]);
+               }
+           }
+               break;
+
+           case MB_TAG_ADDRESS: {
+               if (mb_addr) { 
+                   ERROR("Multiple address tags found!\n");
+                   return -1;
+               }
+               mb_addr = (mb_addr_t*)mb_tag;
+               INFO(" address\n");
+               INFO("  header_addr     =  0x%x\n", mb_addr->header_addr);
+               INFO("  load_addr       =  0x%x\n", mb_addr->load_addr);
+               INFO("  load_end_addr   =  0x%x\n", mb_addr->load_end_addr);
+               INFO("  bss_end_addr    =  0x%x\n", mb_addr->bss_end_addr);
+           }
+               break;
+
+           case MB_TAG_ENTRY: {
+               if (mb_entry) { 
+                   ERROR("Multiple entry tags found!\n");
+                   return -1;
+               }
+               mb_entry=(mb_entry_t*)mb_tag;
+               INFO(" entry\n");
+               INFO("  entry_addr      =  0x%x\n", mb_entry->entry_addr);
+           }
+               break;
+               
+           case MB_TAG_FLAGS: {
+               if (mb_flags) { 
+                   ERROR("Multiple flags tags found!\n");
+                   return -1;
+               }
+               mb_flags = (mb_flags_t*)mb_tag;
+               INFO(" flags\n");
+               INFO("  console_flags   =  0x%x\n", mb_flags->console_flags);
+           }
+               break;
+               
+           case MB_TAG_FRAMEBUF: {
+               if (mb_framebuf) { 
+                   ERROR("Multiple framebuf tags found!\n");
+                   return -1;
+               }
+               mb_framebuf = (mb_framebuf_t*)mb_tag;
+               INFO(" framebuf\n");
+               INFO("  width           =  0x%x\n", mb_framebuf->width);
+               INFO("  height          =  0x%x\n", mb_framebuf->height);
+               INFO("  depth           =  0x%x\n", mb_framebuf->depth);
+           }
+               break;
+
+           case MB_TAG_MODALIGN: {
+               if (mb_modalign) { 
+                   ERROR("Multiple modalign tags found!\n");
+                   return -1;
+               }
+               mb_modalign = (mb_modalign_t*)mb_tag;
+               INFO(" modalign\n");
+               INFO("  size            =  0x%x\n", mb_modalign->size);
+           }
+               break;
+#if 0
+           case MB_TAG_MB64_HRT: {
+               if (mb_mb64_hrt) { 
+                   ERROR("Multiple mb64_hrt tags found!\n");
+                   return -1;
+               }
+               mb_mb64_hrt = (mb_mb64_hrt_t*)mb_tag;
+               INFO(" mb64_hrt\n");
+           }
+               break;
+#endif
+               
+           default: 
+               INFO("Unknown tag... Skipping...\n");
+               break;
+       }
+       mb_tag = (mb_tag_t *)(((void*)mb_tag) + mb_tag->size);
+    }
+
+    // copy out to caller
+    mb->header=mb_header;
+    mb->info=mb_inf;
+    mb->addr=mb_addr;
+    mb->entry=mb_entry;
+    mb->flags=mb_flags;
+    mb->framebuf=mb_framebuf;
+    mb->modalign=mb_modalign;
+    mb->mb64_hrt=mb_mb64_hrt;
+
+    return 0;
+}
+
+
+int v3_parse_multiboot_header(struct v3_cfg_file *file, mb_data_t *result)
+{
+    return parse_multiboot_kernel(file->data,file->size,result);
+}
+
+
+#define APIC_BASE     0xfee00000
+#define IOAPIC_BASE   0xfec00000
+
+/*
+  MB_INFO_HEADER
+  MB_HRT  (if this is an HVM
+  MB_BASIC_MEMORY
+  MB_MEMORY_MAP
+    0..640K  RAM
+    640K..1024 reserved
+    1024..ioapic_base RAM
+    ioapic_base to ioapic_base+page reserved
+    ioapic_base+page to apic_base ram
+    apic_base oto apic_base+page reserved
+    apic_base+page to total RAM
+
+   
+ The multiboot structure that is written reflects the 
+ perspective of the core given the kind of VM it is part of.
+
+ Regular VM
+    - core does not matter 
+    - all memory visible
+
+ HVM
+   ROS core
+    - only ROS memory visible
+    - regular multiboot or bios boot assumed
+   HRT core
+    - full HRT memory visible
+    - HRT64 multiboot assumed
+
+*/
+
+uint64_t v3_build_multiboot_table(struct guest_info *core, uint8_t *dest, uint64_t size)
+{
+    struct v3_vm_info *vm = core->vm_info;
+    mb_info_header_t *header;
+#ifdef V3_CONFIG_HVM
+    mb_info_hrt_t *hrt;
+#endif
+    mb_info_mem_t *mem;
+    mb_info_memmap_t *memmap;
+    mb_info_tag_t *tag;
+    uint64_t num_mem, cur_mem;
+    
+    uint64_t total_mem = vm->mem_size;
+
+#ifdef V3_CONFIG_HVM
+    if (vm->hvm_state.is_hvm) { 
+       if (v3_is_hvm_ros_core(core)) {
+           PrintDebug(core->vm_info,core,"multiboot: hvm: building mb table from ROS core perspective\n");
+           total_mem = v3_get_hvm_ros_memsize(vm);
+       } else {
+           PrintDebug(core->vm_info,core,"multiboot: hvm: building mb table from HRT core perspective\n");
+           total_mem = v3_get_hvm_hrt_memsize(vm);     
+       }
+    }
+#endif
+
+    // assume we have > 1 MB + apic+ioapic
+    num_mem = 5;
+    if (total_mem>IOAPIC_BASE+PAGE_SIZE) {
+       num_mem++;
+    }
+    if (total_mem>APIC_BASE+PAGE_SIZE) {
+       num_mem++;
+    }
+
+
+    uint64_t needed = 
+       sizeof(mb_info_header_t) +
+#ifdef V3_CONFIG_HVM
+       core->vm_info->hvm_state.is_hvm && core->hvm_state.is_hrt ? sizeof(mb_info_hrt_t) : 0 
+#endif
+       + 
+       sizeof(mb_info_mem_t) + 
+       sizeof(mb_info_memmap_t) + 
+       sizeof(mb_info_memmap_entry_t) * num_mem  +
+       sizeof(mb_info_tag_t);
+
+    if (needed>size) { 
+       return 0;
+    }
+
+    uint8_t *next;
+
+    if (needed>size) {
+       ERROR("Cannot fit MB info in needed space\n");
+       return -1;
+    }
+
+    next = dest;
+
+    header = (mb_info_header_t*)next;
+    next += sizeof(mb_info_header_t);
+
+#if V3_CONFIG_HVM
+    if (core->vm_info->hvm_state.is_hvm && v3_is_hvm_hrt_core(core)) { 
+       hrt = (mb_info_hrt_t*)next;
+       next += sizeof(mb_info_hrt_t);
+    }
+#endif
+
+    mem = (mb_info_mem_t*)next;
+    next += sizeof(mb_info_mem_t);
+
+    memmap = (mb_info_memmap_t*)next;
+    next += sizeof(mb_info_memmap_t) + num_mem * sizeof(mb_info_memmap_entry_t);
+
+    tag = (mb_info_tag_t*)next;
+    next += sizeof(mb_info_tag_t);
+
+    header->totalsize = (uint32_t)(next - dest);
+    header->reserved = 0;
+
+#ifdef V3_CONFIG_HVM
+    if (core->vm_info->hvm_state.is_hvm && v3_is_hvm_hrt_core(core)) { 
+       hrt->tag.type = MB_INFO_HRT_TAG;
+       hrt->tag.size = sizeof(mb_info_hrt_t);
+       hrt->total_num_apics = vm->num_cores;
+       hrt->first_hrt_apic_id = vm->hvm_state.first_hrt_core;
+       hrt->have_hrt_ioapic=0;
+       hrt->first_hrt_ioapic_entry=0;
+    }
+#endif
+
+    mem->tag.type = MB_INFO_MEM_TAG;
+    mem->tag.size = sizeof(mb_info_mem_t);
+    mem->mem_lower = 640; // thank you, bill gates
+    mem->mem_upper = (total_mem  - 1024 * 1024) / 1024;
+
+    memmap->tag.type = MB_INFO_MEMMAP_TAG;
+    memmap->tag.size = sizeof(mb_info_memmap_t) + num_mem * sizeof(mb_info_memmap_entry_t);
+    memmap->entry_size = 24;
+    memmap->entry_version = 0;
+
+    cur_mem=0;
+
+    // first 640K
+    memmap->entries[cur_mem].base_addr = 0;
+    memmap->entries[cur_mem].length = 640*1024;
+    memmap->entries[cur_mem].type = MEM_RAM;
+    memmap->entries[cur_mem].reserved = 0;
+    cur_mem++;
+
+    // legacy io (640K->1 MB)
+    memmap->entries[cur_mem].base_addr = 640*1024;
+    memmap->entries[cur_mem].length = 384*1024;
+    memmap->entries[cur_mem].type = MEM_RESV;
+    memmap->entries[cur_mem].reserved = 1;
+    cur_mem++;
+
+    // first meg to ioapic
+    memmap->entries[cur_mem].base_addr = 1024*1024;
+    memmap->entries[cur_mem].length = (total_mem < IOAPIC_BASE ? total_mem : IOAPIC_BASE) - 1024*1024;
+    memmap->entries[cur_mem].type = MEM_RAM;
+    memmap->entries[cur_mem].reserved = 0;
+    cur_mem++;
+
+    // ioapic reservation
+    memmap->entries[cur_mem].base_addr = IOAPIC_BASE;
+    memmap->entries[cur_mem].length = PAGE_SIZE;
+    memmap->entries[cur_mem].type = MEM_RESV;
+    memmap->entries[cur_mem].reserved = 1;
+    cur_mem++;
+
+    if (total_mem > (IOAPIC_BASE + PAGE_SIZE)) {
+       // memory between ioapic and apic
+       memmap->entries[cur_mem].base_addr = IOAPIC_BASE+PAGE_SIZE;
+       memmap->entries[cur_mem].length = (total_mem < APIC_BASE ? total_mem : APIC_BASE) - (IOAPIC_BASE+PAGE_SIZE);;
+       memmap->entries[cur_mem].type = MEM_RAM;
+       memmap->entries[cur_mem].reserved = 0;
+       cur_mem++;
+    } 
+
+    // apic
+    memmap->entries[cur_mem].base_addr = APIC_BASE;
+    memmap->entries[cur_mem].length = PAGE_SIZE;
+    memmap->entries[cur_mem].type = MEM_RESV;
+    memmap->entries[cur_mem].reserved = 1;
+    cur_mem++;
+
+    if (total_mem > (APIC_BASE + PAGE_SIZE)) {
+       // memory after apic
+       memmap->entries[cur_mem].base_addr = APIC_BASE+PAGE_SIZE;
+       memmap->entries[cur_mem].length = total_mem - (APIC_BASE+PAGE_SIZE);
+       memmap->entries[cur_mem].type = MEM_RAM;
+       memmap->entries[cur_mem].reserved = 0;
+       cur_mem++;
+    } 
+
+    for (cur_mem=0;cur_mem<num_mem;cur_mem++) { 
+       PrintDebug(vm, VCORE_NONE,
+                  "multiboot: entry %llu: %p (%llx bytes) - type %x %s\n",
+                  cur_mem, 
+                  (void*) memmap->entries[cur_mem].base_addr,
+                  memmap->entries[cur_mem].length,
+                  memmap->entries[cur_mem].type,
+                  memmap->entries[cur_mem].reserved ? "reserved" : "");
+    }
+
+
+
+    // This demarcates end of list
+    tag->type = 0;
+    tag->size = 8;
+
+    return header->totalsize;
+
+}
+
+
+int v3_write_multiboot_kernel(struct v3_vm_info *vm, mb_data_t *mb, struct v3_cfg_file *file,
+                             void *base, uint64_t limit)
+{
+    uint32_t offset;
+
+    if (!mb->addr || !mb->entry) { 
+       PrintError(vm,VCORE_NONE, "multiboot: kernel is missing address or entry point\n");
+       return -1;
+    }
+
+    if (((void*)(uint64_t)(mb->addr->header_addr) < base ) ||
+       ((void*)(uint64_t)(mb->addr->load_end_addr) > base+limit) ||
+       ((void*)(uint64_t)(mb->addr->bss_end_addr) > base+limit)) { 
+       PrintError(vm,VCORE_NONE, "multiboot: kernel is not within the allowed portion of VM\n");
+       return -1;
+    }
+
+    offset = mb->addr->load_addr - mb->addr->header_addr;
+
+    // Skip the ELF header - assume 1 page... weird.... 
+    // We are trying to do as little ELF loading here as humanly possible
+    v3_write_gpa_memory(&vm->cores[0],
+                       (addr_t)(mb->addr->load_addr),
+                       file->size-PAGE_SIZE-offset,
+                       file->data+PAGE_SIZE+offset);
+
+    PrintDebug(vm,VCORE_NONE,
+              "multiboot: wrote 0x%llx bytes starting at offset 0x%llx to %p\n",
+              (uint64_t) file->size-PAGE_SIZE-offset,
+              (uint64_t) PAGE_SIZE+offset,
+              (void*)(addr_t)(mb->addr->load_addr));
+
+    return 0;
+
+}
+
+
+static int setup_multiboot_kernel(struct v3_vm_info *vm)
+{
+    void *base = 0;
+    uint64_t limit = vm->mem_size;
+
+
+    if (vm->mb_state.mb_file->size > limit) { 
+       PrintError(vm,VCORE_NONE,"multiboot: Cannot map kernel because it is too big (%llu bytes, but only have %llu space\n", vm->mb_state.mb_file->size, (uint64_t)limit);
+       return -1;
+    }
+
+    if (!is_elf(vm->mb_state.mb_file->data,vm->mb_state.mb_file->size)) { 
+       PrintError(vm,VCORE_NONE,"multiboot: supplied kernel is not an ELF\n");
+       return -1;
+    } else {
+       if (find_mb_header(vm->mb_state.mb_file->data,vm->mb_state.mb_file->size)) { 
+           PrintDebug(vm,VCORE_NONE,"multiboot: appears to be a multiboot kernel\n");
+           if (v3_parse_multiboot_header(vm->mb_state.mb_file,&vm->mb_state.mb_data)) { 
+               PrintError(vm,VCORE_NONE,"multiboot: cannot parse multiboot kernel header\n");
+               return -1;
+           }
+           if (v3_write_multiboot_kernel(vm, &(vm->mb_state.mb_data),vm->mb_state.mb_file,base,limit)) { 
+               PrintError(vm,VCORE_NONE,"multiboot: multiboot kernel setup failed\n");
+               return -1;
+           } 
+       } else {
+           PrintError(vm,VCORE_NONE,"multiboot: multiboot kernel has no header\n");
+           return -1;
+       }
+    }
+    
+    return 0;
+    
+}
+
+// 32 bit GDT entries
+//
+//         base24-31    flags2  limit16-19 access8  base16-23   base0-15   limit0-15
+// null       0           0          0       0         0           0           0
+// code       0           1100       f     10011010    0           0         ffff
+// data       0           1100       f     10010010    0           0         ffff
+//
+// null =   00 00 00 00 00 00 00 00
+// code =   00 cf 9a 00 00 00 ff ff 
+// data =   00 cf 92 00 00 00 ff ff
+//
+static uint64_t gdt32[3] = {
+    0x0000000000000000, /* null */
+    0x00cf9a000000ffff, /* code (note lme=0) */
+    0x00cf92000000ffff, /* data */
+};
+
+static void write_gdt(struct v3_vm_info *vm, void *base, uint64_t limit)
+{
+    v3_write_gpa_memory(&vm->cores[0],(addr_t)base,limit,(uint8_t*) gdt32);
+
+    PrintDebug(vm,VCORE_NONE,"multiboot: wrote GDT at %p\n",base);
+}
+
+       
+static void write_tss(struct v3_vm_info *vm, void *base, uint64_t limit)
+{
+    int i;
+    uint64_t tss_data=0x0;
+
+    for (i=0;i<limit/8;i++) {
+       v3_write_gpa_memory(&vm->cores[0],(addr_t)(base+8*i),8,(uint8_t*) &tss_data);
+    }
+
+    PrintDebug(vm,VCORE_NONE,"multiboot: wrote TSS at %p\n",base);
+}
+
+static void write_table(struct v3_vm_info *vm, void *base, uint64_t limit)
+{
+    uint64_t size;
+    uint8_t buf[256];
+
+    limit = limit < 256 ? limit : 256;
+
+    size = v3_build_multiboot_table(&vm->cores[0], buf, limit);
+
+    if (size>256 || size==0) { 
+       PrintError(vm,VCORE_NONE,"multiboot: cannot build multiboot table\n");
+       return;
+    }
+    
+    v3_write_gpa_memory(&vm->cores[0],(addr_t)base,size,buf);
+
+}
+
+
+
+/*
+  GPA layout:
+
+  GDT
+  TSS
+  MBinfo   
+  Kernel at its desired load address (or error)
+
+*/
+
+
+int v3_setup_multiboot_vm_for_boot(struct v3_vm_info *vm)
+{
+    void *kernel_start_gpa;
+    void *kernel_end_gpa;
+    void *mb_gpa;
+    void *tss_gpa;
+    void *gdt_gpa;
+
+    if (!vm->mb_state.is_multiboot) { 
+       PrintDebug(vm,VCORE_NONE,"multiboot: skipping multiboot setup for boot as this is not a multiboot VM\n");
+       return 0;
+    }
+
+    
+    if (setup_multiboot_kernel(vm)) {
+       PrintError(vm,VCORE_NONE,"multiboot: failed to setup kernel\n");
+       return -1;
+    } 
+
+    kernel_start_gpa = (void*) (uint64_t) (vm->mb_state.mb_data.addr->load_addr);
+    kernel_end_gpa = (void*) (uint64_t) (vm->mb_state.mb_data.addr->bss_end_addr);
+
+    // Is there room below the kernel? 
+    if ((uint64_t)kernel_start_gpa > 19*4096 ) {
+       // at least 3 pages between 64K and start of kernel 
+       // place at 64K
+       mb_gpa=(void*)(16*4096);
+    } else {
+       // is there room above the kernel?
+       if ((uint64_t)kernel_end_gpa < vm->mem_size-4*4096) { 
+           if (((uint64_t)kernel_end_gpa + 4 * 4096) <= 0xffffffff) { 
+               mb_gpa=(void*) (4096*((uint64_t)kernel_end_gpa/4096 + 1));
+           } else {
+               PrintError(vm,VCORE_NONE,"multiboot: no room for mb data below 4 GB\n");
+               return -1;
+           } 
+       } else {
+           PrintError(vm,VCORE_NONE,"multiboot: no room for mb data above kernel\n");
+           return -1;
+       }
+    }
+
+    PrintDebug(vm,VCORE_NONE,"multiboot: mb data will start at %p\n",mb_gpa);
+
+    vm->mb_state.mb_data_gpa=mb_gpa;
+
+    tss_gpa = mb_gpa + 1 * 4096;
+    gdt_gpa = mb_gpa + 2 * 4096;
+
+    write_table(vm,mb_gpa,4096);
+    
+    write_tss(vm,tss_gpa,4096);
+
+    write_gdt(vm,gdt_gpa,4096);
+
+    PrintDebug(vm,VCORE_NONE,"multiboot: setup of memory done\n");
+
+    return 0;
+}
+
+/*
+  On entry:
+
+   IDTR not set
+   GDTR points to stub GDT
+   TR   points to stub TSS
+   CR0  has PE and not PG
+   EIP  is entry point to kernel
+   EBX  points to multiboot info
+   EAX  multiboot magic cookie
+
+*/
+int v3_setup_multiboot_core_for_boot(struct guest_info *core)
+{
+    void *base;
+    uint64_t limit;
+
+    if (!core->vm_info->mb_state.is_multiboot) {
+       PrintDebug(core->vm_info,core,"multiboot: skipping mb core setup as this is not an mb VM\n");
+       return 0;
+    }
+       
+    if (core->vcpu_id != 0) {
+       PrintDebug(core->vm_info,core,"multiboot: skipping mb core setup as this is not the BSP core\n");
+       return 0;
+    }
+
+
+    PrintDebug(core->vm_info, core, "multiboot: setting up MB BSP core for boot\n");
+
+    
+    memset(&core->vm_regs,0,sizeof(core->vm_regs));
+    memset(&core->ctrl_regs,0,sizeof(core->ctrl_regs));
+    memset(&core->dbg_regs,0,sizeof(core->dbg_regs));
+    memset(&core->segments,0,sizeof(core->segments));    
+    memset(&core->msrs,0,sizeof(core->msrs));    
+    memset(&core->fp_state,0,sizeof(core->fp_state));    
+
+    // We need to be in protected mode at ring zero
+    core->cpl = 0; // we are going right into the kernel
+    core->cpu_mode = PROTECTED;
+    core->mem_mode = PHYSICAL_MEM; 
+    // default run-state is fine, we are core zero
+    // core->core_run_state = CORE_RUNNING ;
+
+    // right into the kernel
+    core->rip = (uint64_t) core->vm_info->mb_state.mb_data.entry->entry_addr;
+
+    // Setup CRs for protected mode
+    // CR0:  PE (but no PG)
+    core->ctrl_regs.cr0 = 0x1;
+    core->shdw_pg_state.guest_cr0 = core->ctrl_regs.cr0;
+
+    // CR2: don't care (output from #PF)
+    // CR3: don't care (no paging)
+    core->ctrl_regs.cr3 = 0;
+    core->shdw_pg_state.guest_cr3 = core->ctrl_regs.cr3;
+
+    // CR4: no features 
+    core->ctrl_regs.cr4 = 0x0;
+    core->shdw_pg_state.guest_cr4 = core->ctrl_regs.cr4;
+    // CR8 as usual
+    // RFLAGS zeroed is fine: come in with interrupts off
+    // EFER needs SVME and LME but not LMA (last 16 bits: 0 0 0 1 0 1 0 0   0 0 0 0 0 0 0 0
+    core->ctrl_regs.efer = 0x1400;
+    core->shdw_pg_state.guest_efer.value = core->ctrl_regs.efer;
+
+
+    /* 
+       Notes on selectors:
+
+       selector is 13 bits of index, 1 bit table indicator 
+       (0=>GDT), 2 bit RPL
+       
+       index is scaled by 8, even in long mode, where some entries 
+       are 16 bytes long.... 
+          -> code, data descriptors have 8 byte format
+             because base, limit, etc, are ignored (no segmentation)
+          -> interrupt/trap gates have 16 byte format 
+             because offset needs to be 64 bits
+    */
+    
+    // There is no IDTR set and interrupts are disabled
+
+    // Install our stub GDT
+    core->segments.gdtr.selector = 0;
+    core->segments.gdtr.base = (addr_t) core->vm_info->mb_state.mb_data_gpa+2*4096;
+    core->segments.gdtr.limit = 4096-1;
+    core->segments.gdtr.type = 0x6;
+    core->segments.gdtr.system = 1; 
+    core->segments.gdtr.dpl = 0;
+    core->segments.gdtr.present = 1;
+    core->segments.gdtr.long_mode = 0;
+    
+    // And our TSS
+    core->segments.tr.selector = 0;
+    core->segments.tr.base = (addr_t) core->vm_info->mb_state.mb_data_gpa+1*4096;
+    core->segments.tr.limit = 4096-1;
+    core->segments.tr.type = 0x6;
+    core->segments.tr.system = 1; 
+    core->segments.tr.dpl = 0;
+    core->segments.tr.present = 1;
+    core->segments.tr.long_mode = 0;
+    
+    base = 0x0;
+    limit = -1;
+
+    // And CS
+    core->segments.cs.selector = 0x8 ; // entry 1 of GDT (RPL=0)
+    core->segments.cs.base = (addr_t) base;
+    core->segments.cs.limit = limit;
+    core->segments.cs.type = 0xe;
+    core->segments.cs.system = 0; 
+    core->segments.cs.dpl = 0;
+    core->segments.cs.present = 1;
+    core->segments.cs.long_mode = 0;
+
+    // DS, SS, etc are identical
+    core->segments.ds.selector = 0x10; // entry 2 of GDT (RPL=0)
+    core->segments.ds.base = (addr_t) base;
+    core->segments.ds.limit = limit;
+    core->segments.ds.type = 0x6;
+    core->segments.ds.system = 0; 
+    core->segments.ds.dpl = 0;
+    core->segments.ds.present = 1;
+    core->segments.ds.long_mode = 0;
+    
+    memcpy(&core->segments.ss,&core->segments.ds,sizeof(core->segments.ds));
+    memcpy(&core->segments.es,&core->segments.ds,sizeof(core->segments.ds));
+    memcpy(&core->segments.fs,&core->segments.ds,sizeof(core->segments.ds));
+    memcpy(&core->segments.gs,&core->segments.ds,sizeof(core->segments.ds));
+    
+
+
+    // Now for our magic - this signals
+    // the kernel that a multiboot loader loaded it
+    // and that rbx points to its offered data
+    core->vm_regs.rax = MB2_INFO_MAGIC;
+
+    core->vm_regs.rbx = (uint64_t) (core->vm_info->mb_state.mb_data_gpa);
+
+    // reset paging here for shadow... 
+
+    if (core->shdw_pg_mode != NESTED_PAGING) { 
+       PrintError(core->vm_info, core, "multiboot: shadow paging guest... this will end badly\n");
+       return -1;
+    }
+
+
+    return 0;
+}
diff --git a/palacios/src/palacios/vmx.c b/palacios/src/palacios/vmx.c

index daf8eee..23d631a 100644 (file)
--- a/palacios/src/palacios/vmx.c
+++ b/palacios/src/palacios/vmx.c
@@ -1198,6 +1198,13 @@ int v3_start_vmx_guest(struct guest_info * info) {
 
     PrintDebug(info->vm_info, info, "Starting VMX core %u\n", info->vcpu_id);
 
+#ifdef V3_CONFIG_MULTIBOOT
+    if (v3_setup_multiboot_core_for_boot(info)) { 
+       PrintError(info->vm_info, info, "Failed to setup Multiboot core...\n");
+       return -1;
+    }
+#endif
+
 #ifdef V3_CONFIG_HVM
     if (v3_setup_hvm_hrt_core_for_boot(info)) { 
        PrintError(info->vm_info, info, "Failed to setup HRT core...\n");
Kconfig		patch \| blob \| history
palacios/include/palacios/vm_guest.h		patch \| blob \| history
palacios/include/palacios/vmm_hvm.h		patch \| blob \| history
palacios/src/palacios/Makefile		patch \| blob \| history
palacios/src/palacios/svm.c		patch \| blob \| history
palacios/src/palacios/vm_guest.c		patch \| blob \| history
palacios/src/palacios/vmm.c		patch \| blob \| history
palacios/src/palacios/vmm_config.c		patch \| blob \| history
palacios/src/palacios/vmm_hvm.c		patch \| blob \| history
palacios/src/palacios/vmm_multiboot.c	[new file with mode: 0644]	patch \| blob
palacios/src/palacios/vmx.c		patch \| blob \| history