Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


Basic HRT startup for HVM, plus assorted cleanup
Peter Dinda [Wed, 25 Mar 2015 21:18:53 +0000 (16:18 -0500)]
This builds and launches a basic guest environment for an HRT
It does not yet handle ELF load, but it is able to bootstrap
a simple code blob, including with interrupts, on HRT cores
while ROS cores happily run Linux

palacios/include/palacios/vmm_hvm.h
palacios/include/palacios/vmm_hypercall.h
palacios/src/palacios/Makefile
palacios/src/palacios/svm.c
palacios/src/palacios/vm_guest.c
palacios/src/palacios/vmm.c
palacios/src/palacios/vmm_config.c
palacios/src/palacios/vmm_hvm.c
palacios/src/palacios/vmm_hvm_lowlevel.S [new file with mode: 0644]
palacios/src/palacios/vmx.c

index 3fd0091..6d145c2 100644 (file)
@@ -64,6 +64,10 @@ int      v3_hvm_should_deliver_ipi(struct guest_info *src, struct guest_info *de
 void     v3_hvm_find_apics_seen_by_core(struct guest_info *core, struct v3_vm_info *vm, 
                                        uint32_t *start_apic, uint32_t *num_apics);
 
+
+int v3_setup_hvm_vm_for_boot(struct v3_vm_info *vm);
+int v3_setup_hvm_hrt_core_for_boot(struct guest_info *core);
+
 #endif /* ! __V3VEE__ */
 
 
index e3c7c6f..9b16618 100644 (file)
@@ -55,6 +55,8 @@ typedef enum {
     TIME_RDHTSC_HCALL   =  0xd001,       //RBX: cpu freq (out)
 
     VNET_HEADER_QUERY_HCALL  =   0xe000,        // Get the current header for a src/dest pair
+
+    HVM_HCALL = 0xf000, 
 } hcall_id_t;
 
 
index dd40f40..0ca0f57 100644 (file)
@@ -93,6 +93,6 @@ obj-$(V3_CONFIG_SYMMOD) += vmm_symmod.o
 
 obj-$(V3_CONFIG_MEM_TRACK) += vmm_mem_track.o
 
-obj-$(V3_CONFIG_HVM) += vmm_hvm.o
+obj-$(V3_CONFIG_HVM) += vmm_hvm.o vmm_hvm_lowlevel.o
 
 obj-y += mmu/
index ed0cfb9..05fd183 100644 (file)
@@ -849,6 +849,18 @@ int v3_start_svm_guest(struct guest_info * info) {
 
     PrintDebug(info->vm_info, info, "Starting SVM core %u (on logical core %u)\n", info->vcpu_id, info->pcpu_id);
 
+
+#ifdef V3_CONFIG_HVM
+    if (v3_setup_hvm_hrt_core_for_boot(info)) { 
+       PrintError(info->vm_info, info, "Failed to setup HRT core...\n");
+       return -1;
+    }
+#endif
+
+           
+
+
     while (1) {
 
        if (info->core_run_state == CORE_STOPPED) {
@@ -898,7 +910,6 @@ int v3_start_svm_guest(struct guest_info * info) {
            info->core_run_state = CORE_STOPPED;
            break;
        }
-       
 
 #ifdef V3_CONFIG_PMU_TELEMETRY
        v3_pmu_telemetry_start(info);
index acbb11e..d325e6b 100644 (file)
@@ -318,6 +318,9 @@ int v3_free_vm_internal(struct v3_vm_info * vm) {
     v3_remove_hypercall(vm, GUEST_INFO_HCALL);
 
 
+#ifdef V3_CONFIG_HVM
+    v3_deinit_hvm_vm(vm);
+#endif
 
 #ifdef V3_CONFIG_SYMBIOTIC
     v3_deinit_symbiotic_vm(vm);
@@ -377,9 +380,6 @@ int v3_free_vm_internal(struct v3_vm_info * vm) {
     v3_deinit_telemetry(vm);
 #endif
 
-#ifdef V3_CONFIG_HVM
-    v3_deinit_hvm_vm(vm);
-#endif
 
     v3_deinit_events(vm);
 
@@ -470,6 +470,10 @@ int v3_free_core(struct guest_info * core) {
     v3_deinit_symbiotic_core(core);
 #endif
 
+#ifdef V3_CONFIG_HVM
+    v3_deinit_hvm_core(core);
+#endif
+
     v3_deinit_decoder(core);
 
     v3_deinit_intr_controllers(core);
@@ -488,9 +492,6 @@ int v3_free_core(struct guest_info * core) {
     v3_deinit_core_telemetry(core);
 #endif
 
-#ifdef V3_CONFIG_HVM
-    v3_deinit_hvm_core(core);
-#endif
 
     switch (v3_mach_type) {
 #ifdef V3_CONFIG_SVM
index 6690e97..f45a4be 100644 (file)
@@ -385,6 +385,12 @@ int v3_start_vm(struct v3_vm_info * vm, unsigned int cpu_mask) {
         return -1;
     }
 
+#if V3_CONFIG_HVM
+    if (v3_setup_hvm_vm_for_boot(vm)) { 
+       PrintError(vm, VCORE_NONE, "HVM setup for boot failed\n");
+       return -1;
+    }
+#endif
 
     /// CHECK IF WE ARE MULTICORE ENABLED....
 
index 374c1a5..38cfb70 100644 (file)
@@ -355,6 +355,11 @@ static int pre_config_vm(struct v3_vm_info * vm, v3_cfg_tree_t * vm_cfg) {
     }
 #endif
 
+    if (v3_init_vm(vm) == -1) {
+       PrintError(VM_NONE, VCORE_NONE, "Failed to initialize VM\n");
+       return -1;
+    }
+
 #ifdef V3_CONFIG_HVM
     if (v3_init_hvm_vm(vm,vm_cfg)) { 
        PrintError(vm,VCORE_NONE,"Cannot initialize HVM for VM\n");
@@ -362,14 +367,6 @@ static int pre_config_vm(struct v3_vm_info * vm, v3_cfg_tree_t * vm_cfg) {
     }
 #endif
 
-
-    if (v3_init_vm(vm) == -1) {
-       PrintError(VM_NONE, VCORE_NONE, "Failed to initialize VM\n");
-       return -1;
-    }
-
-
-
    if (schedule_hz_str) {
        sched_hz = atoi(schedule_hz_str);
     }
index 4225d28..b1f7013 100644 (file)
 #include <palacios/vmm_emulator.h>
 #include <palacios/vm_guest.h>
 #include <palacios/vmm_debug.h>
+#include <palacios/vmm_hypercall.h>
 
 #include <palacios/vmm_xml.h>
 
+#include <palacios/vm_guest_mem.h>
+
 #include <stdio.h>
 #include <stdlib.h>
 
@@ -78,6 +81,13 @@ int v3_deinit_hvm()
 }
 
 
+static int hvm_hcall_handler(struct guest_info * core , hcall_id_t hcall_id, void * priv_data)
+{
+    V3_Print(core->vm_info,core, "hvm: received hypercall %x  rax=%llx rbx=%llx rcx=%llx\n",
+            hcall_id, core->vm_regs.rax, core->vm_regs.rbx, core->vm_regs.rcx);
+    return 0;
+}
+
 #define CEIL_DIV(x,y) (((x)/(y)) + !!((x)%(y)))
 
 int v3_init_hvm_vm(struct v3_vm_info *vm, struct v3_xml *config)
@@ -146,6 +156,12 @@ int v3_init_hvm_vm(struct v3_vm_info *vm, struct v3_xml *config)
        return -1;
     }
 
+    if (v3_register_hypercall(vm, HVM_HCALL, 
+                             hvm_hcall_handler, 0)) { 
+       PrintError(vm,VCORE_NONE, "hvm: cannot register hypercall....\n");
+       return -1;
+    }
+
     // XXX sanity check config here
 
     vm->hvm_state.is_hvm=1;
@@ -168,9 +184,12 @@ int v3_init_hvm_vm(struct v3_vm_info *vm, struct v3_xml *config)
     
 }
 
+
 int v3_deinit_hvm_vm(struct v3_vm_info *vm)
 {
-    PrintDebug(vm, VCORE_NONE, "hvm: HVM deinit\n");
+    PrintDebug(vm, VCORE_NONE, "hvm: HVM VM deinit\n");
+
+    v3_remove_hypercall(vm,HVM_HCALL);
 
     return 0;
 }
@@ -188,6 +207,8 @@ int v3_init_hvm_core(struct guest_info *core)
 
 int v3_deinit_hvm_core(struct guest_info *core)
 {
+    PrintDebug(core->vm_info, VCORE_NONE, "hvm: HVM core deinit\n");
+
     return 0;
 }
 
@@ -295,5 +316,513 @@ void     v3_hvm_find_apics_seen_by_core(struct guest_info *core, struct v3_vm_in
        }
     }
 }
+
+
+static void get_null_int_handler_loc(struct v3_vm_info *vm, void **base, uint64_t *limit)
+{
+    *base = (void*) PAGE_ADDR(vm->mem_size - PAGE_SIZE);
+    *limit = PAGE_SIZE;
+}
+
+extern v3_cpu_arch_t v3_mach_type;
+
+extern void *v3_hvm_svm_null_int_handler_start;
+extern void *v3_hvm_svm_null_int_handler_end;
+extern void *v3_hvm_vmx_null_int_handler_start;
+extern void *v3_hvm_vmx_null_int_handler_end;
+
+static void write_null_int_handler(struct v3_vm_info *vm)
+{
+    void *base;
+    uint64_t limit;
+    void *data;
+    uint64_t len;
+
+    get_null_int_handler_loc(vm,&base,&limit);
+
+    switch (v3_mach_type) {
+#ifdef V3_CONFIG_SVM
+       case V3_SVM_CPU:
+       case V3_SVM_REV3_CPU:
+           data = (void*) &v3_hvm_svm_null_int_handler_start;
+           len = (void*) &v3_hvm_svm_null_int_handler_end - data;
+           break;
+#endif
+#if V3_CONFIG_VMX
+       case V3_VMX_CPU:
+       case V3_VMX_EPT_CPU:
+       case V3_VMX_EPT_UG_CPU:
+           data = (void*) &v3_hvm_vmx_null_int_handler_start;
+           len = (void*) &v3_hvm_vmx_null_int_handler_end - data;
+           break;
+#endif
+       default:
+           PrintError(vm,VCORE_NONE,"hvm: cannot determine CPU type to select null interrupt handler...\n");
+           data = 0;
+           len = 0;
+    }
+
+    if (data) {
+       v3_write_gpa_memory(&vm->cores[0],(addr_t)(base),len,(uint8_t*)data);
+    }
+
+    PrintDebug(vm,VCORE_NONE,"hvm: wrote null interrupt handler at %p (%llu bytes)\n",base,len);
+}
+
+
+static void get_idt_loc(struct v3_vm_info *vm, void **base, uint64_t *limit)
+{
+    *base = (void*) PAGE_ADDR(vm->mem_size - 2 * PAGE_SIZE);
+    *limit = 16*256;
+}
+
+// default IDT entries (int and trap gates)
+//
+// Format is 16 bytes long:
+//   16 offsetlo   => 0
+//   16 selector   => (target code selector) => 0x8 // entry 1 of GDT
+//    3 ist        => (stack) = 0 => current stack
+//    5 reserved   => 0
+//    4 type       => 0xe=>INT, 0xf=>TRAP 
+//    1 reserved   => 0
+//    2 dpl        => 0
+//    1 present    => 1
+//   16 offsetmid  => 0
+//   32 offsethigh => 0   (total is a 64 bit offset)
+//   32 reserved   => 0
+//
+// 00 00 | 08 00 | 00 | 8[typenybble] | offsetmid | offsethigh | reserved
+// 
+// Note little endian
+//
+static uint64_t idt64_trap_gate_entry_mask[2] = {  0x00008f0000080000, 0x0 } ;
+static uint64_t idt64_int_gate_entry_mask[2] =  { 0x00008e0000080000, 0x0 };
+
+static void write_idt(struct v3_vm_info *vm)
+{
+    void *base;
+    uint64_t limit;
+    void *handler;
+    uint64_t handler_len;
+    int i;
+    uint64_t trap_gate[2];
+    uint64_t int_gate[2];
+
+    get_idt_loc(vm,&base,&limit);
+
+    get_null_int_handler_loc(vm,&handler,&handler_len);
+
+    memcpy(trap_gate,idt64_trap_gate_entry_mask,16);
+    memcpy(int_gate,idt64_int_gate_entry_mask,16);
+
+    if (handler) {
+       // update the entries for the handler location
+       uint8_t *mask;
+       uint8_t *hand;
+       
+       hand = (uint8_t*) &handler;
+
+       mask = (uint8_t *)trap_gate;
+       memcpy(&(mask[0]),&(hand[0]),2); // offset low
+       memcpy(&(mask[6]),&(hand[2]),2); // offset med
+       memcpy(&(mask[8]),&(hand[4]),4); // offset high
+
+       mask = (uint8_t *)int_gate;
+       memcpy(&(mask[0]),&(hand[0]),2); // offset low
+       memcpy(&(mask[6]),&(hand[2]),2); // offset med
+       memcpy(&(mask[8]),&(hand[4]),4); // offset high
+
+       PrintDebug(vm,VCORE_NONE,"hvm: Adding default null trap and int gates\n");
+    }
+
+    for (i=0;i<32;i++) { 
+       v3_write_gpa_memory(&vm->cores[0],(addr_t)(base+i*16),16,(uint8_t*)trap_gate);
+    }
+
+    for (i=32;i<256;i++) { 
+       v3_write_gpa_memory(&vm->cores[0],(addr_t)(base+i*16),16,(uint8_t*)int_gate);
+    }
+
+    PrintDebug(vm,VCORE_NONE,"hvm: wrote IDT at %p\n",base);
+}
+
+
+
+static void get_gdt_loc(struct v3_vm_info *vm, void **base, uint64_t *limit)
+{
+    *base = (void*)PAGE_ADDR(vm->mem_size - 3 * PAGE_SIZE);
+    *limit = 8*3;
+}
+
+static uint64_t gdt64[3] = {
+    0x0000000000000000, /* null */
+    0x00a09a0000000000, /* code (note lme bit) */
+    0x00a0920000000000, /* data (most entries don't matter) */
+};
+
+static void write_gdt(struct v3_vm_info *vm)
+{
+    void *base;
+    uint64_t limit;
+
+    get_gdt_loc(vm,&base,&limit);
+    v3_write_gpa_memory(&vm->cores[0],(addr_t)base,limit,(uint8_t*) gdt64);
+
+    PrintDebug(vm,VCORE_NONE,"hvm: wrote GDT at %p\n",base);
+}
+
+
+
+static void get_tss_loc(struct v3_vm_info *vm, void **base, uint64_t *limit)
+{
+    *base = (void*)PAGE_ADDR(vm->mem_size - 4 * PAGE_SIZE);
+    *limit = PAGE_SIZE;
+}
+
+static uint64_t tss_data=0x0;
+
+static void write_tss(struct v3_vm_info *vm)
+{
+    void *base;
+    uint64_t limit;
+    int i;
+
+    get_tss_loc(vm,&base,&limit);
+    for (i=0;i<limit/8;i++) {
+       v3_write_gpa_memory(&vm->cores[0],(addr_t)(base+8*i),8,(uint8_t*) &tss_data);
+    }
+
+    PrintDebug(vm,VCORE_NONE,"hvm: wrote TSS at %p\n",base);
+}
+
+/*
+  PTS MAP FIRST 512 GB identity mapped: 
+  1 second level
+     512 entries
+  1 top level
+     1 entries
+*/
+
+static void get_pt_loc(struct v3_vm_info *vm, void **base, uint64_t *limit)
+{
+    *base = (void*)PAGE_ADDR(vm->mem_size-(5+1)*PAGE_SIZE);
+    *limit =  2*PAGE_SIZE;
+}
+
+static void write_pt(struct v3_vm_info *vm)
+{
+    void *base;
+    uint64_t size;
+    struct pml4e64 pml4e;
+    struct pdpe64 pdpe;
+    uint64_t i;
+
+    get_pt_loc(vm,&base, &size);
+    if (size!=2*PAGE_SIZE) { 
+       PrintError(vm,VCORE_NONE,"Cannot support pt request, defaulting\n");
+    }
+
+    memset(&pdpe,0,sizeof(pdpe));
+    pdpe.present=1;
+    pdpe.writable=1;
+    pdpe.large_page=1;
+    
+    for (i=0;i<512;i++) {
+       pdpe.pd_base_addr = i*0x40000;  // 0x4000 = 256K pages = 1 GB
+       v3_write_gpa_memory(&vm->cores[0],(addr_t)(base+PAGE_SIZE+i*sizeof(pdpe)),sizeof(pdpe),(uint8_t*)&pdpe);
+    }
+
+    memset(&pml4e,0,sizeof(pml4e));
+    pml4e.present=1;
+    pml4e.writable=1;
+    pml4e.pdp_base_addr = PAGE_BASE_ADDR((addr_t)(base+PAGE_SIZE));
+
+    v3_write_gpa_memory(&vm->cores[0],(addr_t)base,sizeof(pml4e),(uint8_t*)&pml4e);    
+
+    for (i=1;i<512;i++) {
+       pml4e.present=0;
+       v3_write_gpa_memory(&vm->cores[0],(addr_t)(base+i*sizeof(pml4e)),sizeof(pml4e),(uint8_t*)&pml4e);
+    }
+
+    PrintDebug(vm,VCORE_NONE,"hvm: Wrote page tables (1 PML4, 1 PDPE) at %p\n",base);
+}
+
+static void get_bp_loc(struct v3_vm_info *vm, void **base, uint64_t *limit)
+{
+    *base = (void*) PAGE_ADDR(vm->mem_size-(6+1)*PAGE_SIZE);
+    *limit =  PAGE_SIZE;
+}
+
+static void write_bp(struct v3_vm_info *vm)
+{
+    void *base;
+    uint64_t limit;
+    uint64_t data=-1;
+    int i;
+
+    get_bp_loc(vm,&base,&limit);
+    
+    for (i=0;i<limit/8;i++) { 
+       v3_write_gpa_memory(&vm->cores[0],(addr_t)(base+i*8),8,(uint8_t*)&data);
+    }
+
+    PrintDebug(vm,VCORE_NONE,"hvm: wrote boundary page at %p\n", base);
+    
+}
+
+#define MIN_STACK (4096*4)
+
+
+static void get_hrt_loc(struct v3_vm_info *vm, void **base, uint64_t *limit)
+{
+    void *bp_base;
+    uint64_t bp_limit;
+    
+    get_bp_loc(vm,&bp_base,&bp_limit);
+    
+    // assume at least a minimal stack
+
+    bp_base-=MIN_STACK;
+
+    *base = (void*)PAGE_ADDR(vm->hvm_state.first_hrt_gpa);
+
+    if (bp_base < *base+PAGE_SIZE) { 
+       PrintError(vm,VCORE_NONE,"hvm: HRT stack colides with HRT\n");
+    }
+
+    *limit = bp_base - *base;
+}
+
+static void write_hrt(struct v3_vm_info *vm)
+{
+    void *base;
+    uint64_t limit;
+
+    get_hrt_loc(vm,&base,&limit);
+    
+    if (vm->hvm_state.hrt_file->size > limit) { 
+       PrintError(vm,VCORE_NONE,"hvm: Cannot map HRT because it is too big (%llu bytes, but only have %llu space\n", vm->hvm_state.hrt_file->size, (uint64_t)limit);
+       return;
+    }
+
+    v3_write_gpa_memory(&vm->cores[0],(addr_t)base,vm->hvm_state.hrt_file->size,vm->hvm_state.hrt_file->data);
+
+    PrintDebug(vm,VCORE_NONE,"hvm: wrote HRT %s at %p\n", vm->hvm_state.hrt_file->tag,base);
+    
+}
+
+
        
 
+/*
+  GPA layout:
+
+  HRT
+  ---
+  ROS
+
+  We do not touch the ROS portion of the address space.
+  The HRT portion looks like:
+
+  INT_HANDLER (1 page - page aligned)
+  IDT (1 page - page aligned)
+  GDT (1 page - page aligned)
+  TSS (1 page - page asligned)
+  PAGETABLES  (identy map of first N GB)
+     ROOT PT first, followed by 2nd level, etc.
+     Currently PML4 followed by 1 PDPE for 512 GB of mapping
+  BOUNDARY PAGE (all 0xff - avoid smashing page tables in case we keep going...)
+  (stack - we will push machine description)
+  ...
+  HRT (as many pages as needed, page-aligned, starting at first HRT address)
+  ---
+  ROS
+      
+*/
+
+
+int v3_setup_hvm_vm_for_boot(struct v3_vm_info *vm)
+{
+    if (!vm->hvm_state.is_hvm) { 
+       PrintDebug(vm,VCORE_NONE,"hvm: skipping HVM setup for boot as this is not an HVM\n");
+       return 0;
+    }
+
+    PrintDebug(vm,VCORE_NONE,"hvm: setup of HVM memory begins\n");
+
+    write_null_int_handler(vm);
+    write_idt(vm);
+    write_gdt(vm);
+    write_tss(vm);
+
+    write_pt(vm);
+
+    write_bp(vm);
+    
+    write_hrt(vm);
+
+
+    PrintDebug(vm,VCORE_NONE,"hvm: setup of HVM memory done\n");
+
+    return 0;
+}
+
+/*
+  On entry:
+
+   IDTR points to stub IDT
+   GDTR points to stub GDT
+   TS   points to stub TSS
+   CR3 points to root page table
+   CR0 has PE and PG
+   EFER has LME AND LMA
+   RSP is TOS (looks like a call)
+       INFO                     <= RDI
+       0 (fake return address)  <= RSP
+       
+   RIP is entry point to HRT
+   RDI points to machine info on stack
+
+   Other regs are zeroed
+
+   shadow/nested paging state reset for long mode
+
+*/
+int v3_setup_hvm_hrt_core_for_boot(struct guest_info *core)
+{
+    void *base;
+    uint64_t limit;
+
+    if (!core->hvm_state.is_hrt) { 
+       PrintDebug(core->vm_info,core,"hvm: skipping HRT setup for core %u as it is not an HRT core\n", core->vcpu_id);
+       return 0;
+    }
+
+    PrintDebug(core->vm_info, core, "hvm: setting up HRT core (%u) for boot\n", core->vcpu_id);
+
+    
+    memset(&core->vm_regs,0,sizeof(core->vm_regs));
+    memset(&core->ctrl_regs,0,sizeof(core->ctrl_regs));
+    memset(&core->dbg_regs,0,sizeof(core->dbg_regs));
+    memset(&core->segments,0,sizeof(core->segments));    
+    memset(&core->msrs,0,sizeof(core->msrs));    
+    memset(&core->fp_state,0,sizeof(core->fp_state));    
+
+    // We are in long mode with virtual memory and we want
+    // to start immediatley
+    core->cpl = 0; // we are going right into the kernel
+    core->cpu_mode = LONG;
+    core->mem_mode = VIRTUAL_MEM; 
+    core->core_run_state = CORE_RUNNING ;
+
+    // We are going to enter right into the HRT
+    // HRT stack and argument passing
+    get_bp_loc(core->vm_info, &base,&limit);
+    // TODO: push description here
+    core->vm_regs.rsp = (v3_reg_t) base;  // so if we ret, we will blow up
+    core->vm_regs.rbp = (v3_reg_t) base; 
+    // TODO: RDI should really get pointer to description
+    core->vm_regs.rdi = (v3_reg_t) base;
+    // HRT entry point
+    get_hrt_loc(core->vm_info, &base,&limit);
+    core->rip = (uint64_t) base + 0x40; // hack for test.o
+
+    // Setup CRs for long mode and our stub page table
+    // CR0: PG, PE
+    core->ctrl_regs.cr0 = 0x80000001;
+    // CR2: don't care (output from #PF)
+    // CE3: set to our PML4E, without setting PCD or PWT
+    get_pt_loc(core->vm_info, &base,&limit);
+    core->ctrl_regs.cr3 = PAGE_ADDR((addr_t)base);
+    // CR4: PGE, PAE, PSE (last byte: 1 0 1 1 0 0 0 0)
+    core->ctrl_regs.cr4 = 0xb0;
+    // CR8 as usual
+    // RFLAGS zeroed is fine: come in with interrupts off
+    // EFER needs SVME LMA LME (last 16 bites: 0 0 0 1 0 1 0 1 0 0 0 0 0 0 0 0
+    core->ctrl_regs.efer = 0x1500;
+
+
+    /* 
+       Notes on selectors:
+
+       selector is 13 bits of index, 1 bit table indicator 
+       (0=>GDT), 2 bit RPL
+       
+       index is scaled by 8, even in long mode, where some entries 
+       are 16 bytes long.... 
+          -> code, data descriptors have 8 byte format
+             because base, limit, etc, are ignored (no segmentation)
+          -> interrupt/trap gates have 16 byte format 
+             because offset needs to be 64 bits
+    */
+    
+    // Install our stub IDT
+    get_idt_loc(core->vm_info, &base,&limit);
+    core->segments.idtr.selector = 0;  // entry 0 (NULL) of the GDT
+    core->segments.idtr.base = (addr_t) base;
+    core->segments.idtr.limit = limit-1;
+    core->segments.idtr.type = 0xe;
+    core->segments.idtr.system = 1; 
+    core->segments.idtr.dpl = 0;
+    core->segments.idtr.present = 1;
+    core->segments.idtr.long_mode = 1;
+
+    // Install our stub GDT
+    get_gdt_loc(core->vm_info, &base,&limit);
+    core->segments.gdtr.selector = 0;
+    core->segments.gdtr.base = (addr_t) base;
+    core->segments.gdtr.limit = limit-1;
+    core->segments.gdtr.type = 0x6;
+    core->segments.gdtr.system = 1; 
+    core->segments.gdtr.dpl = 0;
+    core->segments.gdtr.present = 1;
+    core->segments.gdtr.long_mode = 1;
+    
+    // And our TSS
+    get_tss_loc(core->vm_info, &base,&limit);
+    core->segments.tr.selector = 0;
+    core->segments.tr.base = (addr_t) base;
+    core->segments.tr.limit = limit-1;
+    core->segments.tr.type = 0x6;
+    core->segments.tr.system = 1; 
+    core->segments.tr.dpl = 0;
+    core->segments.tr.present = 1;
+    core->segments.tr.long_mode = 1;
+    
+    base = 0x0;
+    limit = -1;
+
+    // And CS
+    core->segments.cs.selector = 0x8 ; // entry 1 of GDT (RPL=0)
+    core->segments.cs.base = (addr_t) base;
+    core->segments.cs.limit = limit;
+    core->segments.cs.type = 0xe;
+    core->segments.cs.system = 0; 
+    core->segments.cs.dpl = 0;
+    core->segments.cs.present = 1;
+    core->segments.cs.long_mode = 1;
+
+    // DS, SS, etc are identical
+    core->segments.ds.selector = 0x10; // entry 2 of GDT (RPL=0)
+    core->segments.ds.base = (addr_t) base;
+    core->segments.ds.limit = limit;
+    core->segments.ds.type = 0x6;
+    core->segments.ds.system = 0; 
+    core->segments.ds.dpl = 0;
+    core->segments.ds.present = 1;
+    core->segments.ds.long_mode = 1;
+    
+    memcpy(&core->segments.ss,&core->segments.ds,sizeof(core->segments.ds));
+    memcpy(&core->segments.es,&core->segments.ds,sizeof(core->segments.ds));
+    memcpy(&core->segments.fs,&core->segments.ds,sizeof(core->segments.ds));
+    memcpy(&core->segments.gs,&core->segments.ds,sizeof(core->segments.ds));
+    
+    // reset paging here for shadow... 
+
+    if (core->shdw_pg_mode != NESTED_PAGING) { 
+       PrintError(core->vm_info, core, "hvm: shadow paging guest... this will end badly\n");
+    }
+
+
+    return 0;
+}
diff --git a/palacios/src/palacios/vmm_hvm_lowlevel.S b/palacios/src/palacios/vmm_hvm_lowlevel.S
new file mode 100644 (file)
index 0000000..b6d1a76
--- /dev/null
@@ -0,0 +1,48 @@
+/* HVM environment code and data */
+
+/* This must match the number in vmm_hypercall.h */
+#define HVM_HCALL $0xf000
+       
+/*
+       The default installed interrupt handlers simply hcall
+       and then halt.  They should never be called - this is
+       a panic
+*/
+       
+.global v3_hvm_svm_null_int_handler_start
+v3_hvm_svm_null_int_handler_start:
+       pushq %rax
+       pushq %rbx
+       pushq %rcx
+       movq HVM_HCALL, %rax
+       movq 24(%rsp), %rbx  /* last two words of interrupt stack */
+       movq 32(%rsp), %rcx  
+       vmmcall
+       popq %rcx
+       popq %rbx
+       popq %rax
+       hlt
+       iretq
+.global v3_hvm_svm_null_int_handler_end
+v3_hvm_svm_null_int_handler_end:
+
+.global v3_hvm_vmx_null_int_handler_start
+v3_hvm_vmx_null_int_handler_start:
+       pushq %rax
+       pushq %rbx
+       pushq %rcx
+       movq HVM_HCALL, %rax
+       movq 24(%rsp), %rbx /* last two words of interrupt stack */
+       movq 32(%rsp), %rcx
+       vmcall              /* That's right - one letter different */
+       popq %rcx
+       popq %rbx
+       popq %rax
+       hlt
+       iretq
+.global v3_hvm_vmx_null_int_handler_end
+v3_hvm_vmx_null_int_handler_end:
+
+       
+       
+       
\ No newline at end of file
index 741a636..f0823b4 100644 (file)
@@ -1198,6 +1198,13 @@ int v3_start_vmx_guest(struct guest_info * info) {
 
     PrintDebug(info->vm_info, info, "Starting VMX core %u\n", info->vcpu_id);
 
+#if V3_CONFIG_HVM
+    if (v3_setup_hvm_vm_for_boot(vm)) { 
+       PrintError(vm, VCORE_NONE, "HVM setup for boot failed\n");
+       return -1;
+    }
+#endif
+    
     while (1) {
        if (info->core_run_state == CORE_STOPPED) {
            if (info->vcpu_id == 0) {