Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


Do we need to save and restore the guests GS/FS regs?
[palacios.git] / palacios / src / palacios / svm.c
index 2a0ce74..18c78e1 100644 (file)
 #include <palacios/vmm_decoder.h>
 #include <palacios/vmm_string.h>
 #include <palacios/vmm_lowlevel.h>
+#include <palacios/svm_msr.h>
 
+#include <palacios/vmm_rbtree.h>
 
+#include <palacios/vmm_profiler.h>
 
-extern uint_t Get_CR3();
-
-
+#include <palacios/vmm_direct_paging.h>
 
 extern void v3_stgi();
 extern void v3_clgi();
+//extern int v3_svm_launch(vmcb_t * vmcb, struct v3_gprs * vm_regs, uint64_t * fs, uint64_t * gs);
 extern int v3_svm_launch(vmcb_t * vmcb, struct v3_gprs * vm_regs);
 
 
@@ -59,7 +61,7 @@ static vmcb_t * Allocate_VMCB() {
 
 
 
-
+#include <palacios/vmm_ctrl_regs.h>
 
 static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info *vm_info) {
   vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
@@ -76,8 +78,32 @@ static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info *vm_info) {
   //ctrl_area->instrs.instrs.CR0 = 1;
   ctrl_area->cr_reads.cr0 = 1;
   ctrl_area->cr_writes.cr0 = 1;
+  //ctrl_area->cr_reads.cr4 = 1;
+  ctrl_area->cr_writes.cr4 = 1;
+
+
+  /* Set up the efer to enable 64 bit page tables */
+  /*
+  {
+    struct efer_64 * efer = (struct efer_64 *)&(guest_state->efer);
+    struct cr4_32 * cr4 = (struct cr4_32 *)&(guest_state->cr4);
+    efer->lma = 1;
+    efer->lme = 1;
+
+    cr4->pae = 1;
+  }
+  */
 
   guest_state->efer |= EFER_MSR_svm_enable;
+  vm_info->guest_efer.value = 0x0LL;
+
+  v3_hook_msr(vm_info, EFER_MSR, 
+             &v3_handle_efer_read,
+             &v3_handle_efer_write, 
+             vm_info);
+
+
+
   guest_state->rflags = 0x00000002; // The reserved bit is always 1
   ctrl_area->svm_instrs.VMRUN = 1;
   ctrl_area->svm_instrs.VMMCALL = 1;
@@ -114,6 +140,8 @@ static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info *vm_info) {
   
     ctrl_area->exceptions.nmi = 1;
   */
+
+
   // Debug of boot on physical machines - 7/14/08
   ctrl_area->instrs.NMI=1;
   ctrl_area->instrs.SMI=1;
@@ -121,12 +149,11 @@ static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info *vm_info) {
   ctrl_area->instrs.PAUSE=1;
   ctrl_area->instrs.shutdown_evts=1;
 
-
-
   vm_info->vm_regs.rdx = 0x00000f00;
 
   guest_state->cr0 = 0x60000010;
 
+
   guest_state->cs.selector = 0xf000;
   guest_state->cs.limit=0xffff;
   guest_state->cs.base = 0x0000000f0000LL;
@@ -164,10 +191,17 @@ static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info *vm_info) {
   guest_state->dr6 = 0x00000000ffff0ff0LL;
   guest_state->dr7 = 0x0000000000000400LL;
 
-  if (vm_info->io_map.num_ports > 0) {
-    struct vmm_io_hook * iter;
+  
+  
+
+
+
+  if ( !RB_EMPTY_ROOT(&(vm_info->io_map)) ) {
+    struct v3_io_hook * iter;
+    struct rb_node * io_node = v3_rb_first(&(vm_info->io_map));
     addr_t io_port_bitmap;
-    
+    int i = 0;
+
     io_port_bitmap = (addr_t)V3_VAddr(V3_AllocPages(3));
     memset((uchar_t*)io_port_bitmap, 0, PAGE_SIZE * 3);
     
@@ -175,22 +209,26 @@ static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info *vm_info) {
 
     //PrintDebug("Setting up IO Map at 0x%x\n", io_port_bitmap);
 
-    FOREACH_IO_HOOK(vm_info->io_map, iter) {
+    do {
+      iter = rb_entry(io_node, struct v3_io_hook, tree_node);
+
       ushort_t port = iter->port;
       uchar_t * bitmap = (uchar_t *)io_port_bitmap;
+      //PrintDebug("%d: Hooking Port %d\n", i, port);
 
       bitmap += (port / 8);
       //      PrintDebug("Setting Bit for port 0x%x\n", port);
       *bitmap |= 1 << (port % 8);
-    }
+
+      i++;
+    } while ((io_node = v3_rb_next(io_node)));
 
 
     //PrintDebugMemDump((uchar_t*)io_port_bitmap, PAGE_SIZE *2);
 
     ctrl_area->instrs.IOIO_PROT = 1;
   }
-
-
+  
 
   PrintDebug("Exiting on interrupts\n");
   ctrl_area->guest_ctrl.V_INTR_MASKING = 1;
@@ -199,13 +237,20 @@ static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info *vm_info) {
 
   if (vm_info->shdw_pg_mode == SHADOW_PAGING) {
     PrintDebug("Creating initial shadow page table\n");
-    vm_info->direct_map_pt = V3_PAddr((addr_t)create_passthrough_pde32_pts(vm_info));
 
-    vm_info->shdw_pg_state.shadow_cr3 |= (vm_info->direct_map_pt & ~0xfff);
+
+
+    /* Testing 64 bit page tables for long paged real mode guests */
+    //    vm_info->direct_map_pt = (addr_t)V3_PAddr(create_passthrough_pts_64(vm_info));
+    vm_info->direct_map_pt = (addr_t)V3_PAddr(v3_create_direct_passthrough_pts(vm_info));
+    /* End Test */
+
     vm_info->shdw_pg_state.guest_cr0 = 0x0000000000000010LL;
     PrintDebug("Created\n");
 
-    guest_state->cr3 = vm_info->shdw_pg_state.shadow_cr3;
+
+    guest_state->cr3 = vm_info->direct_map_pt;
+
 
     //PrintDebugPageTables((pde32_t*)(vm_info->shdw_pg_state.shadow_cr3.e_reg.low));
 
@@ -223,7 +268,6 @@ static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info *vm_info) {
     ctrl_area->TLB_CONTROL = 1;
     
 
-
     guest_state->g_pat = 0x7040600070406ULL;
 
     guest_state->cr0 |= 0x80000000;
@@ -238,7 +282,7 @@ static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info *vm_info) {
     PrintDebug("NP_Enable at 0x%p\n", (void *)&(ctrl_area->NP_ENABLE));
 
     // Set the Nested Page Table pointer
-    vm_info->direct_map_pt = ((addr_t)create_passthrough_pde32_pts(vm_info) & ~0xfff);
+    vm_info->direct_map_pt = ((addr_t)create_passthrough_pts_32(vm_info) & ~0xfff);
     ctrl_area->N_CR3 = vm_info->direct_map_pt;
 
     //   ctrl_area->N_CR3 = Get_CR3();
@@ -248,6 +292,19 @@ static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info *vm_info) {
   }
 
 
+  if (vm_info->msr_map.num_hooks > 0) {
+    PrintDebug("Hooking %d msrs\n", vm_info->msr_map.num_hooks);
+    ctrl_area->MSRPM_BASE_PA = v3_init_svm_msr_map(vm_info);
+    ctrl_area->instrs.MSR_PROT = 1;
+
+  }
+
+
+  /* Safety locations for fs/gs */
+  vm_info->fs = 0;
+  vm_info->gs = 0;
+
+
 
 }
 
@@ -302,49 +359,79 @@ static int start_svm_guest(struct guest_info *info) {
 
   while (1) {
     ullong_t tmp_tsc;
-    uint_t vm_cr_low = 0, vm_cr_high = 0;
 
 
+
+#define MSR_STAR      0xc0000081
+#define MSR_LSTAR     0xc0000082
+#define MSR_CSTAR     0xc0000083
+#define MSR_SF_MASK   0xc0000084
+#define MSR_GS_BASE   0xc0000101
+#define MSR_KERNGS_BASE   0xc0000102
+
+
+    struct v3_msr host_cstar;
+    struct v3_msr host_star;
+    struct v3_msr host_lstar;
+    struct v3_msr host_syscall_mask;
+    struct v3_msr host_gs_base;
+    struct v3_msr host_kerngs_base;
+
     v3_enable_ints();
     v3_clgi();
 
 
-    PrintDebug("SVM Entry to rip=%p...\n", (void *)info->rip);
+    /*
+    PrintDebug("SVM Entry to CS=%p  rip=%p...\n", 
+              (void *)(addr_t)info->segments.cs.base, 
+              (void *)(addr_t)info->rip);
+    */
 
-    v3_get_msr(0xc0000101, &vm_cr_high, &vm_cr_low);
 
-    rdtscll(info->time_state.cached_host_tsc);
+    v3_get_msr(MSR_STAR, &(host_star.hi), &(host_star.lo));
+    v3_get_msr(MSR_LSTAR, &(host_lstar.hi), &(host_lstar.lo));
+    v3_get_msr(MSR_CSTAR, &(host_cstar.hi), &(host_cstar.lo));
+    v3_get_msr(MSR_SF_MASK, &(host_syscall_mask.hi), &(host_syscall_mask.lo));
+    v3_get_msr(MSR_GS_BASE, &(host_gs_base.hi), &(host_gs_base.lo));
+    v3_get_msr(MSR_KERNGS_BASE, &(host_kerngs_base.hi), &(host_kerngs_base.lo));
 
+
+    rdtscll(info->time_state.cached_host_tsc);
     guest_ctrl->TSC_OFFSET = info->time_state.guest_tsc - info->time_state.cached_host_tsc;
 
+    //v3_svm_launch((vmcb_t*)V3_PAddr(info->vmm_data), &(info->vm_regs), &(info->fs), &(info->gs));
     v3_svm_launch((vmcb_t*)V3_PAddr(info->vmm_data), &(info->vm_regs));
+
     rdtscll(tmp_tsc);
 
-    v3_set_msr(0xc0000101, vm_cr_high, vm_cr_low);
-    PrintDebug("SVM Returned\n");
+    v3_set_msr(MSR_STAR, host_star.hi, host_star.lo);
+    v3_set_msr(MSR_LSTAR, host_lstar.hi, host_lstar.lo);
+    v3_set_msr(MSR_CSTAR, host_cstar.hi, host_cstar.lo);
+    v3_set_msr(MSR_SF_MASK, host_syscall_mask.hi, host_syscall_mask.lo);
+    v3_set_msr(MSR_GS_BASE, host_gs_base.hi, host_gs_base.lo);
+    v3_set_msr(MSR_KERNGS_BASE, host_kerngs_base.hi, host_kerngs_base.lo);
 
+    //PrintDebug("SVM Returned\n");
 
-#if PrintDebug
-    {
-      uint_t x = 0;
-      PrintDebug("RSP=%p\n", (void *)&x);
-    }
-#endif
 
 
     v3_update_time(info, tmp_tsc - info->time_state.cached_host_tsc);
     num_exits++;
 
-    PrintDebug("Turning on global interrupts\n");
+    //PrintDebug("Turning on global interrupts\n");
     v3_stgi();
 
 
-    PrintDebug("SVM Exit number %d\n", num_exits);
+    if ((num_exits % 5000) == 0) {
+      PrintDebug("SVM Exit number %d\n", num_exits);
+      if (info->enable_profiler) 
+       v3_print_profile(info);
+    }
 
 
      
     if (v3_handle_svm_exit(info) != 0) {
-
+      vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
       addr_t host_addr;
       addr_t linear_addr = 0;
 
@@ -352,7 +439,7 @@ static int start_svm_guest(struct guest_info *info) {
 
       PrintDebug("SVM ERROR!!\n"); 
       
-      PrintDebug("RIP: %p\n", (void *)guest_state->rip);
+      PrintDebug("RIP: %p\n", (void *)(addr_t)(guest_state->rip));
 
 
       linear_addr = get_addr_linear(info, guest_state->rip, &(info->segments.cs));
@@ -361,7 +448,22 @@ static int start_svm_guest(struct guest_info *info) {
       PrintDebug("RIP Linear: %p\n", (void *)linear_addr);
       v3_print_segments(info);
       v3_print_ctrl_regs(info);
+      if (info->shdw_pg_mode == SHADOW_PAGING) {
+       PrintDebug("Shadow Paging Guest Registers:\n");
+       PrintDebug("\tGuest CR0=%p\n", (void *)(addr_t)(info->shdw_pg_state.guest_cr0));
+       PrintDebug("\tGuest CR3=%p\n", (void *)(addr_t)(info->shdw_pg_state.guest_cr3));
+       // efer
+       // CR4
+      }
       v3_print_GPRs(info);
+
+      PrintDebug("SVM Exit Code: %p\n", (void *)(addr_t)guest_ctrl->exit_code); 
+      
+      PrintDebug("exit_info1 low = 0x%.8x\n", *(uint_t*)&(guest_ctrl->exit_info1));
+      PrintDebug("exit_info1 high = 0x%.8x\n", *(uint_t *)(((uchar_t *)&(guest_ctrl->exit_info1)) + 4));
+      
+      PrintDebug("exit_info2 low = 0x%.8x\n", *(uint_t*)&(guest_ctrl->exit_info2));
+      PrintDebug("exit_info2 high = 0x%.8x\n", *(uint_t *)(((uchar_t *)&(guest_ctrl->exit_info2)) + 4));
       
       if (info->mem_mode == PHYSICAL_MEM) {
        guest_pa_to_host_va(info, linear_addr, &host_addr);
@@ -377,6 +479,7 @@ static int start_svm_guest(struct guest_info *info) {
 
       break;
     }
+
   }
   return 0;
 }
@@ -396,7 +499,7 @@ int v3_is_svm_capable() {
 
   v3_cpuid(CPUID_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
   
-  PrintDebug("CPUID_FEATURE_IDS_ecx=0x%p\n", (void *)ecx);
+  PrintDebug("CPUID_FEATURE_IDS_ecx=%p\n", (void *)ecx);
 
   if ((ecx & CPUID_FEATURE_IDS_ecx_svm_avail) == 0) {
     PrintDebug("SVM Not Available\n");
@@ -411,7 +514,7 @@ int v3_is_svm_capable() {
 
       v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
       
-      PrintDebug("CPUID_FEATURE_IDS_edx=0x%p\n", (void *)edx);
+      PrintDebug("CPUID_FEATURE_IDS_edx=%p\n", (void *)edx);
       
       if ((edx & CPUID_SVM_REV_AND_FEATURE_IDS_edx_svml) == 0) {
        PrintDebug("SVM BIOS Disabled, not unlockable\n");
@@ -424,8 +527,11 @@ int v3_is_svm_capable() {
       PrintDebug("SVM is available and  enabled.\n");
 
       v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
-      
-      PrintDebug("CPUID_FEATURE_IDS_edx=0x%p\n", (void *)edx);
+      PrintDebug("CPUID_FEATURE_IDS_eax=%p\n", (void *)eax);
+      PrintDebug("CPUID_FEATURE_IDS_ebx=%p\n", (void *)ebx);
+      PrintDebug("CPUID_FEATURE_IDS_ecx=%p\n", (void *)ecx);      
+      PrintDebug("CPUID_FEATURE_IDS_edx=%p\n", (void *)edx);
+
 
       if ((edx & CPUID_SVM_REV_AND_FEATURE_IDS_edx_np) == 0) {
        PrintDebug("SVM Nested Paging not supported\n");
@@ -668,7 +774,7 @@ void v3_init_SVM(struct v3_ctrl_ops * vmm_ops) {
 
   if (vm_info.page_mode == SHADOW_PAGING) {
     PrintDebug("Creating initial shadow page table\n");
-    vm_info.shdw_pg_state.shadow_cr3 |= ((addr_t)create_passthrough_pde32_pts(&vm_info) & ~0xfff);
+    vm_info.shdw_pg_state.shadow_cr3 |= ((addr_t)create_passthrough_pts_32(&vm_info) & ~0xfff);
     PrintDebug("Created\n");
 
     guest_state->cr3 = vm_info.shdw_pg_state.shadow_cr3;