Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


Add lock to vmm_queue
[palacios.git] / palacios / src / palacios / svm_handler.c
index 7c278d3..75d2149 100644 (file)
 #include <palacios/vmm_decoder.h>
 #include <palacios/vmm_ctrl_regs.h>
 #include <palacios/svm_io.h>
-#include <palacios/svm_halt.h>
+#include <palacios/vmm_halt.h>
 #include <palacios/svm_pause.h>
 #include <palacios/svm_wbinvd.h>
 #include <palacios/vmm_intr.h>
 #include <palacios/vmm_emulator.h>
 #include <palacios/svm_msr.h>
-#include <palacios/vmm_profiler.h>
 #include <palacios/vmm_hypercall.h>
+#include <palacios/vmm_cpuid.h>
+#include <palacios/vmm_direct_paging.h>
 
+#ifdef CONFIG_VNET
+#include <palacios/vmm_vnet.h>
+#endif
 
-
-
-int v3_handle_svm_exit(struct guest_info * info) {
-    vmcb_ctrl_t * guest_ctrl = 0;
-    vmcb_saved_state_t * guest_state = 0;
-    ulong_t exit_code = 0;
-    
-    guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
-    guest_state = GET_VMCB_SAVE_STATE_AREA((vmcb_t*)(info->vmm_data));
-  
-
-    // Update the high level state 
-    info->rip = guest_state->rip;
-    info->vm_regs.rsp = guest_state->rsp;
-    info->vm_regs.rax = guest_state->rax;
-
-    info->cpl = guest_state->cpl;
-
-
-    info->ctrl_regs.cr0 = guest_state->cr0;
-    info->ctrl_regs.cr2 = guest_state->cr2;
-    info->ctrl_regs.cr3 = guest_state->cr3;
-    info->ctrl_regs.cr4 = guest_state->cr4;
-    info->dbg_regs.dr6 = guest_state->dr6;
-    info->dbg_regs.dr7 = guest_state->dr7;
-    info->ctrl_regs.cr8 = guest_ctrl->guest_ctrl.V_TPR;
-    info->ctrl_regs.rflags = guest_state->rflags;
-    info->ctrl_regs.efer = guest_state->efer;
-    
-    get_vmcb_segments((vmcb_t*)(info->vmm_data), &(info->segments));
-    info->cpu_mode = v3_get_cpu_mode(info);
-    info->mem_mode = v3_get_mem_mode(info);
-
-
-    exit_code = guest_ctrl->exit_code;
-
-
-    if ((info->intr_state.irq_pending == 1) && (guest_ctrl->guest_ctrl.V_IRQ == 0)) {
-       // Interrupt was taken in the guest
-#ifdef DEBUG_INTERRUPTS
-           PrintDebug("Interrupt %d taken by guest\n", info->intr_state.irq_vector);
+#ifdef CONFIG_LINUX_VIRTIO_NET
+    extern int v3_virtionic_pktprocess(struct guest_info * info);
 #endif
-       v3_injecting_intr(info, info->intr_state.irq_vector, EXTERNAL_IRQ);
-    }
 
-    info->intr_state.irq_pending = 0;
-  
+#ifdef CONFIG_TELEMETRY
+#include <palacios/vmm_telemetry.h>
+#endif
 
-    // Disable printing io exits due to bochs debug messages
-    //if (!((exit_code == VMEXIT_IOIO) && ((ushort_t)(guest_ctrl->exit_info1 >> 16) == 0x402))) {
-    
-    
-    //  PrintDebug("SVM Returned: Exit Code: 0x%x \t\t(tsc=%ul)\n",exit_code, (uint_t)info->time_state.guest_tsc); 
-  
-    if ((0) && (exit_code <= VMEXIT_EXCP14)) {
-       uchar_t instr[32];
-       int ret;
-       // Dump out the instr stream
-
-       //PrintDebug("RIP: %x\n", guest_state->rip);
-       PrintDebug("\n\n\nRIP Linear: %p\n", (void *)get_addr_linear(info, info->rip, &(info->segments.cs)));
-       
-       v3_print_GPRs(info);
-       v3_print_ctrl_regs(info);
-
-
-       // OK, now we will read the instruction
-       // The only difference between PROTECTED and PROTECTED_PG is whether we read
-       // from guest_pa or guest_va
-       if (info->mem_mode == PHYSICAL_MEM) { 
-           // The real rip address is actually a combination of the rip + CS base 
-           ret = read_guest_pa_memory(info, get_addr_linear(info, info->rip, &(info->segments.cs)), 32, instr);
-       } else { 
-           ret = read_guest_va_memory(info, get_addr_linear(info, info->rip, &(info->segments.cs)), 32, instr);
-       }
-       
+#define VNET_PROFILE
+/* for vnet profiling*/
+#ifdef VNET_PROFILE
+static uint64_t vmm_time = 0;
+static uint64_t vnet_time = 0;
+static uint64_t guest_time = 0;
+static uint64_t last_exit_time = 0;
+static uint64_t num_exit = 0;
+#endif
 
+int v3_handle_svm_exit(struct guest_info * info, addr_t exit_code, addr_t exit_info1, addr_t exit_info2) {
 
-       if (ret != 32) {
-           // I think we should inject a GPF into the guest
-           PrintDebug("Could not read instruction (ret=%d)\n", ret);
-       } else {
-           PrintDebug("Instr Stream:\n");
-           PrintTraceMemDump(instr, 32);
-       }
+#ifdef CONFIG_TELEMETRY
+    if (info->enable_telemetry) {
+       v3_telemetry_start_exit(info);
     }
+#endif
 
+#ifdef VNET_PROFILE
+    uint64_t exit_start_time, vnet_start_time;
+    uint64_t exit_end_time, vnet_end_time;
+    rdtscll(exit_start_time);
+    num_exit ++;
+    if (last_exit_time > 0)
+       guest_time += exit_start_time - last_exit_time;
+#endif
 
-
-    if (info->enable_profiler) {
-       rdtscll(info->profiler.start_time);
-    }
-
-  
     //PrintDebug("SVM Returned: Exit Code: %x\n",exit_code); 
 
     switch (exit_code) {
        case VMEXIT_IOIO: {
-           struct svm_io_info * io_info = (struct svm_io_info *)&(guest_ctrl->exit_info1);
-               
+           struct svm_io_info * io_info = (struct svm_io_info *)&(exit_info1);
+
            if (io_info->type == 0) {
                if (io_info->str) {
 
-                   if (v3_handle_svm_io_outs(info) == -1 ) {
+                   if (v3_handle_svm_io_outs(info, io_info) == -1 ) {
                        return -1;
                    }
                } else {
-                   if (v3_handle_svm_io_out(info) == -1) {
+                   if (v3_handle_svm_io_out(info, io_info) == -1) {
                        return -1;
                    }
                }
@@ -149,24 +94,27 @@ int v3_handle_svm_exit(struct guest_info * info) {
            } else {
 
                if (io_info->str) {
-                   if (v3_handle_svm_io_ins(info) == -1) {
+                   if (v3_handle_svm_io_ins(info, io_info) == -1) {
                        return -1;
                    }
                } else {
-                   if (v3_handle_svm_io_in(info) == -1) {
+                   if (v3_handle_svm_io_in(info, io_info) == -1) {
                        return -1;
                    }
                }
-
            }
+
+           info->rip = exit_info2;
+
            break;
        }
        case VMEXIT_MSR:
-           if (guest_ctrl->exit_info1 == 0) {
+
+           if (exit_info1 == 0) {
                if (v3_handle_msr_read(info) == -1) {
                    return -1;
                }
-           } else if (guest_ctrl->exit_info1 == 1) {
+           } else if (exit_info1 == 1) {
                if (v3_handle_msr_write(info) == -1) {
                    return -1;
                }
@@ -176,8 +124,16 @@ int v3_handle_svm_exit(struct guest_info * info) {
            }
                
            break;
+
+       case VMEXIT_CPUID:
+           if (v3_handle_cpuid(info) == -1) {
+               PrintError("Error handling CPUID\n");
+               return -1;
+           }
+
+           break;
        case VMEXIT_CR0_WRITE: 
-#ifdef DEBUG_CTRL_REGS
+#ifdef CONFIG_DEBUG_CTRL_REGS
            PrintDebug("CR0 Write\n");
 #endif
            if (v3_handle_cr0_write(info) == -1) {
@@ -185,7 +141,7 @@ int v3_handle_svm_exit(struct guest_info * info) {
            }
            break;
        case VMEXIT_CR0_READ: 
-#ifdef DEBUG_CTRL_REGS
+#ifdef CONFIG_DEBUG_CTRL_REGS
            PrintDebug("CR0 Read\n");
 #endif
            if (v3_handle_cr0_read(info) == -1) {
@@ -193,15 +149,16 @@ int v3_handle_svm_exit(struct guest_info * info) {
            }
            break;
        case VMEXIT_CR3_WRITE: 
-#ifdef DEBUG_CTRL_REGS
+#ifdef CONFIG_DEBUG_CTRL_REGS
            PrintDebug("CR3 Write\n");
 #endif
            if (v3_handle_cr3_write(info) == -1) {
                return -1;
            }    
+
            break;
        case  VMEXIT_CR3_READ: 
-#ifdef DEBUG_CTRL_REGS
+#ifdef CONFIG_DEBUG_CTRL_REGS
            PrintDebug("CR3 Read\n");
 #endif
            if (v3_handle_cr3_read(info) == -1) {
@@ -209,7 +166,7 @@ int v3_handle_svm_exit(struct guest_info * info) {
            }
            break;
        case VMEXIT_CR4_WRITE: 
-#ifdef DEBUG_CTRL_REGS
+#ifdef CONFIG_DEBUG_CTRL_REGS
            PrintDebug("CR4 Write\n");
 #endif
            if (v3_handle_cr4_write(info) == -1) {
@@ -217,7 +174,7 @@ int v3_handle_svm_exit(struct guest_info * info) {
            }    
            break;
        case  VMEXIT_CR4_READ: 
-#ifdef DEBUG_CTRL_REGS
+#ifdef CONFIG_DEBUG_CTRL_REGS
            PrintDebug("CR4 Read\n");
 #endif
            if (v3_handle_cr4_read(info) == -1) {
@@ -225,9 +182,9 @@ int v3_handle_svm_exit(struct guest_info * info) {
            }
            break;
        case VMEXIT_EXCP14: {
-           addr_t fault_addr = guest_ctrl->exit_info2;
-           pf_error_t * error_code = (pf_error_t *)&(guest_ctrl->exit_info1);
-#ifdef DEBUG_SHADOW_PAGING
+           addr_t fault_addr = exit_info2;
+           pf_error_t * error_code = (pf_error_t *)&(exit_info1);
+#ifdef CONFIG_DEBUG_SHADOW_PAGING
            PrintDebug("PageFault at %p (error=%d)\n", 
                       (void *)fault_addr, *(uint_t *)error_code);
 #endif
@@ -241,15 +198,23 @@ int v3_handle_svm_exit(struct guest_info * info) {
            }
            break;
        } 
-       case VMEXIT_NPF: 
+       case VMEXIT_NPF: {
+           addr_t fault_addr = exit_info2;
+           pf_error_t * error_code = (pf_error_t *)&(exit_info1);
 
-           PrintError("Currently unhandled Nested Page Fault\n");
-           return -1;
-               
+           if (info->shdw_pg_mode == NESTED_PAGING) {
+               if (v3_handle_nested_pagefault(info, fault_addr, *error_code) == -1) {
+                   return -1;
+               }
+           } else {
+               PrintError("Currently unhandled Nested Page Fault\n");
+               return -1;
+                   }
            break;
+           }
        case VMEXIT_INVLPG: 
            if (info->shdw_pg_mode == SHADOW_PAGING) {
-#ifdef DEBUG_SHADOW_PAGING
+#ifdef CONFIG_DEBUG_SHADOW_PAGING
                PrintDebug("Invlpg\n");
 #endif
                if (v3_handle_shadow_invlpg(info) == -1) {
@@ -261,13 +226,16 @@ int v3_handle_svm_exit(struct guest_info * info) {
            /* 
             * Hypercall 
             */
-               
+
+           // VMMCALL is a 3 byte op
+           // We do this early because some hypercalls can change the rip...
+           info->rip += 3;         
+
            if (v3_handle_hypercall(info) == -1) {
+               PrintError("Error handling Hypercall\n");
                return -1;
            }
-               
-           // VMMCALL is a 3 byte op
-           info->rip += 3;
+
            break;          
        case VMEXIT_INTR:
            // handled by interrupt dispatch earlier
@@ -276,21 +244,21 @@ int v3_handle_svm_exit(struct guest_info * info) {
            //   handle_svm_smi(info); // ignored for now
            break;
        case VMEXIT_HLT:
-#ifdef DEBUG_HALT
+#ifdef CONFIG_DEBUG_HALT
            PrintDebug("Guest halted\n");
 #endif
-           if (v3_handle_svm_halt(info) == -1) {
+           if (v3_handle_halt(info) == -1) {
                return -1;
            }
            break;
        case VMEXIT_PAUSE:
-           //PrintDebug("Guest paused\n");
+           //      PrintDebug("Guest paused\n");
            if (v3_handle_svm_pause(info) == -1) { 
                return -1;
            }
            break;
        case VMEXIT_WBINVD:   
-#ifdef DEBUG_EMULATOR
+#ifdef CONFIG_DEBUG_EMULATOR
            PrintDebug("WBINVD\n");
 #endif
            if (v3_handle_svm_wbinvd(info) == -1) { 
@@ -311,24 +279,24 @@ int v3_handle_svm_exit(struct guest_info * info) {
            
            PrintDebug("Unhandled SVM Exit: %s\n", vmexit_code_to_str(exit_code));
            
-           rip_addr = get_addr_linear(info, guest_state->rip, &(info->segments.cs));
+           rip_addr = get_addr_linear(info, info->rip, &(info->segments.cs));
            
            
            PrintError("SVM Returned:(VMCB=%p)\n", (void *)(info->vmm_data)); 
-           PrintError("RIP: %p\n", (void *)(addr_t)(guest_state->rip));
+           PrintError("RIP: %p\n", (void *)(addr_t)(info->rip));
            PrintError("RIP Linear: %p\n", (void *)(addr_t)(rip_addr));
            
            PrintError("SVM Returned: Exit Code: %p\n", (void *)(addr_t)exit_code); 
            
-           PrintError("io_info1 low = 0x%.8x\n", *(uint_t*)&(guest_ctrl->exit_info1));
-           PrintError("io_info1 high = 0x%.8x\n", *(uint_t *)(((uchar_t *)&(guest_ctrl->exit_info1)) + 4));
+           PrintError("io_info1 low = 0x%.8x\n", *(uint_t*)&(exit_info1));
+           PrintError("io_info1 high = 0x%.8x\n", *(uint_t *)(((uchar_t *)&(exit_info1)) + 4));
            
-           PrintError("io_info2 low = 0x%.8x\n", *(uint_t*)&(guest_ctrl->exit_info2));
-           PrintError("io_info2 high = 0x%.8x\n", *(uint_t *)(((uchar_t *)&(guest_ctrl->exit_info2)) + 4));
+           PrintError("io_info2 low = 0x%.8x\n", *(uint_t*)&(exit_info2));
+           PrintError("io_info2 high = 0x%.8x\n", *(uint_t *)(((uchar_t *)&(exit_info2)) + 4));
            
            
            if (info->shdw_pg_mode == SHADOW_PAGING) {
-               PrintHostPageTables(info, info->ctrl_regs.cr3);
+               //      PrintHostPageTables(info, info->ctrl_regs.cr3);
                //PrintGuestPageTables(info, info->shdw_pg_state.guest_cr3);
            }
            
@@ -338,103 +306,47 @@ int v3_handle_svm_exit(struct guest_info * info) {
     // END OF SWITCH (EXIT_CODE)
 
 
-    if (info->enable_profiler) {
-       rdtscll(info->profiler.end_time);
-       v3_profile_exit(info, exit_code);
-    }
-      
-
+#ifdef CONFIG_VNET
 
-    // Update the low level state
-
-    if (v3_excp_pending(info)) {
-       uint_t excp = v3_get_excp_number(info);
-               
-       guest_ctrl->EVENTINJ.type = SVM_INJECTION_EXCEPTION;
-       
-       if (info->excp_state.excp_error_code_valid) {
-           guest_ctrl->EVENTINJ.error_code = info->excp_state.excp_error_code;
-           guest_ctrl->EVENTINJ.ev = 1;
-#ifdef DEBUG_INTERRUPTS
-           PrintDebug("Injecting exception %d with error code %x\n", excp, guest_ctrl->EVENTINJ.error_code);
+#ifdef VNET_PROFILE
+    rdtscll(vnet_start_time);
 #endif
-       }
-       
-       guest_ctrl->EVENTINJ.vector = excp;
-       
-       guest_ctrl->EVENTINJ.valid = 1;
-#ifdef DEBUG_INTERRUPTS
-       PrintDebug("Injecting Exception %d (EIP=%p)\n", 
-                  guest_ctrl->EVENTINJ.vector, 
-                  (void *)(addr_t)info->rip);
+    v3_vnet_pkt_process(info);
+#ifdef VNET_PROFILE
+    rdtscll(vnet_end_time);
+    vnet_time += vnet_end_time - vnet_start_time;
 #endif
-       v3_injecting_excp(info, excp);
-    } else if (v3_intr_pending(info)) {
-
-       switch (v3_get_intr_type(info)) {
-           case EXTERNAL_IRQ: {
-               uint_t irq = v3_get_intr_number(info);
-                   
-               // check to see if ==-1 (non exists)
-                   
-                   
-               guest_ctrl->guest_ctrl.V_IRQ = 1;
-               guest_ctrl->guest_ctrl.V_INTR_VECTOR = irq;
-               guest_ctrl->guest_ctrl.V_IGN_TPR = 1;
-               guest_ctrl->guest_ctrl.V_INTR_PRIO = 0xf;
-
-#ifdef DEBUG_INTERRUPTS
-               PrintDebug("Injecting Interrupt %d (EIP=%p)\n", 
-                          guest_ctrl->guest_ctrl.V_INTR_VECTOR, 
-                          (void *)(addr_t)info->rip);
 #endif
 
-               info->intr_state.irq_pending = 1;
-               info->intr_state.irq_vector = irq;
-                   
-               break;
-           }
-           case NMI:
-               guest_ctrl->EVENTINJ.type = SVM_INJECTION_NMI;
-               break;
-           case SOFTWARE_INTR:
-               guest_ctrl->EVENTINJ.type = SVM_INJECTION_SOFT_INTR;
-               break;
-           case VIRTUAL_INTR:
-               guest_ctrl->EVENTINJ.type = SVM_INJECTION_VIRTUAL_INTR;
-               break;
-               
-           case INVALID_INTR: 
-           default:
-               PrintError("Attempted to issue an invalid interrupt\n");
-               return -1;
-       }
-       
-    }
-
+#ifdef CONFIG_LINUX_VIRTIO_NET
+    v3_virtionic_pktprocess(info);
+#endif
 
-    guest_state->cr0 = info->ctrl_regs.cr0;
-    guest_state->cr2 = info->ctrl_regs.cr2;
-    guest_state->cr3 = info->ctrl_regs.cr3;
-    guest_state->cr4 = info->ctrl_regs.cr4;
-    guest_state->dr6 = info->dbg_regs.dr6;
-    guest_state->dr7 = info->dbg_regs.dr7;
-    guest_ctrl->guest_ctrl.V_TPR = info->ctrl_regs.cr8 & 0xff;
-    guest_state->rflags = info->ctrl_regs.rflags;
-    guest_state->efer = info->ctrl_regs.efer;
-    
-    guest_state->cpl = info->cpl;
+#ifdef VNET_PROFILE
+    rdtscll(exit_end_time);
+    vmm_time += exit_end_time - exit_start_time;
+    last_exit_time = exit_end_time;
+    if ((num_exit % 100000) == 0) {
+       PrintError("exit: %ld, vmm_time: %ld, guest_time: %ld, vnet_time: %ld\n", (long)num_exit, (long)vmm_time, (long)guest_time, (long)vnet_time);
+       vmm_time = 0;
+       vnet_time = 0;
+       guest_time = 0;
+       last_exit_time = 0;
+    }
+#endif
 
-    guest_state->rax = info->vm_regs.rax;
-    guest_state->rip = info->rip;
-    guest_state->rsp = info->vm_regs.rsp;
 
+#ifdef CONFIG_TELEMETRY
+    if (info->enable_telemetry) {
+       v3_telemetry_end_exit(info, exit_code);
+    }
+#endif
 
-    set_vmcb_segments((vmcb_t*)(info->vmm_data), &(info->segments));
 
     if (exit_code == VMEXIT_INTR) {
        //PrintDebug("INTR ret IP = %x\n", guest_state->rip);
     }
+
     
     return 0;
 }