3 * This file is part of the Palacios Virtual Machine Monitor developed
4 * by the V3VEE Project with funding from the United States National
5 * Science Foundation and the Department of Energy.
7 * The V3VEE Project is a joint project between Northwestern University
8 * and the University of New Mexico. You can find out more at
11 * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu>
12 * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org>
13 * All rights reserved.
15 * Author: Jack Lange <jarusl@cs.northwestern.edu>
17 * This is free software. You are permitted to use,
18 * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
22 #include <palacios/svm.h>
23 #include <palacios/vmm.h>
25 #include <palacios/vmcb.h>
26 #include <palacios/vmm_mem.h>
27 #include <palacios/vmm_paging.h>
28 #include <palacios/svm_handler.h>
30 #include <palacios/vmm_debug.h>
31 #include <palacios/vm_guest_mem.h>
33 #include <palacios/vmm_decoder.h>
34 #include <palacios/vmm_string.h>
35 #include <palacios/vmm_lowlevel.h>
36 #include <palacios/svm_msr.h>
38 #include <palacios/vmm_rbtree.h>
40 #include <palacios/vmm_direct_paging.h>
42 #include <palacios/vmm_ctrl_regs.h>
43 #include <palacios/svm_io.h>
45 #include <palacios/vmm_sprintf.h>
48 #ifndef V3_CONFIG_DEBUG_SVM
50 #define PrintDebug(fmt, args...)
54 uint32_t v3_last_exit;
56 // This is a global pointer to the host's VMCB
57 static addr_t host_vmcbs[V3_CONFIG_MAX_CPUS] = { [0 ... V3_CONFIG_MAX_CPUS - 1] = 0};
61 extern void v3_stgi();
62 extern void v3_clgi();
63 //extern int v3_svm_launch(vmcb_t * vmcb, struct v3_gprs * vm_regs, uint64_t * fs, uint64_t * gs);
64 extern int v3_svm_launch(vmcb_t * vmcb, struct v3_gprs * vm_regs, vmcb_t * host_vmcb);
67 static vmcb_t * Allocate_VMCB() {
68 vmcb_t * vmcb_page = NULL;
69 addr_t vmcb_pa = (addr_t)V3_AllocPages(1);
71 if ((void *)vmcb_pa == NULL) {
72 PrintError("Error allocating VMCB\n");
76 vmcb_page = (vmcb_t *)V3_VAddr((void *)vmcb_pa);
78 memset(vmcb_page, 0, 4096);
85 static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info * core) {
86 vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
87 vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
92 ctrl_area->svm_instrs.VMRUN = 1;
93 ctrl_area->svm_instrs.VMMCALL = 1;
94 ctrl_area->svm_instrs.VMLOAD = 1;
95 ctrl_area->svm_instrs.VMSAVE = 1;
96 ctrl_area->svm_instrs.STGI = 1;
97 ctrl_area->svm_instrs.CLGI = 1;
98 ctrl_area->svm_instrs.SKINIT = 1;
99 ctrl_area->svm_instrs.ICEBP = 1;
100 ctrl_area->svm_instrs.WBINVD = 1;
101 ctrl_area->svm_instrs.MONITOR = 1;
102 ctrl_area->svm_instrs.MWAIT_always = 1;
103 ctrl_area->svm_instrs.MWAIT_if_armed = 1;
104 ctrl_area->instrs.INVLPGA = 1;
105 ctrl_area->instrs.CPUID = 1;
107 ctrl_area->instrs.HLT = 1;
109 #ifdef V3_CONFIG_TIME_VIRTUALIZE_TSC
110 ctrl_area->instrs.RDTSC = 1;
111 ctrl_area->svm_instrs.RDTSCP = 1;
114 // guest_state->cr0 = 0x00000001; // PE
117 ctrl_area->exceptions.de = 1;
118 ctrl_area->exceptions.df = 1;
120 ctrl_area->exceptions.ts = 1;
121 ctrl_area->exceptions.ss = 1;
122 ctrl_area->exceptions.ac = 1;
123 ctrl_area->exceptions.mc = 1;
124 ctrl_area->exceptions.gp = 1;
125 ctrl_area->exceptions.ud = 1;
126 ctrl_area->exceptions.np = 1;
127 ctrl_area->exceptions.of = 1;
129 ctrl_area->exceptions.nmi = 1;
133 ctrl_area->instrs.NMI = 1;
134 ctrl_area->instrs.SMI = 0; // allow SMIs to run in guest
135 ctrl_area->instrs.INIT = 1;
136 ctrl_area->instrs.PAUSE = 1;
137 ctrl_area->instrs.shutdown_evts = 1;
139 /* KCH: intercept SW Interrupts (INT instr) */
140 #ifdef V3_CONFIG_SW_INTERRUPTS
141 ctrl_area->instrs.INTn = 1;
145 /* DEBUG FOR RETURN CODE */
146 ctrl_area->exit_code = 1;
149 /* Setup Guest Machine state */
151 core->vm_regs.rsp = 0x00;
154 core->vm_regs.rdx = 0x00000f00;
159 core->ctrl_regs.rflags = 0x00000002; // The reserved bit is always 1
160 core->ctrl_regs.cr0 = 0x60010010; // Set the WP flag so the memory hooks work in real-mode
161 core->ctrl_regs.efer |= EFER_MSR_svm_enable;
167 core->segments.cs.selector = 0xf000;
168 core->segments.cs.limit = 0xffff;
169 core->segments.cs.base = 0x0000000f0000LL;
171 // (raw attributes = 0xf3)
172 core->segments.cs.type = 0x3;
173 core->segments.cs.system = 0x1;
174 core->segments.cs.dpl = 0x3;
175 core->segments.cs.present = 1;
179 struct v3_segment * segregs [] = {&(core->segments.ss), &(core->segments.ds),
180 &(core->segments.es), &(core->segments.fs),
181 &(core->segments.gs), NULL};
183 for ( i = 0; segregs[i] != NULL; i++) {
184 struct v3_segment * seg = segregs[i];
186 seg->selector = 0x0000;
187 // seg->base = seg->selector << 4;
188 seg->base = 0x00000000;
191 // (raw attributes = 0xf3)
198 core->segments.gdtr.limit = 0x0000ffff;
199 core->segments.gdtr.base = 0x0000000000000000LL;
200 core->segments.idtr.limit = 0x0000ffff;
201 core->segments.idtr.base = 0x0000000000000000LL;
203 core->segments.ldtr.selector = 0x0000;
204 core->segments.ldtr.limit = 0x0000ffff;
205 core->segments.ldtr.base = 0x0000000000000000LL;
206 core->segments.tr.selector = 0x0000;
207 core->segments.tr.limit = 0x0000ffff;
208 core->segments.tr.base = 0x0000000000000000LL;
211 core->dbg_regs.dr6 = 0x00000000ffff0ff0LL;
212 core->dbg_regs.dr7 = 0x0000000000000400LL;
215 ctrl_area->IOPM_BASE_PA = (addr_t)V3_PAddr(core->vm_info->io_map.arch_data);
216 ctrl_area->instrs.IOIO_PROT = 1;
218 ctrl_area->MSRPM_BASE_PA = (addr_t)V3_PAddr(core->vm_info->msr_map.arch_data);
219 ctrl_area->instrs.MSR_PROT = 1;
222 PrintDebug("Exiting on interrupts\n");
223 ctrl_area->guest_ctrl.V_INTR_MASKING = 1;
224 ctrl_area->instrs.INTR = 1;
227 v3_hook_msr(core->vm_info, EFER_MSR,
228 &v3_handle_efer_read,
229 &v3_handle_efer_write,
232 #ifdef V3_CONFIG_HIJACK_SYSCALL_MSR
233 /* KCH: we're not hooking these to TRAP them,
234 instead, we're going to catch the target EIP.
235 Hopefully this EIP is the entry point in the ELF located in the
236 vsyscall page. We can inject checks into the code segment such that
237 we don't have to exit on uninteresting system calls. This should
238 give us much better performance than INT 80, and should even obviate
239 the need to deal with software interrupts at all */
240 v3_hook_msr(core->vm_info, STAR_MSR,
241 &v3_handle_star_read,
242 &v3_handle_star_write,
244 v3_hook_msr(core->vm_info, LSTAR_MSR,
245 &v3_handle_lstar_read,
246 &v3_handle_lstar_write,
248 v3_hook_msr(core->vm_info, CSTAR_MSR,
249 &v3_handle_cstar_read,
250 &v3_handle_cstar_write,
253 /* KCH: this probably isn't necessary, as
254 SYSENTER is only used in legacy mode. In fact,
255 in long mode it results in an illegal instruction
257 v3_hook_msr(core->vm_info, IA32_SYSENTER_EIP_MSR,
258 &v3_handle_seeip_read,
259 &v3_handle_seeip_write,
263 if (core->shdw_pg_mode == SHADOW_PAGING) {
264 PrintDebug("Creating initial shadow page table\n");
266 /* JRL: This is a performance killer, and a simplistic solution */
267 /* We need to fix this */
268 ctrl_area->TLB_CONTROL = 1;
269 ctrl_area->guest_ASID = 1;
272 if (v3_init_passthrough_pts(core) == -1) {
273 PrintError("Could not initialize passthrough page tables\n");
278 core->shdw_pg_state.guest_cr0 = 0x0000000000000010LL;
279 PrintDebug("Created\n");
281 core->ctrl_regs.cr0 |= 0x80000000;
282 core->ctrl_regs.cr3 = core->direct_map_pt;
284 ctrl_area->cr_reads.cr0 = 1;
285 ctrl_area->cr_writes.cr0 = 1;
286 //ctrl_area->cr_reads.cr4 = 1;
287 ctrl_area->cr_writes.cr4 = 1;
288 ctrl_area->cr_reads.cr3 = 1;
289 ctrl_area->cr_writes.cr3 = 1;
293 ctrl_area->instrs.INVLPG = 1;
295 ctrl_area->exceptions.pf = 1;
297 guest_state->g_pat = 0x7040600070406ULL;
301 } else if (core->shdw_pg_mode == NESTED_PAGING) {
302 // Flush the TLB on entries/exits
303 ctrl_area->TLB_CONTROL = 1;
304 ctrl_area->guest_ASID = 1;
306 // Enable Nested Paging
307 ctrl_area->NP_ENABLE = 1;
309 PrintDebug("NP_Enable at 0x%p\n", (void *)&(ctrl_area->NP_ENABLE));
311 // Set the Nested Page Table pointer
312 if (v3_init_passthrough_pts(core) == -1) {
313 PrintError("Could not initialize Nested page tables\n");
317 ctrl_area->N_CR3 = core->direct_map_pt;
319 guest_state->g_pat = 0x7040600070406ULL;
322 /* tell the guest that we don't support SVM */
323 v3_hook_msr(core->vm_info, SVM_VM_CR_MSR,
324 &v3_handle_vm_cr_read,
325 &v3_handle_vm_cr_write,
330 int v3_init_svm_vmcb(struct guest_info * core, v3_vm_class_t vm_class) {
332 PrintDebug("Allocating VMCB\n");
333 core->vmm_data = (void *)Allocate_VMCB();
335 if (core->vmm_data == NULL) {
336 PrintError("Could not allocate VMCB, Exiting...\n");
340 if (vm_class == V3_PC_VM) {
341 PrintDebug("Initializing VMCB (addr=%p)\n", (void *)core->vmm_data);
342 Init_VMCB_BIOS((vmcb_t*)(core->vmm_data), core);
344 PrintError("Invalid VM class\n");
352 int v3_deinit_svm_vmcb(struct guest_info * core) {
353 V3_FreePages(V3_PAddr(core->vmm_data), 1);
358 static int update_irq_exit_state(struct guest_info * info) {
359 vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
361 // Fix for QEMU bug using EVENTINJ as an internal cache
362 guest_ctrl->EVENTINJ.valid = 0;
364 if ((info->intr_core_state.irq_pending == 1) && (guest_ctrl->guest_ctrl.V_IRQ == 0)) {
366 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
367 PrintDebug("INTAK cycle completed for irq %d\n", info->intr_core_state.irq_vector);
370 info->intr_core_state.irq_started = 1;
371 info->intr_core_state.irq_pending = 0;
373 v3_injecting_intr(info, info->intr_core_state.irq_vector, V3_EXTERNAL_IRQ);
376 if ((info->intr_core_state.irq_started == 1) && (guest_ctrl->exit_int_info.valid == 0)) {
377 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
378 PrintDebug("Interrupt %d taken by guest\n", info->intr_core_state.irq_vector);
381 // Interrupt was taken fully vectored
382 info->intr_core_state.irq_started = 0;
384 } else if ((info->intr_core_state.irq_started == 1) && (guest_ctrl->exit_int_info.valid == 1)) {
385 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
386 PrintDebug("EXIT INT INFO is set (vec=%d)\n", guest_ctrl->exit_int_info.vector);
394 static int update_irq_entry_state(struct guest_info * info) {
395 vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
398 if (info->intr_core_state.irq_pending == 0) {
399 guest_ctrl->guest_ctrl.V_IRQ = 0;
400 guest_ctrl->guest_ctrl.V_INTR_VECTOR = 0;
403 if (v3_excp_pending(info)) {
404 uint_t excp = v3_get_excp_number(info);
406 guest_ctrl->EVENTINJ.type = SVM_INJECTION_EXCEPTION;
408 if (info->excp_state.excp_error_code_valid) {
409 guest_ctrl->EVENTINJ.error_code = info->excp_state.excp_error_code;
410 guest_ctrl->EVENTINJ.ev = 1;
411 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
412 PrintDebug("Injecting exception %d with error code %x\n", excp, guest_ctrl->EVENTINJ.error_code);
416 guest_ctrl->EVENTINJ.vector = excp;
418 guest_ctrl->EVENTINJ.valid = 1;
420 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
421 PrintDebug("<%d> Injecting Exception %d (CR2=%p) (EIP=%p)\n",
422 (int)info->num_exits,
423 guest_ctrl->EVENTINJ.vector,
424 (void *)(addr_t)info->ctrl_regs.cr2,
425 (void *)(addr_t)info->rip);
428 v3_injecting_excp(info, excp);
429 } else if (info->intr_core_state.irq_started == 1) {
430 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
431 PrintDebug("IRQ pending from previous injection\n");
433 guest_ctrl->guest_ctrl.V_IRQ = 1;
434 guest_ctrl->guest_ctrl.V_INTR_VECTOR = info->intr_core_state.irq_vector;
435 guest_ctrl->guest_ctrl.V_IGN_TPR = 1;
436 guest_ctrl->guest_ctrl.V_INTR_PRIO = 0xf;
439 switch (v3_intr_pending(info)) {
440 case V3_EXTERNAL_IRQ: {
441 uint32_t irq = v3_get_intr(info);
443 guest_ctrl->guest_ctrl.V_IRQ = 1;
444 guest_ctrl->guest_ctrl.V_INTR_VECTOR = irq;
445 guest_ctrl->guest_ctrl.V_IGN_TPR = 1;
446 guest_ctrl->guest_ctrl.V_INTR_PRIO = 0xf;
448 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
449 PrintDebug("Injecting Interrupt %d (EIP=%p)\n",
450 guest_ctrl->guest_ctrl.V_INTR_VECTOR,
451 (void *)(addr_t)info->rip);
454 info->intr_core_state.irq_pending = 1;
455 info->intr_core_state.irq_vector = irq;
460 guest_ctrl->EVENTINJ.type = SVM_INJECTION_NMI;
462 case V3_SOFTWARE_INTR: {
463 #ifdef CONFIG_DEBUG_INTERRUPTS
464 PrintDebug("Caught an injected software interrupt\n");
465 PrintDebug("\ttype: %d, vector: %d\n", SVM_INJECTION_SOFT_INTR, info->intr_core_state.swintr_vector);
467 guest_ctrl->EVENTINJ.type = SVM_INJECTION_SOFT_INTR;
468 guest_ctrl->EVENTINJ.vector = info->intr_core_state.swintr_vector;
469 guest_ctrl->EVENTINJ.valid = 1;
471 /* reset the software interrupt state.
472 we can do this because we know only one
473 sw int can be posted at a time on a given
475 info->intr_core_state.swintr_posted = 0;
476 info->intr_core_state.swintr_vector = 0;
480 guest_ctrl->EVENTINJ.type = SVM_INJECTION_IRQ;
483 case V3_INVALID_INTR:
495 * CAUTION and DANGER!!!
497 * The VMCB CANNOT(!!) be accessed outside of the clgi/stgi calls inside this function
498 * When exectuing a symbiotic call, the VMCB WILL be overwritten, so any dependencies
499 * on its contents will cause things to break. The contents at the time of the exit WILL
500 * change before the exit handler is executed.
502 int v3_svm_enter(struct guest_info * info) {
503 vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
504 vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA((vmcb_t*)(info->vmm_data));
505 addr_t exit_code = 0, exit_info1 = 0, exit_info2 = 0;
507 // Conditionally yield the CPU if the timeslice has expired
510 // Perform any additional yielding needed for time adjustment
511 v3_adjust_time(info);
513 // disable global interrupts for vm state transition
516 // Update timer devices after being in the VM, with interupts
517 // disabled, but before doing IRQ updates, so that any interrupts they
518 //raise get seen immediately.
519 v3_update_timers(info);
521 // Synchronize the guest state to the VMCB
522 guest_state->cr0 = info->ctrl_regs.cr0;
523 guest_state->cr2 = info->ctrl_regs.cr2;
524 guest_state->cr3 = info->ctrl_regs.cr3;
525 guest_state->cr4 = info->ctrl_regs.cr4;
526 guest_state->dr6 = info->dbg_regs.dr6;
527 guest_state->dr7 = info->dbg_regs.dr7;
528 guest_ctrl->guest_ctrl.V_TPR = info->ctrl_regs.cr8 & 0xff;
529 guest_state->rflags = info->ctrl_regs.rflags;
530 guest_state->efer = info->ctrl_regs.efer;
532 guest_state->cpl = info->cpl;
534 v3_set_vmcb_segments((vmcb_t*)(info->vmm_data), &(info->segments));
536 guest_state->rax = info->vm_regs.rax;
537 guest_state->rip = info->rip;
538 guest_state->rsp = info->vm_regs.rsp;
540 #ifdef V3_CONFIG_SYMCALL
541 if (info->sym_core_state.symcall_state.sym_call_active == 0) {
542 update_irq_entry_state(info);
545 update_irq_entry_state(info);
552 PrintDebug("SVM Entry to CS=%p rip=%p...\n",
553 (void *)(addr_t)info->segments.cs.base,
554 (void *)(addr_t)info->rip);
557 #ifdef V3_CONFIG_SYMCALL
558 if (info->sym_core_state.symcall_state.sym_call_active == 1) {
559 if (guest_ctrl->guest_ctrl.V_IRQ == 1) {
560 V3_Print("!!! Injecting Interrupt during Sym call !!!\n");
565 v3_time_enter_vm(info);
566 guest_ctrl->TSC_OFFSET = v3_tsc_host_offset(&info->time_state);
568 //V3_Print("Calling v3_svm_launch\n");
570 v3_svm_launch((vmcb_t *)V3_PAddr(info->vmm_data), &(info->vm_regs), (vmcb_t *)host_vmcbs[V3_Get_CPU()]);
572 //V3_Print("SVM Returned: Exit Code: %x, guest_rip=%lx\n", (uint32_t)(guest_ctrl->exit_code), (unsigned long)guest_state->rip);
574 v3_last_exit = (uint32_t)(guest_ctrl->exit_code);
576 // Immediate exit from VM time bookkeeping
577 v3_time_exit_vm(info);
581 // Save Guest state from VMCB
582 info->rip = guest_state->rip;
583 info->vm_regs.rsp = guest_state->rsp;
584 info->vm_regs.rax = guest_state->rax;
586 info->cpl = guest_state->cpl;
588 info->ctrl_regs.cr0 = guest_state->cr0;
589 info->ctrl_regs.cr2 = guest_state->cr2;
590 info->ctrl_regs.cr3 = guest_state->cr3;
591 info->ctrl_regs.cr4 = guest_state->cr4;
592 info->dbg_regs.dr6 = guest_state->dr6;
593 info->dbg_regs.dr7 = guest_state->dr7;
594 info->ctrl_regs.cr8 = guest_ctrl->guest_ctrl.V_TPR;
595 info->ctrl_regs.rflags = guest_state->rflags;
596 info->ctrl_regs.efer = guest_state->efer;
598 v3_get_vmcb_segments((vmcb_t*)(info->vmm_data), &(info->segments));
599 info->cpu_mode = v3_get_vm_cpu_mode(info);
600 info->mem_mode = v3_get_vm_mem_mode(info);
603 // save exit info here
604 exit_code = guest_ctrl->exit_code;
605 exit_info1 = guest_ctrl->exit_info1;
606 exit_info2 = guest_ctrl->exit_info2;
608 #ifdef V3_CONFIG_SYMCALL
609 if (info->sym_core_state.symcall_state.sym_call_active == 0) {
610 update_irq_exit_state(info);
613 update_irq_exit_state(info);
616 // reenable global interrupts after vm exit
619 // Conditionally yield the CPU if the timeslice has expired
623 int ret = v3_handle_svm_exit(info, exit_code, exit_info1, exit_info2);
626 PrintError("Error in SVM exit handler (ret=%d)\n", ret);
627 PrintError(" last Exit was %d (exit code=0x%llx)\n", v3_last_exit, (uint64_t) exit_code);
637 int v3_start_svm_guest(struct guest_info * info) {
638 // vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA((vmcb_t*)(info->vmm_data));
639 // vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
641 PrintDebug("Starting SVM core %u (on logical core %u)\n", info->vcpu_id, info->pcpu_id);
643 if (info->vcpu_id == 0) {
644 info->core_run_state = CORE_RUNNING;
645 info->vm_info->run_state = VM_RUNNING;
647 PrintDebug("SVM core %u (on %u): Waiting for core initialization\n", info->vcpu_id, info->pcpu_id);
649 while (info->core_run_state == CORE_STOPPED) {
651 //PrintDebug("SVM core %u: still waiting for INIT\n", info->vcpu_id);
654 PrintDebug("SVM core %u(on %u) initialized\n", info->vcpu_id, info->pcpu_id);
657 PrintDebug("SVM core %u(on %u): I am starting at CS=0x%x (base=0x%p, limit=0x%x), RIP=0x%p\n",
658 info->vcpu_id, info->pcpu_id,
659 info->segments.cs.selector, (void *)(info->segments.cs.base),
660 info->segments.cs.limit, (void *)(info->rip));
664 PrintDebug("SVM core %u: Launching SVM VM (vmcb=%p) (on cpu %u)\n",
665 info->vcpu_id, (void *)info->vmm_data, info->pcpu_id);
666 //PrintDebugVMCB((vmcb_t*)(info->vmm_data));
672 if (info->vm_info->run_state == VM_STOPPED) {
673 info->core_run_state = CORE_STOPPED;
677 if (v3_svm_enter(info) == -1) {
678 vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
680 addr_t linear_addr = 0;
682 info->vm_info->run_state = VM_ERROR;
684 V3_Print("SVM core %u: SVM ERROR!!\n", info->vcpu_id);
686 v3_print_guest_state(info);
688 V3_Print("SVM core %u: SVM Exit Code: %p\n", info->vcpu_id, (void *)(addr_t)guest_ctrl->exit_code);
690 V3_Print("SVM core %u: exit_info1 low = 0x%.8x\n", info->vcpu_id, *(uint_t*)&(guest_ctrl->exit_info1));
691 V3_Print("SVM core %u: exit_info1 high = 0x%.8x\n", info->vcpu_id, *(uint_t *)(((uchar_t *)&(guest_ctrl->exit_info1)) + 4));
693 V3_Print("SVM core %u: exit_info2 low = 0x%.8x\n", info->vcpu_id, *(uint_t*)&(guest_ctrl->exit_info2));
694 V3_Print("SVM core %u: exit_info2 high = 0x%.8x\n", info->vcpu_id, *(uint_t *)(((uchar_t *)&(guest_ctrl->exit_info2)) + 4));
696 linear_addr = get_addr_linear(info, info->rip, &(info->segments.cs));
698 if (info->mem_mode == PHYSICAL_MEM) {
699 v3_gpa_to_hva(info, linear_addr, &host_addr);
700 } else if (info->mem_mode == VIRTUAL_MEM) {
701 v3_gva_to_hva(info, linear_addr, &host_addr);
704 V3_Print("SVM core %u: Host Address of rip = 0x%p\n", info->vcpu_id, (void *)host_addr);
706 V3_Print("SVM core %u: Instr (15 bytes) at %p:\n", info->vcpu_id, (void *)host_addr);
707 v3_dump_mem((uint8_t *)host_addr, 15);
709 v3_print_stack(info);
715 if (info->vm_info->run_state == VM_STOPPED) {
716 info->core_run_state = CORE_STOPPED;
723 if ((info->num_exits % 50000) == 0) {
724 V3_Print("SVM Exit number %d\n", (uint32_t)info->num_exits);
725 v3_print_guest_state(info);
731 // Need to take down the other cores on error...
739 int v3_reset_svm_vm_core(struct guest_info * core, addr_t rip) {
742 // Write the RIP, CS, and descriptor
743 // assume the rest is already good to go
745 // vector VV -> rip at 0
747 // This means we start executing at linear address VV000
749 // So the selector needs to be VV00
750 // and the base needs to be VV000
753 core->segments.cs.selector = rip << 8;
754 core->segments.cs.limit = 0xffff;
755 core->segments.cs.base = rip << 12;
765 /* Checks machine SVM capability */
766 /* Implemented from: AMD Arch Manual 3, sect 15.4 */
767 int v3_is_svm_capable() {
768 uint_t vm_cr_low = 0, vm_cr_high = 0;
769 uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
771 v3_cpuid(CPUID_EXT_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
773 PrintDebug("CPUID_EXT_FEATURE_IDS_ecx=0x%x\n", ecx);
775 if ((ecx & CPUID_EXT_FEATURE_IDS_ecx_svm_avail) == 0) {
776 V3_Print("SVM Not Available\n");
779 v3_get_msr(SVM_VM_CR_MSR, &vm_cr_high, &vm_cr_low);
781 PrintDebug("SVM_VM_CR_MSR = 0x%x 0x%x\n", vm_cr_high, vm_cr_low);
783 if ((vm_cr_low & SVM_VM_CR_MSR_svmdis) == 1) {
784 V3_Print("SVM is available but is disabled.\n");
786 v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
788 PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_edx=0x%x\n", edx);
790 if ((edx & CPUID_SVM_REV_AND_FEATURE_IDS_edx_svml) == 0) {
791 V3_Print("SVM BIOS Disabled, not unlockable\n");
793 V3_Print("SVM is locked with a key\n");
798 V3_Print("SVM is available and enabled.\n");
800 v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
801 PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_eax=0x%x\n", eax);
802 PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_ebx=0x%x\n", ebx);
803 PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_ecx=0x%x\n", ecx);
804 PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_edx=0x%x\n", edx);
811 static int has_svm_nested_paging() {
812 uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
814 v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
816 //PrintDebug("CPUID_EXT_FEATURE_IDS_edx=0x%x\n", edx);
818 if ((edx & CPUID_SVM_REV_AND_FEATURE_IDS_edx_np) == 0) {
819 V3_Print("SVM Nested Paging not supported\n");
822 V3_Print("SVM Nested Paging supported\n");
829 void v3_init_svm_cpu(int cpu_id) {
831 extern v3_cpu_arch_t v3_cpu_types[];
833 // Enable SVM on the CPU
834 v3_get_msr(EFER_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
835 msr.e_reg.low |= EFER_MSR_svm_enable;
836 v3_set_msr(EFER_MSR, 0, msr.e_reg.low);
838 V3_Print("SVM Enabled\n");
840 // Setup the host state save area
841 host_vmcbs[cpu_id] = (addr_t)V3_AllocPages(4);
844 // msr.e_reg.high = 0;
845 //msr.e_reg.low = (uint_t)host_vmcb;
846 msr.r_reg = host_vmcbs[cpu_id];
848 PrintDebug("Host State being saved at %p\n", (void *)host_vmcbs[cpu_id]);
849 v3_set_msr(SVM_VM_HSAVE_PA_MSR, msr.e_reg.high, msr.e_reg.low);
852 if (has_svm_nested_paging() == 1) {
853 v3_cpu_types[cpu_id] = V3_SVM_REV3_CPU;
855 v3_cpu_types[cpu_id] = V3_SVM_CPU;
861 void v3_deinit_svm_cpu(int cpu_id) {
863 extern v3_cpu_arch_t v3_cpu_types[];
865 // reset SVM_VM_HSAVE_PA_MSR
866 // Does setting it to NULL disable??
868 v3_set_msr(SVM_VM_HSAVE_PA_MSR, msr.e_reg.high, msr.e_reg.low);
871 v3_get_msr(EFER_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
872 msr.e_reg.low &= ~EFER_MSR_svm_enable;
873 v3_set_msr(EFER_MSR, 0, msr.e_reg.low);
875 v3_cpu_types[cpu_id] = V3_INVALID_CPU;
877 V3_FreePages((void *)host_vmcbs[cpu_id], 4);
879 V3_Print("Host CPU %d host area freed, and SVM disabled\n", cpu_id);
934 * Test VMSAVE/VMLOAD Latency
936 #define vmsave ".byte 0x0F,0x01,0xDB ; "
937 #define vmload ".byte 0x0F,0x01,0xDA ; "
939 uint32_t start_lo, start_hi;
940 uint32_t end_lo, end_hi;
943 __asm__ __volatile__ (
945 "movl %%eax, %%esi ; "
946 "movl %%edx, %%edi ; "
947 "movq %%rcx, %%rax ; "
950 : "=D"(start_hi), "=S"(start_lo), "=a"(end_lo),"=d"(end_hi)
951 : "c"(host_vmcb[cpu_id]), "0"(0), "1"(0), "2"(0), "3"(0)
962 PrintDebug("VMSave Cycle Latency: %d\n", (uint32_t)(end - start));
964 __asm__ __volatile__ (
966 "movl %%eax, %%esi ; "
967 "movl %%edx, %%edi ; "
968 "movq %%rcx, %%rax ; "
971 : "=D"(start_hi), "=S"(start_lo), "=a"(end_lo),"=d"(end_hi)
972 : "c"(host_vmcb[cpu_id]), "0"(0), "1"(0), "2"(0), "3"(0)
984 PrintDebug("VMLoad Cycle Latency: %d\n", (uint32_t)(end - start));
986 /* End Latency Test */
997 void Init_VMCB_pe(vmcb_t *vmcb, struct guest_info vm_info) {
998 vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
999 vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
1003 guest_state->rsp = vm_info.vm_regs.rsp;
1004 guest_state->rip = vm_info.rip;
1007 /* I pretty much just gutted this from TVMM */
1008 /* Note: That means its probably wrong */
1010 // set the segment registers to mirror ours
1011 guest_state->cs.selector = 1<<3;
1012 guest_state->cs.attrib.fields.type = 0xa; // Code segment+read
1013 guest_state->cs.attrib.fields.S = 1;
1014 guest_state->cs.attrib.fields.P = 1;
1015 guest_state->cs.attrib.fields.db = 1;
1016 guest_state->cs.attrib.fields.G = 1;
1017 guest_state->cs.limit = 0xfffff;
1018 guest_state->cs.base = 0;
1020 struct vmcb_selector *segregs [] = {&(guest_state->ss), &(guest_state->ds), &(guest_state->es), &(guest_state->fs), &(guest_state->gs), NULL};
1021 for ( i = 0; segregs[i] != NULL; i++) {
1022 struct vmcb_selector * seg = segregs[i];
1024 seg->selector = 2<<3;
1025 seg->attrib.fields.type = 0x2; // Data Segment+read/write
1026 seg->attrib.fields.S = 1;
1027 seg->attrib.fields.P = 1;
1028 seg->attrib.fields.db = 1;
1029 seg->attrib.fields.G = 1;
1030 seg->limit = 0xfffff;
1036 /* JRL THIS HAS TO GO */
1038 // guest_state->tr.selector = GetTR_Selector();
1039 guest_state->tr.attrib.fields.type = 0x9;
1040 guest_state->tr.attrib.fields.P = 1;
1041 // guest_state->tr.limit = GetTR_Limit();
1042 //guest_state->tr.base = GetTR_Base();// - 0x2000;
1050 guest_state->efer |= EFER_MSR_svm_enable;
1051 guest_state->rflags = 0x00000002; // The reserved bit is always 1
1052 ctrl_area->svm_instrs.VMRUN = 1;
1053 guest_state->cr0 = 0x00000001; // PE
1054 ctrl_area->guest_ASID = 1;
1057 // guest_state->cpl = 0;
1063 ctrl_area->cr_writes.cr4 = 1;
1065 ctrl_area->exceptions.de = 1;
1066 ctrl_area->exceptions.df = 1;
1067 ctrl_area->exceptions.pf = 1;
1068 ctrl_area->exceptions.ts = 1;
1069 ctrl_area->exceptions.ss = 1;
1070 ctrl_area->exceptions.ac = 1;
1071 ctrl_area->exceptions.mc = 1;
1072 ctrl_area->exceptions.gp = 1;
1073 ctrl_area->exceptions.ud = 1;
1074 ctrl_area->exceptions.np = 1;
1075 ctrl_area->exceptions.of = 1;
1076 ctrl_area->exceptions.nmi = 1;
1080 ctrl_area->instrs.IOIO_PROT = 1;
1081 ctrl_area->IOPM_BASE_PA = (uint_t)V3_AllocPages(3);
1085 tmp_reg.r_reg = ctrl_area->IOPM_BASE_PA;
1086 memset((void*)(tmp_reg.e_reg.low), 0xffffffff, PAGE_SIZE * 2);
1089 ctrl_area->instrs.INTR = 1;
1096 memset(gdt_buf, 0, 6);
1097 memset(idt_buf, 0, 6);
1100 uint_t gdt_base, idt_base;
1101 ushort_t gdt_limit, idt_limit;
1104 gdt_base = *(ulong_t*)((uchar_t*)gdt_buf + 2) & 0xffffffff;
1105 gdt_limit = *(ushort_t*)(gdt_buf) & 0xffff;
1106 PrintDebug("GDT: base: %x, limit: %x\n", gdt_base, gdt_limit);
1109 idt_base = *(ulong_t*)(idt_buf + 2) & 0xffffffff;
1110 idt_limit = *(ushort_t*)(idt_buf) & 0xffff;
1111 PrintDebug("IDT: base: %x, limit: %x\n",idt_base, idt_limit);
1114 // gdt_base -= 0x2000;
1115 //idt_base -= 0x2000;
1117 guest_state->gdtr.base = gdt_base;
1118 guest_state->gdtr.limit = gdt_limit;
1119 guest_state->idtr.base = idt_base;
1120 guest_state->idtr.limit = idt_limit;
1126 // also determine if CPU supports nested paging
1128 if (vm_info.page_tables) {
1130 // Flush the TLB on entries/exits
1131 ctrl_area->TLB_CONTROL = 1;
1133 // Enable Nested Paging
1134 ctrl_area->NP_ENABLE = 1;
1136 PrintDebug("NP_Enable at 0x%x\n", &(ctrl_area->NP_ENABLE));
1138 // Set the Nested Page Table pointer
1139 ctrl_area->N_CR3 |= ((addr_t)vm_info.page_tables & 0xfffff000);
1142 // ctrl_area->N_CR3 = Get_CR3();
1143 // guest_state->cr3 |= (Get_CR3() & 0xfffff000);
1145 guest_state->g_pat = 0x7040600070406ULL;
1147 PrintDebug("Set Nested CR3: lo: 0x%x hi: 0x%x\n", (uint_t)*(&(ctrl_area->N_CR3)), (uint_t)*((unsigned char *)&(ctrl_area->N_CR3) + 4));
1148 PrintDebug("Set Guest CR3: lo: 0x%x hi: 0x%x\n", (uint_t)*(&(guest_state->cr3)), (uint_t)*((unsigned char *)&(guest_state->cr3) + 4));
1150 // guest_state->cr0 |= 0x80000000;