Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


added event hooking framework
[palacios.git] / palacios / src / palacios / svm.c
1 /* 
2  * This file is part of the Palacios Virtual Machine Monitor developed
3  * by the V3VEE Project with funding from the United States National 
4  * Science Foundation and the Department of Energy.  
5  *
6  * The V3VEE Project is a joint project between Northwestern University
7  * and the University of New Mexico.  You can find out more at 
8  * http://www.v3vee.org
9  *
10  * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu> 
11  * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org> 
12  * All rights reserved.
13  *
14  * Author: Jack Lange <jarusl@cs.northwestern.edu>
15  *
16  * This is free software.  You are permitted to use,
17  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
18  */
19
20
21
22 #include <palacios/svm.h>
23 #include <palacios/vmm.h>
24
25 #include <palacios/vmcb.h>
26 #include <palacios/vmm_mem.h>
27 #include <palacios/vmm_paging.h>
28 #include <palacios/svm_handler.h>
29
30 #include <palacios/vmm_debug.h>
31 #include <palacios/vm_guest_mem.h>
32
33 #include <palacios/vmm_decoder.h>
34 #include <palacios/vmm_string.h>
35 #include <palacios/vmm_lowlevel.h>
36 #include <palacios/svm_msr.h>
37
38 #include <palacios/vmm_rbtree.h>
39 #include <palacios/vmm_barrier.h>
40 #include <palacios/vmm_debug.h>
41
42
43
44 #ifdef V3_CONFIG_CHECKPOINT
45 #include <palacios/vmm_checkpoint.h>
46 #endif
47
48 #include <palacios/vmm_direct_paging.h>
49
50 #include <palacios/vmm_ctrl_regs.h>
51 #include <palacios/svm_io.h>
52
53 #include <palacios/vmm_sprintf.h>
54
55
56 #ifndef V3_CONFIG_DEBUG_SVM
57 #undef PrintDebug
58 #define PrintDebug(fmt, args...)
59 #endif
60
61
62 uint32_t v3_last_exit;
63
64 // This is a global pointer to the host's VMCB
65 static addr_t host_vmcbs[V3_CONFIG_MAX_CPUS] = { [0 ... V3_CONFIG_MAX_CPUS - 1] = 0};
66
67
68
69 extern void v3_stgi();
70 extern void v3_clgi();
71 //extern int v3_svm_launch(vmcb_t * vmcb, struct v3_gprs * vm_regs, uint64_t * fs, uint64_t * gs);
72 extern int v3_svm_launch(vmcb_t * vmcb, struct v3_gprs * vm_regs, vmcb_t * host_vmcb);
73
74
75 static vmcb_t * Allocate_VMCB() {
76     vmcb_t * vmcb_page = NULL;
77     addr_t vmcb_pa = (addr_t)V3_AllocPages(1);
78
79     if ((void *)vmcb_pa == NULL) {
80         PrintError("Error allocating VMCB\n");
81         return NULL;
82     }
83
84     vmcb_page = (vmcb_t *)V3_VAddr((void *)vmcb_pa);
85
86     memset(vmcb_page, 0, 4096);
87
88     return vmcb_page;
89 }
90
91
92 static int v3_svm_handle_efer_write(struct guest_info * core, uint_t msr, struct v3_msr src, void * priv_data)
93 {
94     int status;
95
96     // Call arch-independent handler
97     if ((status = v3_handle_efer_write(core, msr, src, priv_data)) != 0) {
98         return status;
99     }
100
101     // SVM-specific code
102     {
103         // Ensure that hardware visible EFER.SVME bit is set (SVM Enable)
104         struct efer_64 * hw_efer = (struct efer_64 *)&(core->ctrl_regs.efer);
105         hw_efer->svme = 1;
106     }
107
108     return 0;
109 }
110
111
112 static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info * core) {
113     vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
114     vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
115     uint_t i;
116
117
118     //
119     ctrl_area->svm_instrs.VMRUN = 1;
120     ctrl_area->svm_instrs.VMMCALL = 1;
121     ctrl_area->svm_instrs.VMLOAD = 1;
122     ctrl_area->svm_instrs.VMSAVE = 1;
123     ctrl_area->svm_instrs.STGI = 1;
124     ctrl_area->svm_instrs.CLGI = 1;
125     ctrl_area->svm_instrs.SKINIT = 1;
126     ctrl_area->svm_instrs.ICEBP = 1;
127     ctrl_area->svm_instrs.WBINVD = 1;
128     ctrl_area->svm_instrs.MONITOR = 1;
129     ctrl_area->svm_instrs.MWAIT_always = 1;
130     ctrl_area->svm_instrs.MWAIT_if_armed = 1;
131     ctrl_area->instrs.INVLPGA = 1;
132     ctrl_area->instrs.CPUID = 1;
133
134     ctrl_area->instrs.HLT = 1;
135
136     /* Set at VMM launch as needed */
137     ctrl_area->instrs.RDTSC = 0;
138     ctrl_area->svm_instrs.RDTSCP = 0;
139
140     // guest_state->cr0 = 0x00000001;    // PE 
141   
142     /*
143       ctrl_area->exceptions.de = 1;
144       ctrl_area->exceptions.df = 1;
145       
146       ctrl_area->exceptions.ts = 1;
147       ctrl_area->exceptions.ss = 1;
148       ctrl_area->exceptions.ac = 1;
149       ctrl_area->exceptions.mc = 1;
150       ctrl_area->exceptions.gp = 1;
151       ctrl_area->exceptions.ud = 1;
152       ctrl_area->exceptions.np = 1;
153       ctrl_area->exceptions.of = 1;
154       
155       ctrl_area->exceptions.nmi = 1;
156     */
157     
158
159     ctrl_area->instrs.NMI = 1;
160     ctrl_area->instrs.SMI = 0; // allow SMIs to run in guest
161     ctrl_area->instrs.INIT = 1;
162     //    ctrl_area->instrs.PAUSE = 1;
163     ctrl_area->instrs.shutdown_evts = 1;
164
165
166     /* DEBUG FOR RETURN CODE */
167     ctrl_area->exit_code = 1;
168
169
170     /* Setup Guest Machine state */
171
172     core->vm_regs.rsp = 0x00;
173     core->rip = 0xfff0;
174
175     core->vm_regs.rdx = 0x00000f00;
176
177
178     core->cpl = 0;
179
180     core->ctrl_regs.rflags = 0x00000002; // The reserved bit is always 1
181     core->ctrl_regs.cr0 = 0x60010010; // Set the WP flag so the memory hooks work in real-mode
182     core->ctrl_regs.efer |= EFER_MSR_svm_enable;
183
184
185
186
187
188     core->segments.cs.selector = 0xf000;
189     core->segments.cs.limit = 0xffff;
190     core->segments.cs.base = 0x0000000f0000LL;
191
192     // (raw attributes = 0xf3)
193     core->segments.cs.type = 0x3;
194     core->segments.cs.system = 0x1;
195     core->segments.cs.dpl = 0x3;
196     core->segments.cs.present = 1;
197
198
199
200     struct v3_segment * segregs [] = {&(core->segments.ss), &(core->segments.ds), 
201                                       &(core->segments.es), &(core->segments.fs), 
202                                       &(core->segments.gs), NULL};
203
204     for ( i = 0; segregs[i] != NULL; i++) {
205         struct v3_segment * seg = segregs[i];
206         
207         seg->selector = 0x0000;
208         //    seg->base = seg->selector << 4;
209         seg->base = 0x00000000;
210         seg->limit = ~0u;
211
212         // (raw attributes = 0xf3)
213         seg->type = 0x3;
214         seg->system = 0x1;
215         seg->dpl = 0x3;
216         seg->present = 1;
217     }
218
219     core->segments.gdtr.limit = 0x0000ffff;
220     core->segments.gdtr.base = 0x0000000000000000LL;
221     core->segments.idtr.limit = 0x0000ffff;
222     core->segments.idtr.base = 0x0000000000000000LL;
223
224     core->segments.ldtr.selector = 0x0000;
225     core->segments.ldtr.limit = 0x0000ffff;
226     core->segments.ldtr.base = 0x0000000000000000LL;
227     core->segments.tr.selector = 0x0000;
228     core->segments.tr.limit = 0x0000ffff;
229     core->segments.tr.base = 0x0000000000000000LL;
230
231
232     core->dbg_regs.dr6 = 0x00000000ffff0ff0LL;
233     core->dbg_regs.dr7 = 0x0000000000000400LL;
234
235
236     ctrl_area->IOPM_BASE_PA = (addr_t)V3_PAddr(core->vm_info->io_map.arch_data);
237     ctrl_area->instrs.IOIO_PROT = 1;
238             
239     ctrl_area->MSRPM_BASE_PA = (addr_t)V3_PAddr(core->vm_info->msr_map.arch_data);
240     ctrl_area->instrs.MSR_PROT = 1;   
241
242
243     PrintDebug("Exiting on interrupts\n");
244     ctrl_area->guest_ctrl.V_INTR_MASKING = 1;
245     ctrl_area->instrs.INTR = 1;
246
247
248     v3_hook_msr(core->vm_info, EFER_MSR, 
249                 &v3_handle_efer_read,
250                 &v3_svm_handle_efer_write, 
251                 core);
252
253     if (core->shdw_pg_mode == SHADOW_PAGING) {
254         PrintDebug("Creating initial shadow page table\n");
255         
256         /* JRL: This is a performance killer, and a simplistic solution */
257         /* We need to fix this */
258         ctrl_area->TLB_CONTROL = 1;
259         ctrl_area->guest_ASID = 1;
260         
261         
262         if (v3_init_passthrough_pts(core) == -1) {
263             PrintError("Could not initialize passthrough page tables\n");
264             return ;
265         }
266
267
268         core->shdw_pg_state.guest_cr0 = 0x0000000000000010LL;
269         PrintDebug("Created\n");
270         
271         core->ctrl_regs.cr0 |= 0x80000000;
272         core->ctrl_regs.cr3 = core->direct_map_pt;
273
274         ctrl_area->cr_reads.cr0 = 1;
275         ctrl_area->cr_writes.cr0 = 1;
276         //ctrl_area->cr_reads.cr4 = 1;
277         ctrl_area->cr_writes.cr4 = 1;
278         ctrl_area->cr_reads.cr3 = 1;
279         ctrl_area->cr_writes.cr3 = 1;
280
281
282
283         ctrl_area->instrs.INVLPG = 1;
284
285         ctrl_area->exceptions.pf = 1;
286
287         guest_state->g_pat = 0x7040600070406ULL;
288
289
290
291     } else if (core->shdw_pg_mode == NESTED_PAGING) {
292         // Flush the TLB on entries/exits
293         ctrl_area->TLB_CONTROL = 1;
294         ctrl_area->guest_ASID = 1;
295
296         // Enable Nested Paging
297         ctrl_area->NP_ENABLE = 1;
298
299         PrintDebug("NP_Enable at 0x%p\n", (void *)&(ctrl_area->NP_ENABLE));
300
301         // Set the Nested Page Table pointer
302         if (v3_init_passthrough_pts(core) == -1) {
303             PrintError("Could not initialize Nested page tables\n");
304             return ;
305         }
306
307         ctrl_area->N_CR3 = core->direct_map_pt;
308
309         guest_state->g_pat = 0x7040600070406ULL;
310     }
311     
312     /* tell the guest that we don't support SVM */
313     v3_hook_msr(core->vm_info, SVM_VM_CR_MSR, 
314         &v3_handle_vm_cr_read,
315         &v3_handle_vm_cr_write, 
316         core);
317
318
319     {
320 #define INT_PENDING_AMD_MSR             0xc0010055
321
322         v3_hook_msr(core->vm_info, IA32_STAR_MSR, NULL, NULL, NULL);
323         v3_hook_msr(core->vm_info, IA32_LSTAR_MSR, NULL, NULL, NULL);
324         v3_hook_msr(core->vm_info, IA32_FMASK_MSR, NULL, NULL, NULL);
325         v3_hook_msr(core->vm_info, IA32_KERN_GS_BASE_MSR, NULL, NULL, NULL);
326         v3_hook_msr(core->vm_info, IA32_CSTAR_MSR, NULL, NULL, NULL);
327
328         v3_hook_msr(core->vm_info, SYSENTER_CS_MSR, NULL, NULL, NULL);
329         v3_hook_msr(core->vm_info, SYSENTER_ESP_MSR, NULL, NULL, NULL);
330         v3_hook_msr(core->vm_info, SYSENTER_EIP_MSR, NULL, NULL, NULL);
331
332
333         v3_hook_msr(core->vm_info, FS_BASE_MSR, NULL, NULL, NULL);
334         v3_hook_msr(core->vm_info, GS_BASE_MSR, NULL, NULL, NULL);
335
336         // Passthrough read operations are ok.
337         v3_hook_msr(core->vm_info, INT_PENDING_AMD_MSR, NULL, v3_msr_unhandled_write, NULL);
338     }
339 }
340
341
342 int v3_init_svm_vmcb(struct guest_info * core, v3_vm_class_t vm_class) {
343
344     PrintDebug("Allocating VMCB\n");
345     core->vmm_data = (void *)Allocate_VMCB();
346     
347     if (core->vmm_data == NULL) {
348         PrintError("Could not allocate VMCB, Exiting...\n");
349         return -1;
350     }
351
352     if (vm_class == V3_PC_VM) {
353         PrintDebug("Initializing VMCB (addr=%p)\n", (void *)core->vmm_data);
354         Init_VMCB_BIOS((vmcb_t*)(core->vmm_data), core);
355     } else {
356         PrintError("Invalid VM class\n");
357         return -1;
358     }
359
360     core->core_run_state = CORE_STOPPED;
361
362     return 0;
363 }
364
365
366 int v3_deinit_svm_vmcb(struct guest_info * core) {
367     V3_FreePages(V3_PAddr(core->vmm_data), 1);
368     return 0;
369 }
370
371
372 #ifdef V3_CONFIG_CHECKPOINT
373 int v3_svm_save_core(struct guest_info * core, void * ctx){
374
375     if (v3_chkpt_save_8(ctx, "cpl", &(core->cpl)) == -1) { 
376         PrintError("Could not save SVM cpl\n");
377         return -1;
378     }
379
380     if (v3_chkpt_save(ctx, "vmcb_data", PAGE_SIZE, core->vmm_data) == -1) { 
381         PrintError("Could not save SVM vmcb\n");
382         return -1;
383     }
384
385     return 0;
386 }
387
388 int v3_svm_load_core(struct guest_info * core, void * ctx){
389     
390     if (v3_chkpt_load_8(ctx, "cpl", &(core->cpl)) == -1) { 
391         PrintError("Could not load SVM cpl\n");
392         return -1;
393     }
394
395     if (v3_chkpt_load(ctx, "vmcb_data", PAGE_SIZE, core->vmm_data) == -1) {
396         return -1;
397     }
398
399     return 0;
400 }
401 #endif
402
403 static int update_irq_exit_state(struct guest_info * info) {
404     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
405
406     // Fix for QEMU bug using EVENTINJ as an internal cache
407     guest_ctrl->EVENTINJ.valid = 0;
408
409     if ((info->intr_core_state.irq_pending == 1) && (guest_ctrl->guest_ctrl.V_IRQ == 0)) {
410         
411 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
412         PrintDebug("INTAK cycle completed for irq %d\n", info->intr_core_state.irq_vector);
413 #endif
414
415         info->intr_core_state.irq_started = 1;
416         info->intr_core_state.irq_pending = 0;
417
418         v3_injecting_intr(info, info->intr_core_state.irq_vector, V3_EXTERNAL_IRQ);
419     }
420
421     if ((info->intr_core_state.irq_started == 1) && (guest_ctrl->exit_int_info.valid == 0)) {
422 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
423         PrintDebug("Interrupt %d taken by guest\n", info->intr_core_state.irq_vector);
424 #endif
425
426         // Interrupt was taken fully vectored
427         info->intr_core_state.irq_started = 0;
428
429     } else if ((info->intr_core_state.irq_started == 1) && (guest_ctrl->exit_int_info.valid == 1)) {
430 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
431         PrintDebug("EXIT INT INFO is set (vec=%d)\n", guest_ctrl->exit_int_info.vector);
432 #endif
433     }
434
435     return 0;
436 }
437
438
439 static int update_irq_entry_state(struct guest_info * info) {
440     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
441
442
443     if (info->intr_core_state.irq_pending == 0) {
444         guest_ctrl->guest_ctrl.V_IRQ = 0;
445         guest_ctrl->guest_ctrl.V_INTR_VECTOR = 0;
446     }
447     
448     if (v3_excp_pending(info)) {
449         uint_t excp = v3_get_excp_number(info);
450         
451         guest_ctrl->EVENTINJ.type = SVM_INJECTION_EXCEPTION;
452         
453         if (info->excp_state.excp_error_code_valid) {
454             guest_ctrl->EVENTINJ.error_code = info->excp_state.excp_error_code;
455             guest_ctrl->EVENTINJ.ev = 1;
456 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
457             PrintDebug("Injecting exception %d with error code %x\n", excp, guest_ctrl->EVENTINJ.error_code);
458 #endif
459         }
460         
461         guest_ctrl->EVENTINJ.vector = excp;
462         
463         guest_ctrl->EVENTINJ.valid = 1;
464
465 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
466         PrintDebug("<%d> Injecting Exception %d (CR2=%p) (EIP=%p)\n", 
467                    (int)info->num_exits, 
468                    guest_ctrl->EVENTINJ.vector, 
469                    (void *)(addr_t)info->ctrl_regs.cr2,
470                    (void *)(addr_t)info->rip);
471 #endif
472
473         v3_injecting_excp(info, excp);
474     } else if (info->intr_core_state.irq_started == 1) {
475 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
476         PrintDebug("IRQ pending from previous injection\n");
477 #endif
478         guest_ctrl->guest_ctrl.V_IRQ = 1;
479         guest_ctrl->guest_ctrl.V_INTR_VECTOR = info->intr_core_state.irq_vector;
480         guest_ctrl->guest_ctrl.V_IGN_TPR = 1;
481         guest_ctrl->guest_ctrl.V_INTR_PRIO = 0xf;
482
483     } else {
484         switch (v3_intr_pending(info)) {
485             case V3_EXTERNAL_IRQ: {
486                 uint32_t irq = v3_get_intr(info);
487
488                 guest_ctrl->guest_ctrl.V_IRQ = 1;
489                 guest_ctrl->guest_ctrl.V_INTR_VECTOR = irq;
490                 guest_ctrl->guest_ctrl.V_IGN_TPR = 1;
491                 guest_ctrl->guest_ctrl.V_INTR_PRIO = 0xf;
492
493 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
494                 PrintDebug("Injecting Interrupt %d (EIP=%p)\n", 
495                            guest_ctrl->guest_ctrl.V_INTR_VECTOR, 
496                            (void *)(addr_t)info->rip);
497 #endif
498
499                 info->intr_core_state.irq_pending = 1;
500                 info->intr_core_state.irq_vector = irq;
501                 
502                 break;
503             }
504             case V3_NMI:
505                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_NMI;
506                 break;
507             case V3_SOFTWARE_INTR:
508                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_SOFT_INTR;
509
510 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
511                 PrintDebug("Injecting software interrupt --  type: %d, vector: %d\n", 
512                            SVM_INJECTION_SOFT_INTR, info->intr_core_state.swintr_vector);
513 #endif
514                 guest_ctrl->EVENTINJ.vector = info->intr_core_state.swintr_vector;
515                 guest_ctrl->EVENTINJ.valid = 1;
516             
517                 /* reset swintr state */
518                 info->intr_core_state.swintr_posted = 0;
519                 info->intr_core_state.swintr_vector = 0;
520                 
521                 break;
522             case V3_VIRTUAL_IRQ:
523                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_IRQ;
524                 break;
525
526             case V3_INVALID_INTR:
527             default:
528                 break;
529         }
530         
531     }
532
533     return 0;
534 }
535
536 int 
537 v3_svm_config_tsc_virtualization(struct guest_info * info) {
538     vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
539
540     if (!(info->time_state.flags & VM_TIME_TRAP_RDTSC)) {
541         if (info->time_state.flags & VM_TIME_TSC_PASSTHROUGH) {
542                 ctrl_area->TSC_OFFSET = 0;
543         } else {
544                 ctrl_area->TSC_OFFSET = v3_tsc_host_offset(&info->time_state);
545         }
546     }
547     return 0;
548 }
549
550 /* 
551  * CAUTION and DANGER!!! 
552  * 
553  * The VMCB CANNOT(!!) be accessed outside of the clgi/stgi calls inside this function
554  * When exectuing a symbiotic call, the VMCB WILL be overwritten, so any dependencies 
555  * on its contents will cause things to break. The contents at the time of the exit WILL 
556  * change before the exit handler is executed.
557  */
558 int v3_svm_enter(struct guest_info * info) {
559     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
560     vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA((vmcb_t*)(info->vmm_data)); 
561     addr_t exit_code = 0, exit_info1 = 0, exit_info2 = 0;
562     uint64_t guest_cycles = 0;
563
564     // Conditionally yield the CPU if the timeslice has expired
565     v3_yield_cond(info,-1);
566
567     // Update timer devices after being in the VM before doing 
568     // IRQ updates, so that any interrupts they raise get seen 
569     // immediately.
570     v3_advance_time(info, NULL);
571     v3_update_timers(info);
572
573     // disable global interrupts for vm state transition
574     v3_clgi();
575
576     // Synchronize the guest state to the VMCB
577     guest_state->cr0 = info->ctrl_regs.cr0;
578     guest_state->cr2 = info->ctrl_regs.cr2;
579     guest_state->cr3 = info->ctrl_regs.cr3;
580     guest_state->cr4 = info->ctrl_regs.cr4;
581     guest_state->dr6 = info->dbg_regs.dr6;
582     guest_state->dr7 = info->dbg_regs.dr7;
583     guest_ctrl->guest_ctrl.V_TPR = info->ctrl_regs.cr8 & 0xff;
584     guest_state->rflags = info->ctrl_regs.rflags;
585     guest_state->efer = info->ctrl_regs.efer;
586     
587     /* Synchronize MSRs */
588     guest_state->star = info->msrs.star;
589     guest_state->lstar = info->msrs.lstar;
590     guest_state->sfmask = info->msrs.sfmask;
591     guest_state->KernelGsBase = info->msrs.kern_gs_base;
592
593     guest_state->cpl = info->cpl;
594
595     v3_set_vmcb_segments((vmcb_t*)(info->vmm_data), &(info->segments));
596
597     guest_state->rax = info->vm_regs.rax;
598     guest_state->rip = info->rip;
599     guest_state->rsp = info->vm_regs.rsp;
600
601 #ifdef V3_CONFIG_SYMCALL
602     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
603         update_irq_entry_state(info);
604     }
605 #else 
606     update_irq_entry_state(info);
607 #endif
608
609
610     /* ** */
611
612     /*
613       PrintDebug("SVM Entry to CS=%p  rip=%p...\n", 
614       (void *)(addr_t)info->segments.cs.base, 
615       (void *)(addr_t)info->rip);
616     */
617
618 #ifdef V3_CONFIG_SYMCALL
619     if (info->sym_core_state.symcall_state.sym_call_active == 1) {
620         if (guest_ctrl->guest_ctrl.V_IRQ == 1) {
621             V3_Print("!!! Injecting Interrupt during Sym call !!!\n");
622         }
623     }
624 #endif
625
626     v3_svm_config_tsc_virtualization(info);
627
628     //V3_Print("Calling v3_svm_launch\n");
629     {   
630         uint64_t entry_tsc = 0;
631         uint64_t exit_tsc = 0;
632         
633         rdtscll(entry_tsc);
634
635         v3_svm_launch((vmcb_t *)V3_PAddr(info->vmm_data), &(info->vm_regs), (vmcb_t *)host_vmcbs[V3_Get_CPU()]);
636
637         rdtscll(exit_tsc);
638
639         guest_cycles = exit_tsc - entry_tsc;
640     }
641
642
643     //V3_Print("SVM Returned: Exit Code: %x, guest_rip=%lx\n", (uint32_t)(guest_ctrl->exit_code), (unsigned long)guest_state->rip);
644
645     v3_last_exit = (uint32_t)(guest_ctrl->exit_code);
646
647     v3_advance_time(info, &guest_cycles);
648
649     info->num_exits++;
650
651     // Save Guest state from VMCB
652     info->rip = guest_state->rip;
653     info->vm_regs.rsp = guest_state->rsp;
654     info->vm_regs.rax = guest_state->rax;
655
656     info->cpl = guest_state->cpl;
657
658     info->ctrl_regs.cr0 = guest_state->cr0;
659     info->ctrl_regs.cr2 = guest_state->cr2;
660     info->ctrl_regs.cr3 = guest_state->cr3;
661     info->ctrl_regs.cr4 = guest_state->cr4;
662     info->dbg_regs.dr6 = guest_state->dr6;
663     info->dbg_regs.dr7 = guest_state->dr7;
664     info->ctrl_regs.cr8 = guest_ctrl->guest_ctrl.V_TPR;
665     info->ctrl_regs.rflags = guest_state->rflags;
666     info->ctrl_regs.efer = guest_state->efer;
667     
668     /* Synchronize MSRs */
669     info->msrs.star =  guest_state->star;
670     info->msrs.lstar = guest_state->lstar;
671     info->msrs.sfmask = guest_state->sfmask;
672     info->msrs.kern_gs_base = guest_state->KernelGsBase;
673
674     v3_get_vmcb_segments((vmcb_t*)(info->vmm_data), &(info->segments));
675     info->cpu_mode = v3_get_vm_cpu_mode(info);
676     info->mem_mode = v3_get_vm_mem_mode(info);
677     /* ** */
678
679     // save exit info here
680     exit_code = guest_ctrl->exit_code;
681     exit_info1 = guest_ctrl->exit_info1;
682     exit_info2 = guest_ctrl->exit_info2;
683
684 #ifdef V3_CONFIG_SYMCALL
685     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
686         update_irq_exit_state(info);
687     }
688 #else
689     update_irq_exit_state(info);
690 #endif
691
692     // reenable global interrupts after vm exit
693     v3_stgi();
694  
695     // Conditionally yield the CPU if the timeslice has expired
696     v3_yield_cond(info,-1);
697
698     // This update timers is for time-dependent handlers
699     // if we're slaved to host time
700     v3_advance_time(info, NULL);
701     v3_update_timers(info);
702
703     {
704         int ret = v3_handle_svm_exit(info, exit_code, exit_info1, exit_info2);
705         
706         if (ret != 0) {
707             PrintError("Error in SVM exit handler (ret=%d)\n", ret);
708             PrintError("  last Exit was %d (exit code=0x%llx)\n", v3_last_exit, (uint64_t) exit_code);
709             return -1;
710         }
711     }
712
713     if (info->timeouts.timeout_active) {
714         /* Check to see if any timeouts have expired */
715         v3_handle_timeouts(info, guest_cycles);
716     }
717
718
719     return 0;
720 }
721
722
723 int v3_start_svm_guest(struct guest_info * info) {
724     //    vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA((vmcb_t*)(info->vmm_data));
725     //  vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
726
727     PrintDebug("Starting SVM core %u (on logical core %u)\n", info->vcpu_id, info->pcpu_id);
728
729     if (info->vcpu_id == 0) {
730         info->core_run_state = CORE_RUNNING;
731     } else  { 
732         PrintDebug("SVM core %u (on %u): Waiting for core initialization\n", info->vcpu_id, info->pcpu_id);
733
734         while (info->core_run_state == CORE_STOPPED) {
735             
736             if (info->vm_info->run_state == VM_STOPPED) {
737                 // The VM was stopped before this core was initialized. 
738                 return 0;
739             }
740
741             v3_yield(info,-1);
742             //PrintDebug("SVM core %u: still waiting for INIT\n", info->vcpu_id);
743         }
744
745         PrintDebug("SVM core %u(on %u) initialized\n", info->vcpu_id, info->pcpu_id);
746
747         // We'll be paranoid about race conditions here
748         v3_wait_at_barrier(info);
749     } 
750
751     PrintDebug("SVM core %u(on %u): I am starting at CS=0x%x (base=0x%p, limit=0x%x),  RIP=0x%p\n", 
752                info->vcpu_id, info->pcpu_id, 
753                info->segments.cs.selector, (void *)(info->segments.cs.base), 
754                info->segments.cs.limit, (void *)(info->rip));
755
756
757
758     PrintDebug("SVM core %u: Launching SVM VM (vmcb=%p) (on cpu %u)\n", 
759                info->vcpu_id, (void *)info->vmm_data, info->pcpu_id);
760     //PrintDebugVMCB((vmcb_t*)(info->vmm_data));
761     
762     v3_start_time(info);
763
764     while (1) {
765
766         if (info->vm_info->run_state == VM_STOPPED) {
767             info->core_run_state = CORE_STOPPED;
768             break;
769         }
770         
771         if (v3_svm_enter(info) == -1) {
772             vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
773             addr_t host_addr;
774             addr_t linear_addr = 0;
775             
776             info->vm_info->run_state = VM_ERROR;
777             
778             V3_Print("SVM core %u: SVM ERROR!!\n", info->vcpu_id); 
779             
780             v3_print_guest_state(info);
781             
782             V3_Print("SVM core %u: SVM Exit Code: %p\n", info->vcpu_id, (void *)(addr_t)guest_ctrl->exit_code); 
783             
784             V3_Print("SVM core %u: exit_info1 low = 0x%.8x\n", info->vcpu_id, *(uint_t*)&(guest_ctrl->exit_info1));
785             V3_Print("SVM core %u: exit_info1 high = 0x%.8x\n", info->vcpu_id, *(uint_t *)(((uchar_t *)&(guest_ctrl->exit_info1)) + 4));
786             
787             V3_Print("SVM core %u: exit_info2 low = 0x%.8x\n", info->vcpu_id, *(uint_t*)&(guest_ctrl->exit_info2));
788             V3_Print("SVM core %u: exit_info2 high = 0x%.8x\n", info->vcpu_id, *(uint_t *)(((uchar_t *)&(guest_ctrl->exit_info2)) + 4));
789             
790             linear_addr = get_addr_linear(info, info->rip, &(info->segments.cs));
791             
792             if (info->mem_mode == PHYSICAL_MEM) {
793                 v3_gpa_to_hva(info, linear_addr, &host_addr);
794             } else if (info->mem_mode == VIRTUAL_MEM) {
795                 v3_gva_to_hva(info, linear_addr, &host_addr);
796             }
797             
798             V3_Print("SVM core %u: Host Address of rip = 0x%p\n", info->vcpu_id, (void *)host_addr);
799             
800             V3_Print("SVM core %u: Instr (15 bytes) at %p:\n", info->vcpu_id, (void *)host_addr);
801             v3_dump_mem((uint8_t *)host_addr, 15);
802             
803             v3_print_stack(info);
804
805             break;
806         }
807
808         v3_wait_at_barrier(info);
809
810
811         if (info->vm_info->run_state == VM_STOPPED) {
812             info->core_run_state = CORE_STOPPED;
813             break;
814         }
815
816         
817
818 /*
819         if ((info->num_exits % 50000) == 0) {
820             V3_Print("SVM Exit number %d\n", (uint32_t)info->num_exits);
821             v3_print_guest_state(info);
822         }
823 */
824         
825     }
826
827     // Need to take down the other cores on error... 
828
829     return 0;
830 }
831
832
833
834
835 int v3_reset_svm_vm_core(struct guest_info * core, addr_t rip) {
836     // init vmcb_bios
837
838     // Write the RIP, CS, and descriptor
839     // assume the rest is already good to go
840     //
841     // vector VV -> rip at 0
842     //              CS = VV00
843     //  This means we start executing at linear address VV000
844     //
845     // So the selector needs to be VV00
846     // and the base needs to be VV000
847     //
848     core->rip = 0;
849     core->segments.cs.selector = rip << 8;
850     core->segments.cs.limit = 0xffff;
851     core->segments.cs.base = rip << 12;
852
853     return 0;
854 }
855
856
857
858
859
860
861 /* Checks machine SVM capability */
862 /* Implemented from: AMD Arch Manual 3, sect 15.4 */ 
863 int v3_is_svm_capable() {
864     uint_t vm_cr_low = 0, vm_cr_high = 0;
865     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
866
867     v3_cpuid(CPUID_EXT_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
868   
869     PrintDebug("CPUID_EXT_FEATURE_IDS_ecx=0x%x\n", ecx);
870
871     if ((ecx & CPUID_EXT_FEATURE_IDS_ecx_svm_avail) == 0) {
872       V3_Print("SVM Not Available\n");
873       return 0;
874     }  else {
875         v3_get_msr(SVM_VM_CR_MSR, &vm_cr_high, &vm_cr_low);
876         
877         PrintDebug("SVM_VM_CR_MSR = 0x%x 0x%x\n", vm_cr_high, vm_cr_low);
878         
879         if ((vm_cr_low & SVM_VM_CR_MSR_svmdis) == 1) {
880             V3_Print("SVM is available but is disabled.\n");
881             
882             v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
883             
884             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_edx=0x%x\n", edx);
885             
886             if ((edx & CPUID_SVM_REV_AND_FEATURE_IDS_edx_svml) == 0) {
887                 V3_Print("SVM BIOS Disabled, not unlockable\n");
888             } else {
889                 V3_Print("SVM is locked with a key\n");
890             }
891             return 0;
892
893         } else {
894             V3_Print("SVM is available and  enabled.\n");
895
896             v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
897             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_eax=0x%x\n", eax);
898             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_ebx=0x%x\n", ebx);
899             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_ecx=0x%x\n", ecx);
900             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_edx=0x%x\n", edx);
901
902             return 1;
903         }
904     }
905 }
906
907 static int has_svm_nested_paging() {
908     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
909     
910     v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
911     
912     //PrintDebug("CPUID_EXT_FEATURE_IDS_edx=0x%x\n", edx);
913     
914     if ((edx & CPUID_SVM_REV_AND_FEATURE_IDS_edx_np) == 0) {
915         V3_Print("SVM Nested Paging not supported\n");
916         return 0;
917     } else {
918         V3_Print("SVM Nested Paging supported\n");
919         return 1;
920     }
921  }
922  
923
924
925 void v3_init_svm_cpu(int cpu_id) {
926     reg_ex_t msr;
927     extern v3_cpu_arch_t v3_cpu_types[];
928
929     // Enable SVM on the CPU
930     v3_get_msr(EFER_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
931     msr.e_reg.low |= EFER_MSR_svm_enable;
932     v3_set_msr(EFER_MSR, 0, msr.e_reg.low);
933
934     V3_Print("SVM Enabled\n");
935
936     // Setup the host state save area
937     host_vmcbs[cpu_id] = (addr_t)V3_AllocPages(4);
938
939     if (!host_vmcbs[cpu_id]) {
940         PrintError("Failed to allocate VMCB\n");
941         return;
942     }
943
944     /* 64-BIT-ISSUE */
945     //  msr.e_reg.high = 0;
946     //msr.e_reg.low = (uint_t)host_vmcb;
947     msr.r_reg = host_vmcbs[cpu_id];
948
949     PrintDebug("Host State being saved at %p\n", (void *)host_vmcbs[cpu_id]);
950     v3_set_msr(SVM_VM_HSAVE_PA_MSR, msr.e_reg.high, msr.e_reg.low);
951
952
953     if (has_svm_nested_paging() == 1) {
954         v3_cpu_types[cpu_id] = V3_SVM_REV3_CPU;
955     } else {
956         v3_cpu_types[cpu_id] = V3_SVM_CPU;
957     }
958 }
959
960
961
962 void v3_deinit_svm_cpu(int cpu_id) {
963     reg_ex_t msr;
964     extern v3_cpu_arch_t v3_cpu_types[];
965
966     // reset SVM_VM_HSAVE_PA_MSR
967     // Does setting it to NULL disable??
968     msr.r_reg = 0;
969     v3_set_msr(SVM_VM_HSAVE_PA_MSR, msr.e_reg.high, msr.e_reg.low);
970
971     // Disable SVM?
972     v3_get_msr(EFER_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
973     msr.e_reg.low &= ~EFER_MSR_svm_enable;
974     v3_set_msr(EFER_MSR, 0, msr.e_reg.low);
975
976     v3_cpu_types[cpu_id] = V3_INVALID_CPU;
977
978     V3_FreePages((void *)host_vmcbs[cpu_id], 4);
979
980     V3_Print("Host CPU %d host area freed, and SVM disabled\n", cpu_id);
981     return;
982 }
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033 #if 0
1034 /* 
1035  * Test VMSAVE/VMLOAD Latency 
1036  */
1037 #define vmsave ".byte 0x0F,0x01,0xDB ; "
1038 #define vmload ".byte 0x0F,0x01,0xDA ; "
1039 {
1040     uint32_t start_lo, start_hi;
1041     uint32_t end_lo, end_hi;
1042     uint64_t start, end;
1043     
1044     __asm__ __volatile__ (
1045                           "rdtsc ; "
1046                           "movl %%eax, %%esi ; "
1047                           "movl %%edx, %%edi ; "
1048                           "movq  %%rcx, %%rax ; "
1049                           vmsave
1050                           "rdtsc ; "
1051                           : "=D"(start_hi), "=S"(start_lo), "=a"(end_lo),"=d"(end_hi)
1052                           : "c"(host_vmcb[cpu_id]), "0"(0), "1"(0), "2"(0), "3"(0)
1053                           );
1054     
1055     start = start_hi;
1056     start <<= 32;
1057     start += start_lo;
1058     
1059     end = end_hi;
1060     end <<= 32;
1061     end += end_lo;
1062     
1063     PrintDebug("VMSave Cycle Latency: %d\n", (uint32_t)(end - start));
1064     
1065     __asm__ __volatile__ (
1066                           "rdtsc ; "
1067                           "movl %%eax, %%esi ; "
1068                           "movl %%edx, %%edi ; "
1069                           "movq  %%rcx, %%rax ; "
1070                           vmload
1071                           "rdtsc ; "
1072                           : "=D"(start_hi), "=S"(start_lo), "=a"(end_lo),"=d"(end_hi)
1073                               : "c"(host_vmcb[cpu_id]), "0"(0), "1"(0), "2"(0), "3"(0)
1074                               );
1075         
1076         start = start_hi;
1077         start <<= 32;
1078         start += start_lo;
1079
1080         end = end_hi;
1081         end <<= 32;
1082         end += end_lo;
1083
1084
1085         PrintDebug("VMLoad Cycle Latency: %d\n", (uint32_t)(end - start));
1086     }
1087     /* End Latency Test */
1088
1089 #endif
1090
1091
1092
1093
1094
1095
1096
1097 #if 0
1098 void Init_VMCB_pe(vmcb_t *vmcb, struct guest_info vm_info) {
1099   vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
1100   vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
1101   uint_t i = 0;
1102
1103
1104   guest_state->rsp = vm_info.vm_regs.rsp;
1105   guest_state->rip = vm_info.rip;
1106
1107
1108   /* I pretty much just gutted this from TVMM */
1109   /* Note: That means its probably wrong */
1110
1111   // set the segment registers to mirror ours
1112   guest_state->cs.selector = 1<<3;
1113   guest_state->cs.attrib.fields.type = 0xa; // Code segment+read
1114   guest_state->cs.attrib.fields.S = 1;
1115   guest_state->cs.attrib.fields.P = 1;
1116   guest_state->cs.attrib.fields.db = 1;
1117   guest_state->cs.attrib.fields.G = 1;
1118   guest_state->cs.limit = 0xfffff;
1119   guest_state->cs.base = 0;
1120   
1121   struct vmcb_selector *segregs [] = {&(guest_state->ss), &(guest_state->ds), &(guest_state->es), &(guest_state->fs), &(guest_state->gs), NULL};
1122   for ( i = 0; segregs[i] != NULL; i++) {
1123     struct vmcb_selector * seg = segregs[i];
1124     
1125     seg->selector = 2<<3;
1126     seg->attrib.fields.type = 0x2; // Data Segment+read/write
1127     seg->attrib.fields.S = 1;
1128     seg->attrib.fields.P = 1;
1129     seg->attrib.fields.db = 1;
1130     seg->attrib.fields.G = 1;
1131     seg->limit = 0xfffff;
1132     seg->base = 0;
1133   }
1134
1135
1136   {
1137     /* JRL THIS HAS TO GO */
1138     
1139     //    guest_state->tr.selector = GetTR_Selector();
1140     guest_state->tr.attrib.fields.type = 0x9; 
1141     guest_state->tr.attrib.fields.P = 1;
1142     // guest_state->tr.limit = GetTR_Limit();
1143     //guest_state->tr.base = GetTR_Base();// - 0x2000;
1144     /* ** */
1145   }
1146
1147
1148   /* ** */
1149
1150
1151   guest_state->efer |= EFER_MSR_svm_enable;
1152   guest_state->rflags = 0x00000002; // The reserved bit is always 1
1153   ctrl_area->svm_instrs.VMRUN = 1;
1154   guest_state->cr0 = 0x00000001;    // PE 
1155   ctrl_area->guest_ASID = 1;
1156
1157
1158   //  guest_state->cpl = 0;
1159
1160
1161
1162   // Setup exits
1163
1164   ctrl_area->cr_writes.cr4 = 1;
1165   
1166   ctrl_area->exceptions.de = 1;
1167   ctrl_area->exceptions.df = 1;
1168   ctrl_area->exceptions.pf = 1;
1169   ctrl_area->exceptions.ts = 1;
1170   ctrl_area->exceptions.ss = 1;
1171   ctrl_area->exceptions.ac = 1;
1172   ctrl_area->exceptions.mc = 1;
1173   ctrl_area->exceptions.gp = 1;
1174   ctrl_area->exceptions.ud = 1;
1175   ctrl_area->exceptions.np = 1;
1176   ctrl_area->exceptions.of = 1;
1177   ctrl_area->exceptions.nmi = 1;
1178
1179   
1180
1181   ctrl_area->instrs.IOIO_PROT = 1;
1182   ctrl_area->IOPM_BASE_PA = (uint_t)V3_AllocPages(3);
1183
1184   if (!ctrl_area->IOPM_BASE_PA) { 
1185       PrintError("Cannot allocate IO bitmap\n");
1186       return;
1187   }
1188   
1189   {
1190     reg_ex_t tmp_reg;
1191     tmp_reg.r_reg = ctrl_area->IOPM_BASE_PA;
1192     memset((void*)(tmp_reg.e_reg.low), 0xffffffff, PAGE_SIZE * 2);
1193   }
1194
1195   ctrl_area->instrs.INTR = 1;
1196
1197   
1198   {
1199     char gdt_buf[6];
1200     char idt_buf[6];
1201
1202     memset(gdt_buf, 0, 6);
1203     memset(idt_buf, 0, 6);
1204
1205
1206     uint_t gdt_base, idt_base;
1207     ushort_t gdt_limit, idt_limit;
1208     
1209     GetGDTR(gdt_buf);
1210     gdt_base = *(ulong_t*)((uchar_t*)gdt_buf + 2) & 0xffffffff;
1211     gdt_limit = *(ushort_t*)(gdt_buf) & 0xffff;
1212     PrintDebug("GDT: base: %x, limit: %x\n", gdt_base, gdt_limit);
1213
1214     GetIDTR(idt_buf);
1215     idt_base = *(ulong_t*)(idt_buf + 2) & 0xffffffff;
1216     idt_limit = *(ushort_t*)(idt_buf) & 0xffff;
1217     PrintDebug("IDT: base: %x, limit: %x\n",idt_base, idt_limit);
1218
1219
1220     // gdt_base -= 0x2000;
1221     //idt_base -= 0x2000;
1222
1223     guest_state->gdtr.base = gdt_base;
1224     guest_state->gdtr.limit = gdt_limit;
1225     guest_state->idtr.base = idt_base;
1226     guest_state->idtr.limit = idt_limit;
1227
1228
1229   }
1230   
1231   
1232   // also determine if CPU supports nested paging
1233   /*
1234   if (vm_info.page_tables) {
1235     //   if (0) {
1236     // Flush the TLB on entries/exits
1237     ctrl_area->TLB_CONTROL = 1;
1238
1239     // Enable Nested Paging
1240     ctrl_area->NP_ENABLE = 1;
1241
1242     PrintDebug("NP_Enable at 0x%x\n", &(ctrl_area->NP_ENABLE));
1243
1244         // Set the Nested Page Table pointer
1245     ctrl_area->N_CR3 |= ((addr_t)vm_info.page_tables & 0xfffff000);
1246
1247
1248     //   ctrl_area->N_CR3 = Get_CR3();
1249     // guest_state->cr3 |= (Get_CR3() & 0xfffff000);
1250
1251     guest_state->g_pat = 0x7040600070406ULL;
1252
1253     PrintDebug("Set Nested CR3: lo: 0x%x  hi: 0x%x\n", (uint_t)*(&(ctrl_area->N_CR3)), (uint_t)*((unsigned char *)&(ctrl_area->N_CR3) + 4));
1254     PrintDebug("Set Guest CR3: lo: 0x%x  hi: 0x%x\n", (uint_t)*(&(guest_state->cr3)), (uint_t)*((unsigned char *)&(guest_state->cr3) + 4));
1255     // Enable Paging
1256     //    guest_state->cr0 |= 0x80000000;
1257   }
1258   */
1259
1260 }
1261
1262
1263
1264
1265
1266 #endif
1267
1268