Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


added run state checks to the vm run loops to allow stopping of VMs
[palacios.git] / palacios / src / palacios / svm.c
1 /* 
2  * This file is part of the Palacios Virtual Machine Monitor developed
3  * by the V3VEE Project with funding from the United States National 
4  * Science Foundation and the Department of Energy.  
5  *
6  * The V3VEE Project is a joint project between Northwestern University
7  * and the University of New Mexico.  You can find out more at 
8  * http://www.v3vee.org
9  *
10  * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu> 
11  * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org> 
12  * All rights reserved.
13  *
14  * Author: Jack Lange <jarusl@cs.northwestern.edu>
15  *
16  * This is free software.  You are permitted to use,
17  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
18  */
19
20
21 #include <palacios/svm.h>
22 #include <palacios/vmm.h>
23
24 #include <palacios/vmcb.h>
25 #include <palacios/vmm_mem.h>
26 #include <palacios/vmm_paging.h>
27 #include <palacios/svm_handler.h>
28
29 #include <palacios/vmm_debug.h>
30 #include <palacios/vm_guest_mem.h>
31
32 #include <palacios/vmm_decoder.h>
33 #include <palacios/vmm_string.h>
34 #include <palacios/vmm_lowlevel.h>
35 #include <palacios/svm_msr.h>
36
37 #include <palacios/vmm_rbtree.h>
38
39 #include <palacios/vmm_direct_paging.h>
40
41 #include <palacios/vmm_ctrl_regs.h>
42 #include <palacios/svm_io.h>
43
44 #include <palacios/vmm_sprintf.h>
45
46
47 #ifndef CONFIG_DEBUG_SVM
48 #undef PrintDebug
49 #define PrintDebug(fmt, args...)
50 #endif
51
52
53 uint32_t v3_last_exit;
54
55 // This is a global pointer to the host's VMCB
56 static addr_t host_vmcbs[CONFIG_MAX_CPUS] = { [0 ... CONFIG_MAX_CPUS - 1] = 0};
57
58
59
60 extern void v3_stgi();
61 extern void v3_clgi();
62 //extern int v3_svm_launch(vmcb_t * vmcb, struct v3_gprs * vm_regs, uint64_t * fs, uint64_t * gs);
63 extern int v3_svm_launch(vmcb_t * vmcb, struct v3_gprs * vm_regs, vmcb_t * host_vmcb);
64
65
66 static vmcb_t * Allocate_VMCB() {
67     vmcb_t * vmcb_page = (vmcb_t *)V3_VAddr(V3_AllocPages(1));
68
69     memset(vmcb_page, 0, 4096);
70
71     return vmcb_page;
72 }
73
74
75
76 static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info * core) {
77     vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
78     vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
79     uint_t i;
80
81
82     //
83     ctrl_area->svm_instrs.VMRUN = 1;
84     ctrl_area->svm_instrs.VMMCALL = 1;
85     ctrl_area->svm_instrs.VMLOAD = 1;
86     ctrl_area->svm_instrs.VMSAVE = 1;
87     ctrl_area->svm_instrs.STGI = 1;
88     ctrl_area->svm_instrs.CLGI = 1;
89     ctrl_area->svm_instrs.SKINIT = 1;
90     ctrl_area->svm_instrs.ICEBP = 1;
91     ctrl_area->svm_instrs.WBINVD = 1;
92     ctrl_area->svm_instrs.MONITOR = 1;
93     ctrl_area->svm_instrs.MWAIT_always = 1;
94     ctrl_area->svm_instrs.MWAIT_if_armed = 1;
95     ctrl_area->instrs.INVLPGA = 1;
96     ctrl_area->instrs.CPUID = 1;
97
98     ctrl_area->instrs.HLT = 1;
99
100 #ifdef CONFIG_TIME_VIRTUALIZE_TSC
101     ctrl_area->instrs.RDTSC = 1;
102     ctrl_area->svm_instrs.RDTSCP = 1;
103 #endif
104
105     // guest_state->cr0 = 0x00000001;    // PE 
106   
107     /*
108       ctrl_area->exceptions.de = 1;
109       ctrl_area->exceptions.df = 1;
110       
111       ctrl_area->exceptions.ts = 1;
112       ctrl_area->exceptions.ss = 1;
113       ctrl_area->exceptions.ac = 1;
114       ctrl_area->exceptions.mc = 1;
115       ctrl_area->exceptions.gp = 1;
116       ctrl_area->exceptions.ud = 1;
117       ctrl_area->exceptions.np = 1;
118       ctrl_area->exceptions.of = 1;
119       
120       ctrl_area->exceptions.nmi = 1;
121     */
122     
123
124     ctrl_area->instrs.NMI = 1;
125     ctrl_area->instrs.SMI = 0; // allow SMIs to run in guest
126     ctrl_area->instrs.INIT = 1;
127     ctrl_area->instrs.PAUSE = 1;
128     ctrl_area->instrs.shutdown_evts = 1;
129
130
131     /* DEBUG FOR RETURN CODE */
132     ctrl_area->exit_code = 1;
133
134
135     /* Setup Guest Machine state */
136
137     core->vm_regs.rsp = 0x00;
138     core->rip = 0xfff0;
139
140     core->vm_regs.rdx = 0x00000f00;
141
142
143     core->cpl = 0;
144
145     core->ctrl_regs.rflags = 0x00000002; // The reserved bit is always 1
146     core->ctrl_regs.cr0 = 0x60010010; // Set the WP flag so the memory hooks work in real-mode
147     core->ctrl_regs.efer |= EFER_MSR_svm_enable;
148
149
150
151
152
153     core->segments.cs.selector = 0xf000;
154     core->segments.cs.limit = 0xffff;
155     core->segments.cs.base = 0x0000000f0000LL;
156
157     // (raw attributes = 0xf3)
158     core->segments.cs.type = 0x3;
159     core->segments.cs.system = 0x1;
160     core->segments.cs.dpl = 0x3;
161     core->segments.cs.present = 1;
162
163
164
165     struct v3_segment * segregs [] = {&(core->segments.ss), &(core->segments.ds), 
166                                       &(core->segments.es), &(core->segments.fs), 
167                                       &(core->segments.gs), NULL};
168
169     for ( i = 0; segregs[i] != NULL; i++) {
170         struct v3_segment * seg = segregs[i];
171         
172         seg->selector = 0x0000;
173         //    seg->base = seg->selector << 4;
174         seg->base = 0x00000000;
175         seg->limit = ~0u;
176
177         // (raw attributes = 0xf3)
178         seg->type = 0x3;
179         seg->system = 0x1;
180         seg->dpl = 0x3;
181         seg->present = 1;
182     }
183
184     core->segments.gdtr.limit = 0x0000ffff;
185     core->segments.gdtr.base = 0x0000000000000000LL;
186     core->segments.idtr.limit = 0x0000ffff;
187     core->segments.idtr.base = 0x0000000000000000LL;
188
189     core->segments.ldtr.selector = 0x0000;
190     core->segments.ldtr.limit = 0x0000ffff;
191     core->segments.ldtr.base = 0x0000000000000000LL;
192     core->segments.tr.selector = 0x0000;
193     core->segments.tr.limit = 0x0000ffff;
194     core->segments.tr.base = 0x0000000000000000LL;
195
196
197     core->dbg_regs.dr6 = 0x00000000ffff0ff0LL;
198     core->dbg_regs.dr7 = 0x0000000000000400LL;
199
200
201     ctrl_area->IOPM_BASE_PA = (addr_t)V3_PAddr(core->vm_info->io_map.arch_data);
202     ctrl_area->instrs.IOIO_PROT = 1;
203             
204     ctrl_area->MSRPM_BASE_PA = (addr_t)V3_PAddr(core->vm_info->msr_map.arch_data);
205     ctrl_area->instrs.MSR_PROT = 1;   
206
207
208     PrintDebug("Exiting on interrupts\n");
209     ctrl_area->guest_ctrl.V_INTR_MASKING = 1;
210     ctrl_area->instrs.INTR = 1;
211
212
213     if (core->shdw_pg_mode == SHADOW_PAGING) {
214         PrintDebug("Creating initial shadow page table\n");
215         
216         /* JRL: This is a performance killer, and a simplistic solution */
217         /* We need to fix this */
218         ctrl_area->TLB_CONTROL = 1;
219         ctrl_area->guest_ASID = 1;
220         
221         
222         if (v3_init_passthrough_pts(core) == -1) {
223             PrintError("Could not initialize passthrough page tables\n");
224             return ;
225         }
226
227
228         core->shdw_pg_state.guest_cr0 = 0x0000000000000010LL;
229         PrintDebug("Created\n");
230         
231         core->ctrl_regs.cr0 |= 0x80000000;
232         core->ctrl_regs.cr3 = core->direct_map_pt;
233
234         ctrl_area->cr_reads.cr0 = 1;
235         ctrl_area->cr_writes.cr0 = 1;
236         //ctrl_area->cr_reads.cr4 = 1;
237         ctrl_area->cr_writes.cr4 = 1;
238         ctrl_area->cr_reads.cr3 = 1;
239         ctrl_area->cr_writes.cr3 = 1;
240
241         v3_hook_msr(core->vm_info, EFER_MSR, 
242                     &v3_handle_efer_read,
243                     &v3_handle_efer_write, 
244                     core);
245
246         ctrl_area->instrs.INVLPG = 1;
247
248         ctrl_area->exceptions.pf = 1;
249
250         guest_state->g_pat = 0x7040600070406ULL;
251
252
253
254     } else if (core->shdw_pg_mode == NESTED_PAGING) {
255         // Flush the TLB on entries/exits
256         ctrl_area->TLB_CONTROL = 1;
257         ctrl_area->guest_ASID = 1;
258
259         // Enable Nested Paging
260         ctrl_area->NP_ENABLE = 1;
261
262         PrintDebug("NP_Enable at 0x%p\n", (void *)&(ctrl_area->NP_ENABLE));
263
264         // Set the Nested Page Table pointer
265         if (v3_init_passthrough_pts(core) == -1) {
266             PrintError("Could not initialize Nested page tables\n");
267             return ;
268         }
269
270         ctrl_area->N_CR3 = core->direct_map_pt;
271
272         guest_state->g_pat = 0x7040600070406ULL;
273     }
274 }
275
276
277 int v3_init_svm_vmcb(struct guest_info * info, v3_vm_class_t vm_class) {
278
279     PrintDebug("Allocating VMCB\n");
280     info->vmm_data = (void*)Allocate_VMCB();
281     
282     if (vm_class == V3_PC_VM) {
283         PrintDebug("Initializing VMCB (addr=%p)\n", (void *)info->vmm_data);
284         Init_VMCB_BIOS((vmcb_t*)(info->vmm_data), info);
285     } else {
286         PrintError("Invalid VM class\n");
287         return -1;
288     }
289
290     return 0;
291 }
292
293
294
295 static int update_irq_exit_state(struct guest_info * info) {
296     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
297
298     // Fix for QEMU bug using EVENTINJ as an internal cache
299     guest_ctrl->EVENTINJ.valid = 0;
300
301     if ((info->intr_core_state.irq_pending == 1) && (guest_ctrl->guest_ctrl.V_IRQ == 0)) {
302         
303 #ifdef CONFIG_DEBUG_INTERRUPTS
304         PrintDebug("INTAK cycle completed for irq %d\n", info->intr_core_state.irq_vector);
305 #endif
306
307         info->intr_core_state.irq_started = 1;
308         info->intr_core_state.irq_pending = 0;
309
310         v3_injecting_intr(info, info->intr_core_state.irq_vector, V3_EXTERNAL_IRQ);
311     }
312
313     if ((info->intr_core_state.irq_started == 1) && (guest_ctrl->exit_int_info.valid == 0)) {
314 #ifdef CONFIG_DEBUG_INTERRUPTS
315         PrintDebug("Interrupt %d taken by guest\n", info->intr_core_state.irq_vector);
316 #endif
317
318         // Interrupt was taken fully vectored
319         info->intr_core_state.irq_started = 0;
320
321     } else if ((info->intr_core_state.irq_started == 1) && (guest_ctrl->exit_int_info.valid == 1)) {
322 #ifdef CONFIG_DEBUG_INTERRUPTS
323         PrintDebug("EXIT INT INFO is set (vec=%d)\n", guest_ctrl->exit_int_info.vector);
324 #endif
325     }
326
327     return 0;
328 }
329
330
331 static int update_irq_entry_state(struct guest_info * info) {
332     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
333
334
335     if (info->intr_core_state.irq_pending == 0) {
336         guest_ctrl->guest_ctrl.V_IRQ = 0;
337         guest_ctrl->guest_ctrl.V_INTR_VECTOR = 0;
338     }
339     
340     if (v3_excp_pending(info)) {
341         uint_t excp = v3_get_excp_number(info);
342         
343         guest_ctrl->EVENTINJ.type = SVM_INJECTION_EXCEPTION;
344         
345         if (info->excp_state.excp_error_code_valid) {
346             guest_ctrl->EVENTINJ.error_code = info->excp_state.excp_error_code;
347             guest_ctrl->EVENTINJ.ev = 1;
348 #ifdef CONFIG_DEBUG_INTERRUPTS
349             PrintDebug("Injecting exception %d with error code %x\n", excp, guest_ctrl->EVENTINJ.error_code);
350 #endif
351         }
352         
353         guest_ctrl->EVENTINJ.vector = excp;
354         
355         guest_ctrl->EVENTINJ.valid = 1;
356
357 #ifdef CONFIG_DEBUG_INTERRUPTS
358         PrintDebug("<%d> Injecting Exception %d (CR2=%p) (EIP=%p)\n", 
359                    (int)info->num_exits, 
360                    guest_ctrl->EVENTINJ.vector, 
361                    (void *)(addr_t)info->ctrl_regs.cr2,
362                    (void *)(addr_t)info->rip);
363 #endif
364
365         v3_injecting_excp(info, excp);
366     } else if (info->intr_core_state.irq_started == 1) {
367 #ifdef CONFIG_DEBUG_INTERRUPTS
368         PrintDebug("IRQ pending from previous injection\n");
369 #endif
370         guest_ctrl->guest_ctrl.V_IRQ = 1;
371         guest_ctrl->guest_ctrl.V_INTR_VECTOR = info->intr_core_state.irq_vector;
372         guest_ctrl->guest_ctrl.V_IGN_TPR = 1;
373         guest_ctrl->guest_ctrl.V_INTR_PRIO = 0xf;
374
375     } else {
376         switch (v3_intr_pending(info)) {
377             case V3_EXTERNAL_IRQ: {
378                 uint32_t irq = v3_get_intr(info);
379
380                 guest_ctrl->guest_ctrl.V_IRQ = 1;
381                 guest_ctrl->guest_ctrl.V_INTR_VECTOR = irq;
382                 guest_ctrl->guest_ctrl.V_IGN_TPR = 1;
383                 guest_ctrl->guest_ctrl.V_INTR_PRIO = 0xf;
384
385 #ifdef CONFIG_DEBUG_INTERRUPTS
386                 PrintDebug("Injecting Interrupt %d (EIP=%p)\n", 
387                            guest_ctrl->guest_ctrl.V_INTR_VECTOR, 
388                            (void *)(addr_t)info->rip);
389 #endif
390
391                 info->intr_core_state.irq_pending = 1;
392                 info->intr_core_state.irq_vector = irq;
393                 
394                 break;
395             }
396             case V3_NMI:
397                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_NMI;
398                 break;
399             case V3_SOFTWARE_INTR:
400                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_SOFT_INTR;
401                 break;
402             case V3_VIRTUAL_IRQ:
403                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_IRQ;
404                 break;
405
406             case V3_INVALID_INTR:
407             default:
408                 break;
409         }
410         
411     }
412
413     return 0;
414 }
415
416
417 /* 
418  * CAUTION and DANGER!!! 
419  * 
420  * The VMCB CANNOT(!!) be accessed outside of the clgi/stgi calls inside this function
421  * When exectuing a symbiotic call, the VMCB WILL be overwritten, so any dependencies 
422  * on its contents will cause things to break. The contents at the time of the exit WILL 
423  * change before the exit handler is executed.
424  */
425 int v3_svm_enter(struct guest_info * info) {
426     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
427     vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA((vmcb_t*)(info->vmm_data)); 
428     addr_t exit_code = 0, exit_info1 = 0, exit_info2 = 0;
429
430     // Conditionally yield the CPU if the timeslice has expired
431     v3_yield_cond(info);
432
433     // disable global interrupts for vm state transition
434     v3_clgi();
435
436     // Synchronize the guest state to the VMCB
437     guest_state->cr0 = info->ctrl_regs.cr0;
438     guest_state->cr2 = info->ctrl_regs.cr2;
439     guest_state->cr3 = info->ctrl_regs.cr3;
440     guest_state->cr4 = info->ctrl_regs.cr4;
441     guest_state->dr6 = info->dbg_regs.dr6;
442     guest_state->dr7 = info->dbg_regs.dr7;
443     guest_ctrl->guest_ctrl.V_TPR = info->ctrl_regs.cr8 & 0xff;
444     guest_state->rflags = info->ctrl_regs.rflags;
445     guest_state->efer = info->ctrl_regs.efer;
446     
447     guest_state->cpl = info->cpl;
448
449     v3_set_vmcb_segments((vmcb_t*)(info->vmm_data), &(info->segments));
450
451     guest_state->rax = info->vm_regs.rax;
452     guest_state->rip = info->rip;
453     guest_state->rsp = info->vm_regs.rsp;
454
455 #ifdef CONFIG_SYMCALL
456     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
457         update_irq_entry_state(info);
458     }
459 #else 
460     update_irq_entry_state(info);
461 #endif
462
463
464     /* ** */
465
466     /*
467       PrintDebug("SVM Entry to CS=%p  rip=%p...\n", 
468       (void *)(addr_t)info->segments.cs.base, 
469       (void *)(addr_t)info->rip);
470     */
471
472 #ifdef CONFIG_SYMCALL
473     if (info->sym_core_state.symcall_state.sym_call_active == 1) {
474         if (guest_ctrl->guest_ctrl.V_IRQ == 1) {
475             V3_Print("!!! Injecting Interrupt during Sym call !!!\n");
476         }
477     }
478 #endif
479
480     v3_update_timers(info);
481
482     /* If this guest is frequency-lagged behind host time, wait 
483      * for the appropriate host time before resuming the guest. */
484     v3_adjust_time(info);
485
486     guest_ctrl->TSC_OFFSET = v3_tsc_host_offset(&info->time_state);
487
488     //V3_Print("Calling v3_svm_launch\n");
489
490     v3_svm_launch((vmcb_t *)V3_PAddr(info->vmm_data), &(info->vm_regs), (vmcb_t *)host_vmcbs[info->cpu_id]);
491
492     //V3_Print("SVM Returned: Exit Code: %x, guest_rip=%lx\n", (uint32_t)(guest_ctrl->exit_code), (unsigned long)guest_state->rip);
493
494     v3_last_exit = (uint32_t)(guest_ctrl->exit_code);
495
496     //PrintDebug("SVM Returned\n");
497     
498     info->num_exits++;
499
500     // Save Guest state from VMCB
501     info->rip = guest_state->rip;
502     info->vm_regs.rsp = guest_state->rsp;
503     info->vm_regs.rax = guest_state->rax;
504
505     info->cpl = guest_state->cpl;
506
507     info->ctrl_regs.cr0 = guest_state->cr0;
508     info->ctrl_regs.cr2 = guest_state->cr2;
509     info->ctrl_regs.cr3 = guest_state->cr3;
510     info->ctrl_regs.cr4 = guest_state->cr4;
511     info->dbg_regs.dr6 = guest_state->dr6;
512     info->dbg_regs.dr7 = guest_state->dr7;
513     info->ctrl_regs.cr8 = guest_ctrl->guest_ctrl.V_TPR;
514     info->ctrl_regs.rflags = guest_state->rflags;
515     info->ctrl_regs.efer = guest_state->efer;
516     
517     v3_get_vmcb_segments((vmcb_t*)(info->vmm_data), &(info->segments));
518     info->cpu_mode = v3_get_vm_cpu_mode(info);
519     info->mem_mode = v3_get_vm_mem_mode(info);
520     /* ** */
521
522
523     // save exit info here
524     exit_code = guest_ctrl->exit_code;
525     exit_info1 = guest_ctrl->exit_info1;
526     exit_info2 = guest_ctrl->exit_info2;
527
528
529 #ifdef CONFIG_SYMCALL
530     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
531         update_irq_exit_state(info);
532     }
533 #else
534     update_irq_exit_state(info);
535 #endif
536
537
538     // reenable global interrupts after vm exit
539     v3_stgi();
540
541  
542     // Conditionally yield the CPU if the timeslice has expired
543     v3_yield_cond(info);
544
545
546
547     if (v3_handle_svm_exit(info, exit_code, exit_info1, exit_info2) != 0) {
548         PrintError("Error in SVM exit handler\n");
549         return -1;
550     }
551
552
553     return 0;
554 }
555
556
557 int v3_start_svm_guest(struct guest_info * info) {
558     //    vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA((vmcb_t*)(info->vmm_data));
559     //  vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
560
561     PrintDebug("Starting SVM core %u\n", info->cpu_id);
562
563     if (info->cpu_id == 0) {
564         info->core_run_state = CORE_RUNNING;
565         info->vm_info->run_state = VM_RUNNING;
566     } else  { 
567         PrintDebug("SVM core %u: Waiting for core initialization\n", info->cpu_id);
568
569         while (info->core_run_state == CORE_STOPPED) {
570             v3_yield(info);
571             //PrintDebug("SVM core %u: still waiting for INIT\n",info->cpu_id);
572         }
573
574         PrintDebug("SVM core %u initialized\n", info->cpu_id);
575     } 
576
577     PrintDebug("SVM core %u: I am starting at CS=0x%x (base=0x%p, limit=0x%x),  RIP=0x%p\n", 
578                info->cpu_id, info->segments.cs.selector, (void *)(info->segments.cs.base), 
579                info->segments.cs.limit, (void *)(info->rip));
580
581
582
583     PrintDebug("SVM core %u: Launching SVM VM (vmcb=%p)\n", info->cpu_id, (void *)info->vmm_data);
584     //PrintDebugVMCB((vmcb_t*)(info->vmm_data));
585     
586     v3_start_time(info);
587
588     while (1) {
589
590         if (info->vm_info->run_state == VM_STOPPED) {
591             info->core_run_state = CORE_STOPPED;
592             break;
593         }
594         
595         if (v3_svm_enter(info) == -1) {
596             vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
597             addr_t host_addr;
598             addr_t linear_addr = 0;
599             
600             info->vm_info->run_state = VM_ERROR;
601             
602             V3_Print("SVM core %u: SVM ERROR!!\n", info->cpu_id); 
603             
604             v3_print_guest_state(info);
605             
606             V3_Print("SVM core %u: SVM Exit Code: %p\n", info->cpu_id, (void *)(addr_t)guest_ctrl->exit_code); 
607             
608             V3_Print("SVM core %u: exit_info1 low = 0x%.8x\n", info->cpu_id, *(uint_t*)&(guest_ctrl->exit_info1));
609             V3_Print("SVM core %u: exit_info1 high = 0x%.8x\n", info->cpu_id, *(uint_t *)(((uchar_t *)&(guest_ctrl->exit_info1)) + 4));
610             
611             V3_Print("SVM core %u: exit_info2 low = 0x%.8x\n", info->cpu_id, *(uint_t*)&(guest_ctrl->exit_info2));
612             V3_Print("SVM core %u: exit_info2 high = 0x%.8x\n", info->cpu_id, *(uint_t *)(((uchar_t *)&(guest_ctrl->exit_info2)) + 4));
613             
614             linear_addr = get_addr_linear(info, info->rip, &(info->segments.cs));
615             
616             if (info->mem_mode == PHYSICAL_MEM) {
617                 v3_gpa_to_hva(info, linear_addr, &host_addr);
618             } else if (info->mem_mode == VIRTUAL_MEM) {
619                 v3_gva_to_hva(info, linear_addr, &host_addr);
620             }
621             
622             V3_Print("SVM core %u: Host Address of rip = 0x%p\n", info->cpu_id, (void *)host_addr);
623             
624             V3_Print("SVM core %u: Instr (15 bytes) at %p:\n", info->cpu_id, (void *)host_addr);
625             v3_dump_mem((uint8_t *)host_addr, 15);
626             
627             v3_print_stack(info);
628
629             break;
630         }
631
632
633         if (info->vm_info->run_state == VM_STOPPED) {
634             info->core_run_state = CORE_STOPPED;
635             break;
636         }
637
638         
639 /*
640         if ((info->num_exits % 5000) == 0) {
641             V3_Print("SVM Exit number %d\n", (uint32_t)info->num_exits);
642         }
643 */
644         
645     }
646
647     // Need to take down the other cores on error... 
648
649     return 0;
650 }
651
652
653
654
655
656 /* Checks machine SVM capability */
657 /* Implemented from: AMD Arch Manual 3, sect 15.4 */ 
658 int v3_is_svm_capable() {
659     uint_t vm_cr_low = 0, vm_cr_high = 0;
660     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
661
662     v3_cpuid(CPUID_EXT_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
663   
664     PrintDebug("CPUID_EXT_FEATURE_IDS_ecx=0x%x\n", ecx);
665
666     if ((ecx & CPUID_EXT_FEATURE_IDS_ecx_svm_avail) == 0) {
667       V3_Print("SVM Not Available\n");
668       return 0;
669     }  else {
670         v3_get_msr(SVM_VM_CR_MSR, &vm_cr_high, &vm_cr_low);
671         
672         PrintDebug("SVM_VM_CR_MSR = 0x%x 0x%x\n", vm_cr_high, vm_cr_low);
673         
674         if ((vm_cr_low & SVM_VM_CR_MSR_svmdis) == 1) {
675             V3_Print("SVM is available but is disabled.\n");
676             
677             v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
678             
679             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_edx=0x%x\n", edx);
680             
681             if ((edx & CPUID_SVM_REV_AND_FEATURE_IDS_edx_svml) == 0) {
682                 V3_Print("SVM BIOS Disabled, not unlockable\n");
683             } else {
684                 V3_Print("SVM is locked with a key\n");
685             }
686             return 0;
687
688         } else {
689             V3_Print("SVM is available and  enabled.\n");
690
691             v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
692             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_eax=0x%x\n", eax);
693             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_ebx=0x%x\n", ebx);
694             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_ecx=0x%x\n", ecx);
695             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_edx=0x%x\n", edx);
696
697             return 1;
698         }
699     }
700 }
701
702 static int has_svm_nested_paging() {
703     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
704
705     v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
706
707     //PrintDebug("CPUID_EXT_FEATURE_IDS_edx=0x%x\n", edx);
708
709     if ((edx & CPUID_SVM_REV_AND_FEATURE_IDS_edx_np) == 0) {
710         V3_Print("SVM Nested Paging not supported\n");
711         return 0;
712     } else {
713         V3_Print("SVM Nested Paging supported\n");
714         return 1;
715     }
716 }
717
718
719 void v3_init_svm_cpu(int cpu_id) {
720     reg_ex_t msr;
721     extern v3_cpu_arch_t v3_cpu_types[];
722
723     // Enable SVM on the CPU
724     v3_get_msr(EFER_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
725     msr.e_reg.low |= EFER_MSR_svm_enable;
726     v3_set_msr(EFER_MSR, 0, msr.e_reg.low);
727
728     V3_Print("SVM Enabled\n");
729
730     // Setup the host state save area
731     host_vmcbs[cpu_id] = (addr_t)V3_AllocPages(4);
732
733     /* 64-BIT-ISSUE */
734     //  msr.e_reg.high = 0;
735     //msr.e_reg.low = (uint_t)host_vmcb;
736     msr.r_reg = host_vmcbs[cpu_id];
737
738     PrintDebug("Host State being saved at %p\n", (void *)host_vmcbs[cpu_id]);
739     v3_set_msr(SVM_VM_HSAVE_PA_MSR, msr.e_reg.high, msr.e_reg.low);
740
741
742     if (has_svm_nested_paging() == 1) {
743         v3_cpu_types[cpu_id] = V3_SVM_REV3_CPU;
744     } else {
745         v3_cpu_types[cpu_id] = V3_SVM_CPU;
746     }
747 }
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802 #if 0
803 /* 
804  * Test VMSAVE/VMLOAD Latency 
805  */
806 #define vmsave ".byte 0x0F,0x01,0xDB ; "
807 #define vmload ".byte 0x0F,0x01,0xDA ; "
808 {
809     uint32_t start_lo, start_hi;
810     uint32_t end_lo, end_hi;
811     uint64_t start, end;
812     
813     __asm__ __volatile__ (
814                           "rdtsc ; "
815                           "movl %%eax, %%esi ; "
816                           "movl %%edx, %%edi ; "
817                           "movq  %%rcx, %%rax ; "
818                           vmsave
819                           "rdtsc ; "
820                           : "=D"(start_hi), "=S"(start_lo), "=a"(end_lo),"=d"(end_hi)
821                           : "c"(host_vmcb[cpu_id]), "0"(0), "1"(0), "2"(0), "3"(0)
822                           );
823     
824     start = start_hi;
825     start <<= 32;
826     start += start_lo;
827     
828     end = end_hi;
829     end <<= 32;
830     end += end_lo;
831     
832     PrintDebug("VMSave Cycle Latency: %d\n", (uint32_t)(end - start));
833     
834     __asm__ __volatile__ (
835                           "rdtsc ; "
836                           "movl %%eax, %%esi ; "
837                           "movl %%edx, %%edi ; "
838                           "movq  %%rcx, %%rax ; "
839                           vmload
840                           "rdtsc ; "
841                           : "=D"(start_hi), "=S"(start_lo), "=a"(end_lo),"=d"(end_hi)
842                               : "c"(host_vmcb[cpu_id]), "0"(0), "1"(0), "2"(0), "3"(0)
843                               );
844         
845         start = start_hi;
846         start <<= 32;
847         start += start_lo;
848
849         end = end_hi;
850         end <<= 32;
851         end += end_lo;
852
853
854         PrintDebug("VMLoad Cycle Latency: %d\n", (uint32_t)(end - start));
855     }
856     /* End Latency Test */
857
858 #endif
859
860
861
862
863
864
865
866 #if 0
867 void Init_VMCB_pe(vmcb_t *vmcb, struct guest_info vm_info) {
868   vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
869   vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
870   uint_t i = 0;
871
872
873   guest_state->rsp = vm_info.vm_regs.rsp;
874   guest_state->rip = vm_info.rip;
875
876
877   /* I pretty much just gutted this from TVMM */
878   /* Note: That means its probably wrong */
879
880   // set the segment registers to mirror ours
881   guest_state->cs.selector = 1<<3;
882   guest_state->cs.attrib.fields.type = 0xa; // Code segment+read
883   guest_state->cs.attrib.fields.S = 1;
884   guest_state->cs.attrib.fields.P = 1;
885   guest_state->cs.attrib.fields.db = 1;
886   guest_state->cs.attrib.fields.G = 1;
887   guest_state->cs.limit = 0xfffff;
888   guest_state->cs.base = 0;
889   
890   struct vmcb_selector *segregs [] = {&(guest_state->ss), &(guest_state->ds), &(guest_state->es), &(guest_state->fs), &(guest_state->gs), NULL};
891   for ( i = 0; segregs[i] != NULL; i++) {
892     struct vmcb_selector * seg = segregs[i];
893     
894     seg->selector = 2<<3;
895     seg->attrib.fields.type = 0x2; // Data Segment+read/write
896     seg->attrib.fields.S = 1;
897     seg->attrib.fields.P = 1;
898     seg->attrib.fields.db = 1;
899     seg->attrib.fields.G = 1;
900     seg->limit = 0xfffff;
901     seg->base = 0;
902   }
903
904
905   {
906     /* JRL THIS HAS TO GO */
907     
908     //    guest_state->tr.selector = GetTR_Selector();
909     guest_state->tr.attrib.fields.type = 0x9; 
910     guest_state->tr.attrib.fields.P = 1;
911     // guest_state->tr.limit = GetTR_Limit();
912     //guest_state->tr.base = GetTR_Base();// - 0x2000;
913     /* ** */
914   }
915
916
917   /* ** */
918
919
920   guest_state->efer |= EFER_MSR_svm_enable;
921   guest_state->rflags = 0x00000002; // The reserved bit is always 1
922   ctrl_area->svm_instrs.VMRUN = 1;
923   guest_state->cr0 = 0x00000001;    // PE 
924   ctrl_area->guest_ASID = 1;
925
926
927   //  guest_state->cpl = 0;
928
929
930
931   // Setup exits
932
933   ctrl_area->cr_writes.cr4 = 1;
934   
935   ctrl_area->exceptions.de = 1;
936   ctrl_area->exceptions.df = 1;
937   ctrl_area->exceptions.pf = 1;
938   ctrl_area->exceptions.ts = 1;
939   ctrl_area->exceptions.ss = 1;
940   ctrl_area->exceptions.ac = 1;
941   ctrl_area->exceptions.mc = 1;
942   ctrl_area->exceptions.gp = 1;
943   ctrl_area->exceptions.ud = 1;
944   ctrl_area->exceptions.np = 1;
945   ctrl_area->exceptions.of = 1;
946   ctrl_area->exceptions.nmi = 1;
947
948   
949
950   ctrl_area->instrs.IOIO_PROT = 1;
951   ctrl_area->IOPM_BASE_PA = (uint_t)V3_AllocPages(3);
952   
953   {
954     reg_ex_t tmp_reg;
955     tmp_reg.r_reg = ctrl_area->IOPM_BASE_PA;
956     memset((void*)(tmp_reg.e_reg.low), 0xffffffff, PAGE_SIZE * 2);
957   }
958
959   ctrl_area->instrs.INTR = 1;
960
961   
962   {
963     char gdt_buf[6];
964     char idt_buf[6];
965
966     memset(gdt_buf, 0, 6);
967     memset(idt_buf, 0, 6);
968
969
970     uint_t gdt_base, idt_base;
971     ushort_t gdt_limit, idt_limit;
972     
973     GetGDTR(gdt_buf);
974     gdt_base = *(ulong_t*)((uchar_t*)gdt_buf + 2) & 0xffffffff;
975     gdt_limit = *(ushort_t*)(gdt_buf) & 0xffff;
976     PrintDebug("GDT: base: %x, limit: %x\n", gdt_base, gdt_limit);
977
978     GetIDTR(idt_buf);
979     idt_base = *(ulong_t*)(idt_buf + 2) & 0xffffffff;
980     idt_limit = *(ushort_t*)(idt_buf) & 0xffff;
981     PrintDebug("IDT: base: %x, limit: %x\n",idt_base, idt_limit);
982
983
984     // gdt_base -= 0x2000;
985     //idt_base -= 0x2000;
986
987     guest_state->gdtr.base = gdt_base;
988     guest_state->gdtr.limit = gdt_limit;
989     guest_state->idtr.base = idt_base;
990     guest_state->idtr.limit = idt_limit;
991
992
993   }
994   
995   
996   // also determine if CPU supports nested paging
997   /*
998   if (vm_info.page_tables) {
999     //   if (0) {
1000     // Flush the TLB on entries/exits
1001     ctrl_area->TLB_CONTROL = 1;
1002
1003     // Enable Nested Paging
1004     ctrl_area->NP_ENABLE = 1;
1005
1006     PrintDebug("NP_Enable at 0x%x\n", &(ctrl_area->NP_ENABLE));
1007
1008         // Set the Nested Page Table pointer
1009     ctrl_area->N_CR3 |= ((addr_t)vm_info.page_tables & 0xfffff000);
1010
1011
1012     //   ctrl_area->N_CR3 = Get_CR3();
1013     // guest_state->cr3 |= (Get_CR3() & 0xfffff000);
1014
1015     guest_state->g_pat = 0x7040600070406ULL;
1016
1017     PrintDebug("Set Nested CR3: lo: 0x%x  hi: 0x%x\n", (uint_t)*(&(ctrl_area->N_CR3)), (uint_t)*((unsigned char *)&(ctrl_area->N_CR3) + 4));
1018     PrintDebug("Set Guest CR3: lo: 0x%x  hi: 0x%x\n", (uint_t)*(&(guest_state->cr3)), (uint_t)*((unsigned char *)&(guest_state->cr3) + 4));
1019     // Enable Paging
1020     //    guest_state->cr0 |= 0x80000000;
1021   }
1022   */
1023
1024 }
1025
1026
1027
1028
1029
1030 #endif
1031
1032