Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


Cleanup of time handling code
[palacios.git] / palacios / src / palacios / svm.c
1 /* 
2  * This file is part of the Palacios Virtual Machine Monitor developed
3  * by the V3VEE Project with funding from the United States National 
4  * Science Foundation and the Department of Energy.  
5  *
6  * The V3VEE Project is a joint project between Northwestern University
7  * and the University of New Mexico.  You can find out more at 
8  * http://www.v3vee.org
9  *
10  * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu> 
11  * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org> 
12  * All rights reserved.
13  *
14  * Author: Jack Lange <jarusl@cs.northwestern.edu>
15  *
16  * This is free software.  You are permitted to use,
17  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
18  */
19
20
21 #include <palacios/svm.h>
22 #include <palacios/vmm.h>
23
24 #include <palacios/vmcb.h>
25 #include <palacios/vmm_mem.h>
26 #include <palacios/vmm_paging.h>
27 #include <palacios/svm_handler.h>
28
29 #include <palacios/vmm_debug.h>
30 #include <palacios/vm_guest_mem.h>
31
32 #include <palacios/vmm_decoder.h>
33 #include <palacios/vmm_string.h>
34 #include <palacios/vmm_lowlevel.h>
35 #include <palacios/svm_msr.h>
36
37 #include <palacios/vmm_rbtree.h>
38
39 #include <palacios/vmm_direct_paging.h>
40
41 #include <palacios/vmm_ctrl_regs.h>
42 #include <palacios/svm_io.h>
43
44 #include <palacios/vmm_sprintf.h>
45
46
47 #ifndef V3_CONFIG_DEBUG_SVM
48 #undef PrintDebug
49 #define PrintDebug(fmt, args...)
50 #endif
51
52
53 uint32_t v3_last_exit;
54
55 // This is a global pointer to the host's VMCB
56 static addr_t host_vmcbs[V3_CONFIG_MAX_CPUS] = { [0 ... V3_CONFIG_MAX_CPUS - 1] = 0};
57
58
59
60 extern void v3_stgi();
61 extern void v3_clgi();
62 //extern int v3_svm_launch(vmcb_t * vmcb, struct v3_gprs * vm_regs, uint64_t * fs, uint64_t * gs);
63 extern int v3_svm_launch(vmcb_t * vmcb, struct v3_gprs * vm_regs, vmcb_t * host_vmcb);
64
65
66 static vmcb_t * Allocate_VMCB() {
67     vmcb_t * vmcb_page = NULL;
68     addr_t vmcb_pa = (addr_t)V3_AllocPages(1);
69
70     if ((void *)vmcb_pa == NULL) {
71         PrintError("Error allocating VMCB\n");
72         return NULL;
73     }
74
75     vmcb_page = (vmcb_t *)V3_VAddr((void *)vmcb_pa);
76
77     memset(vmcb_page, 0, 4096);
78
79     return vmcb_page;
80 }
81
82
83
84 static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info * core) {
85     vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
86     vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
87     uint_t i;
88
89
90     //
91     ctrl_area->svm_instrs.VMRUN = 1;
92     ctrl_area->svm_instrs.VMMCALL = 1;
93     ctrl_area->svm_instrs.VMLOAD = 1;
94     ctrl_area->svm_instrs.VMSAVE = 1;
95     ctrl_area->svm_instrs.STGI = 1;
96     ctrl_area->svm_instrs.CLGI = 1;
97     ctrl_area->svm_instrs.SKINIT = 1;
98     ctrl_area->svm_instrs.ICEBP = 1;
99     ctrl_area->svm_instrs.WBINVD = 1;
100     ctrl_area->svm_instrs.MONITOR = 1;
101     ctrl_area->svm_instrs.MWAIT_always = 1;
102     ctrl_area->svm_instrs.MWAIT_if_armed = 1;
103     ctrl_area->instrs.INVLPGA = 1;
104     ctrl_area->instrs.CPUID = 1;
105
106     ctrl_area->instrs.HLT = 1;
107
108 #ifdef V3_CONFIG_TIME_VIRTUALIZE_TSC
109     ctrl_area->instrs.RDTSC = 1;
110     ctrl_area->svm_instrs.RDTSCP = 1;
111 #endif
112
113     // guest_state->cr0 = 0x00000001;    // PE 
114   
115     /*
116       ctrl_area->exceptions.de = 1;
117       ctrl_area->exceptions.df = 1;
118       
119       ctrl_area->exceptions.ts = 1;
120       ctrl_area->exceptions.ss = 1;
121       ctrl_area->exceptions.ac = 1;
122       ctrl_area->exceptions.mc = 1;
123       ctrl_area->exceptions.gp = 1;
124       ctrl_area->exceptions.ud = 1;
125       ctrl_area->exceptions.np = 1;
126       ctrl_area->exceptions.of = 1;
127       
128       ctrl_area->exceptions.nmi = 1;
129     */
130     
131
132     ctrl_area->instrs.NMI = 1;
133     ctrl_area->instrs.SMI = 0; // allow SMIs to run in guest
134     ctrl_area->instrs.INIT = 1;
135     ctrl_area->instrs.PAUSE = 1;
136     ctrl_area->instrs.shutdown_evts = 1;
137
138
139     /* DEBUG FOR RETURN CODE */
140     ctrl_area->exit_code = 1;
141
142
143     /* Setup Guest Machine state */
144
145     core->vm_regs.rsp = 0x00;
146     core->rip = 0xfff0;
147
148     core->vm_regs.rdx = 0x00000f00;
149
150
151     core->cpl = 0;
152
153     core->ctrl_regs.rflags = 0x00000002; // The reserved bit is always 1
154     core->ctrl_regs.cr0 = 0x60010010; // Set the WP flag so the memory hooks work in real-mode
155     core->ctrl_regs.efer |= EFER_MSR_svm_enable;
156
157
158
159
160
161     core->segments.cs.selector = 0xf000;
162     core->segments.cs.limit = 0xffff;
163     core->segments.cs.base = 0x0000000f0000LL;
164
165     // (raw attributes = 0xf3)
166     core->segments.cs.type = 0x3;
167     core->segments.cs.system = 0x1;
168     core->segments.cs.dpl = 0x3;
169     core->segments.cs.present = 1;
170
171
172
173     struct v3_segment * segregs [] = {&(core->segments.ss), &(core->segments.ds), 
174                                       &(core->segments.es), &(core->segments.fs), 
175                                       &(core->segments.gs), NULL};
176
177     for ( i = 0; segregs[i] != NULL; i++) {
178         struct v3_segment * seg = segregs[i];
179         
180         seg->selector = 0x0000;
181         //    seg->base = seg->selector << 4;
182         seg->base = 0x00000000;
183         seg->limit = ~0u;
184
185         // (raw attributes = 0xf3)
186         seg->type = 0x3;
187         seg->system = 0x1;
188         seg->dpl = 0x3;
189         seg->present = 1;
190     }
191
192     core->segments.gdtr.limit = 0x0000ffff;
193     core->segments.gdtr.base = 0x0000000000000000LL;
194     core->segments.idtr.limit = 0x0000ffff;
195     core->segments.idtr.base = 0x0000000000000000LL;
196
197     core->segments.ldtr.selector = 0x0000;
198     core->segments.ldtr.limit = 0x0000ffff;
199     core->segments.ldtr.base = 0x0000000000000000LL;
200     core->segments.tr.selector = 0x0000;
201     core->segments.tr.limit = 0x0000ffff;
202     core->segments.tr.base = 0x0000000000000000LL;
203
204
205     core->dbg_regs.dr6 = 0x00000000ffff0ff0LL;
206     core->dbg_regs.dr7 = 0x0000000000000400LL;
207
208
209     ctrl_area->IOPM_BASE_PA = (addr_t)V3_PAddr(core->vm_info->io_map.arch_data);
210     ctrl_area->instrs.IOIO_PROT = 1;
211             
212     ctrl_area->MSRPM_BASE_PA = (addr_t)V3_PAddr(core->vm_info->msr_map.arch_data);
213     ctrl_area->instrs.MSR_PROT = 1;   
214
215
216     PrintDebug("Exiting on interrupts\n");
217     ctrl_area->guest_ctrl.V_INTR_MASKING = 1;
218     ctrl_area->instrs.INTR = 1;
219
220
221     v3_hook_msr(core->vm_info, EFER_MSR, 
222                 &v3_handle_efer_read,
223                 &v3_handle_efer_write, 
224                 core);
225
226     if (core->shdw_pg_mode == SHADOW_PAGING) {
227         PrintDebug("Creating initial shadow page table\n");
228         
229         /* JRL: This is a performance killer, and a simplistic solution */
230         /* We need to fix this */
231         ctrl_area->TLB_CONTROL = 1;
232         ctrl_area->guest_ASID = 1;
233         
234         
235         if (v3_init_passthrough_pts(core) == -1) {
236             PrintError("Could not initialize passthrough page tables\n");
237             return ;
238         }
239
240
241         core->shdw_pg_state.guest_cr0 = 0x0000000000000010LL;
242         PrintDebug("Created\n");
243         
244         core->ctrl_regs.cr0 |= 0x80000000;
245         core->ctrl_regs.cr3 = core->direct_map_pt;
246
247         ctrl_area->cr_reads.cr0 = 1;
248         ctrl_area->cr_writes.cr0 = 1;
249         //ctrl_area->cr_reads.cr4 = 1;
250         ctrl_area->cr_writes.cr4 = 1;
251         ctrl_area->cr_reads.cr3 = 1;
252         ctrl_area->cr_writes.cr3 = 1;
253
254
255
256         ctrl_area->instrs.INVLPG = 1;
257
258         ctrl_area->exceptions.pf = 1;
259
260         guest_state->g_pat = 0x7040600070406ULL;
261
262
263
264     } else if (core->shdw_pg_mode == NESTED_PAGING) {
265         // Flush the TLB on entries/exits
266         ctrl_area->TLB_CONTROL = 1;
267         ctrl_area->guest_ASID = 1;
268
269         // Enable Nested Paging
270         ctrl_area->NP_ENABLE = 1;
271
272         PrintDebug("NP_Enable at 0x%p\n", (void *)&(ctrl_area->NP_ENABLE));
273
274         // Set the Nested Page Table pointer
275         if (v3_init_passthrough_pts(core) == -1) {
276             PrintError("Could not initialize Nested page tables\n");
277             return ;
278         }
279
280         ctrl_area->N_CR3 = core->direct_map_pt;
281
282         guest_state->g_pat = 0x7040600070406ULL;
283     }
284     
285     /* tell the guest that we don't support SVM */
286     v3_hook_msr(core->vm_info, SVM_VM_CR_MSR, 
287         &v3_handle_vm_cr_read,
288         &v3_handle_vm_cr_write, 
289         core);
290 }
291
292
293 int v3_init_svm_vmcb(struct guest_info * core, v3_vm_class_t vm_class) {
294
295     PrintDebug("Allocating VMCB\n");
296     core->vmm_data = (void *)Allocate_VMCB();
297     
298     if (core->vmm_data == NULL) {
299         PrintError("Could not allocate VMCB, Exiting...\n");
300         return -1;
301     }
302
303     if (vm_class == V3_PC_VM) {
304         PrintDebug("Initializing VMCB (addr=%p)\n", (void *)core->vmm_data);
305         Init_VMCB_BIOS((vmcb_t*)(core->vmm_data), core);
306     } else {
307         PrintError("Invalid VM class\n");
308         return -1;
309     }
310
311     return 0;
312 }
313
314
315 int v3_deinit_svm_vmcb(struct guest_info * core) {
316     V3_FreePages(V3_PAddr(core->vmm_data), 1);
317     return 0;
318 }
319
320
321 static int update_irq_exit_state(struct guest_info * info) {
322     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
323
324     // Fix for QEMU bug using EVENTINJ as an internal cache
325     guest_ctrl->EVENTINJ.valid = 0;
326
327     if ((info->intr_core_state.irq_pending == 1) && (guest_ctrl->guest_ctrl.V_IRQ == 0)) {
328         
329 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
330         PrintDebug("INTAK cycle completed for irq %d\n", info->intr_core_state.irq_vector);
331 #endif
332
333         info->intr_core_state.irq_started = 1;
334         info->intr_core_state.irq_pending = 0;
335
336         v3_injecting_intr(info, info->intr_core_state.irq_vector, V3_EXTERNAL_IRQ);
337     }
338
339     if ((info->intr_core_state.irq_started == 1) && (guest_ctrl->exit_int_info.valid == 0)) {
340 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
341         PrintDebug("Interrupt %d taken by guest\n", info->intr_core_state.irq_vector);
342 #endif
343
344         // Interrupt was taken fully vectored
345         info->intr_core_state.irq_started = 0;
346
347     } else if ((info->intr_core_state.irq_started == 1) && (guest_ctrl->exit_int_info.valid == 1)) {
348 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
349         PrintDebug("EXIT INT INFO is set (vec=%d)\n", guest_ctrl->exit_int_info.vector);
350 #endif
351     }
352
353     return 0;
354 }
355
356
357 static int update_irq_entry_state(struct guest_info * info) {
358     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
359
360
361     if (info->intr_core_state.irq_pending == 0) {
362         guest_ctrl->guest_ctrl.V_IRQ = 0;
363         guest_ctrl->guest_ctrl.V_INTR_VECTOR = 0;
364     }
365     
366     if (v3_excp_pending(info)) {
367         uint_t excp = v3_get_excp_number(info);
368         
369         guest_ctrl->EVENTINJ.type = SVM_INJECTION_EXCEPTION;
370         
371         if (info->excp_state.excp_error_code_valid) {
372             guest_ctrl->EVENTINJ.error_code = info->excp_state.excp_error_code;
373             guest_ctrl->EVENTINJ.ev = 1;
374 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
375             PrintDebug("Injecting exception %d with error code %x\n", excp, guest_ctrl->EVENTINJ.error_code);
376 #endif
377         }
378         
379         guest_ctrl->EVENTINJ.vector = excp;
380         
381         guest_ctrl->EVENTINJ.valid = 1;
382
383 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
384         PrintDebug("<%d> Injecting Exception %d (CR2=%p) (EIP=%p)\n", 
385                    (int)info->num_exits, 
386                    guest_ctrl->EVENTINJ.vector, 
387                    (void *)(addr_t)info->ctrl_regs.cr2,
388                    (void *)(addr_t)info->rip);
389 #endif
390
391         v3_injecting_excp(info, excp);
392     } else if (info->intr_core_state.irq_started == 1) {
393 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
394         PrintDebug("IRQ pending from previous injection\n");
395 #endif
396         guest_ctrl->guest_ctrl.V_IRQ = 1;
397         guest_ctrl->guest_ctrl.V_INTR_VECTOR = info->intr_core_state.irq_vector;
398         guest_ctrl->guest_ctrl.V_IGN_TPR = 1;
399         guest_ctrl->guest_ctrl.V_INTR_PRIO = 0xf;
400
401     } else {
402         switch (v3_intr_pending(info)) {
403             case V3_EXTERNAL_IRQ: {
404                 uint32_t irq = v3_get_intr(info);
405
406                 guest_ctrl->guest_ctrl.V_IRQ = 1;
407                 guest_ctrl->guest_ctrl.V_INTR_VECTOR = irq;
408                 guest_ctrl->guest_ctrl.V_IGN_TPR = 1;
409                 guest_ctrl->guest_ctrl.V_INTR_PRIO = 0xf;
410
411 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
412                 PrintDebug("Injecting Interrupt %d (EIP=%p)\n", 
413                            guest_ctrl->guest_ctrl.V_INTR_VECTOR, 
414                            (void *)(addr_t)info->rip);
415 #endif
416
417                 info->intr_core_state.irq_pending = 1;
418                 info->intr_core_state.irq_vector = irq;
419                 
420                 break;
421             }
422             case V3_NMI:
423                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_NMI;
424                 break;
425             case V3_SOFTWARE_INTR:
426                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_SOFT_INTR;
427                 break;
428             case V3_VIRTUAL_IRQ:
429                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_IRQ;
430                 break;
431
432             case V3_INVALID_INTR:
433             default:
434                 break;
435         }
436         
437     }
438
439     return 0;
440 }
441
442
443 /* 
444  * CAUTION and DANGER!!! 
445  * 
446  * The VMCB CANNOT(!!) be accessed outside of the clgi/stgi calls inside this function
447  * When exectuing a symbiotic call, the VMCB WILL be overwritten, so any dependencies 
448  * on its contents will cause things to break. The contents at the time of the exit WILL 
449  * change before the exit handler is executed.
450  */
451 int v3_svm_enter(struct guest_info * info) {
452     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
453     vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA((vmcb_t*)(info->vmm_data)); 
454     addr_t exit_code = 0, exit_info1 = 0, exit_info2 = 0;
455
456     // Conditionally yield the CPU if the timeslice has expired
457     v3_yield_cond(info);
458
459     // Perform any additional yielding needed for time adjustment
460     v3_adjust_time(info);
461
462     // disable global interrupts for vm state transition
463     v3_clgi();
464
465     // Update timer devices right before entering the VM. Doing it 
466     // here makes sure the guest sees any timers that fired while 
467     // it was in the VMM
468     v3_update_timers(info);
469
470     // Synchronize the guest state to the VMCB
471     guest_state->cr0 = info->ctrl_regs.cr0;
472     guest_state->cr2 = info->ctrl_regs.cr2;
473     guest_state->cr3 = info->ctrl_regs.cr3;
474     guest_state->cr4 = info->ctrl_regs.cr4;
475     guest_state->dr6 = info->dbg_regs.dr6;
476     guest_state->dr7 = info->dbg_regs.dr7;
477     guest_ctrl->guest_ctrl.V_TPR = info->ctrl_regs.cr8 & 0xff;
478     guest_state->rflags = info->ctrl_regs.rflags;
479     guest_state->efer = info->ctrl_regs.efer;
480     
481     guest_state->cpl = info->cpl;
482
483     v3_set_vmcb_segments((vmcb_t*)(info->vmm_data), &(info->segments));
484
485     guest_state->rax = info->vm_regs.rax;
486     guest_state->rip = info->rip;
487     guest_state->rsp = info->vm_regs.rsp;
488
489 #ifdef V3_CONFIG_SYMCALL
490     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
491         update_irq_entry_state(info);
492     }
493 #else 
494     update_irq_entry_state(info);
495 #endif
496
497
498     /* ** */
499
500     /*
501       PrintDebug("SVM Entry to CS=%p  rip=%p...\n", 
502       (void *)(addr_t)info->segments.cs.base, 
503       (void *)(addr_t)info->rip);
504     */
505
506 #ifdef V3_CONFIG_SYMCALL
507     if (info->sym_core_state.symcall_state.sym_call_active == 1) {
508         if (guest_ctrl->guest_ctrl.V_IRQ == 1) {
509             V3_Print("!!! Injecting Interrupt during Sym call !!!\n");
510         }
511     }
512 #endif
513
514     v3_time_enter_vm(info);
515     guest_ctrl->TSC_OFFSET = v3_tsc_host_offset(&info->time_state);
516
517     //V3_Print("Calling v3_svm_launch\n");
518
519     v3_svm_launch((vmcb_t *)V3_PAddr(info->vmm_data), &(info->vm_regs), (vmcb_t *)host_vmcbs[V3_Get_CPU()]);
520
521     //V3_Print("SVM Returned: Exit Code: %x, guest_rip=%lx\n", (uint32_t)(guest_ctrl->exit_code), (unsigned long)guest_state->rip);
522
523     v3_last_exit = (uint32_t)(guest_ctrl->exit_code);
524
525     // Immediate exit from VM time bookkeeping
526     v3_time_exit_vm(info);
527
528     info->num_exits++;
529
530     // Save Guest state from VMCB
531     info->rip = guest_state->rip;
532     info->vm_regs.rsp = guest_state->rsp;
533     info->vm_regs.rax = guest_state->rax;
534
535     info->cpl = guest_state->cpl;
536
537     info->ctrl_regs.cr0 = guest_state->cr0;
538     info->ctrl_regs.cr2 = guest_state->cr2;
539     info->ctrl_regs.cr3 = guest_state->cr3;
540     info->ctrl_regs.cr4 = guest_state->cr4;
541     info->dbg_regs.dr6 = guest_state->dr6;
542     info->dbg_regs.dr7 = guest_state->dr7;
543     info->ctrl_regs.cr8 = guest_ctrl->guest_ctrl.V_TPR;
544     info->ctrl_regs.rflags = guest_state->rflags;
545     info->ctrl_regs.efer = guest_state->efer;
546     
547     v3_get_vmcb_segments((vmcb_t*)(info->vmm_data), &(info->segments));
548     info->cpu_mode = v3_get_vm_cpu_mode(info);
549     info->mem_mode = v3_get_vm_mem_mode(info);
550     /* ** */
551
552
553     // save exit info here
554     exit_code = guest_ctrl->exit_code;
555     exit_info1 = guest_ctrl->exit_info1;
556     exit_info2 = guest_ctrl->exit_info2;
557
558
559 #ifdef V3_CONFIG_SYMCALL
560     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
561         update_irq_exit_state(info);
562     }
563 #else
564     update_irq_exit_state(info);
565 #endif
566
567
568     // reenable global interrupts after vm exit
569     v3_stgi();
570
571  
572     // Conditionally yield the CPU if the timeslice has expired
573     v3_yield_cond(info);
574
575
576
577     if (v3_handle_svm_exit(info, exit_code, exit_info1, exit_info2) != 0) {
578         PrintError("Error in SVM exit handler\n");
579         PrintError("  last exit was %d\n", v3_last_exit);
580         return -1;
581     }
582
583
584     return 0;
585 }
586
587
588 int v3_start_svm_guest(struct guest_info * info) {
589     //    vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA((vmcb_t*)(info->vmm_data));
590     //  vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
591
592     PrintDebug("Starting SVM core %u (on logical core %u)\n", info->vcpu_id, info->pcpu_id);
593
594     if (info->vcpu_id == 0) {
595         info->core_run_state = CORE_RUNNING;
596         info->vm_info->run_state = VM_RUNNING;
597     } else  { 
598         PrintDebug("SVM core %u (on %u): Waiting for core initialization\n", info->vcpu_id, info->pcpu_id);
599
600         while (info->core_run_state == CORE_STOPPED) {
601             v3_yield(info);
602             //PrintDebug("SVM core %u: still waiting for INIT\n", info->vcpu_id);
603         }
604
605         PrintDebug("SVM core %u(on %u) initialized\n", info->vcpu_id, info->pcpu_id);
606     } 
607
608     PrintDebug("SVM core %u(on %u): I am starting at CS=0x%x (base=0x%p, limit=0x%x),  RIP=0x%p\n", 
609                info->vcpu_id, info->pcpu_id, 
610                info->segments.cs.selector, (void *)(info->segments.cs.base), 
611                info->segments.cs.limit, (void *)(info->rip));
612
613
614
615     PrintDebug("SVM core %u: Launching SVM VM (vmcb=%p) (on cpu %u)\n", 
616                info->vcpu_id, (void *)info->vmm_data, info->pcpu_id);
617     //PrintDebugVMCB((vmcb_t*)(info->vmm_data));
618     
619     v3_start_time(info);
620
621     while (1) {
622
623         if (info->vm_info->run_state == VM_STOPPED) {
624             info->core_run_state = CORE_STOPPED;
625             break;
626         }
627         
628         if (v3_svm_enter(info) == -1) {
629             vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
630             addr_t host_addr;
631             addr_t linear_addr = 0;
632             
633             info->vm_info->run_state = VM_ERROR;
634             
635             V3_Print("SVM core %u: SVM ERROR!!\n", info->vcpu_id); 
636             
637             v3_print_guest_state(info);
638             
639             V3_Print("SVM core %u: SVM Exit Code: %p\n", info->vcpu_id, (void *)(addr_t)guest_ctrl->exit_code); 
640             
641             V3_Print("SVM core %u: exit_info1 low = 0x%.8x\n", info->vcpu_id, *(uint_t*)&(guest_ctrl->exit_info1));
642             V3_Print("SVM core %u: exit_info1 high = 0x%.8x\n", info->vcpu_id, *(uint_t *)(((uchar_t *)&(guest_ctrl->exit_info1)) + 4));
643             
644             V3_Print("SVM core %u: exit_info2 low = 0x%.8x\n", info->vcpu_id, *(uint_t*)&(guest_ctrl->exit_info2));
645             V3_Print("SVM core %u: exit_info2 high = 0x%.8x\n", info->vcpu_id, *(uint_t *)(((uchar_t *)&(guest_ctrl->exit_info2)) + 4));
646             
647             linear_addr = get_addr_linear(info, info->rip, &(info->segments.cs));
648             
649             if (info->mem_mode == PHYSICAL_MEM) {
650                 v3_gpa_to_hva(info, linear_addr, &host_addr);
651             } else if (info->mem_mode == VIRTUAL_MEM) {
652                 v3_gva_to_hva(info, linear_addr, &host_addr);
653             }
654             
655             V3_Print("SVM core %u: Host Address of rip = 0x%p\n", info->vcpu_id, (void *)host_addr);
656             
657             V3_Print("SVM core %u: Instr (15 bytes) at %p:\n", info->vcpu_id, (void *)host_addr);
658             v3_dump_mem((uint8_t *)host_addr, 15);
659             
660             v3_print_stack(info);
661
662             break;
663         }
664
665
666         if (info->vm_info->run_state == VM_STOPPED) {
667             info->core_run_state = CORE_STOPPED;
668             break;
669         }
670
671         
672
673         if ((info->num_exits % 5000) == 0) {
674             V3_Print("SVM Exit number %d\n", (uint32_t)info->num_exits);
675             v3_print_guest_state(info);
676         }
677
678         
679     }
680
681     // Need to take down the other cores on error... 
682
683     return 0;
684 }
685
686
687
688
689
690 /* Checks machine SVM capability */
691 /* Implemented from: AMD Arch Manual 3, sect 15.4 */ 
692 int v3_is_svm_capable() {
693     uint_t vm_cr_low = 0, vm_cr_high = 0;
694     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
695
696     v3_cpuid(CPUID_EXT_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
697   
698     PrintDebug("CPUID_EXT_FEATURE_IDS_ecx=0x%x\n", ecx);
699
700     if ((ecx & CPUID_EXT_FEATURE_IDS_ecx_svm_avail) == 0) {
701       V3_Print("SVM Not Available\n");
702       return 0;
703     }  else {
704         v3_get_msr(SVM_VM_CR_MSR, &vm_cr_high, &vm_cr_low);
705         
706         PrintDebug("SVM_VM_CR_MSR = 0x%x 0x%x\n", vm_cr_high, vm_cr_low);
707         
708         if ((vm_cr_low & SVM_VM_CR_MSR_svmdis) == 1) {
709             V3_Print("SVM is available but is disabled.\n");
710             
711             v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
712             
713             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_edx=0x%x\n", edx);
714             
715             if ((edx & CPUID_SVM_REV_AND_FEATURE_IDS_edx_svml) == 0) {
716                 V3_Print("SVM BIOS Disabled, not unlockable\n");
717             } else {
718                 V3_Print("SVM is locked with a key\n");
719             }
720             return 0;
721
722         } else {
723             V3_Print("SVM is available and  enabled.\n");
724
725             v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
726             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_eax=0x%x\n", eax);
727             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_ebx=0x%x\n", ebx);
728             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_ecx=0x%x\n", ecx);
729             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_edx=0x%x\n", edx);
730
731             return 1;
732         }
733     }
734 }
735
736 static int has_svm_nested_paging() {
737     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
738
739     v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
740
741     //PrintDebug("CPUID_EXT_FEATURE_IDS_edx=0x%x\n", edx);
742
743     if ((edx & CPUID_SVM_REV_AND_FEATURE_IDS_edx_np) == 0) {
744         V3_Print("SVM Nested Paging not supported\n");
745         return 0;
746     } else {
747         V3_Print("SVM Nested Paging supported\n");
748         return 1;
749     }
750 }
751
752
753 void v3_init_svm_cpu(int cpu_id) {
754     reg_ex_t msr;
755     extern v3_cpu_arch_t v3_cpu_types[];
756
757     // Enable SVM on the CPU
758     v3_get_msr(EFER_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
759     msr.e_reg.low |= EFER_MSR_svm_enable;
760     v3_set_msr(EFER_MSR, 0, msr.e_reg.low);
761
762     V3_Print("SVM Enabled\n");
763
764     // Setup the host state save area
765     host_vmcbs[cpu_id] = (addr_t)V3_AllocPages(4);
766
767     /* 64-BIT-ISSUE */
768     //  msr.e_reg.high = 0;
769     //msr.e_reg.low = (uint_t)host_vmcb;
770     msr.r_reg = host_vmcbs[cpu_id];
771
772     PrintDebug("Host State being saved at %p\n", (void *)host_vmcbs[cpu_id]);
773     v3_set_msr(SVM_VM_HSAVE_PA_MSR, msr.e_reg.high, msr.e_reg.low);
774
775
776     if (has_svm_nested_paging() == 1) {
777         v3_cpu_types[cpu_id] = V3_SVM_REV3_CPU;
778     } else {
779         v3_cpu_types[cpu_id] = V3_SVM_CPU;
780     }
781 }
782
783
784
785 void v3_deinit_svm_cpu(int cpu_id) {
786     reg_ex_t msr;
787     extern v3_cpu_arch_t v3_cpu_types[];
788
789     // reset SVM_VM_HSAVE_PA_MSR
790     // Does setting it to NULL disable??
791     msr.r_reg = 0;
792     v3_set_msr(SVM_VM_HSAVE_PA_MSR, msr.e_reg.high, msr.e_reg.low);
793
794     // Disable SVM?
795     v3_get_msr(EFER_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
796     msr.e_reg.low &= ~EFER_MSR_svm_enable;
797     v3_set_msr(EFER_MSR, 0, msr.e_reg.low);
798
799     v3_cpu_types[cpu_id] = V3_INVALID_CPU;
800
801     V3_FreePages((void *)host_vmcbs[cpu_id], 4);
802
803     V3_Print("Host CPU %d host area freed, and SVM disabled\n", cpu_id);
804     return;
805 }
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856 #if 0
857 /* 
858  * Test VMSAVE/VMLOAD Latency 
859  */
860 #define vmsave ".byte 0x0F,0x01,0xDB ; "
861 #define vmload ".byte 0x0F,0x01,0xDA ; "
862 {
863     uint32_t start_lo, start_hi;
864     uint32_t end_lo, end_hi;
865     uint64_t start, end;
866     
867     __asm__ __volatile__ (
868                           "rdtsc ; "
869                           "movl %%eax, %%esi ; "
870                           "movl %%edx, %%edi ; "
871                           "movq  %%rcx, %%rax ; "
872                           vmsave
873                           "rdtsc ; "
874                           : "=D"(start_hi), "=S"(start_lo), "=a"(end_lo),"=d"(end_hi)
875                           : "c"(host_vmcb[cpu_id]), "0"(0), "1"(0), "2"(0), "3"(0)
876                           );
877     
878     start = start_hi;
879     start <<= 32;
880     start += start_lo;
881     
882     end = end_hi;
883     end <<= 32;
884     end += end_lo;
885     
886     PrintDebug("VMSave Cycle Latency: %d\n", (uint32_t)(end - start));
887     
888     __asm__ __volatile__ (
889                           "rdtsc ; "
890                           "movl %%eax, %%esi ; "
891                           "movl %%edx, %%edi ; "
892                           "movq  %%rcx, %%rax ; "
893                           vmload
894                           "rdtsc ; "
895                           : "=D"(start_hi), "=S"(start_lo), "=a"(end_lo),"=d"(end_hi)
896                               : "c"(host_vmcb[cpu_id]), "0"(0), "1"(0), "2"(0), "3"(0)
897                               );
898         
899         start = start_hi;
900         start <<= 32;
901         start += start_lo;
902
903         end = end_hi;
904         end <<= 32;
905         end += end_lo;
906
907
908         PrintDebug("VMLoad Cycle Latency: %d\n", (uint32_t)(end - start));
909     }
910     /* End Latency Test */
911
912 #endif
913
914
915
916
917
918
919
920 #if 0
921 void Init_VMCB_pe(vmcb_t *vmcb, struct guest_info vm_info) {
922   vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
923   vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
924   uint_t i = 0;
925
926
927   guest_state->rsp = vm_info.vm_regs.rsp;
928   guest_state->rip = vm_info.rip;
929
930
931   /* I pretty much just gutted this from TVMM */
932   /* Note: That means its probably wrong */
933
934   // set the segment registers to mirror ours
935   guest_state->cs.selector = 1<<3;
936   guest_state->cs.attrib.fields.type = 0xa; // Code segment+read
937   guest_state->cs.attrib.fields.S = 1;
938   guest_state->cs.attrib.fields.P = 1;
939   guest_state->cs.attrib.fields.db = 1;
940   guest_state->cs.attrib.fields.G = 1;
941   guest_state->cs.limit = 0xfffff;
942   guest_state->cs.base = 0;
943   
944   struct vmcb_selector *segregs [] = {&(guest_state->ss), &(guest_state->ds), &(guest_state->es), &(guest_state->fs), &(guest_state->gs), NULL};
945   for ( i = 0; segregs[i] != NULL; i++) {
946     struct vmcb_selector * seg = segregs[i];
947     
948     seg->selector = 2<<3;
949     seg->attrib.fields.type = 0x2; // Data Segment+read/write
950     seg->attrib.fields.S = 1;
951     seg->attrib.fields.P = 1;
952     seg->attrib.fields.db = 1;
953     seg->attrib.fields.G = 1;
954     seg->limit = 0xfffff;
955     seg->base = 0;
956   }
957
958
959   {
960     /* JRL THIS HAS TO GO */
961     
962     //    guest_state->tr.selector = GetTR_Selector();
963     guest_state->tr.attrib.fields.type = 0x9; 
964     guest_state->tr.attrib.fields.P = 1;
965     // guest_state->tr.limit = GetTR_Limit();
966     //guest_state->tr.base = GetTR_Base();// - 0x2000;
967     /* ** */
968   }
969
970
971   /* ** */
972
973
974   guest_state->efer |= EFER_MSR_svm_enable;
975   guest_state->rflags = 0x00000002; // The reserved bit is always 1
976   ctrl_area->svm_instrs.VMRUN = 1;
977   guest_state->cr0 = 0x00000001;    // PE 
978   ctrl_area->guest_ASID = 1;
979
980
981   //  guest_state->cpl = 0;
982
983
984
985   // Setup exits
986
987   ctrl_area->cr_writes.cr4 = 1;
988   
989   ctrl_area->exceptions.de = 1;
990   ctrl_area->exceptions.df = 1;
991   ctrl_area->exceptions.pf = 1;
992   ctrl_area->exceptions.ts = 1;
993   ctrl_area->exceptions.ss = 1;
994   ctrl_area->exceptions.ac = 1;
995   ctrl_area->exceptions.mc = 1;
996   ctrl_area->exceptions.gp = 1;
997   ctrl_area->exceptions.ud = 1;
998   ctrl_area->exceptions.np = 1;
999   ctrl_area->exceptions.of = 1;
1000   ctrl_area->exceptions.nmi = 1;
1001
1002   
1003
1004   ctrl_area->instrs.IOIO_PROT = 1;
1005   ctrl_area->IOPM_BASE_PA = (uint_t)V3_AllocPages(3);
1006   
1007   {
1008     reg_ex_t tmp_reg;
1009     tmp_reg.r_reg = ctrl_area->IOPM_BASE_PA;
1010     memset((void*)(tmp_reg.e_reg.low), 0xffffffff, PAGE_SIZE * 2);
1011   }
1012
1013   ctrl_area->instrs.INTR = 1;
1014
1015   
1016   {
1017     char gdt_buf[6];
1018     char idt_buf[6];
1019
1020     memset(gdt_buf, 0, 6);
1021     memset(idt_buf, 0, 6);
1022
1023
1024     uint_t gdt_base, idt_base;
1025     ushort_t gdt_limit, idt_limit;
1026     
1027     GetGDTR(gdt_buf);
1028     gdt_base = *(ulong_t*)((uchar_t*)gdt_buf + 2) & 0xffffffff;
1029     gdt_limit = *(ushort_t*)(gdt_buf) & 0xffff;
1030     PrintDebug("GDT: base: %x, limit: %x\n", gdt_base, gdt_limit);
1031
1032     GetIDTR(idt_buf);
1033     idt_base = *(ulong_t*)(idt_buf + 2) & 0xffffffff;
1034     idt_limit = *(ushort_t*)(idt_buf) & 0xffff;
1035     PrintDebug("IDT: base: %x, limit: %x\n",idt_base, idt_limit);
1036
1037
1038     // gdt_base -= 0x2000;
1039     //idt_base -= 0x2000;
1040
1041     guest_state->gdtr.base = gdt_base;
1042     guest_state->gdtr.limit = gdt_limit;
1043     guest_state->idtr.base = idt_base;
1044     guest_state->idtr.limit = idt_limit;
1045
1046
1047   }
1048   
1049   
1050   // also determine if CPU supports nested paging
1051   /*
1052   if (vm_info.page_tables) {
1053     //   if (0) {
1054     // Flush the TLB on entries/exits
1055     ctrl_area->TLB_CONTROL = 1;
1056
1057     // Enable Nested Paging
1058     ctrl_area->NP_ENABLE = 1;
1059
1060     PrintDebug("NP_Enable at 0x%x\n", &(ctrl_area->NP_ENABLE));
1061
1062         // Set the Nested Page Table pointer
1063     ctrl_area->N_CR3 |= ((addr_t)vm_info.page_tables & 0xfffff000);
1064
1065
1066     //   ctrl_area->N_CR3 = Get_CR3();
1067     // guest_state->cr3 |= (Get_CR3() & 0xfffff000);
1068
1069     guest_state->g_pat = 0x7040600070406ULL;
1070
1071     PrintDebug("Set Nested CR3: lo: 0x%x  hi: 0x%x\n", (uint_t)*(&(ctrl_area->N_CR3)), (uint_t)*((unsigned char *)&(ctrl_area->N_CR3) + 4));
1072     PrintDebug("Set Guest CR3: lo: 0x%x  hi: 0x%x\n", (uint_t)*(&(guest_state->cr3)), (uint_t)*((unsigned char *)&(guest_state->cr3) + 4));
1073     // Enable Paging
1074     //    guest_state->cr0 |= 0x80000000;
1075   }
1076   */
1077
1078 }
1079
1080
1081
1082
1083
1084 #endif
1085
1086