Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


Prints last exit reason when handler fails
[palacios.git] / palacios / src / palacios / svm.c
1 /* 
2  * This file is part of the Palacios Virtual Machine Monitor developed
3  * by the V3VEE Project with funding from the United States National 
4  * Science Foundation and the Department of Energy.  
5  *
6  * The V3VEE Project is a joint project between Northwestern University
7  * and the University of New Mexico.  You can find out more at 
8  * http://www.v3vee.org
9  *
10  * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu> 
11  * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org> 
12  * All rights reserved.
13  *
14  * Author: Jack Lange <jarusl@cs.northwestern.edu>
15  *
16  * This is free software.  You are permitted to use,
17  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
18  */
19
20
21 #include <palacios/svm.h>
22 #include <palacios/vmm.h>
23
24 #include <palacios/vmcb.h>
25 #include <palacios/vmm_mem.h>
26 #include <palacios/vmm_paging.h>
27 #include <palacios/svm_handler.h>
28
29 #include <palacios/vmm_debug.h>
30 #include <palacios/vm_guest_mem.h>
31
32 #include <palacios/vmm_decoder.h>
33 #include <palacios/vmm_string.h>
34 #include <palacios/vmm_lowlevel.h>
35 #include <palacios/svm_msr.h>
36
37 #include <palacios/vmm_rbtree.h>
38
39 #include <palacios/vmm_direct_paging.h>
40
41 #include <palacios/vmm_ctrl_regs.h>
42 #include <palacios/svm_io.h>
43
44 #include <palacios/vmm_sprintf.h>
45
46
47 #ifndef CONFIG_DEBUG_SVM
48 #undef PrintDebug
49 #define PrintDebug(fmt, args...)
50 #endif
51
52
53 uint32_t v3_last_exit;
54
55 // This is a global pointer to the host's VMCB
56 static addr_t host_vmcbs[CONFIG_MAX_CPUS] = { [0 ... CONFIG_MAX_CPUS - 1] = 0};
57
58
59
60 extern void v3_stgi();
61 extern void v3_clgi();
62 //extern int v3_svm_launch(vmcb_t * vmcb, struct v3_gprs * vm_regs, uint64_t * fs, uint64_t * gs);
63 extern int v3_svm_launch(vmcb_t * vmcb, struct v3_gprs * vm_regs, vmcb_t * host_vmcb);
64
65
66 static vmcb_t * Allocate_VMCB() {
67     vmcb_t * vmcb_page = NULL;
68     addr_t vmcb_pa = (addr_t)V3_AllocPages(1);
69
70     if ((void *)vmcb_pa == NULL) {
71         PrintError("Error allocating VMCB\n");
72         return NULL;
73     }
74
75     vmcb_page = (vmcb_t *)V3_VAddr((void *)vmcb_pa);
76
77     memset(vmcb_page, 0, 4096);
78
79     return vmcb_page;
80 }
81
82
83
84 static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info * core) {
85     vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
86     vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
87     uint_t i;
88
89
90     //
91     ctrl_area->svm_instrs.VMRUN = 1;
92     ctrl_area->svm_instrs.VMMCALL = 1;
93     ctrl_area->svm_instrs.VMLOAD = 1;
94     ctrl_area->svm_instrs.VMSAVE = 1;
95     ctrl_area->svm_instrs.STGI = 1;
96     ctrl_area->svm_instrs.CLGI = 1;
97     ctrl_area->svm_instrs.SKINIT = 1;
98     ctrl_area->svm_instrs.ICEBP = 1;
99     ctrl_area->svm_instrs.WBINVD = 1;
100     ctrl_area->svm_instrs.MONITOR = 1;
101     ctrl_area->svm_instrs.MWAIT_always = 1;
102     ctrl_area->svm_instrs.MWAIT_if_armed = 1;
103     ctrl_area->instrs.INVLPGA = 1;
104     ctrl_area->instrs.CPUID = 1;
105
106     ctrl_area->instrs.HLT = 1;
107
108 #ifdef CONFIG_TIME_VIRTUALIZE_TSC
109     ctrl_area->instrs.RDTSC = 1;
110     ctrl_area->svm_instrs.RDTSCP = 1;
111 #endif
112
113     // guest_state->cr0 = 0x00000001;    // PE 
114   
115     /*
116       ctrl_area->exceptions.de = 1;
117       ctrl_area->exceptions.df = 1;
118       
119       ctrl_area->exceptions.ts = 1;
120       ctrl_area->exceptions.ss = 1;
121       ctrl_area->exceptions.ac = 1;
122       ctrl_area->exceptions.mc = 1;
123       ctrl_area->exceptions.gp = 1;
124       ctrl_area->exceptions.ud = 1;
125       ctrl_area->exceptions.np = 1;
126       ctrl_area->exceptions.of = 1;
127       
128       ctrl_area->exceptions.nmi = 1;
129     */
130     
131
132     ctrl_area->instrs.NMI = 1;
133     ctrl_area->instrs.SMI = 0; // allow SMIs to run in guest
134     ctrl_area->instrs.INIT = 1;
135     ctrl_area->instrs.PAUSE = 1;
136     ctrl_area->instrs.shutdown_evts = 1;
137
138
139     /* DEBUG FOR RETURN CODE */
140     ctrl_area->exit_code = 1;
141
142
143     /* Setup Guest Machine state */
144
145     core->vm_regs.rsp = 0x00;
146     core->rip = 0xfff0;
147
148     core->vm_regs.rdx = 0x00000f00;
149
150
151     core->cpl = 0;
152
153     core->ctrl_regs.rflags = 0x00000002; // The reserved bit is always 1
154     core->ctrl_regs.cr0 = 0x60010010; // Set the WP flag so the memory hooks work in real-mode
155     core->ctrl_regs.efer |= EFER_MSR_svm_enable;
156
157
158
159
160
161     core->segments.cs.selector = 0xf000;
162     core->segments.cs.limit = 0xffff;
163     core->segments.cs.base = 0x0000000f0000LL;
164
165     // (raw attributes = 0xf3)
166     core->segments.cs.type = 0x3;
167     core->segments.cs.system = 0x1;
168     core->segments.cs.dpl = 0x3;
169     core->segments.cs.present = 1;
170
171
172
173     struct v3_segment * segregs [] = {&(core->segments.ss), &(core->segments.ds), 
174                                       &(core->segments.es), &(core->segments.fs), 
175                                       &(core->segments.gs), NULL};
176
177     for ( i = 0; segregs[i] != NULL; i++) {
178         struct v3_segment * seg = segregs[i];
179         
180         seg->selector = 0x0000;
181         //    seg->base = seg->selector << 4;
182         seg->base = 0x00000000;
183         seg->limit = ~0u;
184
185         // (raw attributes = 0xf3)
186         seg->type = 0x3;
187         seg->system = 0x1;
188         seg->dpl = 0x3;
189         seg->present = 1;
190     }
191
192     core->segments.gdtr.limit = 0x0000ffff;
193     core->segments.gdtr.base = 0x0000000000000000LL;
194     core->segments.idtr.limit = 0x0000ffff;
195     core->segments.idtr.base = 0x0000000000000000LL;
196
197     core->segments.ldtr.selector = 0x0000;
198     core->segments.ldtr.limit = 0x0000ffff;
199     core->segments.ldtr.base = 0x0000000000000000LL;
200     core->segments.tr.selector = 0x0000;
201     core->segments.tr.limit = 0x0000ffff;
202     core->segments.tr.base = 0x0000000000000000LL;
203
204
205     core->dbg_regs.dr6 = 0x00000000ffff0ff0LL;
206     core->dbg_regs.dr7 = 0x0000000000000400LL;
207
208
209     ctrl_area->IOPM_BASE_PA = (addr_t)V3_PAddr(core->vm_info->io_map.arch_data);
210     ctrl_area->instrs.IOIO_PROT = 1;
211             
212     ctrl_area->MSRPM_BASE_PA = (addr_t)V3_PAddr(core->vm_info->msr_map.arch_data);
213     ctrl_area->instrs.MSR_PROT = 1;   
214
215
216     PrintDebug("Exiting on interrupts\n");
217     ctrl_area->guest_ctrl.V_INTR_MASKING = 1;
218     ctrl_area->instrs.INTR = 1;
219
220
221     if (core->shdw_pg_mode == SHADOW_PAGING) {
222         PrintDebug("Creating initial shadow page table\n");
223         
224         /* JRL: This is a performance killer, and a simplistic solution */
225         /* We need to fix this */
226         ctrl_area->TLB_CONTROL = 1;
227         ctrl_area->guest_ASID = 1;
228         
229         
230         if (v3_init_passthrough_pts(core) == -1) {
231             PrintError("Could not initialize passthrough page tables\n");
232             return ;
233         }
234
235
236         core->shdw_pg_state.guest_cr0 = 0x0000000000000010LL;
237         PrintDebug("Created\n");
238         
239         core->ctrl_regs.cr0 |= 0x80000000;
240         core->ctrl_regs.cr3 = core->direct_map_pt;
241
242         ctrl_area->cr_reads.cr0 = 1;
243         ctrl_area->cr_writes.cr0 = 1;
244         //ctrl_area->cr_reads.cr4 = 1;
245         ctrl_area->cr_writes.cr4 = 1;
246         ctrl_area->cr_reads.cr3 = 1;
247         ctrl_area->cr_writes.cr3 = 1;
248
249         v3_hook_msr(core->vm_info, EFER_MSR, 
250                     &v3_handle_efer_read,
251                     &v3_handle_efer_write, 
252                     core);
253
254         ctrl_area->instrs.INVLPG = 1;
255
256         ctrl_area->exceptions.pf = 1;
257
258         guest_state->g_pat = 0x7040600070406ULL;
259
260
261
262     } else if (core->shdw_pg_mode == NESTED_PAGING) {
263         // Flush the TLB on entries/exits
264         ctrl_area->TLB_CONTROL = 1;
265         ctrl_area->guest_ASID = 1;
266
267         // Enable Nested Paging
268         ctrl_area->NP_ENABLE = 1;
269
270         PrintDebug("NP_Enable at 0x%p\n", (void *)&(ctrl_area->NP_ENABLE));
271
272         // Set the Nested Page Table pointer
273         if (v3_init_passthrough_pts(core) == -1) {
274             PrintError("Could not initialize Nested page tables\n");
275             return ;
276         }
277
278         ctrl_area->N_CR3 = core->direct_map_pt;
279
280         guest_state->g_pat = 0x7040600070406ULL;
281     }
282     
283     /* tell the guest that we don't support SVM */
284     v3_hook_msr(core->vm_info, SVM_VM_CR_MSR, 
285         &v3_handle_vm_cr_read,
286         &v3_handle_vm_cr_write, 
287         core);
288 }
289
290
291 int v3_init_svm_vmcb(struct guest_info * core, v3_vm_class_t vm_class) {
292
293     PrintDebug("Allocating VMCB\n");
294     core->vmm_data = (void *)Allocate_VMCB();
295     
296     if (core->vmm_data == NULL) {
297         PrintError("Could not allocate VMCB, Exiting...\n");
298         return -1;
299     }
300
301     if (vm_class == V3_PC_VM) {
302         PrintDebug("Initializing VMCB (addr=%p)\n", (void *)core->vmm_data);
303         Init_VMCB_BIOS((vmcb_t*)(core->vmm_data), core);
304     } else {
305         PrintError("Invalid VM class\n");
306         return -1;
307     }
308
309     return 0;
310 }
311
312
313 int v3_deinit_svm_vmcb(struct guest_info * core) {
314     V3_FreePages(V3_PAddr(core->vmm_data), 1);
315     return 0;
316 }
317
318
319 static int update_irq_exit_state(struct guest_info * info) {
320     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
321
322     // Fix for QEMU bug using EVENTINJ as an internal cache
323     guest_ctrl->EVENTINJ.valid = 0;
324
325     if ((info->intr_core_state.irq_pending == 1) && (guest_ctrl->guest_ctrl.V_IRQ == 0)) {
326         
327 #ifdef CONFIG_DEBUG_INTERRUPTS
328         PrintDebug("INTAK cycle completed for irq %d\n", info->intr_core_state.irq_vector);
329 #endif
330
331         info->intr_core_state.irq_started = 1;
332         info->intr_core_state.irq_pending = 0;
333
334         v3_injecting_intr(info, info->intr_core_state.irq_vector, V3_EXTERNAL_IRQ);
335     }
336
337     if ((info->intr_core_state.irq_started == 1) && (guest_ctrl->exit_int_info.valid == 0)) {
338 #ifdef CONFIG_DEBUG_INTERRUPTS
339         PrintDebug("Interrupt %d taken by guest\n", info->intr_core_state.irq_vector);
340 #endif
341
342         // Interrupt was taken fully vectored
343         info->intr_core_state.irq_started = 0;
344
345     } else if ((info->intr_core_state.irq_started == 1) && (guest_ctrl->exit_int_info.valid == 1)) {
346 #ifdef CONFIG_DEBUG_INTERRUPTS
347         PrintDebug("EXIT INT INFO is set (vec=%d)\n", guest_ctrl->exit_int_info.vector);
348 #endif
349     }
350
351     return 0;
352 }
353
354
355 static int update_irq_entry_state(struct guest_info * info) {
356     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
357
358
359     if (info->intr_core_state.irq_pending == 0) {
360         guest_ctrl->guest_ctrl.V_IRQ = 0;
361         guest_ctrl->guest_ctrl.V_INTR_VECTOR = 0;
362     }
363     
364     if (v3_excp_pending(info)) {
365         uint_t excp = v3_get_excp_number(info);
366         
367         guest_ctrl->EVENTINJ.type = SVM_INJECTION_EXCEPTION;
368         
369         if (info->excp_state.excp_error_code_valid) {
370             guest_ctrl->EVENTINJ.error_code = info->excp_state.excp_error_code;
371             guest_ctrl->EVENTINJ.ev = 1;
372 #ifdef CONFIG_DEBUG_INTERRUPTS
373             PrintDebug("Injecting exception %d with error code %x\n", excp, guest_ctrl->EVENTINJ.error_code);
374 #endif
375         }
376         
377         guest_ctrl->EVENTINJ.vector = excp;
378         
379         guest_ctrl->EVENTINJ.valid = 1;
380
381 #ifdef CONFIG_DEBUG_INTERRUPTS
382         PrintDebug("<%d> Injecting Exception %d (CR2=%p) (EIP=%p)\n", 
383                    (int)info->num_exits, 
384                    guest_ctrl->EVENTINJ.vector, 
385                    (void *)(addr_t)info->ctrl_regs.cr2,
386                    (void *)(addr_t)info->rip);
387 #endif
388
389         v3_injecting_excp(info, excp);
390     } else if (info->intr_core_state.irq_started == 1) {
391 #ifdef CONFIG_DEBUG_INTERRUPTS
392         PrintDebug("IRQ pending from previous injection\n");
393 #endif
394         guest_ctrl->guest_ctrl.V_IRQ = 1;
395         guest_ctrl->guest_ctrl.V_INTR_VECTOR = info->intr_core_state.irq_vector;
396         guest_ctrl->guest_ctrl.V_IGN_TPR = 1;
397         guest_ctrl->guest_ctrl.V_INTR_PRIO = 0xf;
398
399     } else {
400         switch (v3_intr_pending(info)) {
401             case V3_EXTERNAL_IRQ: {
402                 uint32_t irq = v3_get_intr(info);
403
404                 guest_ctrl->guest_ctrl.V_IRQ = 1;
405                 guest_ctrl->guest_ctrl.V_INTR_VECTOR = irq;
406                 guest_ctrl->guest_ctrl.V_IGN_TPR = 1;
407                 guest_ctrl->guest_ctrl.V_INTR_PRIO = 0xf;
408
409 #ifdef CONFIG_DEBUG_INTERRUPTS
410                 PrintDebug("Injecting Interrupt %d (EIP=%p)\n", 
411                            guest_ctrl->guest_ctrl.V_INTR_VECTOR, 
412                            (void *)(addr_t)info->rip);
413 #endif
414
415                 info->intr_core_state.irq_pending = 1;
416                 info->intr_core_state.irq_vector = irq;
417                 
418                 break;
419             }
420             case V3_NMI:
421                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_NMI;
422                 break;
423             case V3_SOFTWARE_INTR:
424                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_SOFT_INTR;
425                 break;
426             case V3_VIRTUAL_IRQ:
427                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_IRQ;
428                 break;
429
430             case V3_INVALID_INTR:
431             default:
432                 break;
433         }
434         
435     }
436
437     return 0;
438 }
439
440
441 /* 
442  * CAUTION and DANGER!!! 
443  * 
444  * The VMCB CANNOT(!!) be accessed outside of the clgi/stgi calls inside this function
445  * When exectuing a symbiotic call, the VMCB WILL be overwritten, so any dependencies 
446  * on its contents will cause things to break. The contents at the time of the exit WILL 
447  * change before the exit handler is executed.
448  */
449 int v3_svm_enter(struct guest_info * info) {
450     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
451     vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA((vmcb_t*)(info->vmm_data)); 
452     addr_t exit_code = 0, exit_info1 = 0, exit_info2 = 0;
453
454     v3_adjust_time(info);
455
456     // Conditionally yield the CPU if the timeslice has expired
457     v3_yield_cond(info);
458
459     // disable global interrupts for vm state transition
460     v3_clgi();
461
462     // Synchronize the guest state to the VMCB
463     guest_state->cr0 = info->ctrl_regs.cr0;
464     guest_state->cr2 = info->ctrl_regs.cr2;
465     guest_state->cr3 = info->ctrl_regs.cr3;
466     guest_state->cr4 = info->ctrl_regs.cr4;
467     guest_state->dr6 = info->dbg_regs.dr6;
468     guest_state->dr7 = info->dbg_regs.dr7;
469     guest_ctrl->guest_ctrl.V_TPR = info->ctrl_regs.cr8 & 0xff;
470     guest_state->rflags = info->ctrl_regs.rflags;
471     guest_state->efer = info->ctrl_regs.efer;
472     
473     guest_state->cpl = info->cpl;
474
475     v3_set_vmcb_segments((vmcb_t*)(info->vmm_data), &(info->segments));
476
477     guest_state->rax = info->vm_regs.rax;
478     guest_state->rip = info->rip;
479     guest_state->rsp = info->vm_regs.rsp;
480
481 #ifdef CONFIG_SYMCALL
482     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
483         update_irq_entry_state(info);
484     }
485 #else 
486     update_irq_entry_state(info);
487 #endif
488
489
490     /* ** */
491
492     /*
493       PrintDebug("SVM Entry to CS=%p  rip=%p...\n", 
494       (void *)(addr_t)info->segments.cs.base, 
495       (void *)(addr_t)info->rip);
496     */
497
498 #ifdef CONFIG_SYMCALL
499     if (info->sym_core_state.symcall_state.sym_call_active == 1) {
500         if (guest_ctrl->guest_ctrl.V_IRQ == 1) {
501             V3_Print("!!! Injecting Interrupt during Sym call !!!\n");
502         }
503     }
504 #endif
505
506     v3_update_timers(info);
507     guest_ctrl->TSC_OFFSET = v3_tsc_host_offset(&info->time_state);
508
509     //V3_Print("Calling v3_svm_launch\n");
510
511     v3_svm_launch((vmcb_t *)V3_PAddr(info->vmm_data), &(info->vm_regs), (vmcb_t *)host_vmcbs[info->cpu_id]);
512
513     //V3_Print("SVM Returned: Exit Code: %x, guest_rip=%lx\n", (uint32_t)(guest_ctrl->exit_code), (unsigned long)guest_state->rip);
514
515     v3_last_exit = (uint32_t)(guest_ctrl->exit_code);
516
517     //PrintDebug("SVM Returned\n");
518     
519     info->num_exits++;
520
521     // Save Guest state from VMCB
522     info->rip = guest_state->rip;
523     info->vm_regs.rsp = guest_state->rsp;
524     info->vm_regs.rax = guest_state->rax;
525
526     info->cpl = guest_state->cpl;
527
528     info->ctrl_regs.cr0 = guest_state->cr0;
529     info->ctrl_regs.cr2 = guest_state->cr2;
530     info->ctrl_regs.cr3 = guest_state->cr3;
531     info->ctrl_regs.cr4 = guest_state->cr4;
532     info->dbg_regs.dr6 = guest_state->dr6;
533     info->dbg_regs.dr7 = guest_state->dr7;
534     info->ctrl_regs.cr8 = guest_ctrl->guest_ctrl.V_TPR;
535     info->ctrl_regs.rflags = guest_state->rflags;
536     info->ctrl_regs.efer = guest_state->efer;
537     
538     v3_get_vmcb_segments((vmcb_t*)(info->vmm_data), &(info->segments));
539     info->cpu_mode = v3_get_vm_cpu_mode(info);
540     info->mem_mode = v3_get_vm_mem_mode(info);
541     /* ** */
542
543
544     // save exit info here
545     exit_code = guest_ctrl->exit_code;
546     exit_info1 = guest_ctrl->exit_info1;
547     exit_info2 = guest_ctrl->exit_info2;
548
549
550 #ifdef CONFIG_SYMCALL
551     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
552         update_irq_exit_state(info);
553     }
554 #else
555     update_irq_exit_state(info);
556 #endif
557
558
559     // reenable global interrupts after vm exit
560     v3_stgi();
561
562  
563     // Conditionally yield the CPU if the timeslice has expired
564     v3_yield_cond(info);
565
566
567
568     if (v3_handle_svm_exit(info, exit_code, exit_info1, exit_info2) != 0) {
569         PrintError("Error in SVM exit handler\n");
570         PrintError("  last exit was %d\n", v3_last_exit);
571         return -1;
572     }
573
574
575     return 0;
576 }
577
578
579 int v3_start_svm_guest(struct guest_info * info) {
580     //    vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA((vmcb_t*)(info->vmm_data));
581     //  vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
582
583     PrintDebug("Starting SVM core %u\n", info->cpu_id);
584
585     if (info->cpu_id == 0) {
586         info->core_run_state = CORE_RUNNING;
587         info->vm_info->run_state = VM_RUNNING;
588     } else  { 
589         PrintDebug("SVM core %u: Waiting for core initialization\n", info->cpu_id);
590
591         while (info->core_run_state == CORE_STOPPED) {
592             v3_yield(info);
593             //PrintDebug("SVM core %u: still waiting for INIT\n",info->cpu_id);
594         }
595
596         PrintDebug("SVM core %u initialized\n", info->cpu_id);
597     } 
598
599     PrintDebug("SVM core %u: I am starting at CS=0x%x (base=0x%p, limit=0x%x),  RIP=0x%p\n", 
600                info->cpu_id, info->segments.cs.selector, (void *)(info->segments.cs.base), 
601                info->segments.cs.limit, (void *)(info->rip));
602
603
604
605     PrintDebug("SVM core %u: Launching SVM VM (vmcb=%p)\n", info->cpu_id, (void *)info->vmm_data);
606     //PrintDebugVMCB((vmcb_t*)(info->vmm_data));
607     
608     v3_start_time(info);
609
610     while (1) {
611
612         if (info->vm_info->run_state == VM_STOPPED) {
613             info->core_run_state = CORE_STOPPED;
614             break;
615         }
616         
617         if (v3_svm_enter(info) == -1) {
618             vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
619             addr_t host_addr;
620             addr_t linear_addr = 0;
621             
622             info->vm_info->run_state = VM_ERROR;
623             
624             V3_Print("SVM core %u: SVM ERROR!!\n", info->cpu_id); 
625             
626             v3_print_guest_state(info);
627             
628             V3_Print("SVM core %u: SVM Exit Code: %p\n", info->cpu_id, (void *)(addr_t)guest_ctrl->exit_code); 
629             
630             V3_Print("SVM core %u: exit_info1 low = 0x%.8x\n", info->cpu_id, *(uint_t*)&(guest_ctrl->exit_info1));
631             V3_Print("SVM core %u: exit_info1 high = 0x%.8x\n", info->cpu_id, *(uint_t *)(((uchar_t *)&(guest_ctrl->exit_info1)) + 4));
632             
633             V3_Print("SVM core %u: exit_info2 low = 0x%.8x\n", info->cpu_id, *(uint_t*)&(guest_ctrl->exit_info2));
634             V3_Print("SVM core %u: exit_info2 high = 0x%.8x\n", info->cpu_id, *(uint_t *)(((uchar_t *)&(guest_ctrl->exit_info2)) + 4));
635             
636             linear_addr = get_addr_linear(info, info->rip, &(info->segments.cs));
637             
638             if (info->mem_mode == PHYSICAL_MEM) {
639                 v3_gpa_to_hva(info, linear_addr, &host_addr);
640             } else if (info->mem_mode == VIRTUAL_MEM) {
641                 v3_gva_to_hva(info, linear_addr, &host_addr);
642             }
643             
644             V3_Print("SVM core %u: Host Address of rip = 0x%p\n", info->cpu_id, (void *)host_addr);
645             
646             V3_Print("SVM core %u: Instr (15 bytes) at %p:\n", info->cpu_id, (void *)host_addr);
647             v3_dump_mem((uint8_t *)host_addr, 15);
648             
649             v3_print_stack(info);
650
651             break;
652         }
653
654
655         if (info->vm_info->run_state == VM_STOPPED) {
656             info->core_run_state = CORE_STOPPED;
657             break;
658         }
659
660         
661 /*
662         if ((info->num_exits % 5000) == 0) {
663             V3_Print("SVM Exit number %d\n", (uint32_t)info->num_exits);
664         }
665 */
666         
667     }
668
669     // Need to take down the other cores on error... 
670
671     return 0;
672 }
673
674
675
676
677
678 /* Checks machine SVM capability */
679 /* Implemented from: AMD Arch Manual 3, sect 15.4 */ 
680 int v3_is_svm_capable() {
681     uint_t vm_cr_low = 0, vm_cr_high = 0;
682     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
683
684     v3_cpuid(CPUID_EXT_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
685   
686     PrintDebug("CPUID_EXT_FEATURE_IDS_ecx=0x%x\n", ecx);
687
688     if ((ecx & CPUID_EXT_FEATURE_IDS_ecx_svm_avail) == 0) {
689       V3_Print("SVM Not Available\n");
690       return 0;
691     }  else {
692         v3_get_msr(SVM_VM_CR_MSR, &vm_cr_high, &vm_cr_low);
693         
694         PrintDebug("SVM_VM_CR_MSR = 0x%x 0x%x\n", vm_cr_high, vm_cr_low);
695         
696         if ((vm_cr_low & SVM_VM_CR_MSR_svmdis) == 1) {
697             V3_Print("SVM is available but is disabled.\n");
698             
699             v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
700             
701             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_edx=0x%x\n", edx);
702             
703             if ((edx & CPUID_SVM_REV_AND_FEATURE_IDS_edx_svml) == 0) {
704                 V3_Print("SVM BIOS Disabled, not unlockable\n");
705             } else {
706                 V3_Print("SVM is locked with a key\n");
707             }
708             return 0;
709
710         } else {
711             V3_Print("SVM is available and  enabled.\n");
712
713             v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
714             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_eax=0x%x\n", eax);
715             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_ebx=0x%x\n", ebx);
716             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_ecx=0x%x\n", ecx);
717             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_edx=0x%x\n", edx);
718
719             return 1;
720         }
721     }
722 }
723
724 static int has_svm_nested_paging() {
725     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
726
727     v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
728
729     //PrintDebug("CPUID_EXT_FEATURE_IDS_edx=0x%x\n", edx);
730
731     if ((edx & CPUID_SVM_REV_AND_FEATURE_IDS_edx_np) == 0) {
732         V3_Print("SVM Nested Paging not supported\n");
733         return 0;
734     } else {
735         V3_Print("SVM Nested Paging supported\n");
736         return 1;
737     }
738 }
739
740
741 void v3_init_svm_cpu(int cpu_id) {
742     reg_ex_t msr;
743     extern v3_cpu_arch_t v3_cpu_types[];
744
745     // Enable SVM on the CPU
746     v3_get_msr(EFER_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
747     msr.e_reg.low |= EFER_MSR_svm_enable;
748     v3_set_msr(EFER_MSR, 0, msr.e_reg.low);
749
750     V3_Print("SVM Enabled\n");
751
752     // Setup the host state save area
753     host_vmcbs[cpu_id] = (addr_t)V3_AllocPages(4);
754
755     /* 64-BIT-ISSUE */
756     //  msr.e_reg.high = 0;
757     //msr.e_reg.low = (uint_t)host_vmcb;
758     msr.r_reg = host_vmcbs[cpu_id];
759
760     PrintDebug("Host State being saved at %p\n", (void *)host_vmcbs[cpu_id]);
761     v3_set_msr(SVM_VM_HSAVE_PA_MSR, msr.e_reg.high, msr.e_reg.low);
762
763
764     if (has_svm_nested_paging() == 1) {
765         v3_cpu_types[cpu_id] = V3_SVM_REV3_CPU;
766     } else {
767         v3_cpu_types[cpu_id] = V3_SVM_CPU;
768     }
769 }
770
771
772
773 void v3_deinit_svm_cpu(int cpu_id) {
774     reg_ex_t msr;
775     extern v3_cpu_arch_t v3_cpu_types[];
776
777     // reset SVM_VM_HSAVE_PA_MSR
778     // Does setting it to NULL disable??
779     msr.r_reg = 0;
780     v3_set_msr(SVM_VM_HSAVE_PA_MSR, msr.e_reg.high, msr.e_reg.low);
781
782     // Disable SVM?
783     v3_get_msr(EFER_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
784     msr.e_reg.low &= ~EFER_MSR_svm_enable;
785     v3_set_msr(EFER_MSR, 0, msr.e_reg.low);
786
787     v3_cpu_types[cpu_id] = V3_INVALID_CPU;
788
789     V3_FreePages((void *)host_vmcbs[cpu_id], 4);
790
791     V3_Print("Host CPU %d host area freed, and SVM disabled\n", cpu_id);
792     return;
793 }
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844 #if 0
845 /* 
846  * Test VMSAVE/VMLOAD Latency 
847  */
848 #define vmsave ".byte 0x0F,0x01,0xDB ; "
849 #define vmload ".byte 0x0F,0x01,0xDA ; "
850 {
851     uint32_t start_lo, start_hi;
852     uint32_t end_lo, end_hi;
853     uint64_t start, end;
854     
855     __asm__ __volatile__ (
856                           "rdtsc ; "
857                           "movl %%eax, %%esi ; "
858                           "movl %%edx, %%edi ; "
859                           "movq  %%rcx, %%rax ; "
860                           vmsave
861                           "rdtsc ; "
862                           : "=D"(start_hi), "=S"(start_lo), "=a"(end_lo),"=d"(end_hi)
863                           : "c"(host_vmcb[cpu_id]), "0"(0), "1"(0), "2"(0), "3"(0)
864                           );
865     
866     start = start_hi;
867     start <<= 32;
868     start += start_lo;
869     
870     end = end_hi;
871     end <<= 32;
872     end += end_lo;
873     
874     PrintDebug("VMSave Cycle Latency: %d\n", (uint32_t)(end - start));
875     
876     __asm__ __volatile__ (
877                           "rdtsc ; "
878                           "movl %%eax, %%esi ; "
879                           "movl %%edx, %%edi ; "
880                           "movq  %%rcx, %%rax ; "
881                           vmload
882                           "rdtsc ; "
883                           : "=D"(start_hi), "=S"(start_lo), "=a"(end_lo),"=d"(end_hi)
884                               : "c"(host_vmcb[cpu_id]), "0"(0), "1"(0), "2"(0), "3"(0)
885                               );
886         
887         start = start_hi;
888         start <<= 32;
889         start += start_lo;
890
891         end = end_hi;
892         end <<= 32;
893         end += end_lo;
894
895
896         PrintDebug("VMLoad Cycle Latency: %d\n", (uint32_t)(end - start));
897     }
898     /* End Latency Test */
899
900 #endif
901
902
903
904
905
906
907
908 #if 0
909 void Init_VMCB_pe(vmcb_t *vmcb, struct guest_info vm_info) {
910   vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
911   vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
912   uint_t i = 0;
913
914
915   guest_state->rsp = vm_info.vm_regs.rsp;
916   guest_state->rip = vm_info.rip;
917
918
919   /* I pretty much just gutted this from TVMM */
920   /* Note: That means its probably wrong */
921
922   // set the segment registers to mirror ours
923   guest_state->cs.selector = 1<<3;
924   guest_state->cs.attrib.fields.type = 0xa; // Code segment+read
925   guest_state->cs.attrib.fields.S = 1;
926   guest_state->cs.attrib.fields.P = 1;
927   guest_state->cs.attrib.fields.db = 1;
928   guest_state->cs.attrib.fields.G = 1;
929   guest_state->cs.limit = 0xfffff;
930   guest_state->cs.base = 0;
931   
932   struct vmcb_selector *segregs [] = {&(guest_state->ss), &(guest_state->ds), &(guest_state->es), &(guest_state->fs), &(guest_state->gs), NULL};
933   for ( i = 0; segregs[i] != NULL; i++) {
934     struct vmcb_selector * seg = segregs[i];
935     
936     seg->selector = 2<<3;
937     seg->attrib.fields.type = 0x2; // Data Segment+read/write
938     seg->attrib.fields.S = 1;
939     seg->attrib.fields.P = 1;
940     seg->attrib.fields.db = 1;
941     seg->attrib.fields.G = 1;
942     seg->limit = 0xfffff;
943     seg->base = 0;
944   }
945
946
947   {
948     /* JRL THIS HAS TO GO */
949     
950     //    guest_state->tr.selector = GetTR_Selector();
951     guest_state->tr.attrib.fields.type = 0x9; 
952     guest_state->tr.attrib.fields.P = 1;
953     // guest_state->tr.limit = GetTR_Limit();
954     //guest_state->tr.base = GetTR_Base();// - 0x2000;
955     /* ** */
956   }
957
958
959   /* ** */
960
961
962   guest_state->efer |= EFER_MSR_svm_enable;
963   guest_state->rflags = 0x00000002; // The reserved bit is always 1
964   ctrl_area->svm_instrs.VMRUN = 1;
965   guest_state->cr0 = 0x00000001;    // PE 
966   ctrl_area->guest_ASID = 1;
967
968
969   //  guest_state->cpl = 0;
970
971
972
973   // Setup exits
974
975   ctrl_area->cr_writes.cr4 = 1;
976   
977   ctrl_area->exceptions.de = 1;
978   ctrl_area->exceptions.df = 1;
979   ctrl_area->exceptions.pf = 1;
980   ctrl_area->exceptions.ts = 1;
981   ctrl_area->exceptions.ss = 1;
982   ctrl_area->exceptions.ac = 1;
983   ctrl_area->exceptions.mc = 1;
984   ctrl_area->exceptions.gp = 1;
985   ctrl_area->exceptions.ud = 1;
986   ctrl_area->exceptions.np = 1;
987   ctrl_area->exceptions.of = 1;
988   ctrl_area->exceptions.nmi = 1;
989
990   
991
992   ctrl_area->instrs.IOIO_PROT = 1;
993   ctrl_area->IOPM_BASE_PA = (uint_t)V3_AllocPages(3);
994   
995   {
996     reg_ex_t tmp_reg;
997     tmp_reg.r_reg = ctrl_area->IOPM_BASE_PA;
998     memset((void*)(tmp_reg.e_reg.low), 0xffffffff, PAGE_SIZE * 2);
999   }
1000
1001   ctrl_area->instrs.INTR = 1;
1002
1003   
1004   {
1005     char gdt_buf[6];
1006     char idt_buf[6];
1007
1008     memset(gdt_buf, 0, 6);
1009     memset(idt_buf, 0, 6);
1010
1011
1012     uint_t gdt_base, idt_base;
1013     ushort_t gdt_limit, idt_limit;
1014     
1015     GetGDTR(gdt_buf);
1016     gdt_base = *(ulong_t*)((uchar_t*)gdt_buf + 2) & 0xffffffff;
1017     gdt_limit = *(ushort_t*)(gdt_buf) & 0xffff;
1018     PrintDebug("GDT: base: %x, limit: %x\n", gdt_base, gdt_limit);
1019
1020     GetIDTR(idt_buf);
1021     idt_base = *(ulong_t*)(idt_buf + 2) & 0xffffffff;
1022     idt_limit = *(ushort_t*)(idt_buf) & 0xffff;
1023     PrintDebug("IDT: base: %x, limit: %x\n",idt_base, idt_limit);
1024
1025
1026     // gdt_base -= 0x2000;
1027     //idt_base -= 0x2000;
1028
1029     guest_state->gdtr.base = gdt_base;
1030     guest_state->gdtr.limit = gdt_limit;
1031     guest_state->idtr.base = idt_base;
1032     guest_state->idtr.limit = idt_limit;
1033
1034
1035   }
1036   
1037   
1038   // also determine if CPU supports nested paging
1039   /*
1040   if (vm_info.page_tables) {
1041     //   if (0) {
1042     // Flush the TLB on entries/exits
1043     ctrl_area->TLB_CONTROL = 1;
1044
1045     // Enable Nested Paging
1046     ctrl_area->NP_ENABLE = 1;
1047
1048     PrintDebug("NP_Enable at 0x%x\n", &(ctrl_area->NP_ENABLE));
1049
1050         // Set the Nested Page Table pointer
1051     ctrl_area->N_CR3 |= ((addr_t)vm_info.page_tables & 0xfffff000);
1052
1053
1054     //   ctrl_area->N_CR3 = Get_CR3();
1055     // guest_state->cr3 |= (Get_CR3() & 0xfffff000);
1056
1057     guest_state->g_pat = 0x7040600070406ULL;
1058
1059     PrintDebug("Set Nested CR3: lo: 0x%x  hi: 0x%x\n", (uint_t)*(&(ctrl_area->N_CR3)), (uint_t)*((unsigned char *)&(ctrl_area->N_CR3) + 4));
1060     PrintDebug("Set Guest CR3: lo: 0x%x  hi: 0x%x\n", (uint_t)*(&(guest_state->cr3)), (uint_t)*((unsigned char *)&(guest_state->cr3) + 4));
1061     // Enable Paging
1062     //    guest_state->cr0 |= 0x80000000;
1063   }
1064   */
1065
1066 }
1067
1068
1069
1070
1071
1072 #endif
1073
1074