Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


975010f47348b7a3ec41e62da9eb744041c7b007
[palacios.releases.git] / palacios / src / palacios / svm.c
1 /* 
2  * This file is part of the Palacios Virtual Machine Monitor developed
3  * by the V3VEE Project with funding from the United States National 
4  * Science Foundation and the Department of Energy.  
5  *
6  * The V3VEE Project is a joint project between Northwestern University
7  * and the University of New Mexico.  You can find out more at 
8  * http://www.v3vee.org
9  *
10  * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu> 
11  * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org> 
12  * All rights reserved.
13  *
14  * Author: Jack Lange <jarusl@cs.northwestern.edu>
15  *
16  * This is free software.  You are permitted to use,
17  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
18  */
19
20
21 #include <palacios/svm.h>
22 #include <palacios/vmm.h>
23
24 #include <palacios/vmcb.h>
25 #include <palacios/vmm_mem.h>
26 #include <palacios/vmm_paging.h>
27 #include <palacios/svm_handler.h>
28
29 #include <palacios/vmm_debug.h>
30 #include <palacios/vm_guest_mem.h>
31
32 #include <palacios/vmm_decoder.h>
33 #include <palacios/vmm_string.h>
34 #include <palacios/vmm_lowlevel.h>
35 #include <palacios/svm_msr.h>
36
37 #include <palacios/vmm_rbtree.h>
38
39 #include <palacios/vmm_direct_paging.h>
40
41 #include <palacios/vmm_ctrl_regs.h>
42 #include <palacios/svm_io.h>
43
44 #include <palacios/vmm_sprintf.h>
45
46
47 #ifndef CONFIG_DEBUG_SVM
48 #undef PrintDebug
49 #define PrintDebug(fmt, args...)
50 #endif
51
52
53 uint32_t v3_last_exit;
54
55 // This is a global pointer to the host's VMCB
56 static addr_t host_vmcbs[CONFIG_MAX_CPUS] = { [0 ... CONFIG_MAX_CPUS - 1] = 0};
57
58
59
60 extern void v3_stgi();
61 extern void v3_clgi();
62 //extern int v3_svm_launch(vmcb_t * vmcb, struct v3_gprs * vm_regs, uint64_t * fs, uint64_t * gs);
63 extern int v3_svm_launch(vmcb_t * vmcb, struct v3_gprs * vm_regs, vmcb_t * host_vmcb);
64
65
66 static vmcb_t * Allocate_VMCB() {
67     vmcb_t * vmcb_page = NULL;
68     addr_t vmcb_pa = (addr_t)V3_AllocPages(1);
69
70     if ((void *)vmcb_pa == NULL) {
71         PrintError("Error allocating VMCB\n");
72         return NULL;
73     }
74
75     vmcb_page = (vmcb_t *)V3_VAddr((void *)vmcb_pa);
76
77     memset(vmcb_page, 0, 4096);
78
79     return vmcb_page;
80 }
81
82
83
84 static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info * core) {
85     vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
86     vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
87     uint_t i;
88
89
90     //
91     ctrl_area->svm_instrs.VMRUN = 1;
92     ctrl_area->svm_instrs.VMMCALL = 1;
93     ctrl_area->svm_instrs.VMLOAD = 1;
94     ctrl_area->svm_instrs.VMSAVE = 1;
95     ctrl_area->svm_instrs.STGI = 1;
96     ctrl_area->svm_instrs.CLGI = 1;
97     ctrl_area->svm_instrs.SKINIT = 1;
98     ctrl_area->svm_instrs.ICEBP = 1;
99     ctrl_area->svm_instrs.WBINVD = 1;
100     ctrl_area->svm_instrs.MONITOR = 1;
101     ctrl_area->svm_instrs.MWAIT_always = 1;
102     ctrl_area->svm_instrs.MWAIT_if_armed = 1;
103     ctrl_area->instrs.INVLPGA = 1;
104     ctrl_area->instrs.CPUID = 1;
105
106     ctrl_area->instrs.HLT = 1;
107
108 #ifdef CONFIG_TIME_VIRTUALIZE_TSC
109     ctrl_area->instrs.RDTSC = 1;
110     ctrl_area->svm_instrs.RDTSCP = 1;
111 #endif
112
113     // guest_state->cr0 = 0x00000001;    // PE 
114   
115     /*
116       ctrl_area->exceptions.de = 1;
117       ctrl_area->exceptions.df = 1;
118       
119       ctrl_area->exceptions.ts = 1;
120       ctrl_area->exceptions.ss = 1;
121       ctrl_area->exceptions.ac = 1;
122       ctrl_area->exceptions.mc = 1;
123       ctrl_area->exceptions.gp = 1;
124       ctrl_area->exceptions.ud = 1;
125       ctrl_area->exceptions.np = 1;
126       ctrl_area->exceptions.of = 1;
127       
128       ctrl_area->exceptions.nmi = 1;
129     */
130     
131
132     ctrl_area->instrs.NMI = 1;
133     ctrl_area->instrs.SMI = 0; // allow SMIs to run in guest
134     ctrl_area->instrs.INIT = 1;
135     ctrl_area->instrs.PAUSE = 1;
136     ctrl_area->instrs.shutdown_evts = 1;
137
138
139     /* DEBUG FOR RETURN CODE */
140     ctrl_area->exit_code = 1;
141
142
143     /* Setup Guest Machine state */
144
145     core->vm_regs.rsp = 0x00;
146     core->rip = 0xfff0;
147
148     core->vm_regs.rdx = 0x00000f00;
149
150
151     core->cpl = 0;
152
153     core->ctrl_regs.rflags = 0x00000002; // The reserved bit is always 1
154     core->ctrl_regs.cr0 = 0x60010010; // Set the WP flag so the memory hooks work in real-mode
155     core->ctrl_regs.efer |= EFER_MSR_svm_enable;
156
157
158
159
160
161     core->segments.cs.selector = 0xf000;
162     core->segments.cs.limit = 0xffff;
163     core->segments.cs.base = 0x0000000f0000LL;
164
165     // (raw attributes = 0xf3)
166     core->segments.cs.type = 0x3;
167     core->segments.cs.system = 0x1;
168     core->segments.cs.dpl = 0x3;
169     core->segments.cs.present = 1;
170
171
172
173     struct v3_segment * segregs [] = {&(core->segments.ss), &(core->segments.ds), 
174                                       &(core->segments.es), &(core->segments.fs), 
175                                       &(core->segments.gs), NULL};
176
177     for ( i = 0; segregs[i] != NULL; i++) {
178         struct v3_segment * seg = segregs[i];
179         
180         seg->selector = 0x0000;
181         //    seg->base = seg->selector << 4;
182         seg->base = 0x00000000;
183         seg->limit = ~0u;
184
185         // (raw attributes = 0xf3)
186         seg->type = 0x3;
187         seg->system = 0x1;
188         seg->dpl = 0x3;
189         seg->present = 1;
190     }
191
192     core->segments.gdtr.limit = 0x0000ffff;
193     core->segments.gdtr.base = 0x0000000000000000LL;
194     core->segments.idtr.limit = 0x0000ffff;
195     core->segments.idtr.base = 0x0000000000000000LL;
196
197     core->segments.ldtr.selector = 0x0000;
198     core->segments.ldtr.limit = 0x0000ffff;
199     core->segments.ldtr.base = 0x0000000000000000LL;
200     core->segments.tr.selector = 0x0000;
201     core->segments.tr.limit = 0x0000ffff;
202     core->segments.tr.base = 0x0000000000000000LL;
203
204
205     core->dbg_regs.dr6 = 0x00000000ffff0ff0LL;
206     core->dbg_regs.dr7 = 0x0000000000000400LL;
207
208
209     ctrl_area->IOPM_BASE_PA = (addr_t)V3_PAddr(core->vm_info->io_map.arch_data);
210     ctrl_area->instrs.IOIO_PROT = 1;
211             
212     ctrl_area->MSRPM_BASE_PA = (addr_t)V3_PAddr(core->vm_info->msr_map.arch_data);
213     ctrl_area->instrs.MSR_PROT = 1;   
214
215
216     PrintDebug("Exiting on interrupts\n");
217     ctrl_area->guest_ctrl.V_INTR_MASKING = 1;
218     ctrl_area->instrs.INTR = 1;
219
220
221     if (core->shdw_pg_mode == SHADOW_PAGING) {
222         PrintDebug("Creating initial shadow page table\n");
223         
224         /* JRL: This is a performance killer, and a simplistic solution */
225         /* We need to fix this */
226         ctrl_area->TLB_CONTROL = 1;
227         ctrl_area->guest_ASID = 1;
228         
229         
230         if (v3_init_passthrough_pts(core) == -1) {
231             PrintError("Could not initialize passthrough page tables\n");
232             return ;
233         }
234
235
236         core->shdw_pg_state.guest_cr0 = 0x0000000000000010LL;
237         PrintDebug("Created\n");
238         
239         core->ctrl_regs.cr0 |= 0x80000000;
240         core->ctrl_regs.cr3 = core->direct_map_pt;
241
242         ctrl_area->cr_reads.cr0 = 1;
243         ctrl_area->cr_writes.cr0 = 1;
244         //ctrl_area->cr_reads.cr4 = 1;
245         ctrl_area->cr_writes.cr4 = 1;
246         ctrl_area->cr_reads.cr3 = 1;
247         ctrl_area->cr_writes.cr3 = 1;
248
249         v3_hook_msr(core->vm_info, EFER_MSR, 
250                     &v3_handle_efer_read,
251                     &v3_handle_efer_write, 
252                     core);
253
254         ctrl_area->instrs.INVLPG = 1;
255
256         ctrl_area->exceptions.pf = 1;
257
258         guest_state->g_pat = 0x7040600070406ULL;
259
260
261
262     } else if (core->shdw_pg_mode == NESTED_PAGING) {
263         // Flush the TLB on entries/exits
264         ctrl_area->TLB_CONTROL = 1;
265         ctrl_area->guest_ASID = 1;
266
267         // Enable Nested Paging
268         ctrl_area->NP_ENABLE = 1;
269
270         PrintDebug("NP_Enable at 0x%p\n", (void *)&(ctrl_area->NP_ENABLE));
271
272         // Set the Nested Page Table pointer
273         if (v3_init_passthrough_pts(core) == -1) {
274             PrintError("Could not initialize Nested page tables\n");
275             return ;
276         }
277
278         ctrl_area->N_CR3 = core->direct_map_pt;
279
280         guest_state->g_pat = 0x7040600070406ULL;
281     }
282 }
283
284
285 int v3_init_svm_vmcb(struct guest_info * info, v3_vm_class_t vm_class) {
286
287     PrintDebug("Allocating VMCB\n");
288     info->vmm_data = (void*)Allocate_VMCB();
289     
290     if (info->vmm_data == NULL) {
291         PrintError("Could not allocate VMCB, Exiting...\n");
292         return -1;
293     }
294
295     if (vm_class == V3_PC_VM) {
296         PrintDebug("Initializing VMCB (addr=%p)\n", (void *)info->vmm_data);
297         Init_VMCB_BIOS((vmcb_t*)(info->vmm_data), info);
298     } else {
299         PrintError("Invalid VM class\n");
300         return -1;
301     }
302
303     return 0;
304 }
305
306
307
308 static int update_irq_exit_state(struct guest_info * info) {
309     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
310
311     // Fix for QEMU bug using EVENTINJ as an internal cache
312     guest_ctrl->EVENTINJ.valid = 0;
313
314     if ((info->intr_core_state.irq_pending == 1) && (guest_ctrl->guest_ctrl.V_IRQ == 0)) {
315         
316 #ifdef CONFIG_DEBUG_INTERRUPTS
317         PrintDebug("INTAK cycle completed for irq %d\n", info->intr_core_state.irq_vector);
318 #endif
319
320         info->intr_core_state.irq_started = 1;
321         info->intr_core_state.irq_pending = 0;
322
323         v3_injecting_intr(info, info->intr_core_state.irq_vector, V3_EXTERNAL_IRQ);
324     }
325
326     if ((info->intr_core_state.irq_started == 1) && (guest_ctrl->exit_int_info.valid == 0)) {
327 #ifdef CONFIG_DEBUG_INTERRUPTS
328         PrintDebug("Interrupt %d taken by guest\n", info->intr_core_state.irq_vector);
329 #endif
330
331         // Interrupt was taken fully vectored
332         info->intr_core_state.irq_started = 0;
333
334     } else if ((info->intr_core_state.irq_started == 1) && (guest_ctrl->exit_int_info.valid == 1)) {
335 #ifdef CONFIG_DEBUG_INTERRUPTS
336         PrintDebug("EXIT INT INFO is set (vec=%d)\n", guest_ctrl->exit_int_info.vector);
337 #endif
338     }
339
340     return 0;
341 }
342
343
344 static int update_irq_entry_state(struct guest_info * info) {
345     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
346
347
348     if (info->intr_core_state.irq_pending == 0) {
349         guest_ctrl->guest_ctrl.V_IRQ = 0;
350         guest_ctrl->guest_ctrl.V_INTR_VECTOR = 0;
351     }
352     
353     if (v3_excp_pending(info)) {
354         uint_t excp = v3_get_excp_number(info);
355         
356         guest_ctrl->EVENTINJ.type = SVM_INJECTION_EXCEPTION;
357         
358         if (info->excp_state.excp_error_code_valid) {
359             guest_ctrl->EVENTINJ.error_code = info->excp_state.excp_error_code;
360             guest_ctrl->EVENTINJ.ev = 1;
361 #ifdef CONFIG_DEBUG_INTERRUPTS
362             PrintDebug("Injecting exception %d with error code %x\n", excp, guest_ctrl->EVENTINJ.error_code);
363 #endif
364         }
365         
366         guest_ctrl->EVENTINJ.vector = excp;
367         
368         guest_ctrl->EVENTINJ.valid = 1;
369
370 #ifdef CONFIG_DEBUG_INTERRUPTS
371         PrintDebug("<%d> Injecting Exception %d (CR2=%p) (EIP=%p)\n", 
372                    (int)info->num_exits, 
373                    guest_ctrl->EVENTINJ.vector, 
374                    (void *)(addr_t)info->ctrl_regs.cr2,
375                    (void *)(addr_t)info->rip);
376 #endif
377
378         v3_injecting_excp(info, excp);
379     } else if (info->intr_core_state.irq_started == 1) {
380 #ifdef CONFIG_DEBUG_INTERRUPTS
381         PrintDebug("IRQ pending from previous injection\n");
382 #endif
383         guest_ctrl->guest_ctrl.V_IRQ = 1;
384         guest_ctrl->guest_ctrl.V_INTR_VECTOR = info->intr_core_state.irq_vector;
385         guest_ctrl->guest_ctrl.V_IGN_TPR = 1;
386         guest_ctrl->guest_ctrl.V_INTR_PRIO = 0xf;
387
388     } else {
389         switch (v3_intr_pending(info)) {
390             case V3_EXTERNAL_IRQ: {
391                 uint32_t irq = v3_get_intr(info);
392
393                 guest_ctrl->guest_ctrl.V_IRQ = 1;
394                 guest_ctrl->guest_ctrl.V_INTR_VECTOR = irq;
395                 guest_ctrl->guest_ctrl.V_IGN_TPR = 1;
396                 guest_ctrl->guest_ctrl.V_INTR_PRIO = 0xf;
397
398 #ifdef CONFIG_DEBUG_INTERRUPTS
399                 PrintDebug("Injecting Interrupt %d (EIP=%p)\n", 
400                            guest_ctrl->guest_ctrl.V_INTR_VECTOR, 
401                            (void *)(addr_t)info->rip);
402 #endif
403
404                 info->intr_core_state.irq_pending = 1;
405                 info->intr_core_state.irq_vector = irq;
406                 
407                 break;
408             }
409             case V3_NMI:
410                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_NMI;
411                 break;
412             case V3_SOFTWARE_INTR:
413                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_SOFT_INTR;
414                 break;
415             case V3_VIRTUAL_IRQ:
416                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_IRQ;
417                 break;
418
419             case V3_INVALID_INTR:
420             default:
421                 break;
422         }
423         
424     }
425
426     return 0;
427 }
428
429
430 /* 
431  * CAUTION and DANGER!!! 
432  * 
433  * The VMCB CANNOT(!!) be accessed outside of the clgi/stgi calls inside this function
434  * When exectuing a symbiotic call, the VMCB WILL be overwritten, so any dependencies 
435  * on its contents will cause things to break. The contents at the time of the exit WILL 
436  * change before the exit handler is executed.
437  */
438 int v3_svm_enter(struct guest_info * info) {
439     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
440     vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA((vmcb_t*)(info->vmm_data)); 
441     addr_t exit_code = 0, exit_info1 = 0, exit_info2 = 0;
442
443     v3_adjust_time(info);
444
445     // Conditionally yield the CPU if the timeslice has expired
446     v3_yield_cond(info);
447
448     // disable global interrupts for vm state transition
449     v3_clgi();
450
451     // Synchronize the guest state to the VMCB
452     guest_state->cr0 = info->ctrl_regs.cr0;
453     guest_state->cr2 = info->ctrl_regs.cr2;
454     guest_state->cr3 = info->ctrl_regs.cr3;
455     guest_state->cr4 = info->ctrl_regs.cr4;
456     guest_state->dr6 = info->dbg_regs.dr6;
457     guest_state->dr7 = info->dbg_regs.dr7;
458     guest_ctrl->guest_ctrl.V_TPR = info->ctrl_regs.cr8 & 0xff;
459     guest_state->rflags = info->ctrl_regs.rflags;
460     guest_state->efer = info->ctrl_regs.efer;
461     
462     guest_state->cpl = info->cpl;
463
464     v3_set_vmcb_segments((vmcb_t*)(info->vmm_data), &(info->segments));
465
466     guest_state->rax = info->vm_regs.rax;
467     guest_state->rip = info->rip;
468     guest_state->rsp = info->vm_regs.rsp;
469
470 #ifdef CONFIG_SYMCALL
471     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
472         update_irq_entry_state(info);
473     }
474 #else 
475     update_irq_entry_state(info);
476 #endif
477
478
479     /* ** */
480
481     /*
482       PrintDebug("SVM Entry to CS=%p  rip=%p...\n", 
483       (void *)(addr_t)info->segments.cs.base, 
484       (void *)(addr_t)info->rip);
485     */
486
487 #ifdef CONFIG_SYMCALL
488     if (info->sym_core_state.symcall_state.sym_call_active == 1) {
489         if (guest_ctrl->guest_ctrl.V_IRQ == 1) {
490             V3_Print("!!! Injecting Interrupt during Sym call !!!\n");
491         }
492     }
493 #endif
494
495     v3_update_timers(info);
496     guest_ctrl->TSC_OFFSET = v3_tsc_host_offset(&info->time_state);
497
498     //V3_Print("Calling v3_svm_launch\n");
499
500     v3_svm_launch((vmcb_t *)V3_PAddr(info->vmm_data), &(info->vm_regs), (vmcb_t *)host_vmcbs[info->cpu_id]);
501
502     //V3_Print("SVM Returned: Exit Code: %x, guest_rip=%lx\n", (uint32_t)(guest_ctrl->exit_code), (unsigned long)guest_state->rip);
503
504     v3_last_exit = (uint32_t)(guest_ctrl->exit_code);
505
506     //PrintDebug("SVM Returned\n");
507     
508     info->num_exits++;
509
510     // Save Guest state from VMCB
511     info->rip = guest_state->rip;
512     info->vm_regs.rsp = guest_state->rsp;
513     info->vm_regs.rax = guest_state->rax;
514
515     info->cpl = guest_state->cpl;
516
517     info->ctrl_regs.cr0 = guest_state->cr0;
518     info->ctrl_regs.cr2 = guest_state->cr2;
519     info->ctrl_regs.cr3 = guest_state->cr3;
520     info->ctrl_regs.cr4 = guest_state->cr4;
521     info->dbg_regs.dr6 = guest_state->dr6;
522     info->dbg_regs.dr7 = guest_state->dr7;
523     info->ctrl_regs.cr8 = guest_ctrl->guest_ctrl.V_TPR;
524     info->ctrl_regs.rflags = guest_state->rflags;
525     info->ctrl_regs.efer = guest_state->efer;
526     
527     v3_get_vmcb_segments((vmcb_t*)(info->vmm_data), &(info->segments));
528     info->cpu_mode = v3_get_vm_cpu_mode(info);
529     info->mem_mode = v3_get_vm_mem_mode(info);
530     /* ** */
531
532
533     // save exit info here
534     exit_code = guest_ctrl->exit_code;
535     exit_info1 = guest_ctrl->exit_info1;
536     exit_info2 = guest_ctrl->exit_info2;
537
538
539 #ifdef CONFIG_SYMCALL
540     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
541         update_irq_exit_state(info);
542     }
543 #else
544     update_irq_exit_state(info);
545 #endif
546
547
548     // reenable global interrupts after vm exit
549     v3_stgi();
550
551  
552     // Conditionally yield the CPU if the timeslice has expired
553     v3_yield_cond(info);
554
555
556
557     if (v3_handle_svm_exit(info, exit_code, exit_info1, exit_info2) != 0) {
558         PrintError("Error in SVM exit handler\n");
559         return -1;
560     }
561
562
563     return 0;
564 }
565
566
567 int v3_start_svm_guest(struct guest_info * info) {
568     //    vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA((vmcb_t*)(info->vmm_data));
569     //  vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
570
571     PrintDebug("Starting SVM core %u\n", info->cpu_id);
572
573     if (info->cpu_id == 0) {
574         info->core_run_state = CORE_RUNNING;
575         info->vm_info->run_state = VM_RUNNING;
576     } else  { 
577         PrintDebug("SVM core %u: Waiting for core initialization\n", info->cpu_id);
578
579         while (info->core_run_state == CORE_STOPPED) {
580             v3_yield(info);
581             //PrintDebug("SVM core %u: still waiting for INIT\n",info->cpu_id);
582         }
583
584         PrintDebug("SVM core %u initialized\n", info->cpu_id);
585     } 
586
587     PrintDebug("SVM core %u: I am starting at CS=0x%x (base=0x%p, limit=0x%x),  RIP=0x%p\n", 
588                info->cpu_id, info->segments.cs.selector, (void *)(info->segments.cs.base), 
589                info->segments.cs.limit, (void *)(info->rip));
590
591
592
593     PrintDebug("SVM core %u: Launching SVM VM (vmcb=%p)\n", info->cpu_id, (void *)info->vmm_data);
594     //PrintDebugVMCB((vmcb_t*)(info->vmm_data));
595     
596     v3_start_time(info);
597
598     while (1) {
599
600         if (info->vm_info->run_state == VM_STOPPED) {
601             info->core_run_state = CORE_STOPPED;
602             break;
603         }
604         
605         if (v3_svm_enter(info) == -1) {
606             vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
607             addr_t host_addr;
608             addr_t linear_addr = 0;
609             
610             info->vm_info->run_state = VM_ERROR;
611             
612             V3_Print("SVM core %u: SVM ERROR!!\n", info->cpu_id); 
613             
614             v3_print_guest_state(info);
615             
616             V3_Print("SVM core %u: SVM Exit Code: %p\n", info->cpu_id, (void *)(addr_t)guest_ctrl->exit_code); 
617             
618             V3_Print("SVM core %u: exit_info1 low = 0x%.8x\n", info->cpu_id, *(uint_t*)&(guest_ctrl->exit_info1));
619             V3_Print("SVM core %u: exit_info1 high = 0x%.8x\n", info->cpu_id, *(uint_t *)(((uchar_t *)&(guest_ctrl->exit_info1)) + 4));
620             
621             V3_Print("SVM core %u: exit_info2 low = 0x%.8x\n", info->cpu_id, *(uint_t*)&(guest_ctrl->exit_info2));
622             V3_Print("SVM core %u: exit_info2 high = 0x%.8x\n", info->cpu_id, *(uint_t *)(((uchar_t *)&(guest_ctrl->exit_info2)) + 4));
623             
624             linear_addr = get_addr_linear(info, info->rip, &(info->segments.cs));
625             
626             if (info->mem_mode == PHYSICAL_MEM) {
627                 v3_gpa_to_hva(info, linear_addr, &host_addr);
628             } else if (info->mem_mode == VIRTUAL_MEM) {
629                 v3_gva_to_hva(info, linear_addr, &host_addr);
630             }
631             
632             V3_Print("SVM core %u: Host Address of rip = 0x%p\n", info->cpu_id, (void *)host_addr);
633             
634             V3_Print("SVM core %u: Instr (15 bytes) at %p:\n", info->cpu_id, (void *)host_addr);
635             v3_dump_mem((uint8_t *)host_addr, 15);
636             
637             v3_print_stack(info);
638
639             break;
640         }
641
642
643         if (info->vm_info->run_state == VM_STOPPED) {
644             info->core_run_state = CORE_STOPPED;
645             break;
646         }
647
648         
649 /*
650         if ((info->num_exits % 5000) == 0) {
651             V3_Print("SVM Exit number %d\n", (uint32_t)info->num_exits);
652         }
653 */
654         
655     }
656
657     // Need to take down the other cores on error... 
658
659     return 0;
660 }
661
662
663
664
665
666 /* Checks machine SVM capability */
667 /* Implemented from: AMD Arch Manual 3, sect 15.4 */ 
668 int v3_is_svm_capable() {
669     uint_t vm_cr_low = 0, vm_cr_high = 0;
670     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
671
672     v3_cpuid(CPUID_EXT_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
673   
674     PrintDebug("CPUID_EXT_FEATURE_IDS_ecx=0x%x\n", ecx);
675
676     if ((ecx & CPUID_EXT_FEATURE_IDS_ecx_svm_avail) == 0) {
677       V3_Print("SVM Not Available\n");
678       return 0;
679     }  else {
680         v3_get_msr(SVM_VM_CR_MSR, &vm_cr_high, &vm_cr_low);
681         
682         PrintDebug("SVM_VM_CR_MSR = 0x%x 0x%x\n", vm_cr_high, vm_cr_low);
683         
684         if ((vm_cr_low & SVM_VM_CR_MSR_svmdis) == 1) {
685             V3_Print("SVM is available but is disabled.\n");
686             
687             v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
688             
689             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_edx=0x%x\n", edx);
690             
691             if ((edx & CPUID_SVM_REV_AND_FEATURE_IDS_edx_svml) == 0) {
692                 V3_Print("SVM BIOS Disabled, not unlockable\n");
693             } else {
694                 V3_Print("SVM is locked with a key\n");
695             }
696             return 0;
697
698         } else {
699             V3_Print("SVM is available and  enabled.\n");
700
701             v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
702             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_eax=0x%x\n", eax);
703             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_ebx=0x%x\n", ebx);
704             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_ecx=0x%x\n", ecx);
705             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_edx=0x%x\n", edx);
706
707             return 1;
708         }
709     }
710 }
711
712 static int has_svm_nested_paging() {
713     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
714
715     v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
716
717     //PrintDebug("CPUID_EXT_FEATURE_IDS_edx=0x%x\n", edx);
718
719     if ((edx & CPUID_SVM_REV_AND_FEATURE_IDS_edx_np) == 0) {
720         V3_Print("SVM Nested Paging not supported\n");
721         return 0;
722     } else {
723         V3_Print("SVM Nested Paging supported\n");
724         return 1;
725     }
726 }
727
728
729 void v3_init_svm_cpu(int cpu_id) {
730     reg_ex_t msr;
731     extern v3_cpu_arch_t v3_cpu_types[];
732
733     // Enable SVM on the CPU
734     v3_get_msr(EFER_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
735     msr.e_reg.low |= EFER_MSR_svm_enable;
736     v3_set_msr(EFER_MSR, 0, msr.e_reg.low);
737
738     V3_Print("SVM Enabled\n");
739
740     // Setup the host state save area
741     host_vmcbs[cpu_id] = (addr_t)V3_AllocPages(4);
742
743     /* 64-BIT-ISSUE */
744     //  msr.e_reg.high = 0;
745     //msr.e_reg.low = (uint_t)host_vmcb;
746     msr.r_reg = host_vmcbs[cpu_id];
747
748     PrintDebug("Host State being saved at %p\n", (void *)host_vmcbs[cpu_id]);
749     v3_set_msr(SVM_VM_HSAVE_PA_MSR, msr.e_reg.high, msr.e_reg.low);
750
751
752     if (has_svm_nested_paging() == 1) {
753         v3_cpu_types[cpu_id] = V3_SVM_REV3_CPU;
754     } else {
755         v3_cpu_types[cpu_id] = V3_SVM_CPU;
756     }
757 }
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812 #if 0
813 /* 
814  * Test VMSAVE/VMLOAD Latency 
815  */
816 #define vmsave ".byte 0x0F,0x01,0xDB ; "
817 #define vmload ".byte 0x0F,0x01,0xDA ; "
818 {
819     uint32_t start_lo, start_hi;
820     uint32_t end_lo, end_hi;
821     uint64_t start, end;
822     
823     __asm__ __volatile__ (
824                           "rdtsc ; "
825                           "movl %%eax, %%esi ; "
826                           "movl %%edx, %%edi ; "
827                           "movq  %%rcx, %%rax ; "
828                           vmsave
829                           "rdtsc ; "
830                           : "=D"(start_hi), "=S"(start_lo), "=a"(end_lo),"=d"(end_hi)
831                           : "c"(host_vmcb[cpu_id]), "0"(0), "1"(0), "2"(0), "3"(0)
832                           );
833     
834     start = start_hi;
835     start <<= 32;
836     start += start_lo;
837     
838     end = end_hi;
839     end <<= 32;
840     end += end_lo;
841     
842     PrintDebug("VMSave Cycle Latency: %d\n", (uint32_t)(end - start));
843     
844     __asm__ __volatile__ (
845                           "rdtsc ; "
846                           "movl %%eax, %%esi ; "
847                           "movl %%edx, %%edi ; "
848                           "movq  %%rcx, %%rax ; "
849                           vmload
850                           "rdtsc ; "
851                           : "=D"(start_hi), "=S"(start_lo), "=a"(end_lo),"=d"(end_hi)
852                               : "c"(host_vmcb[cpu_id]), "0"(0), "1"(0), "2"(0), "3"(0)
853                               );
854         
855         start = start_hi;
856         start <<= 32;
857         start += start_lo;
858
859         end = end_hi;
860         end <<= 32;
861         end += end_lo;
862
863
864         PrintDebug("VMLoad Cycle Latency: %d\n", (uint32_t)(end - start));
865     }
866     /* End Latency Test */
867
868 #endif
869
870
871
872
873
874
875
876 #if 0
877 void Init_VMCB_pe(vmcb_t *vmcb, struct guest_info vm_info) {
878   vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
879   vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
880   uint_t i = 0;
881
882
883   guest_state->rsp = vm_info.vm_regs.rsp;
884   guest_state->rip = vm_info.rip;
885
886
887   /* I pretty much just gutted this from TVMM */
888   /* Note: That means its probably wrong */
889
890   // set the segment registers to mirror ours
891   guest_state->cs.selector = 1<<3;
892   guest_state->cs.attrib.fields.type = 0xa; // Code segment+read
893   guest_state->cs.attrib.fields.S = 1;
894   guest_state->cs.attrib.fields.P = 1;
895   guest_state->cs.attrib.fields.db = 1;
896   guest_state->cs.attrib.fields.G = 1;
897   guest_state->cs.limit = 0xfffff;
898   guest_state->cs.base = 0;
899   
900   struct vmcb_selector *segregs [] = {&(guest_state->ss), &(guest_state->ds), &(guest_state->es), &(guest_state->fs), &(guest_state->gs), NULL};
901   for ( i = 0; segregs[i] != NULL; i++) {
902     struct vmcb_selector * seg = segregs[i];
903     
904     seg->selector = 2<<3;
905     seg->attrib.fields.type = 0x2; // Data Segment+read/write
906     seg->attrib.fields.S = 1;
907     seg->attrib.fields.P = 1;
908     seg->attrib.fields.db = 1;
909     seg->attrib.fields.G = 1;
910     seg->limit = 0xfffff;
911     seg->base = 0;
912   }
913
914
915   {
916     /* JRL THIS HAS TO GO */
917     
918     //    guest_state->tr.selector = GetTR_Selector();
919     guest_state->tr.attrib.fields.type = 0x9; 
920     guest_state->tr.attrib.fields.P = 1;
921     // guest_state->tr.limit = GetTR_Limit();
922     //guest_state->tr.base = GetTR_Base();// - 0x2000;
923     /* ** */
924   }
925
926
927   /* ** */
928
929
930   guest_state->efer |= EFER_MSR_svm_enable;
931   guest_state->rflags = 0x00000002; // The reserved bit is always 1
932   ctrl_area->svm_instrs.VMRUN = 1;
933   guest_state->cr0 = 0x00000001;    // PE 
934   ctrl_area->guest_ASID = 1;
935
936
937   //  guest_state->cpl = 0;
938
939
940
941   // Setup exits
942
943   ctrl_area->cr_writes.cr4 = 1;
944   
945   ctrl_area->exceptions.de = 1;
946   ctrl_area->exceptions.df = 1;
947   ctrl_area->exceptions.pf = 1;
948   ctrl_area->exceptions.ts = 1;
949   ctrl_area->exceptions.ss = 1;
950   ctrl_area->exceptions.ac = 1;
951   ctrl_area->exceptions.mc = 1;
952   ctrl_area->exceptions.gp = 1;
953   ctrl_area->exceptions.ud = 1;
954   ctrl_area->exceptions.np = 1;
955   ctrl_area->exceptions.of = 1;
956   ctrl_area->exceptions.nmi = 1;
957
958   
959
960   ctrl_area->instrs.IOIO_PROT = 1;
961   ctrl_area->IOPM_BASE_PA = (uint_t)V3_AllocPages(3);
962   
963   {
964     reg_ex_t tmp_reg;
965     tmp_reg.r_reg = ctrl_area->IOPM_BASE_PA;
966     memset((void*)(tmp_reg.e_reg.low), 0xffffffff, PAGE_SIZE * 2);
967   }
968
969   ctrl_area->instrs.INTR = 1;
970
971   
972   {
973     char gdt_buf[6];
974     char idt_buf[6];
975
976     memset(gdt_buf, 0, 6);
977     memset(idt_buf, 0, 6);
978
979
980     uint_t gdt_base, idt_base;
981     ushort_t gdt_limit, idt_limit;
982     
983     GetGDTR(gdt_buf);
984     gdt_base = *(ulong_t*)((uchar_t*)gdt_buf + 2) & 0xffffffff;
985     gdt_limit = *(ushort_t*)(gdt_buf) & 0xffff;
986     PrintDebug("GDT: base: %x, limit: %x\n", gdt_base, gdt_limit);
987
988     GetIDTR(idt_buf);
989     idt_base = *(ulong_t*)(idt_buf + 2) & 0xffffffff;
990     idt_limit = *(ushort_t*)(idt_buf) & 0xffff;
991     PrintDebug("IDT: base: %x, limit: %x\n",idt_base, idt_limit);
992
993
994     // gdt_base -= 0x2000;
995     //idt_base -= 0x2000;
996
997     guest_state->gdtr.base = gdt_base;
998     guest_state->gdtr.limit = gdt_limit;
999     guest_state->idtr.base = idt_base;
1000     guest_state->idtr.limit = idt_limit;
1001
1002
1003   }
1004   
1005   
1006   // also determine if CPU supports nested paging
1007   /*
1008   if (vm_info.page_tables) {
1009     //   if (0) {
1010     // Flush the TLB on entries/exits
1011     ctrl_area->TLB_CONTROL = 1;
1012
1013     // Enable Nested Paging
1014     ctrl_area->NP_ENABLE = 1;
1015
1016     PrintDebug("NP_Enable at 0x%x\n", &(ctrl_area->NP_ENABLE));
1017
1018         // Set the Nested Page Table pointer
1019     ctrl_area->N_CR3 |= ((addr_t)vm_info.page_tables & 0xfffff000);
1020
1021
1022     //   ctrl_area->N_CR3 = Get_CR3();
1023     // guest_state->cr3 |= (Get_CR3() & 0xfffff000);
1024
1025     guest_state->g_pat = 0x7040600070406ULL;
1026
1027     PrintDebug("Set Nested CR3: lo: 0x%x  hi: 0x%x\n", (uint_t)*(&(ctrl_area->N_CR3)), (uint_t)*((unsigned char *)&(ctrl_area->N_CR3) + 4));
1028     PrintDebug("Set Guest CR3: lo: 0x%x  hi: 0x%x\n", (uint_t)*(&(guest_state->cr3)), (uint_t)*((unsigned char *)&(guest_state->cr3) + 4));
1029     // Enable Paging
1030     //    guest_state->cr0 |= 0x80000000;
1031   }
1032   */
1033
1034 }
1035
1036
1037
1038
1039
1040 #endif
1041
1042