Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


Now correctly boots 2, 4, 8 core kitten
[palacios.git] / palacios / src / palacios / svm.c
1 /* 
2  * This file is part of the Palacios Virtual Machine Monitor developed
3  * by the V3VEE Project with funding from the United States National 
4  * Science Foundation and the Department of Energy.  
5  *
6  * The V3VEE Project is a joint project between Northwestern University
7  * and the University of New Mexico.  You can find out more at 
8  * http://www.v3vee.org
9  *
10  * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu> 
11  * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org> 
12  * All rights reserved.
13  *
14  * Author: Jack Lange <jarusl@cs.northwestern.edu>
15  *
16  * This is free software.  You are permitted to use,
17  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
18  */
19
20
21 #include <palacios/svm.h>
22 #include <palacios/vmm.h>
23
24 #include <palacios/vmcb.h>
25 #include <palacios/vmm_mem.h>
26 #include <palacios/vmm_paging.h>
27 #include <palacios/svm_handler.h>
28
29 #include <palacios/vmm_debug.h>
30 #include <palacios/vm_guest_mem.h>
31
32 #include <palacios/vmm_decoder.h>
33 #include <palacios/vmm_string.h>
34 #include <palacios/vmm_lowlevel.h>
35 #include <palacios/svm_msr.h>
36
37 #include <palacios/vmm_rbtree.h>
38
39 #include <palacios/vmm_direct_paging.h>
40
41 #include <palacios/vmm_ctrl_regs.h>
42 #include <palacios/svm_io.h>
43
44 #include <palacios/vmm_sprintf.h>
45
46
47 uint32_t v3_last_exit;
48
49 // This is a global pointer to the host's VMCB
50 static addr_t host_vmcbs[CONFIG_MAX_CPUS] = { [0 ... CONFIG_MAX_CPUS - 1] = 0};
51
52
53
54 extern void v3_stgi();
55 extern void v3_clgi();
56 //extern int v3_svm_launch(vmcb_t * vmcb, struct v3_gprs * vm_regs, uint64_t * fs, uint64_t * gs);
57 extern int v3_svm_launch(vmcb_t * vmcb, struct v3_gprs * vm_regs, vmcb_t * host_vmcb);
58
59
60 static vmcb_t * Allocate_VMCB() {
61     vmcb_t * vmcb_page = (vmcb_t *)V3_VAddr(V3_AllocPages(1));
62
63     memset(vmcb_page, 0, 4096);
64
65     return vmcb_page;
66 }
67
68
69
70 static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info * core) {
71     vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
72     vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
73     uint_t i;
74
75
76     //
77
78
79     ctrl_area->svm_instrs.VMRUN = 1;
80     ctrl_area->svm_instrs.VMMCALL = 1;
81     ctrl_area->svm_instrs.VMLOAD = 1;
82     ctrl_area->svm_instrs.VMSAVE = 1;
83     ctrl_area->svm_instrs.STGI = 1;
84     ctrl_area->svm_instrs.CLGI = 1;
85     ctrl_area->svm_instrs.SKINIT = 1;
86     ctrl_area->svm_instrs.RDTSCP = 1;
87     ctrl_area->svm_instrs.ICEBP = 1;
88     ctrl_area->svm_instrs.WBINVD = 1;
89     ctrl_area->svm_instrs.MONITOR = 1;
90     ctrl_area->svm_instrs.MWAIT_always = 1;
91     ctrl_area->svm_instrs.MWAIT_if_armed = 1;
92     ctrl_area->instrs.INVLPGA = 1;
93     ctrl_area->instrs.CPUID = 1;
94
95     ctrl_area->instrs.HLT = 1;
96     // guest_state->cr0 = 0x00000001;    // PE 
97   
98     /*
99       ctrl_area->exceptions.de = 1;
100       ctrl_area->exceptions.df = 1;
101       
102       ctrl_area->exceptions.ts = 1;
103       ctrl_area->exceptions.ss = 1;
104       ctrl_area->exceptions.ac = 1;
105       ctrl_area->exceptions.mc = 1;
106       ctrl_area->exceptions.gp = 1;
107       ctrl_area->exceptions.ud = 1;
108       ctrl_area->exceptions.np = 1;
109       ctrl_area->exceptions.of = 1;
110       
111       ctrl_area->exceptions.nmi = 1;
112     */
113     
114
115     ctrl_area->instrs.NMI = 1;
116     ctrl_area->instrs.SMI = 0; // allow SMIs to run in guest
117     ctrl_area->instrs.INIT = 1;
118     ctrl_area->instrs.PAUSE = 1;
119     ctrl_area->instrs.shutdown_evts = 1;
120
121
122     /* DEBUG FOR RETURN CODE */
123     ctrl_area->exit_code = 1;
124
125
126     /* Setup Guest Machine state */
127
128     core->vm_regs.rsp = 0x00;
129     core->rip = 0xfff0;
130
131     core->vm_regs.rdx = 0x00000f00;
132
133
134     core->cpl = 0;
135
136     core->ctrl_regs.rflags = 0x00000002; // The reserved bit is always 1
137     core->ctrl_regs.cr0 = 0x60010010; // Set the WP flag so the memory hooks work in real-mode
138     core->ctrl_regs.efer |= EFER_MSR_svm_enable;
139
140
141
142
143
144     core->segments.cs.selector = 0xf000;
145     core->segments.cs.limit = 0xffff;
146     core->segments.cs.base = 0x0000000f0000LL;
147
148     // (raw attributes = 0xf3)
149     core->segments.cs.type = 0x3;
150     core->segments.cs.system = 0x1;
151     core->segments.cs.dpl = 0x3;
152     core->segments.cs.present = 1;
153
154
155
156     struct v3_segment * segregs [] = {&(core->segments.ss), &(core->segments.ds), 
157                                       &(core->segments.es), &(core->segments.fs), 
158                                       &(core->segments.gs), NULL};
159
160     for ( i = 0; segregs[i] != NULL; i++) {
161         struct v3_segment * seg = segregs[i];
162         
163         seg->selector = 0x0000;
164         //    seg->base = seg->selector << 4;
165         seg->base = 0x00000000;
166         seg->limit = ~0u;
167
168         // (raw attributes = 0xf3)
169         seg->type = 0x3;
170         seg->system = 0x1;
171         seg->dpl = 0x3;
172         seg->present = 1;
173     }
174
175     core->segments.gdtr.limit = 0x0000ffff;
176     core->segments.gdtr.base = 0x0000000000000000LL;
177     core->segments.idtr.limit = 0x0000ffff;
178     core->segments.idtr.base = 0x0000000000000000LL;
179
180     core->segments.ldtr.selector = 0x0000;
181     core->segments.ldtr.limit = 0x0000ffff;
182     core->segments.ldtr.base = 0x0000000000000000LL;
183     core->segments.tr.selector = 0x0000;
184     core->segments.tr.limit = 0x0000ffff;
185     core->segments.tr.base = 0x0000000000000000LL;
186
187
188     core->dbg_regs.dr6 = 0x00000000ffff0ff0LL;
189     core->dbg_regs.dr7 = 0x0000000000000400LL;
190
191
192     ctrl_area->IOPM_BASE_PA = (addr_t)V3_PAddr(core->vm_info->io_map.arch_data);
193     ctrl_area->instrs.IOIO_PROT = 1;
194             
195     ctrl_area->MSRPM_BASE_PA = (addr_t)V3_PAddr(core->vm_info->msr_map.arch_data);
196     ctrl_area->instrs.MSR_PROT = 1;   
197
198
199     PrintDebug("Exiting on interrupts\n");
200     ctrl_area->guest_ctrl.V_INTR_MASKING = 1;
201     ctrl_area->instrs.INTR = 1;
202
203
204     if (core->shdw_pg_mode == SHADOW_PAGING) {
205         PrintDebug("Creating initial shadow page table\n");
206         
207         /* JRL: This is a performance killer, and a simplistic solution */
208         /* We need to fix this */
209         ctrl_area->TLB_CONTROL = 1;
210         ctrl_area->guest_ASID = 1;
211         
212         
213         if (v3_init_passthrough_pts(core) == -1) {
214             PrintError("Could not initialize passthrough page tables\n");
215             return ;
216         }
217
218
219         core->shdw_pg_state.guest_cr0 = 0x0000000000000010LL;
220         PrintDebug("Created\n");
221         
222         core->ctrl_regs.cr0 |= 0x80000000;
223         core->ctrl_regs.cr3 = core->direct_map_pt;
224
225         ctrl_area->cr_reads.cr0 = 1;
226         ctrl_area->cr_writes.cr0 = 1;
227         //ctrl_area->cr_reads.cr4 = 1;
228         ctrl_area->cr_writes.cr4 = 1;
229         ctrl_area->cr_reads.cr3 = 1;
230         ctrl_area->cr_writes.cr3 = 1;
231
232         v3_hook_msr(core->vm_info, EFER_MSR, 
233                     &v3_handle_efer_read,
234                     &v3_handle_efer_write, 
235                     core);
236
237         ctrl_area->instrs.INVLPG = 1;
238
239         ctrl_area->exceptions.pf = 1;
240
241         guest_state->g_pat = 0x7040600070406ULL;
242
243
244
245     } else if (core->shdw_pg_mode == NESTED_PAGING) {
246         // Flush the TLB on entries/exits
247         ctrl_area->TLB_CONTROL = 1;
248         ctrl_area->guest_ASID = 1;
249
250         // Enable Nested Paging
251         ctrl_area->NP_ENABLE = 1;
252
253         PrintDebug("NP_Enable at 0x%p\n", (void *)&(ctrl_area->NP_ENABLE));
254
255         // Set the Nested Page Table pointer
256         if (v3_init_passthrough_pts(core) == -1) {
257             PrintError("Could not initialize Nested page tables\n");
258             return ;
259         }
260
261         ctrl_area->N_CR3 = core->direct_map_pt;
262
263         guest_state->g_pat = 0x7040600070406ULL;
264     }
265 }
266
267
268 int v3_init_svm_vmcb(struct guest_info * info, v3_vm_class_t vm_class) {
269
270     PrintDebug("Allocating VMCB\n");
271     info->vmm_data = (void*)Allocate_VMCB();
272     
273     if (vm_class == V3_PC_VM) {
274         PrintDebug("Initializing VMCB (addr=%p)\n", (void *)info->vmm_data);
275         Init_VMCB_BIOS((vmcb_t*)(info->vmm_data), info);
276     } else {
277         PrintError("Invalid VM class\n");
278         return -1;
279     }
280
281     return 0;
282 }
283
284
285
286 static int update_irq_exit_state(struct guest_info * info) {
287     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
288
289     // Fix for QEMU bug using EVENTINJ as an internal cache
290     guest_ctrl->EVENTINJ.valid = 0;
291
292     if ((info->intr_core_state.irq_pending == 1) && (guest_ctrl->guest_ctrl.V_IRQ == 0)) {
293         
294 #ifdef CONFIG_DEBUG_INTERRUPTS
295         PrintDebug("INTAK cycle completed for irq %d\n", info->intr_core_state.irq_vector);
296 #endif
297
298         info->intr_core_state.irq_started = 1;
299         info->intr_core_state.irq_pending = 0;
300
301         v3_injecting_intr(info, info->intr_core_state.irq_vector, V3_EXTERNAL_IRQ);
302     }
303
304     if ((info->intr_core_state.irq_started == 1) && (guest_ctrl->exit_int_info.valid == 0)) {
305 #ifdef CONFIG_DEBUG_INTERRUPTS
306         PrintDebug("Interrupt %d taken by guest\n", info->intr_core_state.irq_vector);
307 #endif
308
309         // Interrupt was taken fully vectored
310         info->intr_core_state.irq_started = 0;
311
312     } else if ((info->intr_core_state.irq_started == 1) && (guest_ctrl->exit_int_info.valid == 1)) {
313 #ifdef CONFIG_DEBUG_INTERRUPTS
314         PrintDebug("EXIT INT INFO is set (vec=%d)\n", guest_ctrl->exit_int_info.vector);
315 #endif
316     }
317
318     return 0;
319 }
320
321
322 static int update_irq_entry_state(struct guest_info * info) {
323     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
324
325
326     if (info->intr_core_state.irq_pending == 0) {
327         guest_ctrl->guest_ctrl.V_IRQ = 0;
328         guest_ctrl->guest_ctrl.V_INTR_VECTOR = 0;
329     }
330     
331     if (v3_excp_pending(info)) {
332         uint_t excp = v3_get_excp_number(info);
333         
334         guest_ctrl->EVENTINJ.type = SVM_INJECTION_EXCEPTION;
335         
336         if (info->excp_state.excp_error_code_valid) {
337             guest_ctrl->EVENTINJ.error_code = info->excp_state.excp_error_code;
338             guest_ctrl->EVENTINJ.ev = 1;
339 #ifdef CONFIG_DEBUG_INTERRUPTS
340             PrintDebug("Injecting exception %d with error code %x\n", excp, guest_ctrl->EVENTINJ.error_code);
341 #endif
342         }
343         
344         guest_ctrl->EVENTINJ.vector = excp;
345         
346         guest_ctrl->EVENTINJ.valid = 1;
347
348 #ifdef CONFIG_DEBUG_INTERRUPTS
349         PrintDebug("<%d> Injecting Exception %d (CR2=%p) (EIP=%p)\n", 
350                    (int)info->num_exits, 
351                    guest_ctrl->EVENTINJ.vector, 
352                    (void *)(addr_t)info->ctrl_regs.cr2,
353                    (void *)(addr_t)info->rip);
354 #endif
355
356         v3_injecting_excp(info, excp);
357     } else if (info->intr_core_state.irq_started == 1) {
358 #ifdef CONFIG_DEBUG_INTERRUPTS
359         PrintDebug("IRQ pending from previous injection\n");
360 #endif
361         guest_ctrl->guest_ctrl.V_IRQ = 1;
362         guest_ctrl->guest_ctrl.V_INTR_VECTOR = info->intr_core_state.irq_vector;
363         guest_ctrl->guest_ctrl.V_IGN_TPR = 1;
364         guest_ctrl->guest_ctrl.V_INTR_PRIO = 0xf;
365
366     } else {
367         switch (v3_intr_pending(info)) {
368             case V3_EXTERNAL_IRQ: {
369                 uint32_t irq = v3_get_intr(info);
370
371                 guest_ctrl->guest_ctrl.V_IRQ = 1;
372                 guest_ctrl->guest_ctrl.V_INTR_VECTOR = irq;
373                 guest_ctrl->guest_ctrl.V_IGN_TPR = 1;
374                 guest_ctrl->guest_ctrl.V_INTR_PRIO = 0xf;
375
376 #ifdef CONFIG_DEBUG_INTERRUPTS
377                 PrintDebug("Injecting Interrupt %d (EIP=%p)\n", 
378                            guest_ctrl->guest_ctrl.V_INTR_VECTOR, 
379                            (void *)(addr_t)info->rip);
380 #endif
381
382                 info->intr_core_state.irq_pending = 1;
383                 info->intr_core_state.irq_vector = irq;
384                 
385                 break;
386             }
387             case V3_NMI:
388                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_NMI;
389                 break;
390             case V3_SOFTWARE_INTR:
391                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_SOFT_INTR;
392                 break;
393             case V3_VIRTUAL_IRQ:
394                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_IRQ;
395                 break;
396
397             case V3_INVALID_INTR:
398             default:
399                 break;
400         }
401         
402     }
403
404     return 0;
405 }
406
407
408 /* 
409  * CAUTION and DANGER!!! 
410  * 
411  * The VMCB CANNOT(!!) be accessed outside of the clgi/stgi calls inside this function
412  * When exectuing a symbiotic call, the VMCB WILL be overwritten, so any dependencies 
413  * on its contents will cause things to break. The contents at the time of the exit WILL 
414  * change before the exit handler is executed.
415  */
416 int v3_svm_enter(struct guest_info * info) {
417     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
418     vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA((vmcb_t*)(info->vmm_data)); 
419     ullong_t tmp_tsc;
420     addr_t exit_code = 0, exit_info1 = 0, exit_info2 = 0;
421
422     // Conditionally yield the CPU if the timeslice has expired
423     v3_yield_cond(info);
424
425     // disable global interrupts for vm state transition
426     v3_clgi();
427
428     // Synchronize the guest state to the VMCB
429     guest_state->cr0 = info->ctrl_regs.cr0;
430     guest_state->cr2 = info->ctrl_regs.cr2;
431     guest_state->cr3 = info->ctrl_regs.cr3;
432     guest_state->cr4 = info->ctrl_regs.cr4;
433     guest_state->dr6 = info->dbg_regs.dr6;
434     guest_state->dr7 = info->dbg_regs.dr7;
435     guest_ctrl->guest_ctrl.V_TPR = info->ctrl_regs.cr8 & 0xff;
436     guest_state->rflags = info->ctrl_regs.rflags;
437     guest_state->efer = info->ctrl_regs.efer;
438     
439     guest_state->cpl = info->cpl;
440
441     v3_set_vmcb_segments((vmcb_t*)(info->vmm_data), &(info->segments));
442
443     guest_state->rax = info->vm_regs.rax;
444     guest_state->rip = info->rip;
445     guest_state->rsp = info->vm_regs.rsp;
446
447 #ifdef CONFIG_SYMCALL
448     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
449         update_irq_entry_state(info);
450     }
451 #else 
452     update_irq_entry_state(info);
453 #endif
454
455
456     /* ** */
457
458     /*
459       PrintDebug("SVM Entry to CS=%p  rip=%p...\n", 
460       (void *)(addr_t)info->segments.cs.base, 
461       (void *)(addr_t)info->rip);
462     */
463
464 #ifdef CONFIG_SYMCALL
465     if (info->sym_core_state.symcall_state.sym_call_active == 1) {
466         if (guest_ctrl->guest_ctrl.V_IRQ == 1) {
467             V3_Print("!!! Injecting Interrupt during Sym call !!!\n");
468         }
469     }
470 #endif
471
472
473     rdtscll(tmp_tsc);
474     v3_update_time(info, (tmp_tsc - info->time_state.cached_host_tsc));
475     rdtscll(info->time_state.cached_host_tsc);
476     //    guest_ctrl->TSC_OFFSET = info->time_state.guest_tsc - info->time_state.cached_host_tsc;
477
478     //V3_Print("Calling v3_svm_launch\n");
479
480
481     v3_svm_launch((vmcb_t *)V3_PAddr(info->vmm_data), &(info->vm_regs), (vmcb_t *)host_vmcbs[info->cpu_id]);
482     
483     //V3_Print("SVM Returned: Exit Code: %x, guest_rip=%lx\n", (uint32_t)(guest_ctrl->exit_code), (unsigned long)guest_state->rip);
484
485
486     v3_last_exit = (uint32_t)(guest_ctrl->exit_code);
487
488     //rdtscll(tmp_tsc);
489     //    v3_update_time(info, tmp_tsc - info->time_state.cached_host_tsc);
490
491     //PrintDebug("SVM Returned\n");
492     
493     info->num_exits++;
494
495
496
497
498     // Save Guest state from VMCB
499     info->rip = guest_state->rip;
500     info->vm_regs.rsp = guest_state->rsp;
501     info->vm_regs.rax = guest_state->rax;
502
503     info->cpl = guest_state->cpl;
504
505     info->ctrl_regs.cr0 = guest_state->cr0;
506     info->ctrl_regs.cr2 = guest_state->cr2;
507     info->ctrl_regs.cr3 = guest_state->cr3;
508     info->ctrl_regs.cr4 = guest_state->cr4;
509     info->dbg_regs.dr6 = guest_state->dr6;
510     info->dbg_regs.dr7 = guest_state->dr7;
511     info->ctrl_regs.cr8 = guest_ctrl->guest_ctrl.V_TPR;
512     info->ctrl_regs.rflags = guest_state->rflags;
513     info->ctrl_regs.efer = guest_state->efer;
514     
515     v3_get_vmcb_segments((vmcb_t*)(info->vmm_data), &(info->segments));
516     info->cpu_mode = v3_get_vm_cpu_mode(info);
517     info->mem_mode = v3_get_vm_mem_mode(info);
518     /* ** */
519
520
521     // save exit info here
522     exit_code = guest_ctrl->exit_code;
523     exit_info1 = guest_ctrl->exit_info1;
524     exit_info2 = guest_ctrl->exit_info2;
525
526
527 #ifdef CONFIG_SYMCALL
528     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
529         update_irq_exit_state(info);
530     }
531 #else
532     update_irq_exit_state(info);
533 #endif
534
535
536     // reenable global interrupts after vm exit
537     v3_stgi();
538
539  
540     // Conditionally yield the CPU if the timeslice has expired
541     v3_yield_cond(info);
542
543
544
545     if (v3_handle_svm_exit(info, exit_code, exit_info1, exit_info2) != 0) {
546         PrintError("Error in SVM exit handler\n");
547         return -1;
548     }
549
550
551     return 0;
552 }
553
554
555 int v3_start_svm_guest(struct guest_info *info) {
556     //    vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA((vmcb_t*)(info->vmm_data));
557     //  vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
558
559
560     PrintDebug("Starting SVM core %u\n",info->cpu_id);
561     if (info->cpu_mode==INIT) { 
562         PrintDebug("SVM core %u: I am an AP in INIT mode, waiting for that to change\n",info->cpu_id);
563         while (info->cpu_mode==INIT) {
564             v3_yield(info);
565             //PrintDebug("SVM core %u: still waiting for INIT\n",info->cpu_id);
566         }
567         PrintDebug("SVM core %u: I am out of INIT\n",info->cpu_id);
568         if (info->cpu_mode==SIPI) { 
569             PrintDebug("SVM core %u: I am waiting on a SIPI to set my starting address\n",info->cpu_id);
570             while (info->cpu_mode==SIPI) {
571                 v3_yield(info);
572                 //PrintDebug("SVM core %u: still waiting for SIPI\n",info->cpu_id);
573             }
574         }
575         PrintDebug("SVM core %u: I have my SIPI\n", info->cpu_id);
576     }
577
578     if (info->cpu_mode!=REAL) { 
579         PrintError("SVM core %u: I am not in REAL mode at launch!  Huh?!\n", info->cpu_id);
580         return -1;
581     }
582
583     PrintDebug("SVM core %u: I am starting at CS=0x%x (base=0x%p, limit=0x%x),  RIP=0x%p\n", 
584                info->cpu_id, info->segments.cs.selector, (void*)(info->segments.cs.base), 
585                info->segments.cs.limit,(void*)(info->rip));
586
587
588
589     PrintDebug("SVM core %u: Launching SVM VM (vmcb=%p)\n", info->cpu_id, (void *)info->vmm_data);
590     //PrintDebugVMCB((vmcb_t*)(info->vmm_data));
591     
592     info->vm_info->run_state = VM_RUNNING;
593     rdtscll(info->yield_start_cycle);
594
595
596     while (1) {
597         if (v3_svm_enter(info) == -1) {
598             vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
599             addr_t host_addr;
600             addr_t linear_addr = 0;
601             
602             info->vm_info->run_state = VM_ERROR;
603             
604             V3_Print("SVM core %u: SVM ERROR!!\n", info->cpu_id); 
605             
606             v3_print_guest_state(info);
607             
608             V3_Print("SVM core %u: SVM Exit Code: %p\n", info->cpu_id, (void *)(addr_t)guest_ctrl->exit_code); 
609             
610             V3_Print("SVM core %u: exit_info1 low = 0x%.8x\n", info->cpu_id, *(uint_t*)&(guest_ctrl->exit_info1));
611             V3_Print("SVM core %u: exit_info1 high = 0x%.8x\n", info->cpu_id, *(uint_t *)(((uchar_t *)&(guest_ctrl->exit_info1)) + 4));
612             
613             V3_Print("SVM core %u: exit_info2 low = 0x%.8x\n", info->cpu_id, *(uint_t*)&(guest_ctrl->exit_info2));
614             V3_Print("SVM core %u: exit_info2 high = 0x%.8x\n", info->cpu_id, *(uint_t *)(((uchar_t *)&(guest_ctrl->exit_info2)) + 4));
615             
616             linear_addr = get_addr_linear(info, info->rip, &(info->segments.cs));
617             
618             if (info->mem_mode == PHYSICAL_MEM) {
619                 v3_gpa_to_hva(info, linear_addr, &host_addr);
620             } else if (info->mem_mode == VIRTUAL_MEM) {
621                 v3_gva_to_hva(info, linear_addr, &host_addr);
622             }
623             
624             V3_Print("SVM core %u: Host Address of rip = 0x%p\n", info->cpu_id, (void *)host_addr);
625             
626             V3_Print("SVM core %u: Instr (15 bytes) at %p:\n", info->cpu_id, (void *)host_addr);
627             v3_dump_mem((uint8_t *)host_addr, 15);
628             
629             v3_print_stack(info);
630
631             break;
632         }
633         
634 /*
635         if ((info->num_exits % 5000) == 0) {
636             V3_Print("SVM Exit number %d\n", (uint32_t)info->num_exits);
637         }
638 */
639         
640     }
641
642     // Need to take down the other cores on error... 
643
644     return 0;
645 }
646
647
648
649
650
651 /* Checks machine SVM capability */
652 /* Implemented from: AMD Arch Manual 3, sect 15.4 */ 
653 int v3_is_svm_capable() {
654     uint_t vm_cr_low = 0, vm_cr_high = 0;
655     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
656
657     v3_cpuid(CPUID_EXT_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
658   
659     PrintDebug("CPUID_EXT_FEATURE_IDS_ecx=0x%x\n", ecx);
660
661     if ((ecx & CPUID_EXT_FEATURE_IDS_ecx_svm_avail) == 0) {
662       V3_Print("SVM Not Available\n");
663       return 0;
664     }  else {
665         v3_get_msr(SVM_VM_CR_MSR, &vm_cr_high, &vm_cr_low);
666         
667         PrintDebug("SVM_VM_CR_MSR = 0x%x 0x%x\n", vm_cr_high, vm_cr_low);
668         
669         if ((vm_cr_low & SVM_VM_CR_MSR_svmdis) == 1) {
670             V3_Print("SVM is available but is disabled.\n");
671             
672             v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
673             
674             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_edx=0x%x\n", edx);
675             
676             if ((edx & CPUID_SVM_REV_AND_FEATURE_IDS_edx_svml) == 0) {
677                 V3_Print("SVM BIOS Disabled, not unlockable\n");
678             } else {
679                 V3_Print("SVM is locked with a key\n");
680             }
681             return 0;
682
683         } else {
684             V3_Print("SVM is available and  enabled.\n");
685
686             v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
687             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_eax=0x%x\n", eax);
688             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_ebx=0x%x\n", ebx);
689             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_ecx=0x%x\n", ecx);
690             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_edx=0x%x\n", edx);
691
692             return 1;
693         }
694     }
695 }
696
697 static int has_svm_nested_paging() {
698     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
699
700     v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
701
702     //PrintDebug("CPUID_EXT_FEATURE_IDS_edx=0x%x\n", edx);
703
704     if ((edx & CPUID_SVM_REV_AND_FEATURE_IDS_edx_np) == 0) {
705         V3_Print("SVM Nested Paging not supported\n");
706         return 0;
707     } else {
708         V3_Print("SVM Nested Paging supported\n");
709         return 1;
710     }
711 }
712
713
714 void v3_init_svm_cpu(int cpu_id) {
715     reg_ex_t msr;
716     extern v3_cpu_arch_t v3_cpu_types[];
717
718     // Enable SVM on the CPU
719     v3_get_msr(EFER_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
720     msr.e_reg.low |= EFER_MSR_svm_enable;
721     v3_set_msr(EFER_MSR, 0, msr.e_reg.low);
722
723     V3_Print("SVM Enabled\n");
724
725     // Setup the host state save area
726     host_vmcbs[cpu_id] = (addr_t)V3_AllocPages(4);
727
728     /* 64-BIT-ISSUE */
729     //  msr.e_reg.high = 0;
730     //msr.e_reg.low = (uint_t)host_vmcb;
731     msr.r_reg = host_vmcbs[cpu_id];
732
733     PrintDebug("Host State being saved at %p\n", (void *)host_vmcbs[cpu_id]);
734     v3_set_msr(SVM_VM_HSAVE_PA_MSR, msr.e_reg.high, msr.e_reg.low);
735
736
737     if (has_svm_nested_paging() == 1) {
738         v3_cpu_types[cpu_id] = V3_SVM_REV3_CPU;
739     } else {
740         v3_cpu_types[cpu_id] = V3_SVM_CPU;
741     }
742 }
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797 #if 0
798 /* 
799  * Test VMSAVE/VMLOAD Latency 
800  */
801 #define vmsave ".byte 0x0F,0x01,0xDB ; "
802 #define vmload ".byte 0x0F,0x01,0xDA ; "
803 {
804     uint32_t start_lo, start_hi;
805     uint32_t end_lo, end_hi;
806     uint64_t start, end;
807     
808     __asm__ __volatile__ (
809                           "rdtsc ; "
810                           "movl %%eax, %%esi ; "
811                           "movl %%edx, %%edi ; "
812                           "movq  %%rcx, %%rax ; "
813                           vmsave
814                           "rdtsc ; "
815                           : "=D"(start_hi), "=S"(start_lo), "=a"(end_lo),"=d"(end_hi)
816                           : "c"(host_vmcb[cpu_id]), "0"(0), "1"(0), "2"(0), "3"(0)
817                           );
818     
819     start = start_hi;
820     start <<= 32;
821     start += start_lo;
822     
823     end = end_hi;
824     end <<= 32;
825     end += end_lo;
826     
827     PrintDebug("VMSave Cycle Latency: %d\n", (uint32_t)(end - start));
828     
829     __asm__ __volatile__ (
830                           "rdtsc ; "
831                           "movl %%eax, %%esi ; "
832                           "movl %%edx, %%edi ; "
833                           "movq  %%rcx, %%rax ; "
834                           vmload
835                           "rdtsc ; "
836                           : "=D"(start_hi), "=S"(start_lo), "=a"(end_lo),"=d"(end_hi)
837                               : "c"(host_vmcb[cpu_id]), "0"(0), "1"(0), "2"(0), "3"(0)
838                               );
839         
840         start = start_hi;
841         start <<= 32;
842         start += start_lo;
843
844         end = end_hi;
845         end <<= 32;
846         end += end_lo;
847
848
849         PrintDebug("VMLoad Cycle Latency: %d\n", (uint32_t)(end - start));
850     }
851     /* End Latency Test */
852
853 #endif
854
855
856
857
858
859
860
861 #if 0
862 void Init_VMCB_pe(vmcb_t *vmcb, struct guest_info vm_info) {
863   vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
864   vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
865   uint_t i = 0;
866
867
868   guest_state->rsp = vm_info.vm_regs.rsp;
869   guest_state->rip = vm_info.rip;
870
871
872   /* I pretty much just gutted this from TVMM */
873   /* Note: That means its probably wrong */
874
875   // set the segment registers to mirror ours
876   guest_state->cs.selector = 1<<3;
877   guest_state->cs.attrib.fields.type = 0xa; // Code segment+read
878   guest_state->cs.attrib.fields.S = 1;
879   guest_state->cs.attrib.fields.P = 1;
880   guest_state->cs.attrib.fields.db = 1;
881   guest_state->cs.attrib.fields.G = 1;
882   guest_state->cs.limit = 0xfffff;
883   guest_state->cs.base = 0;
884   
885   struct vmcb_selector *segregs [] = {&(guest_state->ss), &(guest_state->ds), &(guest_state->es), &(guest_state->fs), &(guest_state->gs), NULL};
886   for ( i = 0; segregs[i] != NULL; i++) {
887     struct vmcb_selector * seg = segregs[i];
888     
889     seg->selector = 2<<3;
890     seg->attrib.fields.type = 0x2; // Data Segment+read/write
891     seg->attrib.fields.S = 1;
892     seg->attrib.fields.P = 1;
893     seg->attrib.fields.db = 1;
894     seg->attrib.fields.G = 1;
895     seg->limit = 0xfffff;
896     seg->base = 0;
897   }
898
899
900   {
901     /* JRL THIS HAS TO GO */
902     
903     //    guest_state->tr.selector = GetTR_Selector();
904     guest_state->tr.attrib.fields.type = 0x9; 
905     guest_state->tr.attrib.fields.P = 1;
906     // guest_state->tr.limit = GetTR_Limit();
907     //guest_state->tr.base = GetTR_Base();// - 0x2000;
908     /* ** */
909   }
910
911
912   /* ** */
913
914
915   guest_state->efer |= EFER_MSR_svm_enable;
916   guest_state->rflags = 0x00000002; // The reserved bit is always 1
917   ctrl_area->svm_instrs.VMRUN = 1;
918   guest_state->cr0 = 0x00000001;    // PE 
919   ctrl_area->guest_ASID = 1;
920
921
922   //  guest_state->cpl = 0;
923
924
925
926   // Setup exits
927
928   ctrl_area->cr_writes.cr4 = 1;
929   
930   ctrl_area->exceptions.de = 1;
931   ctrl_area->exceptions.df = 1;
932   ctrl_area->exceptions.pf = 1;
933   ctrl_area->exceptions.ts = 1;
934   ctrl_area->exceptions.ss = 1;
935   ctrl_area->exceptions.ac = 1;
936   ctrl_area->exceptions.mc = 1;
937   ctrl_area->exceptions.gp = 1;
938   ctrl_area->exceptions.ud = 1;
939   ctrl_area->exceptions.np = 1;
940   ctrl_area->exceptions.of = 1;
941   ctrl_area->exceptions.nmi = 1;
942
943   
944
945   ctrl_area->instrs.IOIO_PROT = 1;
946   ctrl_area->IOPM_BASE_PA = (uint_t)V3_AllocPages(3);
947   
948   {
949     reg_ex_t tmp_reg;
950     tmp_reg.r_reg = ctrl_area->IOPM_BASE_PA;
951     memset((void*)(tmp_reg.e_reg.low), 0xffffffff, PAGE_SIZE * 2);
952   }
953
954   ctrl_area->instrs.INTR = 1;
955
956   
957   {
958     char gdt_buf[6];
959     char idt_buf[6];
960
961     memset(gdt_buf, 0, 6);
962     memset(idt_buf, 0, 6);
963
964
965     uint_t gdt_base, idt_base;
966     ushort_t gdt_limit, idt_limit;
967     
968     GetGDTR(gdt_buf);
969     gdt_base = *(ulong_t*)((uchar_t*)gdt_buf + 2) & 0xffffffff;
970     gdt_limit = *(ushort_t*)(gdt_buf) & 0xffff;
971     PrintDebug("GDT: base: %x, limit: %x\n", gdt_base, gdt_limit);
972
973     GetIDTR(idt_buf);
974     idt_base = *(ulong_t*)(idt_buf + 2) & 0xffffffff;
975     idt_limit = *(ushort_t*)(idt_buf) & 0xffff;
976     PrintDebug("IDT: base: %x, limit: %x\n",idt_base, idt_limit);
977
978
979     // gdt_base -= 0x2000;
980     //idt_base -= 0x2000;
981
982     guest_state->gdtr.base = gdt_base;
983     guest_state->gdtr.limit = gdt_limit;
984     guest_state->idtr.base = idt_base;
985     guest_state->idtr.limit = idt_limit;
986
987
988   }
989   
990   
991   // also determine if CPU supports nested paging
992   /*
993   if (vm_info.page_tables) {
994     //   if (0) {
995     // Flush the TLB on entries/exits
996     ctrl_area->TLB_CONTROL = 1;
997
998     // Enable Nested Paging
999     ctrl_area->NP_ENABLE = 1;
1000
1001     PrintDebug("NP_Enable at 0x%x\n", &(ctrl_area->NP_ENABLE));
1002
1003         // Set the Nested Page Table pointer
1004     ctrl_area->N_CR3 |= ((addr_t)vm_info.page_tables & 0xfffff000);
1005
1006
1007     //   ctrl_area->N_CR3 = Get_CR3();
1008     // guest_state->cr3 |= (Get_CR3() & 0xfffff000);
1009
1010     guest_state->g_pat = 0x7040600070406ULL;
1011
1012     PrintDebug("Set Nested CR3: lo: 0x%x  hi: 0x%x\n", (uint_t)*(&(ctrl_area->N_CR3)), (uint_t)*((unsigned char *)&(ctrl_area->N_CR3) + 4));
1013     PrintDebug("Set Guest CR3: lo: 0x%x  hi: 0x%x\n", (uint_t)*(&(guest_state->cr3)), (uint_t)*((unsigned char *)&(guest_state->cr3) + 4));
1014     // Enable Paging
1015     //    guest_state->cr0 |= 0x80000000;
1016   }
1017   */
1018
1019 }
1020
1021
1022
1023
1024
1025 #endif
1026
1027