Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


Semi-functional SMP (boots Kitten guest with two cores)
[palacios.git] / palacios / src / palacios / svm.c
1 /* 
2  * This file is part of the Palacios Virtual Machine Monitor developed
3  * by the V3VEE Project with funding from the United States National 
4  * Science Foundation and the Department of Energy.  
5  *
6  * The V3VEE Project is a joint project between Northwestern University
7  * and the University of New Mexico.  You can find out more at 
8  * http://www.v3vee.org
9  *
10  * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu> 
11  * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org> 
12  * All rights reserved.
13  *
14  * Author: Jack Lange <jarusl@cs.northwestern.edu>
15  *
16  * This is free software.  You are permitted to use,
17  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
18  */
19
20
21 #include <palacios/svm.h>
22 #include <palacios/vmm.h>
23
24 #include <palacios/vmcb.h>
25 #include <palacios/vmm_mem.h>
26 #include <palacios/vmm_paging.h>
27 #include <palacios/svm_handler.h>
28
29 #include <palacios/vmm_debug.h>
30 #include <palacios/vm_guest_mem.h>
31
32 #include <palacios/vmm_decoder.h>
33 #include <palacios/vmm_string.h>
34 #include <palacios/vmm_lowlevel.h>
35 #include <palacios/svm_msr.h>
36
37 #include <palacios/vmm_rbtree.h>
38
39 #include <palacios/vmm_direct_paging.h>
40
41 #include <palacios/vmm_ctrl_regs.h>
42 #include <palacios/svm_io.h>
43
44 #include <palacios/vmm_sprintf.h>
45
46
47 uint32_t v3_last_exit;
48
49 // This is a global pointer to the host's VMCB
50 static addr_t host_vmcbs[CONFIG_MAX_CPUS] = { [0 ... CONFIG_MAX_CPUS - 1] = 0};
51
52
53
54 extern void v3_stgi();
55 extern void v3_clgi();
56 //extern int v3_svm_launch(vmcb_t * vmcb, struct v3_gprs * vm_regs, uint64_t * fs, uint64_t * gs);
57 extern int v3_svm_launch(vmcb_t * vmcb, struct v3_gprs * vm_regs, vmcb_t * host_vmcb);
58
59
60 static vmcb_t * Allocate_VMCB() {
61     vmcb_t * vmcb_page = (vmcb_t *)V3_VAddr(V3_AllocPages(1));
62
63     memset(vmcb_page, 0, 4096);
64
65     return vmcb_page;
66 }
67
68
69
70 static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info * core) {
71     vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
72     vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
73     uint_t i;
74
75
76     //
77
78
79     ctrl_area->svm_instrs.VMRUN = 1;
80     ctrl_area->svm_instrs.VMMCALL = 1;
81     ctrl_area->svm_instrs.VMLOAD = 1;
82     ctrl_area->svm_instrs.VMSAVE = 1;
83     ctrl_area->svm_instrs.STGI = 1;
84     ctrl_area->svm_instrs.CLGI = 1;
85     ctrl_area->svm_instrs.SKINIT = 1;
86     ctrl_area->svm_instrs.RDTSCP = 1;
87     ctrl_area->svm_instrs.ICEBP = 1;
88     ctrl_area->svm_instrs.WBINVD = 1;
89     ctrl_area->svm_instrs.MONITOR = 1;
90     ctrl_area->svm_instrs.MWAIT_always = 1;
91     ctrl_area->svm_instrs.MWAIT_if_armed = 1;
92     ctrl_area->instrs.INVLPGA = 1;
93     ctrl_area->instrs.CPUID = 1;
94
95     ctrl_area->instrs.HLT = 1;
96     // guest_state->cr0 = 0x00000001;    // PE 
97   
98     /*
99       ctrl_area->exceptions.de = 1;
100       ctrl_area->exceptions.df = 1;
101       
102       ctrl_area->exceptions.ts = 1;
103       ctrl_area->exceptions.ss = 1;
104       ctrl_area->exceptions.ac = 1;
105       ctrl_area->exceptions.mc = 1;
106       ctrl_area->exceptions.gp = 1;
107       ctrl_area->exceptions.ud = 1;
108       ctrl_area->exceptions.np = 1;
109       ctrl_area->exceptions.of = 1;
110       
111       ctrl_area->exceptions.nmi = 1;
112     */
113     
114
115     ctrl_area->instrs.NMI = 1;
116     ctrl_area->instrs.SMI = 0; // allow SMIs to run in guest
117     ctrl_area->instrs.INIT = 1;
118     ctrl_area->instrs.PAUSE = 1;
119     ctrl_area->instrs.shutdown_evts = 1;
120
121
122     /* DEBUG FOR RETURN CODE */
123     ctrl_area->exit_code = 1;
124
125
126     /* Setup Guest Machine state */
127
128     core->vm_regs.rsp = 0x00;
129     core->rip = 0xfff0;
130
131     core->vm_regs.rdx = 0x00000f00;
132
133
134     core->cpl = 0;
135
136     core->ctrl_regs.rflags = 0x00000002; // The reserved bit is always 1
137     core->ctrl_regs.cr0 = 0x60010010; // Set the WP flag so the memory hooks work in real-mode
138     core->ctrl_regs.efer |= EFER_MSR_svm_enable;
139
140
141
142
143
144     core->segments.cs.selector = 0xf000;
145     core->segments.cs.limit = 0xffff;
146     core->segments.cs.base = 0x0000000f0000LL;
147
148     // (raw attributes = 0xf3)
149     core->segments.cs.type = 0x3;
150     core->segments.cs.system = 0x1;
151     core->segments.cs.dpl = 0x3;
152     core->segments.cs.present = 1;
153
154
155
156     struct v3_segment * segregs [] = {&(core->segments.ss), &(core->segments.ds), 
157                                       &(core->segments.es), &(core->segments.fs), 
158                                       &(core->segments.gs), NULL};
159
160     for ( i = 0; segregs[i] != NULL; i++) {
161         struct v3_segment * seg = segregs[i];
162         
163         seg->selector = 0x0000;
164         //    seg->base = seg->selector << 4;
165         seg->base = 0x00000000;
166         seg->limit = ~0u;
167
168         // (raw attributes = 0xf3)
169         seg->type = 0x3;
170         seg->system = 0x1;
171         seg->dpl = 0x3;
172         seg->present = 1;
173     }
174
175     core->segments.gdtr.limit = 0x0000ffff;
176     core->segments.gdtr.base = 0x0000000000000000LL;
177     core->segments.idtr.limit = 0x0000ffff;
178     core->segments.idtr.base = 0x0000000000000000LL;
179
180     core->segments.ldtr.selector = 0x0000;
181     core->segments.ldtr.limit = 0x0000ffff;
182     core->segments.ldtr.base = 0x0000000000000000LL;
183     core->segments.tr.selector = 0x0000;
184     core->segments.tr.limit = 0x0000ffff;
185     core->segments.tr.base = 0x0000000000000000LL;
186
187
188     core->dbg_regs.dr6 = 0x00000000ffff0ff0LL;
189     core->dbg_regs.dr7 = 0x0000000000000400LL;
190
191
192     ctrl_area->IOPM_BASE_PA = (addr_t)V3_PAddr(core->vm_info->io_map.arch_data);
193     ctrl_area->instrs.IOIO_PROT = 1;
194             
195     ctrl_area->MSRPM_BASE_PA = (addr_t)V3_PAddr(core->vm_info->msr_map.arch_data);
196     ctrl_area->instrs.MSR_PROT = 1;   
197
198
199     PrintDebug("Exiting on interrupts\n");
200     ctrl_area->guest_ctrl.V_INTR_MASKING = 1;
201     ctrl_area->instrs.INTR = 1;
202
203
204     if (core->shdw_pg_mode == SHADOW_PAGING) {
205         PrintDebug("Creating initial shadow page table\n");
206         
207         /* JRL: This is a performance killer, and a simplistic solution */
208         /* We need to fix this */
209         ctrl_area->TLB_CONTROL = 1;
210         ctrl_area->guest_ASID = 1;
211         
212         
213         if (v3_init_passthrough_pts(core) == -1) {
214             PrintError("Could not initialize passthrough page tables\n");
215             return ;
216         }
217
218
219         core->shdw_pg_state.guest_cr0 = 0x0000000000000010LL;
220         PrintDebug("Created\n");
221         
222         core->ctrl_regs.cr0 |= 0x80000000;
223         core->ctrl_regs.cr3 = core->direct_map_pt;
224
225         ctrl_area->cr_reads.cr0 = 1;
226         ctrl_area->cr_writes.cr0 = 1;
227         //ctrl_area->cr_reads.cr4 = 1;
228         ctrl_area->cr_writes.cr4 = 1;
229         ctrl_area->cr_reads.cr3 = 1;
230         ctrl_area->cr_writes.cr3 = 1;
231
232         v3_hook_msr(core->vm_info, EFER_MSR, 
233                     &v3_handle_efer_read,
234                     &v3_handle_efer_write, 
235                     core);
236
237         ctrl_area->instrs.INVLPG = 1;
238
239         ctrl_area->exceptions.pf = 1;
240
241         guest_state->g_pat = 0x7040600070406ULL;
242
243
244
245     } else if (core->shdw_pg_mode == NESTED_PAGING) {
246         // Flush the TLB on entries/exits
247         ctrl_area->TLB_CONTROL = 1;
248         ctrl_area->guest_ASID = 1;
249
250         // Enable Nested Paging
251         ctrl_area->NP_ENABLE = 1;
252
253         PrintDebug("NP_Enable at 0x%p\n", (void *)&(ctrl_area->NP_ENABLE));
254
255         // Set the Nested Page Table pointer
256         if (v3_init_passthrough_pts(core) == -1) {
257             PrintError("Could not initialize Nested page tables\n");
258             return ;
259         }
260
261         ctrl_area->N_CR3 = core->direct_map_pt;
262
263         guest_state->g_pat = 0x7040600070406ULL;
264     }
265 }
266
267
268 int v3_init_svm_vmcb(struct guest_info * info, v3_vm_class_t vm_class) {
269
270     PrintDebug("Allocating VMCB\n");
271     info->vmm_data = (void*)Allocate_VMCB();
272     
273     if (vm_class == V3_PC_VM) {
274         PrintDebug("Initializing VMCB (addr=%p)\n", (void *)info->vmm_data);
275         Init_VMCB_BIOS((vmcb_t*)(info->vmm_data), info);
276     } else {
277         PrintError("Invalid VM class\n");
278         return -1;
279     }
280
281     return 0;
282 }
283
284
285
286 static int update_irq_exit_state(struct guest_info * info) {
287     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
288
289     // Fix for QEMU bug using EVENTINJ as an internal cache
290     guest_ctrl->EVENTINJ.valid = 0;
291
292     if ((info->intr_core_state.irq_pending == 1) && (guest_ctrl->guest_ctrl.V_IRQ == 0)) {
293         
294 #ifdef CONFIG_DEBUG_INTERRUPTS
295         PrintDebug("INTAK cycle completed for irq %d\n", info->intr_core_state.irq_vector);
296 #endif
297
298         info->intr_core_state.irq_started = 1;
299         info->intr_core_state.irq_pending = 0;
300
301         v3_injecting_intr(info, info->intr_core_state.irq_vector, V3_EXTERNAL_IRQ);
302     }
303
304     if ((info->intr_core_state.irq_started == 1) && (guest_ctrl->exit_int_info.valid == 0)) {
305 #ifdef CONFIG_DEBUG_INTERRUPTS
306         PrintDebug("Interrupt %d taken by guest\n", info->intr_core_state.irq_vector);
307 #endif
308
309         // Interrupt was taken fully vectored
310         info->intr_core_state.irq_started = 0;
311
312     } else if ((info->intr_core_state.irq_started == 1) && (guest_ctrl->exit_int_info.valid == 1)) {
313 #ifdef CONFIG_DEBUG_INTERRUPTS
314         PrintDebug("EXIT INT INFO is set (vec=%d)\n", guest_ctrl->exit_int_info.vector);
315 #endif
316     }
317
318     return 0;
319 }
320
321
322 static int update_irq_entry_state(struct guest_info * info) {
323     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
324
325
326     if (info->intr_core_state.irq_pending == 0) {
327         guest_ctrl->guest_ctrl.V_IRQ = 0;
328         guest_ctrl->guest_ctrl.V_INTR_VECTOR = 0;
329     }
330     
331     if (v3_excp_pending(info)) {
332         uint_t excp = v3_get_excp_number(info);
333         
334         guest_ctrl->EVENTINJ.type = SVM_INJECTION_EXCEPTION;
335         
336         if (info->excp_state.excp_error_code_valid) {
337             guest_ctrl->EVENTINJ.error_code = info->excp_state.excp_error_code;
338             guest_ctrl->EVENTINJ.ev = 1;
339 #ifdef CONFIG_DEBUG_INTERRUPTS
340             PrintDebug("Injecting exception %d with error code %x\n", excp, guest_ctrl->EVENTINJ.error_code);
341 #endif
342         }
343         
344         guest_ctrl->EVENTINJ.vector = excp;
345         
346         guest_ctrl->EVENTINJ.valid = 1;
347
348 #ifdef CONFIG_DEBUG_INTERRUPTS
349         PrintDebug("<%d> Injecting Exception %d (CR2=%p) (EIP=%p)\n", 
350                    (int)info->num_exits, 
351                    guest_ctrl->EVENTINJ.vector, 
352                    (void *)(addr_t)info->ctrl_regs.cr2,
353                    (void *)(addr_t)info->rip);
354 #endif
355
356         v3_injecting_excp(info, excp);
357     } else if (info->intr_core_state.irq_started == 1) {
358 #ifdef CONFIG_DEBUG_INTERRUPTS
359         PrintDebug("IRQ pending from previous injection\n");
360 #endif
361         guest_ctrl->guest_ctrl.V_IRQ = 1;
362         guest_ctrl->guest_ctrl.V_INTR_VECTOR = info->intr_core_state.irq_vector;
363         guest_ctrl->guest_ctrl.V_IGN_TPR = 1;
364         guest_ctrl->guest_ctrl.V_INTR_PRIO = 0xf;
365
366     } else {
367         switch (v3_intr_pending(info)) {
368             case V3_EXTERNAL_IRQ: {
369                 uint32_t irq = v3_get_intr(info);
370
371                 guest_ctrl->guest_ctrl.V_IRQ = 1;
372                 guest_ctrl->guest_ctrl.V_INTR_VECTOR = irq;
373                 guest_ctrl->guest_ctrl.V_IGN_TPR = 1;
374                 guest_ctrl->guest_ctrl.V_INTR_PRIO = 0xf;
375
376 #ifdef CONFIG_DEBUG_INTERRUPTS
377                 PrintDebug("Injecting Interrupt %d (EIP=%p)\n", 
378                            guest_ctrl->guest_ctrl.V_INTR_VECTOR, 
379                            (void *)(addr_t)info->rip);
380 #endif
381
382                 info->intr_core_state.irq_pending = 1;
383                 info->intr_core_state.irq_vector = irq;
384                 
385                 break;
386             }
387             case V3_NMI:
388                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_NMI;
389                 break;
390             case V3_SOFTWARE_INTR:
391                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_SOFT_INTR;
392                 break;
393             case V3_VIRTUAL_IRQ:
394                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_IRQ;
395                 break;
396
397             case V3_INVALID_INTR:
398             default:
399                 break;
400         }
401         
402     }
403
404     return 0;
405 }
406
407
408 /* 
409  * CAUTION and DANGER!!! 
410  * 
411  * The VMCB CANNOT(!!) be accessed outside of the clgi/stgi calls inside this function
412  * When exectuing a symbiotic call, the VMCB WILL be overwritten, so any dependencies 
413  * on its contents will cause things to break. The contents at the time of the exit WILL 
414  * change before the exit handler is executed.
415  */
416 int v3_svm_enter(struct guest_info * info) {
417     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
418     vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA((vmcb_t*)(info->vmm_data)); 
419     ullong_t tmp_tsc;
420     addr_t exit_code = 0, exit_info1 = 0, exit_info2 = 0;
421
422     // Conditionally yield the CPU if the timeslice has expired
423     v3_yield_cond(info);
424
425     // disable global interrupts for vm state transition
426     v3_clgi();
427
428     // Synchronize the guest state to the VMCB
429     guest_state->cr0 = info->ctrl_regs.cr0;
430     guest_state->cr2 = info->ctrl_regs.cr2;
431     guest_state->cr3 = info->ctrl_regs.cr3;
432     guest_state->cr4 = info->ctrl_regs.cr4;
433     guest_state->dr6 = info->dbg_regs.dr6;
434     guest_state->dr7 = info->dbg_regs.dr7;
435     guest_ctrl->guest_ctrl.V_TPR = info->ctrl_regs.cr8 & 0xff;
436     guest_state->rflags = info->ctrl_regs.rflags;
437     guest_state->efer = info->ctrl_regs.efer;
438     
439     guest_state->cpl = info->cpl;
440
441     v3_set_vmcb_segments((vmcb_t*)(info->vmm_data), &(info->segments));
442
443     guest_state->rax = info->vm_regs.rax;
444     guest_state->rip = info->rip;
445     guest_state->rsp = info->vm_regs.rsp;
446
447 #ifdef CONFIG_SYMCALL
448     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
449         update_irq_entry_state(info);
450     }
451 #else 
452     update_irq_entry_state(info);
453 #endif
454
455
456     /* ** */
457
458     /*
459       PrintDebug("SVM Entry to CS=%p  rip=%p...\n", 
460       (void *)(addr_t)info->segments.cs.base, 
461       (void *)(addr_t)info->rip);
462     */
463
464 #ifdef CONFIG_SYMCALL
465     if (info->sym_core_state.symcall_state.sym_call_active == 1) {
466         if (guest_ctrl->guest_ctrl.V_IRQ == 1) {
467             V3_Print("!!! Injecting Interrupt during Sym call !!!\n");
468         }
469     }
470 #endif
471
472
473     rdtscll(tmp_tsc);
474     v3_update_time(info, (tmp_tsc - info->time_state.cached_host_tsc));
475     rdtscll(info->time_state.cached_host_tsc);
476     //    guest_ctrl->TSC_OFFSET = info->time_state.guest_tsc - info->time_state.cached_host_tsc;
477
478     //V3_Print("Calling v3_svm_launch\n");
479
480
481 #if 0
482     if (info->cpu_id==1) { 
483         V3_Print("Entering Core 1\n");
484         v3_print_guest_state(info);
485     }
486 #endif
487         
488     v3_svm_launch((vmcb_t *)V3_PAddr(info->vmm_data), &(info->vm_regs), (vmcb_t *)host_vmcbs[info->cpu_id]);
489     
490     //V3_Print("SVM Returned: Exit Code: %x, guest_rip=%lx\n", (uint32_t)(guest_ctrl->exit_code), (unsigned long)guest_state->rip);
491
492
493     v3_last_exit = (uint32_t)(guest_ctrl->exit_code);
494
495     //rdtscll(tmp_tsc);
496     //    v3_update_time(info, tmp_tsc - info->time_state.cached_host_tsc);
497
498     //PrintDebug("SVM Returned\n");
499     
500     info->num_exits++;
501
502
503
504
505     // Save Guest state from VMCB
506     info->rip = guest_state->rip;
507     info->vm_regs.rsp = guest_state->rsp;
508     info->vm_regs.rax = guest_state->rax;
509
510     info->cpl = guest_state->cpl;
511
512     info->ctrl_regs.cr0 = guest_state->cr0;
513     info->ctrl_regs.cr2 = guest_state->cr2;
514     info->ctrl_regs.cr3 = guest_state->cr3;
515     info->ctrl_regs.cr4 = guest_state->cr4;
516     info->dbg_regs.dr6 = guest_state->dr6;
517     info->dbg_regs.dr7 = guest_state->dr7;
518     info->ctrl_regs.cr8 = guest_ctrl->guest_ctrl.V_TPR;
519     info->ctrl_regs.rflags = guest_state->rflags;
520     info->ctrl_regs.efer = guest_state->efer;
521     
522     v3_get_vmcb_segments((vmcb_t*)(info->vmm_data), &(info->segments));
523     info->cpu_mode = v3_get_vm_cpu_mode(info);
524     info->mem_mode = v3_get_vm_mem_mode(info);
525     /* ** */
526
527
528     // save exit info here
529     exit_code = guest_ctrl->exit_code;
530     exit_info1 = guest_ctrl->exit_info1;
531     exit_info2 = guest_ctrl->exit_info2;
532
533
534 #ifdef CONFIG_SYMCALL
535     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
536         update_irq_exit_state(info);
537     }
538 #else
539     update_irq_exit_state(info);
540 #endif
541
542
543     // reenable global interrupts after vm exit
544     v3_stgi();
545
546  
547     // Conditionally yield the CPU if the timeslice has expired
548     v3_yield_cond(info);
549
550
551
552 #if 0
553     if (info->cpu_id==1) { 
554         V3_Print("Exited Core 1, before handler\n");
555         v3_print_guest_state(info);
556         PrintDebugVMCB((vmcb_t*)(info->vmm_data));
557     }
558 #endif
559
560
561     if (v3_handle_svm_exit(info, exit_code, exit_info1, exit_info2) != 0) {
562         PrintError("Error in SVM exit handler\n");
563         return -1;
564     }
565
566
567 #if 0
568     if (info->cpu_id==1) { 
569         V3_Print("Exited Core 1, after handler\n");
570         v3_print_guest_state(info);
571         PrintDebugVMCB((vmcb_t*)(info->vmm_data));
572     }
573 #endif
574
575
576
577     return 0;
578 }
579
580
581 int v3_start_svm_guest(struct guest_info *info) {
582     //    vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA((vmcb_t*)(info->vmm_data));
583     //  vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
584
585
586     PrintDebug("Starting SVM core %u\n",info->cpu_id);
587     if (info->cpu_mode==INIT) { 
588         PrintDebug("SVM core %u: I am an AP in INIT mode, waiting for that to change\n",info->cpu_id);
589         while (info->cpu_mode==INIT) {
590             v3_yield(info);
591             //PrintDebug("SVM core %u: still waiting for INIT\n",info->cpu_id);
592         }
593         PrintDebug("SVM core %u: I am out of INIT\n",info->cpu_id);
594         if (info->cpu_mode==SIPI) { 
595             PrintDebug("SVM core %u: I am waiting on a SIPI to set my starting address\n",info->cpu_id);
596             while (info->cpu_mode==SIPI) {
597                 v3_yield(info);
598                 //PrintDebug("SVM core %u: still waiting for SIPI\n",info->cpu_id);
599             }
600         }
601         PrintDebug("SVM core %u: I have my SIPI\n", info->cpu_id);
602     }
603
604     if (info->cpu_mode!=REAL) { 
605         PrintError("SVM core %u: I am not in REAL mode at launch!  Huh?!\n", info->cpu_id);
606         return -1;
607     }
608
609     PrintDebug("SVM core %u: I am starting at CS=0x%x (base=0x%p, limit=0x%x),  RIP=0x%p\n", 
610                info->cpu_id, info->segments.cs.selector, (void*)(info->segments.cs.base), 
611                info->segments.cs.limit,(void*)(info->rip));
612
613
614
615     PrintDebug("SVM core %u: Launching SVM VM (vmcb=%p)\n", info->cpu_id, (void *)info->vmm_data);
616     //PrintDebugVMCB((vmcb_t*)(info->vmm_data));
617     
618     info->vm_info->run_state = VM_RUNNING;
619     rdtscll(info->yield_start_cycle);
620
621
622     while (1) {
623         if (v3_svm_enter(info) == -1) {
624             vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
625             addr_t host_addr;
626             addr_t linear_addr = 0;
627             
628             info->vm_info->run_state = VM_ERROR;
629             
630             V3_Print("SVM core %u: SVM ERROR!!\n", info->cpu_id); 
631             
632             v3_print_guest_state(info);
633             
634             V3_Print("SVM core %u: SVM Exit Code: %p\n", info->cpu_id, (void *)(addr_t)guest_ctrl->exit_code); 
635             
636             V3_Print("SVM core %u: exit_info1 low = 0x%.8x\n", info->cpu_id, *(uint_t*)&(guest_ctrl->exit_info1));
637             V3_Print("SVM core %u: exit_info1 high = 0x%.8x\n", info->cpu_id, *(uint_t *)(((uchar_t *)&(guest_ctrl->exit_info1)) + 4));
638             
639             V3_Print("SVM core %u: exit_info2 low = 0x%.8x\n", info->cpu_id, *(uint_t*)&(guest_ctrl->exit_info2));
640             V3_Print("SVM core %u: exit_info2 high = 0x%.8x\n", info->cpu_id, *(uint_t *)(((uchar_t *)&(guest_ctrl->exit_info2)) + 4));
641             
642             linear_addr = get_addr_linear(info, info->rip, &(info->segments.cs));
643             
644             if (info->mem_mode == PHYSICAL_MEM) {
645                 v3_gpa_to_hva(info, linear_addr, &host_addr);
646             } else if (info->mem_mode == VIRTUAL_MEM) {
647                 v3_gva_to_hva(info, linear_addr, &host_addr);
648             }
649             
650             V3_Print("SVM core %u: Host Address of rip = 0x%p\n", info->cpu_id, (void *)host_addr);
651             
652             V3_Print("SVM core %u: Instr (15 bytes) at %p:\n", info->cpu_id, (void *)host_addr);
653             v3_dump_mem((uint8_t *)host_addr, 15);
654             
655             v3_print_stack(info);
656
657             break;
658         }
659         
660 /*
661         if ((info->num_exits % 5000) == 0) {
662             V3_Print("SVM Exit number %d\n", (uint32_t)info->num_exits);
663         }
664 */
665         
666     }
667
668     // Need to take down the other cores on error... 
669
670     return 0;
671 }
672
673
674
675
676
677 /* Checks machine SVM capability */
678 /* Implemented from: AMD Arch Manual 3, sect 15.4 */ 
679 int v3_is_svm_capable() {
680     uint_t vm_cr_low = 0, vm_cr_high = 0;
681     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
682
683     v3_cpuid(CPUID_EXT_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
684   
685     PrintDebug("CPUID_EXT_FEATURE_IDS_ecx=0x%x\n", ecx);
686
687     if ((ecx & CPUID_EXT_FEATURE_IDS_ecx_svm_avail) == 0) {
688       V3_Print("SVM Not Available\n");
689       return 0;
690     }  else {
691         v3_get_msr(SVM_VM_CR_MSR, &vm_cr_high, &vm_cr_low);
692         
693         PrintDebug("SVM_VM_CR_MSR = 0x%x 0x%x\n", vm_cr_high, vm_cr_low);
694         
695         if ((vm_cr_low & SVM_VM_CR_MSR_svmdis) == 1) {
696             V3_Print("SVM is available but is disabled.\n");
697             
698             v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
699             
700             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_edx=0x%x\n", edx);
701             
702             if ((edx & CPUID_SVM_REV_AND_FEATURE_IDS_edx_svml) == 0) {
703                 V3_Print("SVM BIOS Disabled, not unlockable\n");
704             } else {
705                 V3_Print("SVM is locked with a key\n");
706             }
707             return 0;
708
709         } else {
710             V3_Print("SVM is available and  enabled.\n");
711
712             v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
713             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_eax=0x%x\n", eax);
714             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_ebx=0x%x\n", ebx);
715             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_ecx=0x%x\n", ecx);
716             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_edx=0x%x\n", edx);
717
718             return 1;
719         }
720     }
721 }
722
723 static int has_svm_nested_paging() {
724     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
725
726     v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
727
728     //PrintDebug("CPUID_EXT_FEATURE_IDS_edx=0x%x\n", edx);
729
730     if ((edx & CPUID_SVM_REV_AND_FEATURE_IDS_edx_np) == 0) {
731         V3_Print("SVM Nested Paging not supported\n");
732         return 0;
733     } else {
734         V3_Print("SVM Nested Paging supported\n");
735         return 1;
736     }
737 }
738
739
740 void v3_init_svm_cpu(int cpu_id) {
741     reg_ex_t msr;
742     extern v3_cpu_arch_t v3_cpu_types[];
743
744     // Enable SVM on the CPU
745     v3_get_msr(EFER_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
746     msr.e_reg.low |= EFER_MSR_svm_enable;
747     v3_set_msr(EFER_MSR, 0, msr.e_reg.low);
748
749     V3_Print("SVM Enabled\n");
750
751     // Setup the host state save area
752     host_vmcbs[cpu_id] = (addr_t)V3_AllocPages(4);
753
754     /* 64-BIT-ISSUE */
755     //  msr.e_reg.high = 0;
756     //msr.e_reg.low = (uint_t)host_vmcb;
757     msr.r_reg = host_vmcbs[cpu_id];
758
759     PrintDebug("Host State being saved at %p\n", (void *)host_vmcbs[cpu_id]);
760     v3_set_msr(SVM_VM_HSAVE_PA_MSR, msr.e_reg.high, msr.e_reg.low);
761
762
763     if (has_svm_nested_paging() == 1) {
764         v3_cpu_types[cpu_id] = V3_SVM_REV3_CPU;
765     } else {
766         v3_cpu_types[cpu_id] = V3_SVM_CPU;
767     }
768 }
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823 #if 0
824 /* 
825  * Test VMSAVE/VMLOAD Latency 
826  */
827 #define vmsave ".byte 0x0F,0x01,0xDB ; "
828 #define vmload ".byte 0x0F,0x01,0xDA ; "
829 {
830     uint32_t start_lo, start_hi;
831     uint32_t end_lo, end_hi;
832     uint64_t start, end;
833     
834     __asm__ __volatile__ (
835                           "rdtsc ; "
836                           "movl %%eax, %%esi ; "
837                           "movl %%edx, %%edi ; "
838                           "movq  %%rcx, %%rax ; "
839                           vmsave
840                           "rdtsc ; "
841                           : "=D"(start_hi), "=S"(start_lo), "=a"(end_lo),"=d"(end_hi)
842                           : "c"(host_vmcb[cpu_id]), "0"(0), "1"(0), "2"(0), "3"(0)
843                           );
844     
845     start = start_hi;
846     start <<= 32;
847     start += start_lo;
848     
849     end = end_hi;
850     end <<= 32;
851     end += end_lo;
852     
853     PrintDebug("VMSave Cycle Latency: %d\n", (uint32_t)(end - start));
854     
855     __asm__ __volatile__ (
856                           "rdtsc ; "
857                           "movl %%eax, %%esi ; "
858                           "movl %%edx, %%edi ; "
859                           "movq  %%rcx, %%rax ; "
860                           vmload
861                           "rdtsc ; "
862                           : "=D"(start_hi), "=S"(start_lo), "=a"(end_lo),"=d"(end_hi)
863                               : "c"(host_vmcb[cpu_id]), "0"(0), "1"(0), "2"(0), "3"(0)
864                               );
865         
866         start = start_hi;
867         start <<= 32;
868         start += start_lo;
869
870         end = end_hi;
871         end <<= 32;
872         end += end_lo;
873
874
875         PrintDebug("VMLoad Cycle Latency: %d\n", (uint32_t)(end - start));
876     }
877     /* End Latency Test */
878
879 #endif
880
881
882
883
884
885
886
887 #if 0
888 void Init_VMCB_pe(vmcb_t *vmcb, struct guest_info vm_info) {
889   vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
890   vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
891   uint_t i = 0;
892
893
894   guest_state->rsp = vm_info.vm_regs.rsp;
895   guest_state->rip = vm_info.rip;
896
897
898   /* I pretty much just gutted this from TVMM */
899   /* Note: That means its probably wrong */
900
901   // set the segment registers to mirror ours
902   guest_state->cs.selector = 1<<3;
903   guest_state->cs.attrib.fields.type = 0xa; // Code segment+read
904   guest_state->cs.attrib.fields.S = 1;
905   guest_state->cs.attrib.fields.P = 1;
906   guest_state->cs.attrib.fields.db = 1;
907   guest_state->cs.attrib.fields.G = 1;
908   guest_state->cs.limit = 0xfffff;
909   guest_state->cs.base = 0;
910   
911   struct vmcb_selector *segregs [] = {&(guest_state->ss), &(guest_state->ds), &(guest_state->es), &(guest_state->fs), &(guest_state->gs), NULL};
912   for ( i = 0; segregs[i] != NULL; i++) {
913     struct vmcb_selector * seg = segregs[i];
914     
915     seg->selector = 2<<3;
916     seg->attrib.fields.type = 0x2; // Data Segment+read/write
917     seg->attrib.fields.S = 1;
918     seg->attrib.fields.P = 1;
919     seg->attrib.fields.db = 1;
920     seg->attrib.fields.G = 1;
921     seg->limit = 0xfffff;
922     seg->base = 0;
923   }
924
925
926   {
927     /* JRL THIS HAS TO GO */
928     
929     //    guest_state->tr.selector = GetTR_Selector();
930     guest_state->tr.attrib.fields.type = 0x9; 
931     guest_state->tr.attrib.fields.P = 1;
932     // guest_state->tr.limit = GetTR_Limit();
933     //guest_state->tr.base = GetTR_Base();// - 0x2000;
934     /* ** */
935   }
936
937
938   /* ** */
939
940
941   guest_state->efer |= EFER_MSR_svm_enable;
942   guest_state->rflags = 0x00000002; // The reserved bit is always 1
943   ctrl_area->svm_instrs.VMRUN = 1;
944   guest_state->cr0 = 0x00000001;    // PE 
945   ctrl_area->guest_ASID = 1;
946
947
948   //  guest_state->cpl = 0;
949
950
951
952   // Setup exits
953
954   ctrl_area->cr_writes.cr4 = 1;
955   
956   ctrl_area->exceptions.de = 1;
957   ctrl_area->exceptions.df = 1;
958   ctrl_area->exceptions.pf = 1;
959   ctrl_area->exceptions.ts = 1;
960   ctrl_area->exceptions.ss = 1;
961   ctrl_area->exceptions.ac = 1;
962   ctrl_area->exceptions.mc = 1;
963   ctrl_area->exceptions.gp = 1;
964   ctrl_area->exceptions.ud = 1;
965   ctrl_area->exceptions.np = 1;
966   ctrl_area->exceptions.of = 1;
967   ctrl_area->exceptions.nmi = 1;
968
969   
970
971   ctrl_area->instrs.IOIO_PROT = 1;
972   ctrl_area->IOPM_BASE_PA = (uint_t)V3_AllocPages(3);
973   
974   {
975     reg_ex_t tmp_reg;
976     tmp_reg.r_reg = ctrl_area->IOPM_BASE_PA;
977     memset((void*)(tmp_reg.e_reg.low), 0xffffffff, PAGE_SIZE * 2);
978   }
979
980   ctrl_area->instrs.INTR = 1;
981
982   
983   {
984     char gdt_buf[6];
985     char idt_buf[6];
986
987     memset(gdt_buf, 0, 6);
988     memset(idt_buf, 0, 6);
989
990
991     uint_t gdt_base, idt_base;
992     ushort_t gdt_limit, idt_limit;
993     
994     GetGDTR(gdt_buf);
995     gdt_base = *(ulong_t*)((uchar_t*)gdt_buf + 2) & 0xffffffff;
996     gdt_limit = *(ushort_t*)(gdt_buf) & 0xffff;
997     PrintDebug("GDT: base: %x, limit: %x\n", gdt_base, gdt_limit);
998
999     GetIDTR(idt_buf);
1000     idt_base = *(ulong_t*)(idt_buf + 2) & 0xffffffff;
1001     idt_limit = *(ushort_t*)(idt_buf) & 0xffff;
1002     PrintDebug("IDT: base: %x, limit: %x\n",idt_base, idt_limit);
1003
1004
1005     // gdt_base -= 0x2000;
1006     //idt_base -= 0x2000;
1007
1008     guest_state->gdtr.base = gdt_base;
1009     guest_state->gdtr.limit = gdt_limit;
1010     guest_state->idtr.base = idt_base;
1011     guest_state->idtr.limit = idt_limit;
1012
1013
1014   }
1015   
1016   
1017   // also determine if CPU supports nested paging
1018   /*
1019   if (vm_info.page_tables) {
1020     //   if (0) {
1021     // Flush the TLB on entries/exits
1022     ctrl_area->TLB_CONTROL = 1;
1023
1024     // Enable Nested Paging
1025     ctrl_area->NP_ENABLE = 1;
1026
1027     PrintDebug("NP_Enable at 0x%x\n", &(ctrl_area->NP_ENABLE));
1028
1029         // Set the Nested Page Table pointer
1030     ctrl_area->N_CR3 |= ((addr_t)vm_info.page_tables & 0xfffff000);
1031
1032
1033     //   ctrl_area->N_CR3 = Get_CR3();
1034     // guest_state->cr3 |= (Get_CR3() & 0xfffff000);
1035
1036     guest_state->g_pat = 0x7040600070406ULL;
1037
1038     PrintDebug("Set Nested CR3: lo: 0x%x  hi: 0x%x\n", (uint_t)*(&(ctrl_area->N_CR3)), (uint_t)*((unsigned char *)&(ctrl_area->N_CR3) + 4));
1039     PrintDebug("Set Guest CR3: lo: 0x%x  hi: 0x%x\n", (uint_t)*(&(guest_state->cr3)), (uint_t)*((unsigned char *)&(guest_state->cr3) + 4));
1040     // Enable Paging
1041     //    guest_state->cr0 |= 0x80000000;
1042   }
1043   */
1044
1045 }
1046
1047
1048
1049
1050
1051 #endif
1052
1053