Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


added barrier synchronization and integrated it with pause/continue functionality
[palacios.git] / palacios / src / palacios / svm.c
1
2 /* 
3  * This file is part of the Palacios Virtual Machine Monitor developed
4  * by the V3VEE Project with funding from the United States National 
5  * Science Foundation and the Department of Energy.  
6  *
7  * The V3VEE Project is a joint project between Northwestern University
8  * and the University of New Mexico.  You can find out more at 
9  * http://www.v3vee.org
10  *
11  * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu> 
12  * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org> 
13  * All rights reserved.
14  *
15  * Author: Jack Lange <jarusl@cs.northwestern.edu>
16  *
17  * This is free software.  You are permitted to use,
18  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
19  */
20
21
22 #include <palacios/svm.h>
23 #include <palacios/vmm.h>
24
25 #include <palacios/vmcb.h>
26 #include <palacios/vmm_mem.h>
27 #include <palacios/vmm_paging.h>
28 #include <palacios/svm_handler.h>
29
30 #include <palacios/vmm_debug.h>
31 #include <palacios/vm_guest_mem.h>
32
33 #include <palacios/vmm_decoder.h>
34 #include <palacios/vmm_string.h>
35 #include <palacios/vmm_lowlevel.h>
36 #include <palacios/svm_msr.h>
37
38 #include <palacios/vmm_rbtree.h>
39 #include <palacios/vmm_barrier.h>
40
41
42 #include <palacios/vmm_direct_paging.h>
43
44 #include <palacios/vmm_ctrl_regs.h>
45 #include <palacios/svm_io.h>
46
47 #include <palacios/vmm_sprintf.h>
48
49
50 #ifndef V3_CONFIG_DEBUG_SVM
51 #undef PrintDebug
52 #define PrintDebug(fmt, args...)
53 #endif
54
55
56 uint32_t v3_last_exit;
57
58 // This is a global pointer to the host's VMCB
59 static addr_t host_vmcbs[V3_CONFIG_MAX_CPUS] = { [0 ... V3_CONFIG_MAX_CPUS - 1] = 0};
60
61
62
63 extern void v3_stgi();
64 extern void v3_clgi();
65 //extern int v3_svm_launch(vmcb_t * vmcb, struct v3_gprs * vm_regs, uint64_t * fs, uint64_t * gs);
66 extern int v3_svm_launch(vmcb_t * vmcb, struct v3_gprs * vm_regs, vmcb_t * host_vmcb);
67
68
69 static vmcb_t * Allocate_VMCB() {
70     vmcb_t * vmcb_page = NULL;
71     addr_t vmcb_pa = (addr_t)V3_AllocPages(1);
72
73     if ((void *)vmcb_pa == NULL) {
74         PrintError("Error allocating VMCB\n");
75         return NULL;
76     }
77
78     vmcb_page = (vmcb_t *)V3_VAddr((void *)vmcb_pa);
79
80     memset(vmcb_page, 0, 4096);
81
82     return vmcb_page;
83 }
84
85
86 static int v3_svm_handle_efer_write(struct guest_info * core, uint_t msr, struct v3_msr src, void * priv_data)
87 {
88     int status;
89
90     // Call arch-independent handler
91     if ((status = v3_handle_efer_write(core, msr, src, priv_data)) != 0)
92         return status;
93
94     // SVM-specific code
95     if (core->shdw_pg_mode == NESTED_PAGING) {
96         // Ensure that hardware visible EFER.SVME bit is set (SVM Enable)
97         struct efer_64 * hw_efer = (struct efer_64 *)&(core->ctrl_regs.efer);
98         hw_efer->svme = 1;
99     }
100
101     return 0;
102 }
103
104
105 static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info * core) {
106     vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
107     vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
108     uint_t i;
109
110
111     //
112     ctrl_area->svm_instrs.VMRUN = 1;
113     ctrl_area->svm_instrs.VMMCALL = 1;
114     ctrl_area->svm_instrs.VMLOAD = 1;
115     ctrl_area->svm_instrs.VMSAVE = 1;
116     ctrl_area->svm_instrs.STGI = 1;
117     ctrl_area->svm_instrs.CLGI = 1;
118     ctrl_area->svm_instrs.SKINIT = 1;
119     ctrl_area->svm_instrs.ICEBP = 1;
120     ctrl_area->svm_instrs.WBINVD = 1;
121     ctrl_area->svm_instrs.MONITOR = 1;
122     ctrl_area->svm_instrs.MWAIT_always = 1;
123     ctrl_area->svm_instrs.MWAIT_if_armed = 1;
124     ctrl_area->instrs.INVLPGA = 1;
125     ctrl_area->instrs.CPUID = 1;
126
127     ctrl_area->instrs.HLT = 1;
128
129 #ifdef V3_CONFIG_TIME_VIRTUALIZE_TSC
130     ctrl_area->instrs.RDTSC = 1;
131     ctrl_area->svm_instrs.RDTSCP = 1;
132 #endif
133
134     // guest_state->cr0 = 0x00000001;    // PE 
135   
136     /*
137       ctrl_area->exceptions.de = 1;
138       ctrl_area->exceptions.df = 1;
139       
140       ctrl_area->exceptions.ts = 1;
141       ctrl_area->exceptions.ss = 1;
142       ctrl_area->exceptions.ac = 1;
143       ctrl_area->exceptions.mc = 1;
144       ctrl_area->exceptions.gp = 1;
145       ctrl_area->exceptions.ud = 1;
146       ctrl_area->exceptions.np = 1;
147       ctrl_area->exceptions.of = 1;
148       
149       ctrl_area->exceptions.nmi = 1;
150     */
151     
152
153     ctrl_area->instrs.NMI = 1;
154     ctrl_area->instrs.SMI = 0; // allow SMIs to run in guest
155     ctrl_area->instrs.INIT = 1;
156     ctrl_area->instrs.PAUSE = 1;
157     ctrl_area->instrs.shutdown_evts = 1;
158
159
160     /* DEBUG FOR RETURN CODE */
161     ctrl_area->exit_code = 1;
162
163
164     /* Setup Guest Machine state */
165
166     core->vm_regs.rsp = 0x00;
167     core->rip = 0xfff0;
168
169     core->vm_regs.rdx = 0x00000f00;
170
171
172     core->cpl = 0;
173
174     core->ctrl_regs.rflags = 0x00000002; // The reserved bit is always 1
175     core->ctrl_regs.cr0 = 0x60010010; // Set the WP flag so the memory hooks work in real-mode
176     core->ctrl_regs.efer |= EFER_MSR_svm_enable;
177
178
179
180
181
182     core->segments.cs.selector = 0xf000;
183     core->segments.cs.limit = 0xffff;
184     core->segments.cs.base = 0x0000000f0000LL;
185
186     // (raw attributes = 0xf3)
187     core->segments.cs.type = 0x3;
188     core->segments.cs.system = 0x1;
189     core->segments.cs.dpl = 0x3;
190     core->segments.cs.present = 1;
191
192
193
194     struct v3_segment * segregs [] = {&(core->segments.ss), &(core->segments.ds), 
195                                       &(core->segments.es), &(core->segments.fs), 
196                                       &(core->segments.gs), NULL};
197
198     for ( i = 0; segregs[i] != NULL; i++) {
199         struct v3_segment * seg = segregs[i];
200         
201         seg->selector = 0x0000;
202         //    seg->base = seg->selector << 4;
203         seg->base = 0x00000000;
204         seg->limit = ~0u;
205
206         // (raw attributes = 0xf3)
207         seg->type = 0x3;
208         seg->system = 0x1;
209         seg->dpl = 0x3;
210         seg->present = 1;
211     }
212
213     core->segments.gdtr.limit = 0x0000ffff;
214     core->segments.gdtr.base = 0x0000000000000000LL;
215     core->segments.idtr.limit = 0x0000ffff;
216     core->segments.idtr.base = 0x0000000000000000LL;
217
218     core->segments.ldtr.selector = 0x0000;
219     core->segments.ldtr.limit = 0x0000ffff;
220     core->segments.ldtr.base = 0x0000000000000000LL;
221     core->segments.tr.selector = 0x0000;
222     core->segments.tr.limit = 0x0000ffff;
223     core->segments.tr.base = 0x0000000000000000LL;
224
225
226     core->dbg_regs.dr6 = 0x00000000ffff0ff0LL;
227     core->dbg_regs.dr7 = 0x0000000000000400LL;
228
229
230     ctrl_area->IOPM_BASE_PA = (addr_t)V3_PAddr(core->vm_info->io_map.arch_data);
231     ctrl_area->instrs.IOIO_PROT = 1;
232             
233     ctrl_area->MSRPM_BASE_PA = (addr_t)V3_PAddr(core->vm_info->msr_map.arch_data);
234     ctrl_area->instrs.MSR_PROT = 1;   
235
236
237     PrintDebug("Exiting on interrupts\n");
238     ctrl_area->guest_ctrl.V_INTR_MASKING = 1;
239     ctrl_area->instrs.INTR = 1;
240
241
242     v3_hook_msr(core->vm_info, EFER_MSR, 
243                 &v3_handle_efer_read,
244                 &v3_svm_handle_efer_write, 
245                 core);
246
247     if (core->shdw_pg_mode == SHADOW_PAGING) {
248         PrintDebug("Creating initial shadow page table\n");
249         
250         /* JRL: This is a performance killer, and a simplistic solution */
251         /* We need to fix this */
252         ctrl_area->TLB_CONTROL = 1;
253         ctrl_area->guest_ASID = 1;
254         
255         
256         if (v3_init_passthrough_pts(core) == -1) {
257             PrintError("Could not initialize passthrough page tables\n");
258             return ;
259         }
260
261
262         core->shdw_pg_state.guest_cr0 = 0x0000000000000010LL;
263         PrintDebug("Created\n");
264         
265         core->ctrl_regs.cr0 |= 0x80000000;
266         core->ctrl_regs.cr3 = core->direct_map_pt;
267
268         ctrl_area->cr_reads.cr0 = 1;
269         ctrl_area->cr_writes.cr0 = 1;
270         //ctrl_area->cr_reads.cr4 = 1;
271         ctrl_area->cr_writes.cr4 = 1;
272         ctrl_area->cr_reads.cr3 = 1;
273         ctrl_area->cr_writes.cr3 = 1;
274
275
276
277         ctrl_area->instrs.INVLPG = 1;
278
279         ctrl_area->exceptions.pf = 1;
280
281         guest_state->g_pat = 0x7040600070406ULL;
282
283
284
285     } else if (core->shdw_pg_mode == NESTED_PAGING) {
286         // Flush the TLB on entries/exits
287         ctrl_area->TLB_CONTROL = 1;
288         ctrl_area->guest_ASID = 1;
289
290         // Enable Nested Paging
291         ctrl_area->NP_ENABLE = 1;
292
293         PrintDebug("NP_Enable at 0x%p\n", (void *)&(ctrl_area->NP_ENABLE));
294
295         // Set the Nested Page Table pointer
296         if (v3_init_passthrough_pts(core) == -1) {
297             PrintError("Could not initialize Nested page tables\n");
298             return ;
299         }
300
301         ctrl_area->N_CR3 = core->direct_map_pt;
302
303         guest_state->g_pat = 0x7040600070406ULL;
304     }
305     
306     /* tell the guest that we don't support SVM */
307     v3_hook_msr(core->vm_info, SVM_VM_CR_MSR, 
308         &v3_handle_vm_cr_read,
309         &v3_handle_vm_cr_write, 
310         core);
311 }
312
313
314 int v3_init_svm_vmcb(struct guest_info * core, v3_vm_class_t vm_class) {
315
316     PrintDebug("Allocating VMCB\n");
317     core->vmm_data = (void *)Allocate_VMCB();
318     
319     if (core->vmm_data == NULL) {
320         PrintError("Could not allocate VMCB, Exiting...\n");
321         return -1;
322     }
323
324     if (vm_class == V3_PC_VM) {
325         PrintDebug("Initializing VMCB (addr=%p)\n", (void *)core->vmm_data);
326         Init_VMCB_BIOS((vmcb_t*)(core->vmm_data), core);
327     } else {
328         PrintError("Invalid VM class\n");
329         return -1;
330     }
331
332     return 0;
333 }
334
335
336 int v3_deinit_svm_vmcb(struct guest_info * core) {
337     V3_FreePages(V3_PAddr(core->vmm_data), 1);
338     return 0;
339 }
340
341
342 static int update_irq_exit_state(struct guest_info * info) {
343     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
344
345     // Fix for QEMU bug using EVENTINJ as an internal cache
346     guest_ctrl->EVENTINJ.valid = 0;
347
348     if ((info->intr_core_state.irq_pending == 1) && (guest_ctrl->guest_ctrl.V_IRQ == 0)) {
349         
350 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
351         PrintDebug("INTAK cycle completed for irq %d\n", info->intr_core_state.irq_vector);
352 #endif
353
354         info->intr_core_state.irq_started = 1;
355         info->intr_core_state.irq_pending = 0;
356
357         v3_injecting_intr(info, info->intr_core_state.irq_vector, V3_EXTERNAL_IRQ);
358     }
359
360     if ((info->intr_core_state.irq_started == 1) && (guest_ctrl->exit_int_info.valid == 0)) {
361 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
362         PrintDebug("Interrupt %d taken by guest\n", info->intr_core_state.irq_vector);
363 #endif
364
365         // Interrupt was taken fully vectored
366         info->intr_core_state.irq_started = 0;
367
368     } else if ((info->intr_core_state.irq_started == 1) && (guest_ctrl->exit_int_info.valid == 1)) {
369 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
370         PrintDebug("EXIT INT INFO is set (vec=%d)\n", guest_ctrl->exit_int_info.vector);
371 #endif
372     }
373
374     return 0;
375 }
376
377
378 static int update_irq_entry_state(struct guest_info * info) {
379     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
380
381
382     if (info->intr_core_state.irq_pending == 0) {
383         guest_ctrl->guest_ctrl.V_IRQ = 0;
384         guest_ctrl->guest_ctrl.V_INTR_VECTOR = 0;
385     }
386     
387     if (v3_excp_pending(info)) {
388         uint_t excp = v3_get_excp_number(info);
389         
390         guest_ctrl->EVENTINJ.type = SVM_INJECTION_EXCEPTION;
391         
392         if (info->excp_state.excp_error_code_valid) {
393             guest_ctrl->EVENTINJ.error_code = info->excp_state.excp_error_code;
394             guest_ctrl->EVENTINJ.ev = 1;
395 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
396             PrintDebug("Injecting exception %d with error code %x\n", excp, guest_ctrl->EVENTINJ.error_code);
397 #endif
398         }
399         
400         guest_ctrl->EVENTINJ.vector = excp;
401         
402         guest_ctrl->EVENTINJ.valid = 1;
403
404 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
405         PrintDebug("<%d> Injecting Exception %d (CR2=%p) (EIP=%p)\n", 
406                    (int)info->num_exits, 
407                    guest_ctrl->EVENTINJ.vector, 
408                    (void *)(addr_t)info->ctrl_regs.cr2,
409                    (void *)(addr_t)info->rip);
410 #endif
411
412         v3_injecting_excp(info, excp);
413     } else if (info->intr_core_state.irq_started == 1) {
414 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
415         PrintDebug("IRQ pending from previous injection\n");
416 #endif
417         guest_ctrl->guest_ctrl.V_IRQ = 1;
418         guest_ctrl->guest_ctrl.V_INTR_VECTOR = info->intr_core_state.irq_vector;
419         guest_ctrl->guest_ctrl.V_IGN_TPR = 1;
420         guest_ctrl->guest_ctrl.V_INTR_PRIO = 0xf;
421
422     } else {
423         switch (v3_intr_pending(info)) {
424             case V3_EXTERNAL_IRQ: {
425                 uint32_t irq = v3_get_intr(info);
426
427                 guest_ctrl->guest_ctrl.V_IRQ = 1;
428                 guest_ctrl->guest_ctrl.V_INTR_VECTOR = irq;
429                 guest_ctrl->guest_ctrl.V_IGN_TPR = 1;
430                 guest_ctrl->guest_ctrl.V_INTR_PRIO = 0xf;
431
432 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
433                 PrintDebug("Injecting Interrupt %d (EIP=%p)\n", 
434                            guest_ctrl->guest_ctrl.V_INTR_VECTOR, 
435                            (void *)(addr_t)info->rip);
436 #endif
437
438                 info->intr_core_state.irq_pending = 1;
439                 info->intr_core_state.irq_vector = irq;
440                 
441                 break;
442             }
443             case V3_NMI:
444                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_NMI;
445                 break;
446             case V3_SOFTWARE_INTR:
447                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_SOFT_INTR;
448
449 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
450                 PrintDebug("Injecting software interrupt --  type: %d, vector: %d\n", 
451                            SVM_INJECTION_SOFT_INTR, info->intr_core_state.swintr_vector);
452 #endif
453                 guest_ctrl->EVENTINJ.vector = info->intr_core_state.swintr_vector;
454                 guest_ctrl->EVENTINJ.valid = 1;
455             
456                 /* reset swintr state */
457                 info->intr_core_state.swintr_posted = 0;
458                 info->intr_core_state.swintr_vector = 0;
459                 
460                 break;
461             case V3_VIRTUAL_IRQ:
462                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_IRQ;
463                 break;
464
465             case V3_INVALID_INTR:
466             default:
467                 break;
468         }
469         
470     }
471
472     return 0;
473 }
474
475
476 /* 
477  * CAUTION and DANGER!!! 
478  * 
479  * The VMCB CANNOT(!!) be accessed outside of the clgi/stgi calls inside this function
480  * When exectuing a symbiotic call, the VMCB WILL be overwritten, so any dependencies 
481  * on its contents will cause things to break. The contents at the time of the exit WILL 
482  * change before the exit handler is executed.
483  */
484 int v3_svm_enter(struct guest_info * info) {
485     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
486     vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA((vmcb_t*)(info->vmm_data)); 
487     addr_t exit_code = 0, exit_info1 = 0, exit_info2 = 0;
488
489     // Conditionally yield the CPU if the timeslice has expired
490     v3_yield_cond(info);
491
492     // Perform any additional yielding needed for time adjustment
493     v3_adjust_time(info);
494
495     // disable global interrupts for vm state transition
496     v3_clgi();
497
498     // Update timer devices after being in the VM, with interupts
499     // disabled, but before doing IRQ updates, so that any interrupts they 
500     //raise get seen immediately.
501     v3_update_timers(info);
502
503     // Synchronize the guest state to the VMCB
504     guest_state->cr0 = info->ctrl_regs.cr0;
505     guest_state->cr2 = info->ctrl_regs.cr2;
506     guest_state->cr3 = info->ctrl_regs.cr3;
507     guest_state->cr4 = info->ctrl_regs.cr4;
508     guest_state->dr6 = info->dbg_regs.dr6;
509     guest_state->dr7 = info->dbg_regs.dr7;
510     guest_ctrl->guest_ctrl.V_TPR = info->ctrl_regs.cr8 & 0xff;
511     guest_state->rflags = info->ctrl_regs.rflags;
512     guest_state->efer = info->ctrl_regs.efer;
513     
514     guest_state->cpl = info->cpl;
515
516     v3_set_vmcb_segments((vmcb_t*)(info->vmm_data), &(info->segments));
517
518     guest_state->rax = info->vm_regs.rax;
519     guest_state->rip = info->rip;
520     guest_state->rsp = info->vm_regs.rsp;
521
522 #ifdef V3_CONFIG_SYMCALL
523     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
524         update_irq_entry_state(info);
525     }
526 #else 
527     update_irq_entry_state(info);
528 #endif
529
530
531     /* ** */
532
533     /*
534       PrintDebug("SVM Entry to CS=%p  rip=%p...\n", 
535       (void *)(addr_t)info->segments.cs.base, 
536       (void *)(addr_t)info->rip);
537     */
538
539 #ifdef V3_CONFIG_SYMCALL
540     if (info->sym_core_state.symcall_state.sym_call_active == 1) {
541         if (guest_ctrl->guest_ctrl.V_IRQ == 1) {
542             V3_Print("!!! Injecting Interrupt during Sym call !!!\n");
543         }
544     }
545 #endif
546
547     v3_time_enter_vm(info);
548     guest_ctrl->TSC_OFFSET = v3_tsc_host_offset(&info->time_state);
549
550     //V3_Print("Calling v3_svm_launch\n");
551
552     v3_svm_launch((vmcb_t *)V3_PAddr(info->vmm_data), &(info->vm_regs), (vmcb_t *)host_vmcbs[V3_Get_CPU()]);
553
554     //V3_Print("SVM Returned: Exit Code: %x, guest_rip=%lx\n", (uint32_t)(guest_ctrl->exit_code), (unsigned long)guest_state->rip);
555
556     v3_last_exit = (uint32_t)(guest_ctrl->exit_code);
557
558     // Immediate exit from VM time bookkeeping
559     v3_time_exit_vm(info);
560
561     info->num_exits++;
562
563     // Save Guest state from VMCB
564     info->rip = guest_state->rip;
565     info->vm_regs.rsp = guest_state->rsp;
566     info->vm_regs.rax = guest_state->rax;
567
568     info->cpl = guest_state->cpl;
569
570     info->ctrl_regs.cr0 = guest_state->cr0;
571     info->ctrl_regs.cr2 = guest_state->cr2;
572     info->ctrl_regs.cr3 = guest_state->cr3;
573     info->ctrl_regs.cr4 = guest_state->cr4;
574     info->dbg_regs.dr6 = guest_state->dr6;
575     info->dbg_regs.dr7 = guest_state->dr7;
576     info->ctrl_regs.cr8 = guest_ctrl->guest_ctrl.V_TPR;
577     info->ctrl_regs.rflags = guest_state->rflags;
578     info->ctrl_regs.efer = guest_state->efer;
579     
580     v3_get_vmcb_segments((vmcb_t*)(info->vmm_data), &(info->segments));
581     info->cpu_mode = v3_get_vm_cpu_mode(info);
582     info->mem_mode = v3_get_vm_mem_mode(info);
583     /* ** */
584
585     // save exit info here
586     exit_code = guest_ctrl->exit_code;
587     exit_info1 = guest_ctrl->exit_info1;
588     exit_info2 = guest_ctrl->exit_info2;
589
590 #ifdef V3_CONFIG_SYMCALL
591     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
592         update_irq_exit_state(info);
593     }
594 #else
595     update_irq_exit_state(info);
596 #endif
597
598     // reenable global interrupts after vm exit
599     v3_stgi();
600  
601     // Conditionally yield the CPU if the timeslice has expired
602     v3_yield_cond(info);
603
604     {
605         int ret = v3_handle_svm_exit(info, exit_code, exit_info1, exit_info2);
606         
607         if (ret != 0) {
608             PrintError("Error in SVM exit handler (ret=%d)\n", ret);
609             PrintError("  last Exit was %d (exit code=0x%llx)\n", v3_last_exit, (uint64_t) exit_code);
610             return -1;
611         }
612     }
613
614
615     return 0;
616 }
617
618
619 int v3_start_svm_guest(struct guest_info * info) {
620     //    vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA((vmcb_t*)(info->vmm_data));
621     //  vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
622
623     PrintDebug("Starting SVM core %u (on logical core %u)\n", info->vcpu_id, info->pcpu_id);
624
625     if (info->vcpu_id == 0) {
626         info->core_run_state = CORE_RUNNING;
627         info->vm_info->run_state = VM_RUNNING;
628     } else  { 
629         PrintDebug("SVM core %u (on %u): Waiting for core initialization\n", info->vcpu_id, info->pcpu_id);
630
631         while (info->core_run_state == CORE_STOPPED) {
632             v3_yield(info);
633             //PrintDebug("SVM core %u: still waiting for INIT\n", info->vcpu_id);
634         }
635
636         PrintDebug("SVM core %u(on %u) initialized\n", info->vcpu_id, info->pcpu_id);
637     } 
638
639     PrintDebug("SVM core %u(on %u): I am starting at CS=0x%x (base=0x%p, limit=0x%x),  RIP=0x%p\n", 
640                info->vcpu_id, info->pcpu_id, 
641                info->segments.cs.selector, (void *)(info->segments.cs.base), 
642                info->segments.cs.limit, (void *)(info->rip));
643
644
645
646     PrintDebug("SVM core %u: Launching SVM VM (vmcb=%p) (on cpu %u)\n", 
647                info->vcpu_id, (void *)info->vmm_data, info->pcpu_id);
648     //PrintDebugVMCB((vmcb_t*)(info->vmm_data));
649     
650     v3_start_time(info);
651
652     while (1) {
653
654         if (info->vm_info->run_state == VM_STOPPED) {
655             info->core_run_state = CORE_STOPPED;
656             break;
657         }
658         
659         if (v3_svm_enter(info) == -1) {
660             vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
661             addr_t host_addr;
662             addr_t linear_addr = 0;
663             
664             info->vm_info->run_state = VM_ERROR;
665             
666             V3_Print("SVM core %u: SVM ERROR!!\n", info->vcpu_id); 
667             
668             v3_print_guest_state(info);
669             
670             V3_Print("SVM core %u: SVM Exit Code: %p\n", info->vcpu_id, (void *)(addr_t)guest_ctrl->exit_code); 
671             
672             V3_Print("SVM core %u: exit_info1 low = 0x%.8x\n", info->vcpu_id, *(uint_t*)&(guest_ctrl->exit_info1));
673             V3_Print("SVM core %u: exit_info1 high = 0x%.8x\n", info->vcpu_id, *(uint_t *)(((uchar_t *)&(guest_ctrl->exit_info1)) + 4));
674             
675             V3_Print("SVM core %u: exit_info2 low = 0x%.8x\n", info->vcpu_id, *(uint_t*)&(guest_ctrl->exit_info2));
676             V3_Print("SVM core %u: exit_info2 high = 0x%.8x\n", info->vcpu_id, *(uint_t *)(((uchar_t *)&(guest_ctrl->exit_info2)) + 4));
677             
678             linear_addr = get_addr_linear(info, info->rip, &(info->segments.cs));
679             
680             if (info->mem_mode == PHYSICAL_MEM) {
681                 v3_gpa_to_hva(info, linear_addr, &host_addr);
682             } else if (info->mem_mode == VIRTUAL_MEM) {
683                 v3_gva_to_hva(info, linear_addr, &host_addr);
684             }
685             
686             V3_Print("SVM core %u: Host Address of rip = 0x%p\n", info->vcpu_id, (void *)host_addr);
687             
688             V3_Print("SVM core %u: Instr (15 bytes) at %p:\n", info->vcpu_id, (void *)host_addr);
689             v3_dump_mem((uint8_t *)host_addr, 15);
690             
691             v3_print_stack(info);
692
693             break;
694         }
695
696         v3_wait_at_barrier(info);
697
698
699         if (info->vm_info->run_state == VM_STOPPED) {
700             info->core_run_state = CORE_STOPPED;
701             break;
702         }
703
704         
705
706 /*
707         if ((info->num_exits % 50000) == 0) {
708             V3_Print("SVM Exit number %d\n", (uint32_t)info->num_exits);
709             v3_print_guest_state(info);
710         }
711 */
712         
713     }
714
715     // Need to take down the other cores on error... 
716
717     return 0;
718 }
719
720
721
722
723 int v3_reset_svm_vm_core(struct guest_info * core, addr_t rip) {
724     // init vmcb_bios
725
726     // Write the RIP, CS, and descriptor
727     // assume the rest is already good to go
728     //
729     // vector VV -> rip at 0
730     //              CS = VV00
731     //  This means we start executing at linear address VV000
732     //
733     // So the selector needs to be VV00
734     // and the base needs to be VV000
735     //
736     core->rip = 0;
737     core->segments.cs.selector = rip << 8;
738     core->segments.cs.limit = 0xffff;
739     core->segments.cs.base = rip << 12;
740
741     return 0;
742 }
743
744
745
746
747
748
749 /* Checks machine SVM capability */
750 /* Implemented from: AMD Arch Manual 3, sect 15.4 */ 
751 int v3_is_svm_capable() {
752     uint_t vm_cr_low = 0, vm_cr_high = 0;
753     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
754
755     v3_cpuid(CPUID_EXT_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
756   
757     PrintDebug("CPUID_EXT_FEATURE_IDS_ecx=0x%x\n", ecx);
758
759     if ((ecx & CPUID_EXT_FEATURE_IDS_ecx_svm_avail) == 0) {
760       V3_Print("SVM Not Available\n");
761       return 0;
762     }  else {
763         v3_get_msr(SVM_VM_CR_MSR, &vm_cr_high, &vm_cr_low);
764         
765         PrintDebug("SVM_VM_CR_MSR = 0x%x 0x%x\n", vm_cr_high, vm_cr_low);
766         
767         if ((vm_cr_low & SVM_VM_CR_MSR_svmdis) == 1) {
768             V3_Print("SVM is available but is disabled.\n");
769             
770             v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
771             
772             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_edx=0x%x\n", edx);
773             
774             if ((edx & CPUID_SVM_REV_AND_FEATURE_IDS_edx_svml) == 0) {
775                 V3_Print("SVM BIOS Disabled, not unlockable\n");
776             } else {
777                 V3_Print("SVM is locked with a key\n");
778             }
779             return 0;
780
781         } else {
782             V3_Print("SVM is available and  enabled.\n");
783
784             v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
785             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_eax=0x%x\n", eax);
786             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_ebx=0x%x\n", ebx);
787             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_ecx=0x%x\n", ecx);
788             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_edx=0x%x\n", edx);
789
790             return 1;
791         }
792     }
793 }
794
795 static int has_svm_nested_paging() {
796     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
797     
798     v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
799     
800     //PrintDebug("CPUID_EXT_FEATURE_IDS_edx=0x%x\n", edx);
801     
802     if ((edx & CPUID_SVM_REV_AND_FEATURE_IDS_edx_np) == 0) {
803         V3_Print("SVM Nested Paging not supported\n");
804         return 0;
805     } else {
806         V3_Print("SVM Nested Paging supported\n");
807         return 1;
808     }
809  }
810  
811
812
813 void v3_init_svm_cpu(int cpu_id) {
814     reg_ex_t msr;
815     extern v3_cpu_arch_t v3_cpu_types[];
816
817     // Enable SVM on the CPU
818     v3_get_msr(EFER_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
819     msr.e_reg.low |= EFER_MSR_svm_enable;
820     v3_set_msr(EFER_MSR, 0, msr.e_reg.low);
821
822     V3_Print("SVM Enabled\n");
823
824     // Setup the host state save area
825     host_vmcbs[cpu_id] = (addr_t)V3_AllocPages(4);
826
827     /* 64-BIT-ISSUE */
828     //  msr.e_reg.high = 0;
829     //msr.e_reg.low = (uint_t)host_vmcb;
830     msr.r_reg = host_vmcbs[cpu_id];
831
832     PrintDebug("Host State being saved at %p\n", (void *)host_vmcbs[cpu_id]);
833     v3_set_msr(SVM_VM_HSAVE_PA_MSR, msr.e_reg.high, msr.e_reg.low);
834
835
836     if (has_svm_nested_paging() == 1) {
837         v3_cpu_types[cpu_id] = V3_SVM_REV3_CPU;
838     } else {
839         v3_cpu_types[cpu_id] = V3_SVM_CPU;
840     }
841 }
842
843
844
845 void v3_deinit_svm_cpu(int cpu_id) {
846     reg_ex_t msr;
847     extern v3_cpu_arch_t v3_cpu_types[];
848
849     // reset SVM_VM_HSAVE_PA_MSR
850     // Does setting it to NULL disable??
851     msr.r_reg = 0;
852     v3_set_msr(SVM_VM_HSAVE_PA_MSR, msr.e_reg.high, msr.e_reg.low);
853
854     // Disable SVM?
855     v3_get_msr(EFER_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
856     msr.e_reg.low &= ~EFER_MSR_svm_enable;
857     v3_set_msr(EFER_MSR, 0, msr.e_reg.low);
858
859     v3_cpu_types[cpu_id] = V3_INVALID_CPU;
860
861     V3_FreePages((void *)host_vmcbs[cpu_id], 4);
862
863     V3_Print("Host CPU %d host area freed, and SVM disabled\n", cpu_id);
864     return;
865 }
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916 #if 0
917 /* 
918  * Test VMSAVE/VMLOAD Latency 
919  */
920 #define vmsave ".byte 0x0F,0x01,0xDB ; "
921 #define vmload ".byte 0x0F,0x01,0xDA ; "
922 {
923     uint32_t start_lo, start_hi;
924     uint32_t end_lo, end_hi;
925     uint64_t start, end;
926     
927     __asm__ __volatile__ (
928                           "rdtsc ; "
929                           "movl %%eax, %%esi ; "
930                           "movl %%edx, %%edi ; "
931                           "movq  %%rcx, %%rax ; "
932                           vmsave
933                           "rdtsc ; "
934                           : "=D"(start_hi), "=S"(start_lo), "=a"(end_lo),"=d"(end_hi)
935                           : "c"(host_vmcb[cpu_id]), "0"(0), "1"(0), "2"(0), "3"(0)
936                           );
937     
938     start = start_hi;
939     start <<= 32;
940     start += start_lo;
941     
942     end = end_hi;
943     end <<= 32;
944     end += end_lo;
945     
946     PrintDebug("VMSave Cycle Latency: %d\n", (uint32_t)(end - start));
947     
948     __asm__ __volatile__ (
949                           "rdtsc ; "
950                           "movl %%eax, %%esi ; "
951                           "movl %%edx, %%edi ; "
952                           "movq  %%rcx, %%rax ; "
953                           vmload
954                           "rdtsc ; "
955                           : "=D"(start_hi), "=S"(start_lo), "=a"(end_lo),"=d"(end_hi)
956                               : "c"(host_vmcb[cpu_id]), "0"(0), "1"(0), "2"(0), "3"(0)
957                               );
958         
959         start = start_hi;
960         start <<= 32;
961         start += start_lo;
962
963         end = end_hi;
964         end <<= 32;
965         end += end_lo;
966
967
968         PrintDebug("VMLoad Cycle Latency: %d\n", (uint32_t)(end - start));
969     }
970     /* End Latency Test */
971
972 #endif
973
974
975
976
977
978
979
980 #if 0
981 void Init_VMCB_pe(vmcb_t *vmcb, struct guest_info vm_info) {
982   vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
983   vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
984   uint_t i = 0;
985
986
987   guest_state->rsp = vm_info.vm_regs.rsp;
988   guest_state->rip = vm_info.rip;
989
990
991   /* I pretty much just gutted this from TVMM */
992   /* Note: That means its probably wrong */
993
994   // set the segment registers to mirror ours
995   guest_state->cs.selector = 1<<3;
996   guest_state->cs.attrib.fields.type = 0xa; // Code segment+read
997   guest_state->cs.attrib.fields.S = 1;
998   guest_state->cs.attrib.fields.P = 1;
999   guest_state->cs.attrib.fields.db = 1;
1000   guest_state->cs.attrib.fields.G = 1;
1001   guest_state->cs.limit = 0xfffff;
1002   guest_state->cs.base = 0;
1003   
1004   struct vmcb_selector *segregs [] = {&(guest_state->ss), &(guest_state->ds), &(guest_state->es), &(guest_state->fs), &(guest_state->gs), NULL};
1005   for ( i = 0; segregs[i] != NULL; i++) {
1006     struct vmcb_selector * seg = segregs[i];
1007     
1008     seg->selector = 2<<3;
1009     seg->attrib.fields.type = 0x2; // Data Segment+read/write
1010     seg->attrib.fields.S = 1;
1011     seg->attrib.fields.P = 1;
1012     seg->attrib.fields.db = 1;
1013     seg->attrib.fields.G = 1;
1014     seg->limit = 0xfffff;
1015     seg->base = 0;
1016   }
1017
1018
1019   {
1020     /* JRL THIS HAS TO GO */
1021     
1022     //    guest_state->tr.selector = GetTR_Selector();
1023     guest_state->tr.attrib.fields.type = 0x9; 
1024     guest_state->tr.attrib.fields.P = 1;
1025     // guest_state->tr.limit = GetTR_Limit();
1026     //guest_state->tr.base = GetTR_Base();// - 0x2000;
1027     /* ** */
1028   }
1029
1030
1031   /* ** */
1032
1033
1034   guest_state->efer |= EFER_MSR_svm_enable;
1035   guest_state->rflags = 0x00000002; // The reserved bit is always 1
1036   ctrl_area->svm_instrs.VMRUN = 1;
1037   guest_state->cr0 = 0x00000001;    // PE 
1038   ctrl_area->guest_ASID = 1;
1039
1040
1041   //  guest_state->cpl = 0;
1042
1043
1044
1045   // Setup exits
1046
1047   ctrl_area->cr_writes.cr4 = 1;
1048   
1049   ctrl_area->exceptions.de = 1;
1050   ctrl_area->exceptions.df = 1;
1051   ctrl_area->exceptions.pf = 1;
1052   ctrl_area->exceptions.ts = 1;
1053   ctrl_area->exceptions.ss = 1;
1054   ctrl_area->exceptions.ac = 1;
1055   ctrl_area->exceptions.mc = 1;
1056   ctrl_area->exceptions.gp = 1;
1057   ctrl_area->exceptions.ud = 1;
1058   ctrl_area->exceptions.np = 1;
1059   ctrl_area->exceptions.of = 1;
1060   ctrl_area->exceptions.nmi = 1;
1061
1062   
1063
1064   ctrl_area->instrs.IOIO_PROT = 1;
1065   ctrl_area->IOPM_BASE_PA = (uint_t)V3_AllocPages(3);
1066   
1067   {
1068     reg_ex_t tmp_reg;
1069     tmp_reg.r_reg = ctrl_area->IOPM_BASE_PA;
1070     memset((void*)(tmp_reg.e_reg.low), 0xffffffff, PAGE_SIZE * 2);
1071   }
1072
1073   ctrl_area->instrs.INTR = 1;
1074
1075   
1076   {
1077     char gdt_buf[6];
1078     char idt_buf[6];
1079
1080     memset(gdt_buf, 0, 6);
1081     memset(idt_buf, 0, 6);
1082
1083
1084     uint_t gdt_base, idt_base;
1085     ushort_t gdt_limit, idt_limit;
1086     
1087     GetGDTR(gdt_buf);
1088     gdt_base = *(ulong_t*)((uchar_t*)gdt_buf + 2) & 0xffffffff;
1089     gdt_limit = *(ushort_t*)(gdt_buf) & 0xffff;
1090     PrintDebug("GDT: base: %x, limit: %x\n", gdt_base, gdt_limit);
1091
1092     GetIDTR(idt_buf);
1093     idt_base = *(ulong_t*)(idt_buf + 2) & 0xffffffff;
1094     idt_limit = *(ushort_t*)(idt_buf) & 0xffff;
1095     PrintDebug("IDT: base: %x, limit: %x\n",idt_base, idt_limit);
1096
1097
1098     // gdt_base -= 0x2000;
1099     //idt_base -= 0x2000;
1100
1101     guest_state->gdtr.base = gdt_base;
1102     guest_state->gdtr.limit = gdt_limit;
1103     guest_state->idtr.base = idt_base;
1104     guest_state->idtr.limit = idt_limit;
1105
1106
1107   }
1108   
1109   
1110   // also determine if CPU supports nested paging
1111   /*
1112   if (vm_info.page_tables) {
1113     //   if (0) {
1114     // Flush the TLB on entries/exits
1115     ctrl_area->TLB_CONTROL = 1;
1116
1117     // Enable Nested Paging
1118     ctrl_area->NP_ENABLE = 1;
1119
1120     PrintDebug("NP_Enable at 0x%x\n", &(ctrl_area->NP_ENABLE));
1121
1122         // Set the Nested Page Table pointer
1123     ctrl_area->N_CR3 |= ((addr_t)vm_info.page_tables & 0xfffff000);
1124
1125
1126     //   ctrl_area->N_CR3 = Get_CR3();
1127     // guest_state->cr3 |= (Get_CR3() & 0xfffff000);
1128
1129     guest_state->g_pat = 0x7040600070406ULL;
1130
1131     PrintDebug("Set Nested CR3: lo: 0x%x  hi: 0x%x\n", (uint_t)*(&(ctrl_area->N_CR3)), (uint_t)*((unsigned char *)&(ctrl_area->N_CR3) + 4));
1132     PrintDebug("Set Guest CR3: lo: 0x%x  hi: 0x%x\n", (uint_t)*(&(guest_state->cr3)), (uint_t)*((unsigned char *)&(guest_state->cr3) + 4));
1133     // Enable Paging
1134     //    guest_state->cr0 |= 0x80000000;
1135   }
1136   */
1137
1138 }
1139
1140
1141
1142
1143
1144 #endif
1145
1146