Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


1bf16ccfa45dd469533c1a48dfc4df212f407d49
[palacios.git] / palacios / src / palacios / svm.c
1 /* 
2  * This file is part of the Palacios Virtual Machine Monitor developed
3  * by the V3VEE Project with funding from the United States National 
4  * Science Foundation and the Department of Energy.  
5  *
6  * The V3VEE Project is a joint project between Northwestern University
7  * and the University of New Mexico.  You can find out more at 
8  * http://www.v3vee.org
9  *
10  * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu> 
11  * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org> 
12  * All rights reserved.
13  *
14  * Author: Jack Lange <jarusl@cs.northwestern.edu>
15  *
16  * This is free software.  You are permitted to use,
17  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
18  */
19
20
21 #include <palacios/svm.h>
22 #include <palacios/vmm.h>
23
24 #include <palacios/vmcb.h>
25 #include <palacios/vmm_mem.h>
26 #include <palacios/vmm_paging.h>
27 #include <palacios/svm_handler.h>
28
29 #include <palacios/vmm_debug.h>
30 #include <palacios/vm_guest_mem.h>
31
32 #include <palacios/vmm_decoder.h>
33 #include <palacios/vmm_string.h>
34 #include <palacios/vmm_lowlevel.h>
35 #include <palacios/svm_msr.h>
36
37 #include <palacios/vmm_rbtree.h>
38 #include <palacios/vmm_barrier.h>
39
40
41 #include <palacios/vmm_direct_paging.h>
42
43 #include <palacios/vmm_ctrl_regs.h>
44 #include <palacios/svm_io.h>
45
46 #include <palacios/vmm_sprintf.h>
47
48
49 #ifndef V3_CONFIG_DEBUG_SVM
50 #undef PrintDebug
51 #define PrintDebug(fmt, args...)
52 #endif
53
54
55 uint32_t v3_last_exit;
56
57 // This is a global pointer to the host's VMCB
58 static addr_t host_vmcbs[V3_CONFIG_MAX_CPUS] = { [0 ... V3_CONFIG_MAX_CPUS - 1] = 0};
59
60
61
62 extern void v3_stgi();
63 extern void v3_clgi();
64 //extern int v3_svm_launch(vmcb_t * vmcb, struct v3_gprs * vm_regs, uint64_t * fs, uint64_t * gs);
65 extern int v3_svm_launch(vmcb_t * vmcb, struct v3_gprs * vm_regs, vmcb_t * host_vmcb);
66
67
68 static vmcb_t * Allocate_VMCB() {
69     vmcb_t * vmcb_page = NULL;
70     addr_t vmcb_pa = (addr_t)V3_AllocPages(1);
71
72     if ((void *)vmcb_pa == NULL) {
73         PrintError("Error allocating VMCB\n");
74         return NULL;
75     }
76
77     vmcb_page = (vmcb_t *)V3_VAddr((void *)vmcb_pa);
78
79     memset(vmcb_page, 0, 4096);
80
81     return vmcb_page;
82 }
83
84
85 static int v3_svm_handle_efer_write(struct guest_info * core, uint_t msr, struct v3_msr src, void * priv_data)
86 {
87     int status;
88
89     // Call arch-independent handler
90     if ((status = v3_handle_efer_write(core, msr, src, priv_data)) != 0) {
91         return status;
92     }
93
94     // SVM-specific code
95     {
96         // Ensure that hardware visible EFER.SVME bit is set (SVM Enable)
97         struct efer_64 * hw_efer = (struct efer_64 *)&(core->ctrl_regs.efer);
98         hw_efer->svme = 1;
99     }
100
101     return 0;
102 }
103
104
105 static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info * core) {
106     vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
107     vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
108     uint_t i;
109
110
111     //
112     ctrl_area->svm_instrs.VMRUN = 1;
113     ctrl_area->svm_instrs.VMMCALL = 1;
114     ctrl_area->svm_instrs.VMLOAD = 1;
115     ctrl_area->svm_instrs.VMSAVE = 1;
116     ctrl_area->svm_instrs.STGI = 1;
117     ctrl_area->svm_instrs.CLGI = 1;
118     ctrl_area->svm_instrs.SKINIT = 1;
119     ctrl_area->svm_instrs.ICEBP = 1;
120     ctrl_area->svm_instrs.WBINVD = 1;
121     ctrl_area->svm_instrs.MONITOR = 1;
122     ctrl_area->svm_instrs.MWAIT_always = 1;
123     ctrl_area->svm_instrs.MWAIT_if_armed = 1;
124     ctrl_area->instrs.INVLPGA = 1;
125     ctrl_area->instrs.CPUID = 1;
126
127     ctrl_area->instrs.HLT = 1;
128
129 #ifdef V3_CONFIG_TIME_VIRTUALIZE_TSC
130     ctrl_area->instrs.RDTSC = 1;
131     ctrl_area->svm_instrs.RDTSCP = 1;
132 #endif
133
134     // guest_state->cr0 = 0x00000001;    // PE 
135   
136     /*
137       ctrl_area->exceptions.de = 1;
138       ctrl_area->exceptions.df = 1;
139       
140       ctrl_area->exceptions.ts = 1;
141       ctrl_area->exceptions.ss = 1;
142       ctrl_area->exceptions.ac = 1;
143       ctrl_area->exceptions.mc = 1;
144       ctrl_area->exceptions.gp = 1;
145       ctrl_area->exceptions.ud = 1;
146       ctrl_area->exceptions.np = 1;
147       ctrl_area->exceptions.of = 1;
148       
149       ctrl_area->exceptions.nmi = 1;
150     */
151     
152
153     ctrl_area->instrs.NMI = 1;
154     ctrl_area->instrs.SMI = 0; // allow SMIs to run in guest
155     ctrl_area->instrs.INIT = 1;
156     ctrl_area->instrs.PAUSE = 1;
157     ctrl_area->instrs.shutdown_evts = 1;
158
159
160     /* DEBUG FOR RETURN CODE */
161     ctrl_area->exit_code = 1;
162
163
164     /* Setup Guest Machine state */
165
166     core->vm_regs.rsp = 0x00;
167     core->rip = 0xfff0;
168
169     core->vm_regs.rdx = 0x00000f00;
170
171
172     core->cpl = 0;
173
174     core->ctrl_regs.rflags = 0x00000002; // The reserved bit is always 1
175     core->ctrl_regs.cr0 = 0x60010010; // Set the WP flag so the memory hooks work in real-mode
176     core->ctrl_regs.efer |= EFER_MSR_svm_enable;
177
178
179
180
181
182     core->segments.cs.selector = 0xf000;
183     core->segments.cs.limit = 0xffff;
184     core->segments.cs.base = 0x0000000f0000LL;
185
186     // (raw attributes = 0xf3)
187     core->segments.cs.type = 0x3;
188     core->segments.cs.system = 0x1;
189     core->segments.cs.dpl = 0x3;
190     core->segments.cs.present = 1;
191
192
193
194     struct v3_segment * segregs [] = {&(core->segments.ss), &(core->segments.ds), 
195                                       &(core->segments.es), &(core->segments.fs), 
196                                       &(core->segments.gs), NULL};
197
198     for ( i = 0; segregs[i] != NULL; i++) {
199         struct v3_segment * seg = segregs[i];
200         
201         seg->selector = 0x0000;
202         //    seg->base = seg->selector << 4;
203         seg->base = 0x00000000;
204         seg->limit = ~0u;
205
206         // (raw attributes = 0xf3)
207         seg->type = 0x3;
208         seg->system = 0x1;
209         seg->dpl = 0x3;
210         seg->present = 1;
211     }
212
213     core->segments.gdtr.limit = 0x0000ffff;
214     core->segments.gdtr.base = 0x0000000000000000LL;
215     core->segments.idtr.limit = 0x0000ffff;
216     core->segments.idtr.base = 0x0000000000000000LL;
217
218     core->segments.ldtr.selector = 0x0000;
219     core->segments.ldtr.limit = 0x0000ffff;
220     core->segments.ldtr.base = 0x0000000000000000LL;
221     core->segments.tr.selector = 0x0000;
222     core->segments.tr.limit = 0x0000ffff;
223     core->segments.tr.base = 0x0000000000000000LL;
224
225
226     core->dbg_regs.dr6 = 0x00000000ffff0ff0LL;
227     core->dbg_regs.dr7 = 0x0000000000000400LL;
228
229
230     ctrl_area->IOPM_BASE_PA = (addr_t)V3_PAddr(core->vm_info->io_map.arch_data);
231     ctrl_area->instrs.IOIO_PROT = 1;
232             
233     ctrl_area->MSRPM_BASE_PA = (addr_t)V3_PAddr(core->vm_info->msr_map.arch_data);
234     ctrl_area->instrs.MSR_PROT = 1;   
235
236
237     PrintDebug("Exiting on interrupts\n");
238     ctrl_area->guest_ctrl.V_INTR_MASKING = 1;
239     ctrl_area->instrs.INTR = 1;
240
241
242     v3_hook_msr(core->vm_info, EFER_MSR, 
243                 &v3_handle_efer_read,
244                 &v3_svm_handle_efer_write, 
245                 core);
246
247     if (core->shdw_pg_mode == SHADOW_PAGING) {
248         PrintDebug("Creating initial shadow page table\n");
249         
250         /* JRL: This is a performance killer, and a simplistic solution */
251         /* We need to fix this */
252         ctrl_area->TLB_CONTROL = 1;
253         ctrl_area->guest_ASID = 1;
254         
255         
256         if (v3_init_passthrough_pts(core) == -1) {
257             PrintError("Could not initialize passthrough page tables\n");
258             return ;
259         }
260
261
262         core->shdw_pg_state.guest_cr0 = 0x0000000000000010LL;
263         PrintDebug("Created\n");
264         
265         core->ctrl_regs.cr0 |= 0x80000000;
266         core->ctrl_regs.cr3 = core->direct_map_pt;
267
268         ctrl_area->cr_reads.cr0 = 1;
269         ctrl_area->cr_writes.cr0 = 1;
270         //ctrl_area->cr_reads.cr4 = 1;
271         ctrl_area->cr_writes.cr4 = 1;
272         ctrl_area->cr_reads.cr3 = 1;
273         ctrl_area->cr_writes.cr3 = 1;
274
275
276
277         ctrl_area->instrs.INVLPG = 1;
278
279         ctrl_area->exceptions.pf = 1;
280
281         guest_state->g_pat = 0x7040600070406ULL;
282
283
284
285     } else if (core->shdw_pg_mode == NESTED_PAGING) {
286         // Flush the TLB on entries/exits
287         ctrl_area->TLB_CONTROL = 1;
288         ctrl_area->guest_ASID = 1;
289
290         // Enable Nested Paging
291         ctrl_area->NP_ENABLE = 1;
292
293         PrintDebug("NP_Enable at 0x%p\n", (void *)&(ctrl_area->NP_ENABLE));
294
295         // Set the Nested Page Table pointer
296         if (v3_init_passthrough_pts(core) == -1) {
297             PrintError("Could not initialize Nested page tables\n");
298             return ;
299         }
300
301         ctrl_area->N_CR3 = core->direct_map_pt;
302
303         guest_state->g_pat = 0x7040600070406ULL;
304     }
305     
306     /* tell the guest that we don't support SVM */
307     v3_hook_msr(core->vm_info, SVM_VM_CR_MSR, 
308         &v3_handle_vm_cr_read,
309         &v3_handle_vm_cr_write, 
310         core);
311 }
312
313
314 int v3_init_svm_vmcb(struct guest_info * core, v3_vm_class_t vm_class) {
315
316     PrintDebug("Allocating VMCB\n");
317     core->vmm_data = (void *)Allocate_VMCB();
318     
319     if (core->vmm_data == NULL) {
320         PrintError("Could not allocate VMCB, Exiting...\n");
321         return -1;
322     }
323
324     if (vm_class == V3_PC_VM) {
325         PrintDebug("Initializing VMCB (addr=%p)\n", (void *)core->vmm_data);
326         Init_VMCB_BIOS((vmcb_t*)(core->vmm_data), core);
327     } else {
328         PrintError("Invalid VM class\n");
329         return -1;
330     }
331
332     return 0;
333 }
334
335
336 int v3_deinit_svm_vmcb(struct guest_info * core) {
337     V3_FreePages(V3_PAddr(core->vmm_data), 1);
338     return 0;
339 }
340
341
342 static int update_irq_exit_state(struct guest_info * info) {
343     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
344
345     // Fix for QEMU bug using EVENTINJ as an internal cache
346     guest_ctrl->EVENTINJ.valid = 0;
347
348     if ((info->intr_core_state.irq_pending == 1) && (guest_ctrl->guest_ctrl.V_IRQ == 0)) {
349         
350 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
351         PrintDebug("INTAK cycle completed for irq %d\n", info->intr_core_state.irq_vector);
352 #endif
353
354         info->intr_core_state.irq_started = 1;
355         info->intr_core_state.irq_pending = 0;
356
357         v3_injecting_intr(info, info->intr_core_state.irq_vector, V3_EXTERNAL_IRQ);
358     }
359
360     if ((info->intr_core_state.irq_started == 1) && (guest_ctrl->exit_int_info.valid == 0)) {
361 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
362         PrintDebug("Interrupt %d taken by guest\n", info->intr_core_state.irq_vector);
363 #endif
364
365         // Interrupt was taken fully vectored
366         info->intr_core_state.irq_started = 0;
367
368     } else if ((info->intr_core_state.irq_started == 1) && (guest_ctrl->exit_int_info.valid == 1)) {
369 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
370         PrintDebug("EXIT INT INFO is set (vec=%d)\n", guest_ctrl->exit_int_info.vector);
371 #endif
372     }
373
374     return 0;
375 }
376
377
378 static int update_irq_entry_state(struct guest_info * info) {
379     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
380
381
382     if (info->intr_core_state.irq_pending == 0) {
383         guest_ctrl->guest_ctrl.V_IRQ = 0;
384         guest_ctrl->guest_ctrl.V_INTR_VECTOR = 0;
385     }
386     
387     if (v3_excp_pending(info)) {
388         uint_t excp = v3_get_excp_number(info);
389         
390         guest_ctrl->EVENTINJ.type = SVM_INJECTION_EXCEPTION;
391         
392         if (info->excp_state.excp_error_code_valid) {
393             guest_ctrl->EVENTINJ.error_code = info->excp_state.excp_error_code;
394             guest_ctrl->EVENTINJ.ev = 1;
395 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
396             PrintDebug("Injecting exception %d with error code %x\n", excp, guest_ctrl->EVENTINJ.error_code);
397 #endif
398         }
399         
400         guest_ctrl->EVENTINJ.vector = excp;
401         
402         guest_ctrl->EVENTINJ.valid = 1;
403
404 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
405         PrintDebug("<%d> Injecting Exception %d (CR2=%p) (EIP=%p)\n", 
406                    (int)info->num_exits, 
407                    guest_ctrl->EVENTINJ.vector, 
408                    (void *)(addr_t)info->ctrl_regs.cr2,
409                    (void *)(addr_t)info->rip);
410 #endif
411
412         v3_injecting_excp(info, excp);
413     } else if (info->intr_core_state.irq_started == 1) {
414 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
415         PrintDebug("IRQ pending from previous injection\n");
416 #endif
417         guest_ctrl->guest_ctrl.V_IRQ = 1;
418         guest_ctrl->guest_ctrl.V_INTR_VECTOR = info->intr_core_state.irq_vector;
419         guest_ctrl->guest_ctrl.V_IGN_TPR = 1;
420         guest_ctrl->guest_ctrl.V_INTR_PRIO = 0xf;
421
422     } else {
423         switch (v3_intr_pending(info)) {
424             case V3_EXTERNAL_IRQ: {
425                 uint32_t irq = v3_get_intr(info);
426
427                 guest_ctrl->guest_ctrl.V_IRQ = 1;
428                 guest_ctrl->guest_ctrl.V_INTR_VECTOR = irq;
429                 guest_ctrl->guest_ctrl.V_IGN_TPR = 1;
430                 guest_ctrl->guest_ctrl.V_INTR_PRIO = 0xf;
431
432 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
433                 PrintDebug("Injecting Interrupt %d (EIP=%p)\n", 
434                            guest_ctrl->guest_ctrl.V_INTR_VECTOR, 
435                            (void *)(addr_t)info->rip);
436 #endif
437
438                 info->intr_core_state.irq_pending = 1;
439                 info->intr_core_state.irq_vector = irq;
440                 
441                 break;
442             }
443             case V3_NMI:
444                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_NMI;
445                 break;
446             case V3_SOFTWARE_INTR:
447                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_SOFT_INTR;
448
449 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
450                 PrintDebug("Injecting software interrupt --  type: %d, vector: %d\n", 
451                            SVM_INJECTION_SOFT_INTR, info->intr_core_state.swintr_vector);
452 #endif
453                 guest_ctrl->EVENTINJ.vector = info->intr_core_state.swintr_vector;
454                 guest_ctrl->EVENTINJ.valid = 1;
455             
456                 /* reset swintr state */
457                 info->intr_core_state.swintr_posted = 0;
458                 info->intr_core_state.swintr_vector = 0;
459                 
460                 break;
461             case V3_VIRTUAL_IRQ:
462                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_IRQ;
463                 break;
464
465             case V3_INVALID_INTR:
466             default:
467                 break;
468         }
469         
470     }
471
472     return 0;
473 }
474
475
476 /* 
477  * CAUTION and DANGER!!! 
478  * 
479  * The VMCB CANNOT(!!) be accessed outside of the clgi/stgi calls inside this function
480  * When exectuing a symbiotic call, the VMCB WILL be overwritten, so any dependencies 
481  * on its contents will cause things to break. The contents at the time of the exit WILL 
482  * change before the exit handler is executed.
483  */
484 int v3_svm_enter(struct guest_info * info) {
485     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
486     vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA((vmcb_t*)(info->vmm_data)); 
487     addr_t exit_code = 0, exit_info1 = 0, exit_info2 = 0;
488
489     // Conditionally yield the CPU if the timeslice has expired
490     v3_yield_cond(info);
491
492     // Perform any additional yielding needed for time adjustment
493     v3_adjust_time(info);
494
495     // disable global interrupts for vm state transition
496     v3_clgi();
497
498     // Update timer devices after being in the VM, with interupts
499     // disabled, but before doing IRQ updates, so that any interrupts they 
500     //raise get seen immediately.
501     v3_update_timers(info);
502
503     // Synchronize the guest state to the VMCB
504     guest_state->cr0 = info->ctrl_regs.cr0;
505     guest_state->cr2 = info->ctrl_regs.cr2;
506     guest_state->cr3 = info->ctrl_regs.cr3;
507     guest_state->cr4 = info->ctrl_regs.cr4;
508     guest_state->dr6 = info->dbg_regs.dr6;
509     guest_state->dr7 = info->dbg_regs.dr7;
510     guest_ctrl->guest_ctrl.V_TPR = info->ctrl_regs.cr8 & 0xff;
511     guest_state->rflags = info->ctrl_regs.rflags;
512     guest_state->efer = info->ctrl_regs.efer;
513     
514     guest_state->cpl = info->cpl;
515
516     v3_set_vmcb_segments((vmcb_t*)(info->vmm_data), &(info->segments));
517
518     guest_state->rax = info->vm_regs.rax;
519     guest_state->rip = info->rip;
520     guest_state->rsp = info->vm_regs.rsp;
521
522 #ifdef V3_CONFIG_SYMCALL
523     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
524         update_irq_entry_state(info);
525     }
526 #else 
527     update_irq_entry_state(info);
528 #endif
529
530
531     /* ** */
532
533     /*
534       PrintDebug("SVM Entry to CS=%p  rip=%p...\n", 
535       (void *)(addr_t)info->segments.cs.base, 
536       (void *)(addr_t)info->rip);
537     */
538
539 #ifdef V3_CONFIG_SYMCALL
540     if (info->sym_core_state.symcall_state.sym_call_active == 1) {
541         if (guest_ctrl->guest_ctrl.V_IRQ == 1) {
542             V3_Print("!!! Injecting Interrupt during Sym call !!!\n");
543         }
544     }
545 #endif
546
547     v3_time_enter_vm(info);
548     guest_ctrl->TSC_OFFSET = v3_tsc_host_offset(&info->time_state);
549
550
551     //V3_Print("Calling v3_svm_launch\n");
552
553     v3_svm_launch((vmcb_t *)V3_PAddr(info->vmm_data), &(info->vm_regs), (vmcb_t *)host_vmcbs[V3_Get_CPU()]);
554
555     //V3_Print("SVM Returned: Exit Code: %x, guest_rip=%lx\n", (uint32_t)(guest_ctrl->exit_code), (unsigned long)guest_state->rip);
556
557     v3_last_exit = (uint32_t)(guest_ctrl->exit_code);
558
559     // Immediate exit from VM time bookkeeping
560     v3_time_exit_vm(info);
561
562     info->num_exits++;
563
564     // Save Guest state from VMCB
565     info->rip = guest_state->rip;
566     info->vm_regs.rsp = guest_state->rsp;
567     info->vm_regs.rax = guest_state->rax;
568
569     info->cpl = guest_state->cpl;
570
571     info->ctrl_regs.cr0 = guest_state->cr0;
572     info->ctrl_regs.cr2 = guest_state->cr2;
573     info->ctrl_regs.cr3 = guest_state->cr3;
574     info->ctrl_regs.cr4 = guest_state->cr4;
575     info->dbg_regs.dr6 = guest_state->dr6;
576     info->dbg_regs.dr7 = guest_state->dr7;
577     info->ctrl_regs.cr8 = guest_ctrl->guest_ctrl.V_TPR;
578     info->ctrl_regs.rflags = guest_state->rflags;
579     info->ctrl_regs.efer = guest_state->efer;
580     
581     v3_get_vmcb_segments((vmcb_t*)(info->vmm_data), &(info->segments));
582     info->cpu_mode = v3_get_vm_cpu_mode(info);
583     info->mem_mode = v3_get_vm_mem_mode(info);
584     /* ** */
585
586     // save exit info here
587     exit_code = guest_ctrl->exit_code;
588     exit_info1 = guest_ctrl->exit_info1;
589     exit_info2 = guest_ctrl->exit_info2;
590
591 #ifdef V3_CONFIG_SYMCALL
592     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
593         update_irq_exit_state(info);
594     }
595 #else
596     update_irq_exit_state(info);
597 #endif
598
599     // reenable global interrupts after vm exit
600     v3_stgi();
601  
602     // Conditionally yield the CPU if the timeslice has expired
603     v3_yield_cond(info);
604
605     {
606         int ret = v3_handle_svm_exit(info, exit_code, exit_info1, exit_info2);
607         
608         if (ret != 0) {
609             PrintError("Error in SVM exit handler (ret=%d)\n", ret);
610             PrintError("  last Exit was %d (exit code=0x%llx)\n", v3_last_exit, (uint64_t) exit_code);
611             return -1;
612         }
613     }
614
615
616     return 0;
617 }
618
619
620 int v3_start_svm_guest(struct guest_info * info) {
621     //    vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA((vmcb_t*)(info->vmm_data));
622     //  vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
623
624     PrintDebug("Starting SVM core %u (on logical core %u)\n", info->vcpu_id, info->pcpu_id);
625
626     if (info->vcpu_id == 0) {
627         info->core_run_state = CORE_RUNNING;
628         info->vm_info->run_state = VM_RUNNING;
629     } else  { 
630         PrintDebug("SVM core %u (on %u): Waiting for core initialization\n", info->vcpu_id, info->pcpu_id);
631
632         while (info->core_run_state == CORE_STOPPED) {
633             v3_yield(info);
634             //PrintDebug("SVM core %u: still waiting for INIT\n", info->vcpu_id);
635         }
636
637         PrintDebug("SVM core %u(on %u) initialized\n", info->vcpu_id, info->pcpu_id);
638     } 
639
640     PrintDebug("SVM core %u(on %u): I am starting at CS=0x%x (base=0x%p, limit=0x%x),  RIP=0x%p\n", 
641                info->vcpu_id, info->pcpu_id, 
642                info->segments.cs.selector, (void *)(info->segments.cs.base), 
643                info->segments.cs.limit, (void *)(info->rip));
644
645
646
647     PrintDebug("SVM core %u: Launching SVM VM (vmcb=%p) (on cpu %u)\n", 
648                info->vcpu_id, (void *)info->vmm_data, info->pcpu_id);
649     //PrintDebugVMCB((vmcb_t*)(info->vmm_data));
650     
651     v3_start_time(info);
652
653     while (1) {
654
655         if (info->vm_info->run_state == VM_STOPPED) {
656             info->core_run_state = CORE_STOPPED;
657             break;
658         }
659         
660         if (v3_svm_enter(info) == -1) {
661             vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
662             addr_t host_addr;
663             addr_t linear_addr = 0;
664             
665             info->vm_info->run_state = VM_ERROR;
666             
667             V3_Print("SVM core %u: SVM ERROR!!\n", info->vcpu_id); 
668             
669             v3_print_guest_state(info);
670             
671             V3_Print("SVM core %u: SVM Exit Code: %p\n", info->vcpu_id, (void *)(addr_t)guest_ctrl->exit_code); 
672             
673             V3_Print("SVM core %u: exit_info1 low = 0x%.8x\n", info->vcpu_id, *(uint_t*)&(guest_ctrl->exit_info1));
674             V3_Print("SVM core %u: exit_info1 high = 0x%.8x\n", info->vcpu_id, *(uint_t *)(((uchar_t *)&(guest_ctrl->exit_info1)) + 4));
675             
676             V3_Print("SVM core %u: exit_info2 low = 0x%.8x\n", info->vcpu_id, *(uint_t*)&(guest_ctrl->exit_info2));
677             V3_Print("SVM core %u: exit_info2 high = 0x%.8x\n", info->vcpu_id, *(uint_t *)(((uchar_t *)&(guest_ctrl->exit_info2)) + 4));
678             
679             linear_addr = get_addr_linear(info, info->rip, &(info->segments.cs));
680             
681             if (info->mem_mode == PHYSICAL_MEM) {
682                 v3_gpa_to_hva(info, linear_addr, &host_addr);
683             } else if (info->mem_mode == VIRTUAL_MEM) {
684                 v3_gva_to_hva(info, linear_addr, &host_addr);
685             }
686             
687             V3_Print("SVM core %u: Host Address of rip = 0x%p\n", info->vcpu_id, (void *)host_addr);
688             
689             V3_Print("SVM core %u: Instr (15 bytes) at %p:\n", info->vcpu_id, (void *)host_addr);
690             v3_dump_mem((uint8_t *)host_addr, 15);
691             
692             v3_print_stack(info);
693
694             break;
695         }
696
697         v3_wait_at_barrier(info);
698
699
700         if (info->vm_info->run_state == VM_STOPPED) {
701             info->core_run_state = CORE_STOPPED;
702             break;
703         }
704
705         
706
707 /*
708         if ((info->num_exits % 50000) == 0) {
709             V3_Print("SVM Exit number %d\n", (uint32_t)info->num_exits);
710             v3_print_guest_state(info);
711         }
712 */
713         
714     }
715
716     // Need to take down the other cores on error... 
717
718     return 0;
719 }
720
721
722
723
724 int v3_reset_svm_vm_core(struct guest_info * core, addr_t rip) {
725     // init vmcb_bios
726
727     // Write the RIP, CS, and descriptor
728     // assume the rest is already good to go
729     //
730     // vector VV -> rip at 0
731     //              CS = VV00
732     //  This means we start executing at linear address VV000
733     //
734     // So the selector needs to be VV00
735     // and the base needs to be VV000
736     //
737     core->rip = 0;
738     core->segments.cs.selector = rip << 8;
739     core->segments.cs.limit = 0xffff;
740     core->segments.cs.base = rip << 12;
741
742     return 0;
743 }
744
745
746
747
748
749
750 /* Checks machine SVM capability */
751 /* Implemented from: AMD Arch Manual 3, sect 15.4 */ 
752 int v3_is_svm_capable() {
753     uint_t vm_cr_low = 0, vm_cr_high = 0;
754     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
755
756     v3_cpuid(CPUID_EXT_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
757   
758     PrintDebug("CPUID_EXT_FEATURE_IDS_ecx=0x%x\n", ecx);
759
760     if ((ecx & CPUID_EXT_FEATURE_IDS_ecx_svm_avail) == 0) {
761       V3_Print("SVM Not Available\n");
762       return 0;
763     }  else {
764         v3_get_msr(SVM_VM_CR_MSR, &vm_cr_high, &vm_cr_low);
765         
766         PrintDebug("SVM_VM_CR_MSR = 0x%x 0x%x\n", vm_cr_high, vm_cr_low);
767         
768         if ((vm_cr_low & SVM_VM_CR_MSR_svmdis) == 1) {
769             V3_Print("SVM is available but is disabled.\n");
770             
771             v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
772             
773             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_edx=0x%x\n", edx);
774             
775             if ((edx & CPUID_SVM_REV_AND_FEATURE_IDS_edx_svml) == 0) {
776                 V3_Print("SVM BIOS Disabled, not unlockable\n");
777             } else {
778                 V3_Print("SVM is locked with a key\n");
779             }
780             return 0;
781
782         } else {
783             V3_Print("SVM is available and  enabled.\n");
784
785             v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
786             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_eax=0x%x\n", eax);
787             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_ebx=0x%x\n", ebx);
788             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_ecx=0x%x\n", ecx);
789             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_edx=0x%x\n", edx);
790
791             return 1;
792         }
793     }
794 }
795
796 static int has_svm_nested_paging() {
797     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
798     
799     v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
800     
801     //PrintDebug("CPUID_EXT_FEATURE_IDS_edx=0x%x\n", edx);
802     
803     if ((edx & CPUID_SVM_REV_AND_FEATURE_IDS_edx_np) == 0) {
804         V3_Print("SVM Nested Paging not supported\n");
805         return 0;
806     } else {
807         V3_Print("SVM Nested Paging supported\n");
808         return 1;
809     }
810  }
811  
812
813
814 void v3_init_svm_cpu(int cpu_id) {
815     reg_ex_t msr;
816     extern v3_cpu_arch_t v3_cpu_types[];
817
818     // Enable SVM on the CPU
819     v3_get_msr(EFER_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
820     msr.e_reg.low |= EFER_MSR_svm_enable;
821     v3_set_msr(EFER_MSR, 0, msr.e_reg.low);
822
823     V3_Print("SVM Enabled\n");
824
825     // Setup the host state save area
826     host_vmcbs[cpu_id] = (addr_t)V3_AllocPages(4);
827
828     /* 64-BIT-ISSUE */
829     //  msr.e_reg.high = 0;
830     //msr.e_reg.low = (uint_t)host_vmcb;
831     msr.r_reg = host_vmcbs[cpu_id];
832
833     PrintDebug("Host State being saved at %p\n", (void *)host_vmcbs[cpu_id]);
834     v3_set_msr(SVM_VM_HSAVE_PA_MSR, msr.e_reg.high, msr.e_reg.low);
835
836
837     if (has_svm_nested_paging() == 1) {
838         v3_cpu_types[cpu_id] = V3_SVM_REV3_CPU;
839     } else {
840         v3_cpu_types[cpu_id] = V3_SVM_CPU;
841     }
842 }
843
844
845
846 void v3_deinit_svm_cpu(int cpu_id) {
847     reg_ex_t msr;
848     extern v3_cpu_arch_t v3_cpu_types[];
849
850     // reset SVM_VM_HSAVE_PA_MSR
851     // Does setting it to NULL disable??
852     msr.r_reg = 0;
853     v3_set_msr(SVM_VM_HSAVE_PA_MSR, msr.e_reg.high, msr.e_reg.low);
854
855     // Disable SVM?
856     v3_get_msr(EFER_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
857     msr.e_reg.low &= ~EFER_MSR_svm_enable;
858     v3_set_msr(EFER_MSR, 0, msr.e_reg.low);
859
860     v3_cpu_types[cpu_id] = V3_INVALID_CPU;
861
862     V3_FreePages((void *)host_vmcbs[cpu_id], 4);
863
864     V3_Print("Host CPU %d host area freed, and SVM disabled\n", cpu_id);
865     return;
866 }
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917 #if 0
918 /* 
919  * Test VMSAVE/VMLOAD Latency 
920  */
921 #define vmsave ".byte 0x0F,0x01,0xDB ; "
922 #define vmload ".byte 0x0F,0x01,0xDA ; "
923 {
924     uint32_t start_lo, start_hi;
925     uint32_t end_lo, end_hi;
926     uint64_t start, end;
927     
928     __asm__ __volatile__ (
929                           "rdtsc ; "
930                           "movl %%eax, %%esi ; "
931                           "movl %%edx, %%edi ; "
932                           "movq  %%rcx, %%rax ; "
933                           vmsave
934                           "rdtsc ; "
935                           : "=D"(start_hi), "=S"(start_lo), "=a"(end_lo),"=d"(end_hi)
936                           : "c"(host_vmcb[cpu_id]), "0"(0), "1"(0), "2"(0), "3"(0)
937                           );
938     
939     start = start_hi;
940     start <<= 32;
941     start += start_lo;
942     
943     end = end_hi;
944     end <<= 32;
945     end += end_lo;
946     
947     PrintDebug("VMSave Cycle Latency: %d\n", (uint32_t)(end - start));
948     
949     __asm__ __volatile__ (
950                           "rdtsc ; "
951                           "movl %%eax, %%esi ; "
952                           "movl %%edx, %%edi ; "
953                           "movq  %%rcx, %%rax ; "
954                           vmload
955                           "rdtsc ; "
956                           : "=D"(start_hi), "=S"(start_lo), "=a"(end_lo),"=d"(end_hi)
957                               : "c"(host_vmcb[cpu_id]), "0"(0), "1"(0), "2"(0), "3"(0)
958                               );
959         
960         start = start_hi;
961         start <<= 32;
962         start += start_lo;
963
964         end = end_hi;
965         end <<= 32;
966         end += end_lo;
967
968
969         PrintDebug("VMLoad Cycle Latency: %d\n", (uint32_t)(end - start));
970     }
971     /* End Latency Test */
972
973 #endif
974
975
976
977
978
979
980
981 #if 0
982 void Init_VMCB_pe(vmcb_t *vmcb, struct guest_info vm_info) {
983   vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
984   vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
985   uint_t i = 0;
986
987
988   guest_state->rsp = vm_info.vm_regs.rsp;
989   guest_state->rip = vm_info.rip;
990
991
992   /* I pretty much just gutted this from TVMM */
993   /* Note: That means its probably wrong */
994
995   // set the segment registers to mirror ours
996   guest_state->cs.selector = 1<<3;
997   guest_state->cs.attrib.fields.type = 0xa; // Code segment+read
998   guest_state->cs.attrib.fields.S = 1;
999   guest_state->cs.attrib.fields.P = 1;
1000   guest_state->cs.attrib.fields.db = 1;
1001   guest_state->cs.attrib.fields.G = 1;
1002   guest_state->cs.limit = 0xfffff;
1003   guest_state->cs.base = 0;
1004   
1005   struct vmcb_selector *segregs [] = {&(guest_state->ss), &(guest_state->ds), &(guest_state->es), &(guest_state->fs), &(guest_state->gs), NULL};
1006   for ( i = 0; segregs[i] != NULL; i++) {
1007     struct vmcb_selector * seg = segregs[i];
1008     
1009     seg->selector = 2<<3;
1010     seg->attrib.fields.type = 0x2; // Data Segment+read/write
1011     seg->attrib.fields.S = 1;
1012     seg->attrib.fields.P = 1;
1013     seg->attrib.fields.db = 1;
1014     seg->attrib.fields.G = 1;
1015     seg->limit = 0xfffff;
1016     seg->base = 0;
1017   }
1018
1019
1020   {
1021     /* JRL THIS HAS TO GO */
1022     
1023     //    guest_state->tr.selector = GetTR_Selector();
1024     guest_state->tr.attrib.fields.type = 0x9; 
1025     guest_state->tr.attrib.fields.P = 1;
1026     // guest_state->tr.limit = GetTR_Limit();
1027     //guest_state->tr.base = GetTR_Base();// - 0x2000;
1028     /* ** */
1029   }
1030
1031
1032   /* ** */
1033
1034
1035   guest_state->efer |= EFER_MSR_svm_enable;
1036   guest_state->rflags = 0x00000002; // The reserved bit is always 1
1037   ctrl_area->svm_instrs.VMRUN = 1;
1038   guest_state->cr0 = 0x00000001;    // PE 
1039   ctrl_area->guest_ASID = 1;
1040
1041
1042   //  guest_state->cpl = 0;
1043
1044
1045
1046   // Setup exits
1047
1048   ctrl_area->cr_writes.cr4 = 1;
1049   
1050   ctrl_area->exceptions.de = 1;
1051   ctrl_area->exceptions.df = 1;
1052   ctrl_area->exceptions.pf = 1;
1053   ctrl_area->exceptions.ts = 1;
1054   ctrl_area->exceptions.ss = 1;
1055   ctrl_area->exceptions.ac = 1;
1056   ctrl_area->exceptions.mc = 1;
1057   ctrl_area->exceptions.gp = 1;
1058   ctrl_area->exceptions.ud = 1;
1059   ctrl_area->exceptions.np = 1;
1060   ctrl_area->exceptions.of = 1;
1061   ctrl_area->exceptions.nmi = 1;
1062
1063   
1064
1065   ctrl_area->instrs.IOIO_PROT = 1;
1066   ctrl_area->IOPM_BASE_PA = (uint_t)V3_AllocPages(3);
1067   
1068   {
1069     reg_ex_t tmp_reg;
1070     tmp_reg.r_reg = ctrl_area->IOPM_BASE_PA;
1071     memset((void*)(tmp_reg.e_reg.low), 0xffffffff, PAGE_SIZE * 2);
1072   }
1073
1074   ctrl_area->instrs.INTR = 1;
1075
1076   
1077   {
1078     char gdt_buf[6];
1079     char idt_buf[6];
1080
1081     memset(gdt_buf, 0, 6);
1082     memset(idt_buf, 0, 6);
1083
1084
1085     uint_t gdt_base, idt_base;
1086     ushort_t gdt_limit, idt_limit;
1087     
1088     GetGDTR(gdt_buf);
1089     gdt_base = *(ulong_t*)((uchar_t*)gdt_buf + 2) & 0xffffffff;
1090     gdt_limit = *(ushort_t*)(gdt_buf) & 0xffff;
1091     PrintDebug("GDT: base: %x, limit: %x\n", gdt_base, gdt_limit);
1092
1093     GetIDTR(idt_buf);
1094     idt_base = *(ulong_t*)(idt_buf + 2) & 0xffffffff;
1095     idt_limit = *(ushort_t*)(idt_buf) & 0xffff;
1096     PrintDebug("IDT: base: %x, limit: %x\n",idt_base, idt_limit);
1097
1098
1099     // gdt_base -= 0x2000;
1100     //idt_base -= 0x2000;
1101
1102     guest_state->gdtr.base = gdt_base;
1103     guest_state->gdtr.limit = gdt_limit;
1104     guest_state->idtr.base = idt_base;
1105     guest_state->idtr.limit = idt_limit;
1106
1107
1108   }
1109   
1110   
1111   // also determine if CPU supports nested paging
1112   /*
1113   if (vm_info.page_tables) {
1114     //   if (0) {
1115     // Flush the TLB on entries/exits
1116     ctrl_area->TLB_CONTROL = 1;
1117
1118     // Enable Nested Paging
1119     ctrl_area->NP_ENABLE = 1;
1120
1121     PrintDebug("NP_Enable at 0x%x\n", &(ctrl_area->NP_ENABLE));
1122
1123         // Set the Nested Page Table pointer
1124     ctrl_area->N_CR3 |= ((addr_t)vm_info.page_tables & 0xfffff000);
1125
1126
1127     //   ctrl_area->N_CR3 = Get_CR3();
1128     // guest_state->cr3 |= (Get_CR3() & 0xfffff000);
1129
1130     guest_state->g_pat = 0x7040600070406ULL;
1131
1132     PrintDebug("Set Nested CR3: lo: 0x%x  hi: 0x%x\n", (uint_t)*(&(ctrl_area->N_CR3)), (uint_t)*((unsigned char *)&(ctrl_area->N_CR3) + 4));
1133     PrintDebug("Set Guest CR3: lo: 0x%x  hi: 0x%x\n", (uint_t)*(&(guest_state->cr3)), (uint_t)*((unsigned char *)&(guest_state->cr3) + 4));
1134     // Enable Paging
1135     //    guest_state->cr0 |= 0x80000000;
1136   }
1137   */
1138
1139 }
1140
1141
1142
1143
1144
1145 #endif
1146
1147