Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


Initial support for moving virtual cores among different physical CPUs
[palacios.git] / palacios / src / palacios / svm.c
1 /* 
2  * This file is part of the Palacios Virtual Machine Monitor developed
3  * by the V3VEE Project with funding from the United States National 
4  * Science Foundation and the Department of Energy.  
5  *
6  * The V3VEE Project is a joint project between Northwestern University
7  * and the University of New Mexico.  You can find out more at 
8  * http://www.v3vee.org
9  *
10  * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu> 
11  * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org> 
12  * All rights reserved.
13  *
14  * Author: Jack Lange <jarusl@cs.northwestern.edu>
15  *
16  * This is free software.  You are permitted to use,
17  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
18  */
19
20
21 #include <palacios/svm.h>
22 #include <palacios/vmm.h>
23
24 #include <palacios/vmcb.h>
25 #include <palacios/vmm_mem.h>
26 #include <palacios/vmm_paging.h>
27 #include <palacios/svm_handler.h>
28
29 #include <palacios/vmm_debug.h>
30 #include <palacios/vm_guest_mem.h>
31
32 #include <palacios/vmm_decoder.h>
33 #include <palacios/vmm_string.h>
34 #include <palacios/vmm_lowlevel.h>
35 #include <palacios/svm_msr.h>
36
37 #include <palacios/vmm_rbtree.h>
38 #include <palacios/vmm_barrier.h>
39
40
41 #include <palacios/vmm_direct_paging.h>
42
43 #include <palacios/vmm_ctrl_regs.h>
44 #include <palacios/svm_io.h>
45
46 #include <palacios/vmm_sprintf.h>
47
48
49 #ifndef V3_CONFIG_DEBUG_SVM
50 #undef PrintDebug
51 #define PrintDebug(fmt, args...)
52 #endif
53
54
55 uint32_t v3_last_exit;
56
57 // This is a global pointer to the host's VMCB
58 static addr_t host_vmcbs[V3_CONFIG_MAX_CPUS] = { [0 ... V3_CONFIG_MAX_CPUS - 1] = 0};
59
60
61
62 extern void v3_stgi();
63 extern void v3_clgi();
64 //extern int v3_svm_launch(vmcb_t * vmcb, struct v3_gprs * vm_regs, uint64_t * fs, uint64_t * gs);
65 extern int v3_svm_launch(vmcb_t * vmcb, struct v3_gprs * vm_regs, vmcb_t * host_vmcb);
66
67
68 static vmcb_t * Allocate_VMCB() {
69     vmcb_t * vmcb_page = NULL;
70     addr_t vmcb_pa = (addr_t)V3_AllocPages(1);
71
72     if ((void *)vmcb_pa == NULL) {
73         PrintError("Error allocating VMCB\n");
74         return NULL;
75     }
76
77     vmcb_page = (vmcb_t *)V3_VAddr((void *)vmcb_pa);
78
79     memset(vmcb_page, 0, 4096);
80
81     return vmcb_page;
82 }
83
84
85 static int v3_svm_handle_efer_write(struct guest_info * core, uint_t msr, struct v3_msr src, void * priv_data)
86 {
87     int status;
88
89     // Call arch-independent handler
90     if ((status = v3_handle_efer_write(core, msr, src, priv_data)) != 0) {
91         return status;
92     }
93
94     // SVM-specific code
95     {
96         // Ensure that hardware visible EFER.SVME bit is set (SVM Enable)
97         struct efer_64 * hw_efer = (struct efer_64 *)&(core->ctrl_regs.efer);
98         hw_efer->svme = 1;
99     }
100
101     return 0;
102 }
103
104
105 static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info * core) {
106     vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
107     vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
108     uint_t i;
109
110
111     //
112     ctrl_area->svm_instrs.VMRUN = 1;
113     ctrl_area->svm_instrs.VMMCALL = 1;
114     ctrl_area->svm_instrs.VMLOAD = 1;
115     ctrl_area->svm_instrs.VMSAVE = 1;
116     ctrl_area->svm_instrs.STGI = 1;
117     ctrl_area->svm_instrs.CLGI = 1;
118     ctrl_area->svm_instrs.SKINIT = 1;
119     ctrl_area->svm_instrs.ICEBP = 1;
120     ctrl_area->svm_instrs.WBINVD = 1;
121     ctrl_area->svm_instrs.MONITOR = 1;
122     ctrl_area->svm_instrs.MWAIT_always = 1;
123     ctrl_area->svm_instrs.MWAIT_if_armed = 1;
124     ctrl_area->instrs.INVLPGA = 1;
125     ctrl_area->instrs.CPUID = 1;
126
127     ctrl_area->instrs.HLT = 1;
128
129 #ifdef V3_CONFIG_TIME_VIRTUALIZE_TSC
130     ctrl_area->instrs.RDTSC = 1;
131     ctrl_area->svm_instrs.RDTSCP = 1;
132 #endif
133
134     // guest_state->cr0 = 0x00000001;    // PE 
135   
136     /*
137       ctrl_area->exceptions.de = 1;
138       ctrl_area->exceptions.df = 1;
139       
140       ctrl_area->exceptions.ts = 1;
141       ctrl_area->exceptions.ss = 1;
142       ctrl_area->exceptions.ac = 1;
143       ctrl_area->exceptions.mc = 1;
144       ctrl_area->exceptions.gp = 1;
145       ctrl_area->exceptions.ud = 1;
146       ctrl_area->exceptions.np = 1;
147       ctrl_area->exceptions.of = 1;
148       
149       ctrl_area->exceptions.nmi = 1;
150     */
151     
152
153     ctrl_area->instrs.NMI = 1;
154     ctrl_area->instrs.SMI = 0; // allow SMIs to run in guest
155     ctrl_area->instrs.INIT = 1;
156     ctrl_area->instrs.PAUSE = 1;
157     ctrl_area->instrs.shutdown_evts = 1;
158
159
160     /* DEBUG FOR RETURN CODE */
161     ctrl_area->exit_code = 1;
162
163
164     /* Setup Guest Machine state */
165
166     core->vm_regs.rsp = 0x00;
167     core->rip = 0xfff0;
168
169     core->vm_regs.rdx = 0x00000f00;
170
171
172     core->cpl = 0;
173
174     core->ctrl_regs.rflags = 0x00000002; // The reserved bit is always 1
175     core->ctrl_regs.cr0 = 0x60010010; // Set the WP flag so the memory hooks work in real-mode
176     core->ctrl_regs.efer |= EFER_MSR_svm_enable;
177
178
179
180
181
182     core->segments.cs.selector = 0xf000;
183     core->segments.cs.limit = 0xffff;
184     core->segments.cs.base = 0x0000000f0000LL;
185
186     // (raw attributes = 0xf3)
187     core->segments.cs.type = 0x3;
188     core->segments.cs.system = 0x1;
189     core->segments.cs.dpl = 0x3;
190     core->segments.cs.present = 1;
191
192
193
194     struct v3_segment * segregs [] = {&(core->segments.ss), &(core->segments.ds), 
195                                       &(core->segments.es), &(core->segments.fs), 
196                                       &(core->segments.gs), NULL};
197
198     for ( i = 0; segregs[i] != NULL; i++) {
199         struct v3_segment * seg = segregs[i];
200         
201         seg->selector = 0x0000;
202         //    seg->base = seg->selector << 4;
203         seg->base = 0x00000000;
204         seg->limit = ~0u;
205
206         // (raw attributes = 0xf3)
207         seg->type = 0x3;
208         seg->system = 0x1;
209         seg->dpl = 0x3;
210         seg->present = 1;
211     }
212
213     core->segments.gdtr.limit = 0x0000ffff;
214     core->segments.gdtr.base = 0x0000000000000000LL;
215     core->segments.idtr.limit = 0x0000ffff;
216     core->segments.idtr.base = 0x0000000000000000LL;
217
218     core->segments.ldtr.selector = 0x0000;
219     core->segments.ldtr.limit = 0x0000ffff;
220     core->segments.ldtr.base = 0x0000000000000000LL;
221     core->segments.tr.selector = 0x0000;
222     core->segments.tr.limit = 0x0000ffff;
223     core->segments.tr.base = 0x0000000000000000LL;
224
225
226     core->dbg_regs.dr6 = 0x00000000ffff0ff0LL;
227     core->dbg_regs.dr7 = 0x0000000000000400LL;
228
229
230     ctrl_area->IOPM_BASE_PA = (addr_t)V3_PAddr(core->vm_info->io_map.arch_data);
231     ctrl_area->instrs.IOIO_PROT = 1;
232             
233     ctrl_area->MSRPM_BASE_PA = (addr_t)V3_PAddr(core->vm_info->msr_map.arch_data);
234     ctrl_area->instrs.MSR_PROT = 1;   
235
236
237     PrintDebug("Exiting on interrupts\n");
238     ctrl_area->guest_ctrl.V_INTR_MASKING = 1;
239     ctrl_area->instrs.INTR = 1;
240
241
242     v3_hook_msr(core->vm_info, EFER_MSR, 
243                 &v3_handle_efer_read,
244                 &v3_svm_handle_efer_write, 
245                 core);
246
247     if (core->shdw_pg_mode == SHADOW_PAGING) {
248         PrintDebug("Creating initial shadow page table\n");
249         
250         /* JRL: This is a performance killer, and a simplistic solution */
251         /* We need to fix this */
252         ctrl_area->TLB_CONTROL = 1;
253         ctrl_area->guest_ASID = 1;
254         
255         
256         if (v3_init_passthrough_pts(core) == -1) {
257             PrintError("Could not initialize passthrough page tables\n");
258             return ;
259         }
260
261
262         core->shdw_pg_state.guest_cr0 = 0x0000000000000010LL;
263         PrintDebug("Created\n");
264         
265         core->ctrl_regs.cr0 |= 0x80000000;
266         core->ctrl_regs.cr3 = core->direct_map_pt;
267
268         ctrl_area->cr_reads.cr0 = 1;
269         ctrl_area->cr_writes.cr0 = 1;
270         //ctrl_area->cr_reads.cr4 = 1;
271         ctrl_area->cr_writes.cr4 = 1;
272         ctrl_area->cr_reads.cr3 = 1;
273         ctrl_area->cr_writes.cr3 = 1;
274
275
276
277         ctrl_area->instrs.INVLPG = 1;
278
279         ctrl_area->exceptions.pf = 1;
280
281         guest_state->g_pat = 0x7040600070406ULL;
282
283
284
285     } else if (core->shdw_pg_mode == NESTED_PAGING) {
286         // Flush the TLB on entries/exits
287         ctrl_area->TLB_CONTROL = 1;
288         ctrl_area->guest_ASID = 1;
289
290         // Enable Nested Paging
291         ctrl_area->NP_ENABLE = 1;
292
293         PrintDebug("NP_Enable at 0x%p\n", (void *)&(ctrl_area->NP_ENABLE));
294
295         // Set the Nested Page Table pointer
296         if (v3_init_passthrough_pts(core) == -1) {
297             PrintError("Could not initialize Nested page tables\n");
298             return ;
299         }
300
301         ctrl_area->N_CR3 = core->direct_map_pt;
302
303         guest_state->g_pat = 0x7040600070406ULL;
304     }
305     
306     /* tell the guest that we don't support SVM */
307     v3_hook_msr(core->vm_info, SVM_VM_CR_MSR, 
308         &v3_handle_vm_cr_read,
309         &v3_handle_vm_cr_write, 
310         core);
311 }
312
313
314 int v3_init_svm_vmcb(struct guest_info * core, v3_vm_class_t vm_class) {
315
316     PrintDebug("Allocating VMCB\n");
317     core->vmm_data = (void *)Allocate_VMCB();
318     
319     if (core->vmm_data == NULL) {
320         PrintError("Could not allocate VMCB, Exiting...\n");
321         return -1;
322     }
323
324     if (vm_class == V3_PC_VM) {
325         PrintDebug("Initializing VMCB (addr=%p)\n", (void *)core->vmm_data);
326         Init_VMCB_BIOS((vmcb_t*)(core->vmm_data), core);
327     } else {
328         PrintError("Invalid VM class\n");
329         return -1;
330     }
331
332     return 0;
333 }
334
335
336 int v3_deinit_svm_vmcb(struct guest_info * core) {
337     V3_FreePages(V3_PAddr(core->vmm_data), 1);
338     return 0;
339 }
340
341
342 static int update_irq_exit_state(struct guest_info * info) {
343     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
344
345     // Fix for QEMU bug using EVENTINJ as an internal cache
346     guest_ctrl->EVENTINJ.valid = 0;
347
348     if ((info->intr_core_state.irq_pending == 1) && (guest_ctrl->guest_ctrl.V_IRQ == 0)) {
349         
350 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
351         PrintDebug("INTAK cycle completed for irq %d\n", info->intr_core_state.irq_vector);
352 #endif
353
354         info->intr_core_state.irq_started = 1;
355         info->intr_core_state.irq_pending = 0;
356
357         v3_injecting_intr(info, info->intr_core_state.irq_vector, V3_EXTERNAL_IRQ);
358     }
359
360     if ((info->intr_core_state.irq_started == 1) && (guest_ctrl->exit_int_info.valid == 0)) {
361 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
362         PrintDebug("Interrupt %d taken by guest\n", info->intr_core_state.irq_vector);
363 #endif
364
365         // Interrupt was taken fully vectored
366         info->intr_core_state.irq_started = 0;
367
368     } else if ((info->intr_core_state.irq_started == 1) && (guest_ctrl->exit_int_info.valid == 1)) {
369 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
370         PrintDebug("EXIT INT INFO is set (vec=%d)\n", guest_ctrl->exit_int_info.vector);
371 #endif
372     }
373
374     return 0;
375 }
376
377
378 static int update_irq_entry_state(struct guest_info * info) {
379     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
380
381
382     if (info->intr_core_state.irq_pending == 0) {
383         guest_ctrl->guest_ctrl.V_IRQ = 0;
384         guest_ctrl->guest_ctrl.V_INTR_VECTOR = 0;
385     }
386     
387     if (v3_excp_pending(info)) {
388         uint_t excp = v3_get_excp_number(info);
389         
390         guest_ctrl->EVENTINJ.type = SVM_INJECTION_EXCEPTION;
391         
392         if (info->excp_state.excp_error_code_valid) {
393             guest_ctrl->EVENTINJ.error_code = info->excp_state.excp_error_code;
394             guest_ctrl->EVENTINJ.ev = 1;
395 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
396             PrintDebug("Injecting exception %d with error code %x\n", excp, guest_ctrl->EVENTINJ.error_code);
397 #endif
398         }
399         
400         guest_ctrl->EVENTINJ.vector = excp;
401         
402         guest_ctrl->EVENTINJ.valid = 1;
403
404 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
405         PrintDebug("<%d> Injecting Exception %d (CR2=%p) (EIP=%p)\n", 
406                    (int)info->num_exits, 
407                    guest_ctrl->EVENTINJ.vector, 
408                    (void *)(addr_t)info->ctrl_regs.cr2,
409                    (void *)(addr_t)info->rip);
410 #endif
411
412         v3_injecting_excp(info, excp);
413     } else if (info->intr_core_state.irq_started == 1) {
414 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
415         PrintDebug("IRQ pending from previous injection\n");
416 #endif
417         guest_ctrl->guest_ctrl.V_IRQ = 1;
418         guest_ctrl->guest_ctrl.V_INTR_VECTOR = info->intr_core_state.irq_vector;
419         guest_ctrl->guest_ctrl.V_IGN_TPR = 1;
420         guest_ctrl->guest_ctrl.V_INTR_PRIO = 0xf;
421
422     } else {
423         switch (v3_intr_pending(info)) {
424             case V3_EXTERNAL_IRQ: {
425                 uint32_t irq = v3_get_intr(info);
426
427                 guest_ctrl->guest_ctrl.V_IRQ = 1;
428                 guest_ctrl->guest_ctrl.V_INTR_VECTOR = irq;
429                 guest_ctrl->guest_ctrl.V_IGN_TPR = 1;
430                 guest_ctrl->guest_ctrl.V_INTR_PRIO = 0xf;
431
432 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
433                 PrintDebug("Injecting Interrupt %d (EIP=%p)\n", 
434                            guest_ctrl->guest_ctrl.V_INTR_VECTOR, 
435                            (void *)(addr_t)info->rip);
436 #endif
437
438                 info->intr_core_state.irq_pending = 1;
439                 info->intr_core_state.irq_vector = irq;
440                 
441                 break;
442             }
443             case V3_NMI:
444                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_NMI;
445                 break;
446             case V3_SOFTWARE_INTR:
447                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_SOFT_INTR;
448
449 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
450                 PrintDebug("Injecting software interrupt --  type: %d, vector: %d\n", 
451                            SVM_INJECTION_SOFT_INTR, info->intr_core_state.swintr_vector);
452 #endif
453                 guest_ctrl->EVENTINJ.vector = info->intr_core_state.swintr_vector;
454                 guest_ctrl->EVENTINJ.valid = 1;
455             
456                 /* reset swintr state */
457                 info->intr_core_state.swintr_posted = 0;
458                 info->intr_core_state.swintr_vector = 0;
459                 
460                 break;
461             case V3_VIRTUAL_IRQ:
462                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_IRQ;
463                 break;
464
465             case V3_INVALID_INTR:
466             default:
467                 break;
468         }
469         
470     }
471
472     return 0;
473 }
474
475
476 /* 
477  * CAUTION and DANGER!!! 
478  * 
479  * The VMCB CANNOT(!!) be accessed outside of the clgi/stgi calls inside this function
480  * When exectuing a symbiotic call, the VMCB WILL be overwritten, so any dependencies 
481  * on its contents will cause things to break. The contents at the time of the exit WILL 
482  * change before the exit handler is executed.
483  */
484 int v3_svm_enter(struct guest_info * info) {
485     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
486     vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA((vmcb_t*)(info->vmm_data)); 
487     addr_t exit_code = 0, exit_info1 = 0, exit_info2 = 0;
488
489     // Conditionally yield the CPU if the timeslice has expired
490     v3_yield_cond(info);
491
492     // Perform any additional yielding needed for time adjustment
493     v3_adjust_time(info);
494
495     // disable global interrupts for vm state transition
496     v3_clgi();
497
498     // Update timer devices after being in the VM, with interupts
499     // disabled, but before doing IRQ updates, so that any interrupts they 
500     //raise get seen immediately.
501     v3_update_timers(info);
502
503     // Synchronize the guest state to the VMCB
504     guest_state->cr0 = info->ctrl_regs.cr0;
505     guest_state->cr2 = info->ctrl_regs.cr2;
506     guest_state->cr3 = info->ctrl_regs.cr3;
507     guest_state->cr4 = info->ctrl_regs.cr4;
508     guest_state->dr6 = info->dbg_regs.dr6;
509     guest_state->dr7 = info->dbg_regs.dr7;
510     guest_ctrl->guest_ctrl.V_TPR = info->ctrl_regs.cr8 & 0xff;
511     guest_state->rflags = info->ctrl_regs.rflags;
512     guest_state->efer = info->ctrl_regs.efer;
513     
514     guest_state->cpl = info->cpl;
515
516     v3_set_vmcb_segments((vmcb_t*)(info->vmm_data), &(info->segments));
517
518     guest_state->rax = info->vm_regs.rax;
519     guest_state->rip = info->rip;
520     guest_state->rsp = info->vm_regs.rsp;
521
522 #ifdef V3_CONFIG_SYMCALL
523     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
524         update_irq_entry_state(info);
525     }
526 #else 
527     update_irq_entry_state(info);
528 #endif
529
530
531     /* ** */
532
533     /*
534       PrintDebug("SVM Entry to CS=%p  rip=%p...\n", 
535       (void *)(addr_t)info->segments.cs.base, 
536       (void *)(addr_t)info->rip);
537     */
538
539 #ifdef V3_CONFIG_SYMCALL
540     if (info->sym_core_state.symcall_state.sym_call_active == 1) {
541         if (guest_ctrl->guest_ctrl.V_IRQ == 1) {
542             V3_Print("!!! Injecting Interrupt during Sym call !!!\n");
543         }
544     }
545 #endif
546
547     v3_time_enter_vm(info);
548     guest_ctrl->TSC_OFFSET = v3_tsc_host_offset(&info->time_state);
549
550     if(info->core_move_state == CORE_MOVE_PENDING) {
551         v3_stgi();
552
553         if(V3_MOVE_THREAD_TO_CPU(info->target_pcpu_id, info->core_thread) != 0){
554             PrintError("Failed to move Vcore %d to CPU %d\n", 
555                      info->vcpu_id, 
556                      info->target_pcpu_id);
557         } else {
558             info->pcpu_id = info->target_pcpu_id;
559             V3_Print("Core move done, vcore %d is running on CPU %d now\n", 
560                      info->vcpu_id, 
561                      V3_Get_CPU());
562         }
563         
564         info->core_move_state = CORE_MOVE_DONE;
565
566         /* disable global interrupts, 
567          *  NOTE now it is being running on a different CPU 
568          */
569         v3_clgi();
570     }
571
572         
573
574     //V3_Print("Calling v3_svm_launch\n");
575
576     v3_svm_launch((vmcb_t *)V3_PAddr(info->vmm_data), &(info->vm_regs), (vmcb_t *)host_vmcbs[V3_Get_CPU()]);
577
578     //V3_Print("SVM Returned: Exit Code: %x, guest_rip=%lx\n", (uint32_t)(guest_ctrl->exit_code), (unsigned long)guest_state->rip);
579
580     v3_last_exit = (uint32_t)(guest_ctrl->exit_code);
581
582     // Immediate exit from VM time bookkeeping
583     v3_time_exit_vm(info);
584
585     info->num_exits++;
586
587     // Save Guest state from VMCB
588     info->rip = guest_state->rip;
589     info->vm_regs.rsp = guest_state->rsp;
590     info->vm_regs.rax = guest_state->rax;
591
592     info->cpl = guest_state->cpl;
593
594     info->ctrl_regs.cr0 = guest_state->cr0;
595     info->ctrl_regs.cr2 = guest_state->cr2;
596     info->ctrl_regs.cr3 = guest_state->cr3;
597     info->ctrl_regs.cr4 = guest_state->cr4;
598     info->dbg_regs.dr6 = guest_state->dr6;
599     info->dbg_regs.dr7 = guest_state->dr7;
600     info->ctrl_regs.cr8 = guest_ctrl->guest_ctrl.V_TPR;
601     info->ctrl_regs.rflags = guest_state->rflags;
602     info->ctrl_regs.efer = guest_state->efer;
603     
604     v3_get_vmcb_segments((vmcb_t*)(info->vmm_data), &(info->segments));
605     info->cpu_mode = v3_get_vm_cpu_mode(info);
606     info->mem_mode = v3_get_vm_mem_mode(info);
607     /* ** */
608
609     // save exit info here
610     exit_code = guest_ctrl->exit_code;
611     exit_info1 = guest_ctrl->exit_info1;
612     exit_info2 = guest_ctrl->exit_info2;
613
614 #ifdef V3_CONFIG_SYMCALL
615     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
616         update_irq_exit_state(info);
617     }
618 #else
619     update_irq_exit_state(info);
620 #endif
621
622     // reenable global interrupts after vm exit
623     v3_stgi();
624  
625     // Conditionally yield the CPU if the timeslice has expired
626     v3_yield_cond(info);
627
628     {
629         int ret = v3_handle_svm_exit(info, exit_code, exit_info1, exit_info2);
630         
631         if (ret != 0) {
632             PrintError("Error in SVM exit handler (ret=%d)\n", ret);
633             PrintError("  last Exit was %d (exit code=0x%llx)\n", v3_last_exit, (uint64_t) exit_code);
634             return -1;
635         }
636     }
637
638
639     return 0;
640 }
641
642
643 int v3_start_svm_guest(struct guest_info * info) {
644     //    vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA((vmcb_t*)(info->vmm_data));
645     //  vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
646
647     PrintDebug("Starting SVM core %u (on logical core %u)\n", info->vcpu_id, info->pcpu_id);
648
649     if (info->vcpu_id == 0) {
650         info->core_run_state = CORE_RUNNING;
651         info->vm_info->run_state = VM_RUNNING;
652     } else  { 
653         PrintDebug("SVM core %u (on %u): Waiting for core initialization\n", info->vcpu_id, info->pcpu_id);
654
655         while (info->core_run_state == CORE_STOPPED) {
656             v3_yield(info);
657             //PrintDebug("SVM core %u: still waiting for INIT\n", info->vcpu_id);
658         }
659
660         PrintDebug("SVM core %u(on %u) initialized\n", info->vcpu_id, info->pcpu_id);
661     } 
662
663     PrintDebug("SVM core %u(on %u): I am starting at CS=0x%x (base=0x%p, limit=0x%x),  RIP=0x%p\n", 
664                info->vcpu_id, info->pcpu_id, 
665                info->segments.cs.selector, (void *)(info->segments.cs.base), 
666                info->segments.cs.limit, (void *)(info->rip));
667
668
669
670     PrintDebug("SVM core %u: Launching SVM VM (vmcb=%p) (on cpu %u)\n", 
671                info->vcpu_id, (void *)info->vmm_data, info->pcpu_id);
672     //PrintDebugVMCB((vmcb_t*)(info->vmm_data));
673     
674     v3_start_time(info);
675
676     while (1) {
677
678         if (info->vm_info->run_state == VM_STOPPED) {
679             info->core_run_state = CORE_STOPPED;
680             break;
681         }
682         
683         if (v3_svm_enter(info) == -1) {
684             vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
685             addr_t host_addr;
686             addr_t linear_addr = 0;
687             
688             info->vm_info->run_state = VM_ERROR;
689             
690             V3_Print("SVM core %u: SVM ERROR!!\n", info->vcpu_id); 
691             
692             v3_print_guest_state(info);
693             
694             V3_Print("SVM core %u: SVM Exit Code: %p\n", info->vcpu_id, (void *)(addr_t)guest_ctrl->exit_code); 
695             
696             V3_Print("SVM core %u: exit_info1 low = 0x%.8x\n", info->vcpu_id, *(uint_t*)&(guest_ctrl->exit_info1));
697             V3_Print("SVM core %u: exit_info1 high = 0x%.8x\n", info->vcpu_id, *(uint_t *)(((uchar_t *)&(guest_ctrl->exit_info1)) + 4));
698             
699             V3_Print("SVM core %u: exit_info2 low = 0x%.8x\n", info->vcpu_id, *(uint_t*)&(guest_ctrl->exit_info2));
700             V3_Print("SVM core %u: exit_info2 high = 0x%.8x\n", info->vcpu_id, *(uint_t *)(((uchar_t *)&(guest_ctrl->exit_info2)) + 4));
701             
702             linear_addr = get_addr_linear(info, info->rip, &(info->segments.cs));
703             
704             if (info->mem_mode == PHYSICAL_MEM) {
705                 v3_gpa_to_hva(info, linear_addr, &host_addr);
706             } else if (info->mem_mode == VIRTUAL_MEM) {
707                 v3_gva_to_hva(info, linear_addr, &host_addr);
708             }
709             
710             V3_Print("SVM core %u: Host Address of rip = 0x%p\n", info->vcpu_id, (void *)host_addr);
711             
712             V3_Print("SVM core %u: Instr (15 bytes) at %p:\n", info->vcpu_id, (void *)host_addr);
713             v3_dump_mem((uint8_t *)host_addr, 15);
714             
715             v3_print_stack(info);
716
717             break;
718         }
719
720         v3_wait_at_barrier(info);
721
722
723         if (info->vm_info->run_state == VM_STOPPED) {
724             info->core_run_state = CORE_STOPPED;
725             break;
726         }
727
728         
729
730 /*
731         if ((info->num_exits % 50000) == 0) {
732             V3_Print("SVM Exit number %d\n", (uint32_t)info->num_exits);
733             v3_print_guest_state(info);
734         }
735 */
736         
737     }
738
739     // Need to take down the other cores on error... 
740
741     return 0;
742 }
743
744
745
746
747 int v3_reset_svm_vm_core(struct guest_info * core, addr_t rip) {
748     // init vmcb_bios
749
750     // Write the RIP, CS, and descriptor
751     // assume the rest is already good to go
752     //
753     // vector VV -> rip at 0
754     //              CS = VV00
755     //  This means we start executing at linear address VV000
756     //
757     // So the selector needs to be VV00
758     // and the base needs to be VV000
759     //
760     core->rip = 0;
761     core->segments.cs.selector = rip << 8;
762     core->segments.cs.limit = 0xffff;
763     core->segments.cs.base = rip << 12;
764
765     return 0;
766 }
767
768
769
770
771
772
773 /* Checks machine SVM capability */
774 /* Implemented from: AMD Arch Manual 3, sect 15.4 */ 
775 int v3_is_svm_capable() {
776     uint_t vm_cr_low = 0, vm_cr_high = 0;
777     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
778
779     v3_cpuid(CPUID_EXT_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
780   
781     PrintDebug("CPUID_EXT_FEATURE_IDS_ecx=0x%x\n", ecx);
782
783     if ((ecx & CPUID_EXT_FEATURE_IDS_ecx_svm_avail) == 0) {
784       V3_Print("SVM Not Available\n");
785       return 0;
786     }  else {
787         v3_get_msr(SVM_VM_CR_MSR, &vm_cr_high, &vm_cr_low);
788         
789         PrintDebug("SVM_VM_CR_MSR = 0x%x 0x%x\n", vm_cr_high, vm_cr_low);
790         
791         if ((vm_cr_low & SVM_VM_CR_MSR_svmdis) == 1) {
792             V3_Print("SVM is available but is disabled.\n");
793             
794             v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
795             
796             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_edx=0x%x\n", edx);
797             
798             if ((edx & CPUID_SVM_REV_AND_FEATURE_IDS_edx_svml) == 0) {
799                 V3_Print("SVM BIOS Disabled, not unlockable\n");
800             } else {
801                 V3_Print("SVM is locked with a key\n");
802             }
803             return 0;
804
805         } else {
806             V3_Print("SVM is available and  enabled.\n");
807
808             v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
809             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_eax=0x%x\n", eax);
810             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_ebx=0x%x\n", ebx);
811             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_ecx=0x%x\n", ecx);
812             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_edx=0x%x\n", edx);
813
814             return 1;
815         }
816     }
817 }
818
819 static int has_svm_nested_paging() {
820     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
821     
822     v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
823     
824     //PrintDebug("CPUID_EXT_FEATURE_IDS_edx=0x%x\n", edx);
825     
826     if ((edx & CPUID_SVM_REV_AND_FEATURE_IDS_edx_np) == 0) {
827         V3_Print("SVM Nested Paging not supported\n");
828         return 0;
829     } else {
830         V3_Print("SVM Nested Paging supported\n");
831         return 1;
832     }
833  }
834  
835
836
837 void v3_init_svm_cpu(int cpu_id) {
838     reg_ex_t msr;
839     extern v3_cpu_arch_t v3_cpu_types[];
840
841     // Enable SVM on the CPU
842     v3_get_msr(EFER_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
843     msr.e_reg.low |= EFER_MSR_svm_enable;
844     v3_set_msr(EFER_MSR, 0, msr.e_reg.low);
845
846     V3_Print("SVM Enabled\n");
847
848     // Setup the host state save area
849     host_vmcbs[cpu_id] = (addr_t)V3_AllocPages(4);
850
851     /* 64-BIT-ISSUE */
852     //  msr.e_reg.high = 0;
853     //msr.e_reg.low = (uint_t)host_vmcb;
854     msr.r_reg = host_vmcbs[cpu_id];
855
856     PrintDebug("Host State being saved at %p\n", (void *)host_vmcbs[cpu_id]);
857     v3_set_msr(SVM_VM_HSAVE_PA_MSR, msr.e_reg.high, msr.e_reg.low);
858
859
860     if (has_svm_nested_paging() == 1) {
861         v3_cpu_types[cpu_id] = V3_SVM_REV3_CPU;
862     } else {
863         v3_cpu_types[cpu_id] = V3_SVM_CPU;
864     }
865 }
866
867
868
869 void v3_deinit_svm_cpu(int cpu_id) {
870     reg_ex_t msr;
871     extern v3_cpu_arch_t v3_cpu_types[];
872
873     // reset SVM_VM_HSAVE_PA_MSR
874     // Does setting it to NULL disable??
875     msr.r_reg = 0;
876     v3_set_msr(SVM_VM_HSAVE_PA_MSR, msr.e_reg.high, msr.e_reg.low);
877
878     // Disable SVM?
879     v3_get_msr(EFER_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
880     msr.e_reg.low &= ~EFER_MSR_svm_enable;
881     v3_set_msr(EFER_MSR, 0, msr.e_reg.low);
882
883     v3_cpu_types[cpu_id] = V3_INVALID_CPU;
884
885     V3_FreePages((void *)host_vmcbs[cpu_id], 4);
886
887     V3_Print("Host CPU %d host area freed, and SVM disabled\n", cpu_id);
888     return;
889 }
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940 #if 0
941 /* 
942  * Test VMSAVE/VMLOAD Latency 
943  */
944 #define vmsave ".byte 0x0F,0x01,0xDB ; "
945 #define vmload ".byte 0x0F,0x01,0xDA ; "
946 {
947     uint32_t start_lo, start_hi;
948     uint32_t end_lo, end_hi;
949     uint64_t start, end;
950     
951     __asm__ __volatile__ (
952                           "rdtsc ; "
953                           "movl %%eax, %%esi ; "
954                           "movl %%edx, %%edi ; "
955                           "movq  %%rcx, %%rax ; "
956                           vmsave
957                           "rdtsc ; "
958                           : "=D"(start_hi), "=S"(start_lo), "=a"(end_lo),"=d"(end_hi)
959                           : "c"(host_vmcb[cpu_id]), "0"(0), "1"(0), "2"(0), "3"(0)
960                           );
961     
962     start = start_hi;
963     start <<= 32;
964     start += start_lo;
965     
966     end = end_hi;
967     end <<= 32;
968     end += end_lo;
969     
970     PrintDebug("VMSave Cycle Latency: %d\n", (uint32_t)(end - start));
971     
972     __asm__ __volatile__ (
973                           "rdtsc ; "
974                           "movl %%eax, %%esi ; "
975                           "movl %%edx, %%edi ; "
976                           "movq  %%rcx, %%rax ; "
977                           vmload
978                           "rdtsc ; "
979                           : "=D"(start_hi), "=S"(start_lo), "=a"(end_lo),"=d"(end_hi)
980                               : "c"(host_vmcb[cpu_id]), "0"(0), "1"(0), "2"(0), "3"(0)
981                               );
982         
983         start = start_hi;
984         start <<= 32;
985         start += start_lo;
986
987         end = end_hi;
988         end <<= 32;
989         end += end_lo;
990
991
992         PrintDebug("VMLoad Cycle Latency: %d\n", (uint32_t)(end - start));
993     }
994     /* End Latency Test */
995
996 #endif
997
998
999
1000
1001
1002
1003
1004 #if 0
1005 void Init_VMCB_pe(vmcb_t *vmcb, struct guest_info vm_info) {
1006   vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
1007   vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
1008   uint_t i = 0;
1009
1010
1011   guest_state->rsp = vm_info.vm_regs.rsp;
1012   guest_state->rip = vm_info.rip;
1013
1014
1015   /* I pretty much just gutted this from TVMM */
1016   /* Note: That means its probably wrong */
1017
1018   // set the segment registers to mirror ours
1019   guest_state->cs.selector = 1<<3;
1020   guest_state->cs.attrib.fields.type = 0xa; // Code segment+read
1021   guest_state->cs.attrib.fields.S = 1;
1022   guest_state->cs.attrib.fields.P = 1;
1023   guest_state->cs.attrib.fields.db = 1;
1024   guest_state->cs.attrib.fields.G = 1;
1025   guest_state->cs.limit = 0xfffff;
1026   guest_state->cs.base = 0;
1027   
1028   struct vmcb_selector *segregs [] = {&(guest_state->ss), &(guest_state->ds), &(guest_state->es), &(guest_state->fs), &(guest_state->gs), NULL};
1029   for ( i = 0; segregs[i] != NULL; i++) {
1030     struct vmcb_selector * seg = segregs[i];
1031     
1032     seg->selector = 2<<3;
1033     seg->attrib.fields.type = 0x2; // Data Segment+read/write
1034     seg->attrib.fields.S = 1;
1035     seg->attrib.fields.P = 1;
1036     seg->attrib.fields.db = 1;
1037     seg->attrib.fields.G = 1;
1038     seg->limit = 0xfffff;
1039     seg->base = 0;
1040   }
1041
1042
1043   {
1044     /* JRL THIS HAS TO GO */
1045     
1046     //    guest_state->tr.selector = GetTR_Selector();
1047     guest_state->tr.attrib.fields.type = 0x9; 
1048     guest_state->tr.attrib.fields.P = 1;
1049     // guest_state->tr.limit = GetTR_Limit();
1050     //guest_state->tr.base = GetTR_Base();// - 0x2000;
1051     /* ** */
1052   }
1053
1054
1055   /* ** */
1056
1057
1058   guest_state->efer |= EFER_MSR_svm_enable;
1059   guest_state->rflags = 0x00000002; // The reserved bit is always 1
1060   ctrl_area->svm_instrs.VMRUN = 1;
1061   guest_state->cr0 = 0x00000001;    // PE 
1062   ctrl_area->guest_ASID = 1;
1063
1064
1065   //  guest_state->cpl = 0;
1066
1067
1068
1069   // Setup exits
1070
1071   ctrl_area->cr_writes.cr4 = 1;
1072   
1073   ctrl_area->exceptions.de = 1;
1074   ctrl_area->exceptions.df = 1;
1075   ctrl_area->exceptions.pf = 1;
1076   ctrl_area->exceptions.ts = 1;
1077   ctrl_area->exceptions.ss = 1;
1078   ctrl_area->exceptions.ac = 1;
1079   ctrl_area->exceptions.mc = 1;
1080   ctrl_area->exceptions.gp = 1;
1081   ctrl_area->exceptions.ud = 1;
1082   ctrl_area->exceptions.np = 1;
1083   ctrl_area->exceptions.of = 1;
1084   ctrl_area->exceptions.nmi = 1;
1085
1086   
1087
1088   ctrl_area->instrs.IOIO_PROT = 1;
1089   ctrl_area->IOPM_BASE_PA = (uint_t)V3_AllocPages(3);
1090   
1091   {
1092     reg_ex_t tmp_reg;
1093     tmp_reg.r_reg = ctrl_area->IOPM_BASE_PA;
1094     memset((void*)(tmp_reg.e_reg.low), 0xffffffff, PAGE_SIZE * 2);
1095   }
1096
1097   ctrl_area->instrs.INTR = 1;
1098
1099   
1100   {
1101     char gdt_buf[6];
1102     char idt_buf[6];
1103
1104     memset(gdt_buf, 0, 6);
1105     memset(idt_buf, 0, 6);
1106
1107
1108     uint_t gdt_base, idt_base;
1109     ushort_t gdt_limit, idt_limit;
1110     
1111     GetGDTR(gdt_buf);
1112     gdt_base = *(ulong_t*)((uchar_t*)gdt_buf + 2) & 0xffffffff;
1113     gdt_limit = *(ushort_t*)(gdt_buf) & 0xffff;
1114     PrintDebug("GDT: base: %x, limit: %x\n", gdt_base, gdt_limit);
1115
1116     GetIDTR(idt_buf);
1117     idt_base = *(ulong_t*)(idt_buf + 2) & 0xffffffff;
1118     idt_limit = *(ushort_t*)(idt_buf) & 0xffff;
1119     PrintDebug("IDT: base: %x, limit: %x\n",idt_base, idt_limit);
1120
1121
1122     // gdt_base -= 0x2000;
1123     //idt_base -= 0x2000;
1124
1125     guest_state->gdtr.base = gdt_base;
1126     guest_state->gdtr.limit = gdt_limit;
1127     guest_state->idtr.base = idt_base;
1128     guest_state->idtr.limit = idt_limit;
1129
1130
1131   }
1132   
1133   
1134   // also determine if CPU supports nested paging
1135   /*
1136   if (vm_info.page_tables) {
1137     //   if (0) {
1138     // Flush the TLB on entries/exits
1139     ctrl_area->TLB_CONTROL = 1;
1140
1141     // Enable Nested Paging
1142     ctrl_area->NP_ENABLE = 1;
1143
1144     PrintDebug("NP_Enable at 0x%x\n", &(ctrl_area->NP_ENABLE));
1145
1146         // Set the Nested Page Table pointer
1147     ctrl_area->N_CR3 |= ((addr_t)vm_info.page_tables & 0xfffff000);
1148
1149
1150     //   ctrl_area->N_CR3 = Get_CR3();
1151     // guest_state->cr3 |= (Get_CR3() & 0xfffff000);
1152
1153     guest_state->g_pat = 0x7040600070406ULL;
1154
1155     PrintDebug("Set Nested CR3: lo: 0x%x  hi: 0x%x\n", (uint_t)*(&(ctrl_area->N_CR3)), (uint_t)*((unsigned char *)&(ctrl_area->N_CR3) + 4));
1156     PrintDebug("Set Guest CR3: lo: 0x%x  hi: 0x%x\n", (uint_t)*(&(guest_state->cr3)), (uint_t)*((unsigned char *)&(guest_state->cr3) + 4));
1157     // Enable Paging
1158     //    guest_state->cr0 |= 0x80000000;
1159   }
1160   */
1161
1162 }
1163
1164
1165
1166
1167
1168 #endif
1169
1170