Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


8134d012a2535cddd94211cc8f46298f2853eef5
[palacios.releases.git] / palacios / src / palacios / svm.c
1
2 /* 
3  * This file is part of the Palacios Virtual Machine Monitor developed
4  * by the V3VEE Project with funding from the United States National 
5  * Science Foundation and the Department of Energy.  
6  *
7  * The V3VEE Project is a joint project between Northwestern University
8  * and the University of New Mexico.  You can find out more at 
9  * http://www.v3vee.org
10  *
11  * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu> 
12  * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org> 
13  * All rights reserved.
14  *
15  * Author: Jack Lange <jarusl@cs.northwestern.edu>
16  *
17  * This is free software.  You are permitted to use,
18  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
19  */
20
21
22 #include <palacios/svm.h>
23 #include <palacios/vmm.h>
24
25 #include <palacios/vmcb.h>
26 #include <palacios/vmm_mem.h>
27 #include <palacios/vmm_paging.h>
28 #include <palacios/svm_handler.h>
29
30 #include <palacios/vmm_debug.h>
31 #include <palacios/vm_guest_mem.h>
32
33 #include <palacios/vmm_decoder.h>
34 #include <palacios/vmm_string.h>
35 #include <palacios/vmm_lowlevel.h>
36 #include <palacios/svm_msr.h>
37
38 #include <palacios/vmm_rbtree.h>
39
40 #include <palacios/vmm_direct_paging.h>
41
42 #include <palacios/vmm_ctrl_regs.h>
43 #include <palacios/svm_io.h>
44
45 #include <palacios/vmm_sprintf.h>
46
47
48 #ifndef V3_CONFIG_DEBUG_SVM
49 #undef PrintDebug
50 #define PrintDebug(fmt, args...)
51 #endif
52
53
54 uint32_t v3_last_exit;
55
56 // This is a global pointer to the host's VMCB
57 static addr_t host_vmcbs[V3_CONFIG_MAX_CPUS] = { [0 ... V3_CONFIG_MAX_CPUS - 1] = 0};
58
59
60
61 extern void v3_stgi();
62 extern void v3_clgi();
63 //extern int v3_svm_launch(vmcb_t * vmcb, struct v3_gprs * vm_regs, uint64_t * fs, uint64_t * gs);
64 extern int v3_svm_launch(vmcb_t * vmcb, struct v3_gprs * vm_regs, vmcb_t * host_vmcb);
65
66
67 static vmcb_t * Allocate_VMCB() {
68     vmcb_t * vmcb_page = NULL;
69     addr_t vmcb_pa = (addr_t)V3_AllocPages(1);
70
71     if ((void *)vmcb_pa == NULL) {
72         PrintError("Error allocating VMCB\n");
73         return NULL;
74     }
75
76     vmcb_page = (vmcb_t *)V3_VAddr((void *)vmcb_pa);
77
78     memset(vmcb_page, 0, 4096);
79
80     return vmcb_page;
81 }
82
83
84
85 static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info * core) {
86     vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
87     vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
88     uint_t i;
89
90
91     //
92     ctrl_area->svm_instrs.VMRUN = 1;
93     ctrl_area->svm_instrs.VMMCALL = 1;
94     ctrl_area->svm_instrs.VMLOAD = 1;
95     ctrl_area->svm_instrs.VMSAVE = 1;
96     ctrl_area->svm_instrs.STGI = 1;
97     ctrl_area->svm_instrs.CLGI = 1;
98     ctrl_area->svm_instrs.SKINIT = 1;
99     ctrl_area->svm_instrs.ICEBP = 1;
100     ctrl_area->svm_instrs.WBINVD = 1;
101     ctrl_area->svm_instrs.MONITOR = 1;
102     ctrl_area->svm_instrs.MWAIT_always = 1;
103     ctrl_area->svm_instrs.MWAIT_if_armed = 1;
104     ctrl_area->instrs.INVLPGA = 1;
105     ctrl_area->instrs.CPUID = 1;
106
107     ctrl_area->instrs.HLT = 1;
108
109 #ifdef V3_CONFIG_TIME_VIRTUALIZE_TSC
110     ctrl_area->instrs.RDTSC = 1;
111     ctrl_area->svm_instrs.RDTSCP = 1;
112 #endif
113
114     // guest_state->cr0 = 0x00000001;    // PE 
115   
116     /*
117       ctrl_area->exceptions.de = 1;
118       ctrl_area->exceptions.df = 1;
119       
120       ctrl_area->exceptions.ts = 1;
121       ctrl_area->exceptions.ss = 1;
122       ctrl_area->exceptions.ac = 1;
123       ctrl_area->exceptions.mc = 1;
124       ctrl_area->exceptions.gp = 1;
125       ctrl_area->exceptions.ud = 1;
126       ctrl_area->exceptions.np = 1;
127       ctrl_area->exceptions.of = 1;
128       
129       ctrl_area->exceptions.nmi = 1;
130     */
131     
132
133     ctrl_area->instrs.NMI = 1;
134     ctrl_area->instrs.SMI = 0; // allow SMIs to run in guest
135     ctrl_area->instrs.INIT = 1;
136     ctrl_area->instrs.PAUSE = 1;
137     ctrl_area->instrs.shutdown_evts = 1;
138
139     /* KCH: intercept SW Interrupts (INT instr) */
140 #ifdef V3_CONFIG_SW_INTERRUPTS
141     ctrl_area->instrs.INTn = 1;
142 #endif
143
144
145     /* DEBUG FOR RETURN CODE */
146     ctrl_area->exit_code = 1;
147
148
149     /* Setup Guest Machine state */
150
151     core->vm_regs.rsp = 0x00;
152     core->rip = 0xfff0;
153
154     core->vm_regs.rdx = 0x00000f00;
155
156
157     core->cpl = 0;
158
159     core->ctrl_regs.rflags = 0x00000002; // The reserved bit is always 1
160     core->ctrl_regs.cr0 = 0x60010010; // Set the WP flag so the memory hooks work in real-mode
161     core->ctrl_regs.efer |= EFER_MSR_svm_enable;
162
163
164
165
166
167     core->segments.cs.selector = 0xf000;
168     core->segments.cs.limit = 0xffff;
169     core->segments.cs.base = 0x0000000f0000LL;
170
171     // (raw attributes = 0xf3)
172     core->segments.cs.type = 0x3;
173     core->segments.cs.system = 0x1;
174     core->segments.cs.dpl = 0x3;
175     core->segments.cs.present = 1;
176
177
178
179     struct v3_segment * segregs [] = {&(core->segments.ss), &(core->segments.ds), 
180                                       &(core->segments.es), &(core->segments.fs), 
181                                       &(core->segments.gs), NULL};
182
183     for ( i = 0; segregs[i] != NULL; i++) {
184         struct v3_segment * seg = segregs[i];
185         
186         seg->selector = 0x0000;
187         //    seg->base = seg->selector << 4;
188         seg->base = 0x00000000;
189         seg->limit = ~0u;
190
191         // (raw attributes = 0xf3)
192         seg->type = 0x3;
193         seg->system = 0x1;
194         seg->dpl = 0x3;
195         seg->present = 1;
196     }
197
198     core->segments.gdtr.limit = 0x0000ffff;
199     core->segments.gdtr.base = 0x0000000000000000LL;
200     core->segments.idtr.limit = 0x0000ffff;
201     core->segments.idtr.base = 0x0000000000000000LL;
202
203     core->segments.ldtr.selector = 0x0000;
204     core->segments.ldtr.limit = 0x0000ffff;
205     core->segments.ldtr.base = 0x0000000000000000LL;
206     core->segments.tr.selector = 0x0000;
207     core->segments.tr.limit = 0x0000ffff;
208     core->segments.tr.base = 0x0000000000000000LL;
209
210
211     core->dbg_regs.dr6 = 0x00000000ffff0ff0LL;
212     core->dbg_regs.dr7 = 0x0000000000000400LL;
213
214
215     ctrl_area->IOPM_BASE_PA = (addr_t)V3_PAddr(core->vm_info->io_map.arch_data);
216     ctrl_area->instrs.IOIO_PROT = 1;
217             
218     ctrl_area->MSRPM_BASE_PA = (addr_t)V3_PAddr(core->vm_info->msr_map.arch_data);
219     ctrl_area->instrs.MSR_PROT = 1;   
220
221
222     PrintDebug("Exiting on interrupts\n");
223     ctrl_area->guest_ctrl.V_INTR_MASKING = 1;
224     ctrl_area->instrs.INTR = 1;
225
226
227     v3_hook_msr(core->vm_info, EFER_MSR, 
228                 &v3_handle_efer_read,
229                 &v3_handle_efer_write, 
230                 core);
231
232 #ifdef V3_CONFIG_HIJACK_SYSCALL_MSR
233     /* KCH: we're not hooking these to TRAP them,
234             instead, we're going to catch the target EIP.
235             Hopefully this EIP is the entry point in the ELF located in the 
236             vsyscall page. We can inject checks into the code segment such that
237             we don't have to exit on uninteresting system calls. This should
238             give us much better performance than INT 80, and should even obviate
239             the need to deal with software interrupts at all */
240     v3_hook_msr(core->vm_info, STAR_MSR,
241         &v3_handle_star_read,
242         &v3_handle_star_write,
243         core);
244     v3_hook_msr(core->vm_info, LSTAR_MSR,
245         &v3_handle_lstar_read,
246         &v3_handle_lstar_write,
247         core);
248     v3_hook_msr(core->vm_info, CSTAR_MSR,
249         &v3_handle_cstar_read,
250         &v3_handle_cstar_write,
251         core);
252     
253     /* KCH: this probably isn't necessary, as
254         SYSENTER is only used in legacy mode. In fact,
255         in long mode it results in an illegal instruction
256         exception */
257     v3_hook_msr(core->vm_info, IA32_SYSENTER_EIP_MSR,
258         &v3_handle_seeip_read,
259         &v3_handle_seeip_write,
260         core);
261 #endif
262
263     if (core->shdw_pg_mode == SHADOW_PAGING) {
264         PrintDebug("Creating initial shadow page table\n");
265         
266         /* JRL: This is a performance killer, and a simplistic solution */
267         /* We need to fix this */
268         ctrl_area->TLB_CONTROL = 1;
269         ctrl_area->guest_ASID = 1;
270         
271         
272         if (v3_init_passthrough_pts(core) == -1) {
273             PrintError("Could not initialize passthrough page tables\n");
274             return ;
275         }
276
277
278         core->shdw_pg_state.guest_cr0 = 0x0000000000000010LL;
279         PrintDebug("Created\n");
280         
281         core->ctrl_regs.cr0 |= 0x80000000;
282         core->ctrl_regs.cr3 = core->direct_map_pt;
283
284         ctrl_area->cr_reads.cr0 = 1;
285         ctrl_area->cr_writes.cr0 = 1;
286         //ctrl_area->cr_reads.cr4 = 1;
287         ctrl_area->cr_writes.cr4 = 1;
288         ctrl_area->cr_reads.cr3 = 1;
289         ctrl_area->cr_writes.cr3 = 1;
290
291
292
293         ctrl_area->instrs.INVLPG = 1;
294
295         ctrl_area->exceptions.pf = 1;
296
297         guest_state->g_pat = 0x7040600070406ULL;
298
299
300
301     } else if (core->shdw_pg_mode == NESTED_PAGING) {
302         // Flush the TLB on entries/exits
303         ctrl_area->TLB_CONTROL = 1;
304         ctrl_area->guest_ASID = 1;
305
306         // Enable Nested Paging
307         ctrl_area->NP_ENABLE = 1;
308
309         PrintDebug("NP_Enable at 0x%p\n", (void *)&(ctrl_area->NP_ENABLE));
310
311         // Set the Nested Page Table pointer
312         if (v3_init_passthrough_pts(core) == -1) {
313             PrintError("Could not initialize Nested page tables\n");
314             return ;
315         }
316
317         ctrl_area->N_CR3 = core->direct_map_pt;
318
319         guest_state->g_pat = 0x7040600070406ULL;
320     }
321     
322     /* tell the guest that we don't support SVM */
323     v3_hook_msr(core->vm_info, SVM_VM_CR_MSR, 
324         &v3_handle_vm_cr_read,
325         &v3_handle_vm_cr_write, 
326         core);
327 }
328
329
330 int v3_init_svm_vmcb(struct guest_info * core, v3_vm_class_t vm_class) {
331
332     PrintDebug("Allocating VMCB\n");
333     core->vmm_data = (void *)Allocate_VMCB();
334     
335     if (core->vmm_data == NULL) {
336         PrintError("Could not allocate VMCB, Exiting...\n");
337         return -1;
338     }
339
340     if (vm_class == V3_PC_VM) {
341         PrintDebug("Initializing VMCB (addr=%p)\n", (void *)core->vmm_data);
342         Init_VMCB_BIOS((vmcb_t*)(core->vmm_data), core);
343     } else {
344         PrintError("Invalid VM class\n");
345         return -1;
346     }
347
348     return 0;
349 }
350
351
352 int v3_deinit_svm_vmcb(struct guest_info * core) {
353     V3_FreePages(V3_PAddr(core->vmm_data), 1);
354     return 0;
355 }
356
357
358 static int update_irq_exit_state(struct guest_info * info) {
359     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
360
361     // Fix for QEMU bug using EVENTINJ as an internal cache
362     guest_ctrl->EVENTINJ.valid = 0;
363
364     if ((info->intr_core_state.irq_pending == 1) && (guest_ctrl->guest_ctrl.V_IRQ == 0)) {
365         
366 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
367         PrintDebug("INTAK cycle completed for irq %d\n", info->intr_core_state.irq_vector);
368 #endif
369
370         info->intr_core_state.irq_started = 1;
371         info->intr_core_state.irq_pending = 0;
372
373         v3_injecting_intr(info, info->intr_core_state.irq_vector, V3_EXTERNAL_IRQ);
374     }
375
376     if ((info->intr_core_state.irq_started == 1) && (guest_ctrl->exit_int_info.valid == 0)) {
377 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
378         PrintDebug("Interrupt %d taken by guest\n", info->intr_core_state.irq_vector);
379 #endif
380
381         // Interrupt was taken fully vectored
382         info->intr_core_state.irq_started = 0;
383
384     } else if ((info->intr_core_state.irq_started == 1) && (guest_ctrl->exit_int_info.valid == 1)) {
385 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
386         PrintDebug("EXIT INT INFO is set (vec=%d)\n", guest_ctrl->exit_int_info.vector);
387 #endif
388     }
389
390     return 0;
391 }
392
393
394 static int update_irq_entry_state(struct guest_info * info) {
395     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
396
397
398     if (info->intr_core_state.irq_pending == 0) {
399         guest_ctrl->guest_ctrl.V_IRQ = 0;
400         guest_ctrl->guest_ctrl.V_INTR_VECTOR = 0;
401     }
402     
403     if (v3_excp_pending(info)) {
404         uint_t excp = v3_get_excp_number(info);
405         
406         guest_ctrl->EVENTINJ.type = SVM_INJECTION_EXCEPTION;
407         
408         if (info->excp_state.excp_error_code_valid) {
409             guest_ctrl->EVENTINJ.error_code = info->excp_state.excp_error_code;
410             guest_ctrl->EVENTINJ.ev = 1;
411 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
412             PrintDebug("Injecting exception %d with error code %x\n", excp, guest_ctrl->EVENTINJ.error_code);
413 #endif
414         }
415         
416         guest_ctrl->EVENTINJ.vector = excp;
417         
418         guest_ctrl->EVENTINJ.valid = 1;
419
420 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
421         PrintDebug("<%d> Injecting Exception %d (CR2=%p) (EIP=%p)\n", 
422                    (int)info->num_exits, 
423                    guest_ctrl->EVENTINJ.vector, 
424                    (void *)(addr_t)info->ctrl_regs.cr2,
425                    (void *)(addr_t)info->rip);
426 #endif
427
428         v3_injecting_excp(info, excp);
429     } else if (info->intr_core_state.irq_started == 1) {
430 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
431         PrintDebug("IRQ pending from previous injection\n");
432 #endif
433         guest_ctrl->guest_ctrl.V_IRQ = 1;
434         guest_ctrl->guest_ctrl.V_INTR_VECTOR = info->intr_core_state.irq_vector;
435         guest_ctrl->guest_ctrl.V_IGN_TPR = 1;
436         guest_ctrl->guest_ctrl.V_INTR_PRIO = 0xf;
437
438     } else {
439         switch (v3_intr_pending(info)) {
440             case V3_EXTERNAL_IRQ: {
441                 uint32_t irq = v3_get_intr(info);
442
443                 guest_ctrl->guest_ctrl.V_IRQ = 1;
444                 guest_ctrl->guest_ctrl.V_INTR_VECTOR = irq;
445                 guest_ctrl->guest_ctrl.V_IGN_TPR = 1;
446                 guest_ctrl->guest_ctrl.V_INTR_PRIO = 0xf;
447
448 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
449                 PrintDebug("Injecting Interrupt %d (EIP=%p)\n", 
450                            guest_ctrl->guest_ctrl.V_INTR_VECTOR, 
451                            (void *)(addr_t)info->rip);
452 #endif
453
454                 info->intr_core_state.irq_pending = 1;
455                 info->intr_core_state.irq_vector = irq;
456                 
457                 break;
458             }
459             case V3_NMI:
460                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_NMI;
461                 break;
462             case V3_SOFTWARE_INTR: {
463 #ifdef CONFIG_DEBUG_INTERRUPTS
464             PrintDebug("Caught an injected software interrupt\n");
465             PrintDebug("\ttype: %d, vector: %d\n", SVM_INJECTION_SOFT_INTR, info->intr_core_state.swintr_vector);
466 #endif
467             guest_ctrl->EVENTINJ.type = SVM_INJECTION_SOFT_INTR;
468             guest_ctrl->EVENTINJ.vector = info->intr_core_state.swintr_vector;
469             guest_ctrl->EVENTINJ.valid = 1;
470             
471             /* reset the software interrupt state. 
472                 we can do this because we know only one
473                 sw int can be posted at a time on a given 
474                 core, unlike irqs */
475             info->intr_core_state.swintr_posted = 0;
476             info->intr_core_state.swintr_vector = 0;
477             break;
478         }
479             case V3_VIRTUAL_IRQ:
480                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_IRQ;
481                 break;
482
483             case V3_INVALID_INTR:
484             default:
485                 break;
486         }
487         
488     }
489
490     return 0;
491 }
492
493
494 /* 
495  * CAUTION and DANGER!!! 
496  * 
497  * The VMCB CANNOT(!!) be accessed outside of the clgi/stgi calls inside this function
498  * When exectuing a symbiotic call, the VMCB WILL be overwritten, so any dependencies 
499  * on its contents will cause things to break. The contents at the time of the exit WILL 
500  * change before the exit handler is executed.
501  */
502 int v3_svm_enter(struct guest_info * info) {
503     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
504     vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA((vmcb_t*)(info->vmm_data)); 
505     addr_t exit_code = 0, exit_info1 = 0, exit_info2 = 0;
506
507     // Conditionally yield the CPU if the timeslice has expired
508     v3_yield_cond(info);
509
510     // Perform any additional yielding needed for time adjustment
511     v3_adjust_time(info);
512
513     // disable global interrupts for vm state transition
514     v3_clgi();
515
516     // Update timer devices after being in the VM, with interupts
517     // disabled, but before doing IRQ updates, so that any interrupts they 
518     //raise get seen immediately.
519     v3_update_timers(info);
520
521     // Synchronize the guest state to the VMCB
522     guest_state->cr0 = info->ctrl_regs.cr0;
523     guest_state->cr2 = info->ctrl_regs.cr2;
524     guest_state->cr3 = info->ctrl_regs.cr3;
525     guest_state->cr4 = info->ctrl_regs.cr4;
526     guest_state->dr6 = info->dbg_regs.dr6;
527     guest_state->dr7 = info->dbg_regs.dr7;
528     guest_ctrl->guest_ctrl.V_TPR = info->ctrl_regs.cr8 & 0xff;
529     guest_state->rflags = info->ctrl_regs.rflags;
530     guest_state->efer = info->ctrl_regs.efer;
531     
532     guest_state->cpl = info->cpl;
533
534     v3_set_vmcb_segments((vmcb_t*)(info->vmm_data), &(info->segments));
535
536     guest_state->rax = info->vm_regs.rax;
537     guest_state->rip = info->rip;
538     guest_state->rsp = info->vm_regs.rsp;
539
540 #ifdef V3_CONFIG_SYMCALL
541     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
542         update_irq_entry_state(info);
543     }
544 #else 
545     update_irq_entry_state(info);
546 #endif
547
548
549     /* ** */
550
551     /*
552       PrintDebug("SVM Entry to CS=%p  rip=%p...\n", 
553       (void *)(addr_t)info->segments.cs.base, 
554       (void *)(addr_t)info->rip);
555     */
556
557 #ifdef V3_CONFIG_SYMCALL
558     if (info->sym_core_state.symcall_state.sym_call_active == 1) {
559         if (guest_ctrl->guest_ctrl.V_IRQ == 1) {
560             V3_Print("!!! Injecting Interrupt during Sym call !!!\n");
561         }
562     }
563 #endif
564
565     v3_time_enter_vm(info);
566     guest_ctrl->TSC_OFFSET = v3_tsc_host_offset(&info->time_state);
567
568     //V3_Print("Calling v3_svm_launch\n");
569
570     v3_svm_launch((vmcb_t *)V3_PAddr(info->vmm_data), &(info->vm_regs), (vmcb_t *)host_vmcbs[V3_Get_CPU()]);
571
572     //V3_Print("SVM Returned: Exit Code: %x, guest_rip=%lx\n", (uint32_t)(guest_ctrl->exit_code), (unsigned long)guest_state->rip);
573
574     v3_last_exit = (uint32_t)(guest_ctrl->exit_code);
575
576     // Immediate exit from VM time bookkeeping
577     v3_time_exit_vm(info);
578
579     info->num_exits++;
580
581     // Save Guest state from VMCB
582     info->rip = guest_state->rip;
583     info->vm_regs.rsp = guest_state->rsp;
584     info->vm_regs.rax = guest_state->rax;
585
586     info->cpl = guest_state->cpl;
587
588     info->ctrl_regs.cr0 = guest_state->cr0;
589     info->ctrl_regs.cr2 = guest_state->cr2;
590     info->ctrl_regs.cr3 = guest_state->cr3;
591     info->ctrl_regs.cr4 = guest_state->cr4;
592     info->dbg_regs.dr6 = guest_state->dr6;
593     info->dbg_regs.dr7 = guest_state->dr7;
594     info->ctrl_regs.cr8 = guest_ctrl->guest_ctrl.V_TPR;
595     info->ctrl_regs.rflags = guest_state->rflags;
596     info->ctrl_regs.efer = guest_state->efer;
597     
598     v3_get_vmcb_segments((vmcb_t*)(info->vmm_data), &(info->segments));
599     info->cpu_mode = v3_get_vm_cpu_mode(info);
600     info->mem_mode = v3_get_vm_mem_mode(info);
601     /* ** */
602
603     // save exit info here
604     exit_code = guest_ctrl->exit_code;
605     exit_info1 = guest_ctrl->exit_info1;
606     exit_info2 = guest_ctrl->exit_info2;
607
608 #ifdef V3_CONFIG_SYMCALL
609     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
610         update_irq_exit_state(info);
611     }
612 #else
613     update_irq_exit_state(info);
614 #endif
615
616     // reenable global interrupts after vm exit
617     v3_stgi();
618  
619     // Conditionally yield the CPU if the timeslice has expired
620     v3_yield_cond(info);
621
622     {
623         int ret = v3_handle_svm_exit(info, exit_code, exit_info1, exit_info2);
624         
625         if (ret != 0) {
626             PrintError("Error in SVM exit handler (ret=%d)\n", ret);
627             PrintError("  last Exit was %d (exit code=0x%llx)\n", v3_last_exit, (uint64_t) exit_code);
628             return -1;
629         }
630     }
631
632
633     return 0;
634 }
635
636
637 int v3_start_svm_guest(struct guest_info * info) {
638     //    vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA((vmcb_t*)(info->vmm_data));
639     //  vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
640
641     PrintDebug("Starting SVM core %u (on logical core %u)\n", info->vcpu_id, info->pcpu_id);
642
643     if (info->vcpu_id == 0) {
644         info->core_run_state = CORE_RUNNING;
645         info->vm_info->run_state = VM_RUNNING;
646     } else  { 
647         PrintDebug("SVM core %u (on %u): Waiting for core initialization\n", info->vcpu_id, info->pcpu_id);
648
649         while (info->core_run_state == CORE_STOPPED) {
650             v3_yield(info);
651             //PrintDebug("SVM core %u: still waiting for INIT\n", info->vcpu_id);
652         }
653
654         PrintDebug("SVM core %u(on %u) initialized\n", info->vcpu_id, info->pcpu_id);
655     } 
656
657     PrintDebug("SVM core %u(on %u): I am starting at CS=0x%x (base=0x%p, limit=0x%x),  RIP=0x%p\n", 
658                info->vcpu_id, info->pcpu_id, 
659                info->segments.cs.selector, (void *)(info->segments.cs.base), 
660                info->segments.cs.limit, (void *)(info->rip));
661
662
663
664     PrintDebug("SVM core %u: Launching SVM VM (vmcb=%p) (on cpu %u)\n", 
665                info->vcpu_id, (void *)info->vmm_data, info->pcpu_id);
666     //PrintDebugVMCB((vmcb_t*)(info->vmm_data));
667     
668     v3_start_time(info);
669
670     while (1) {
671
672         if (info->vm_info->run_state == VM_STOPPED) {
673             info->core_run_state = CORE_STOPPED;
674             break;
675         }
676         
677         if (v3_svm_enter(info) == -1) {
678             vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
679             addr_t host_addr;
680             addr_t linear_addr = 0;
681             
682             info->vm_info->run_state = VM_ERROR;
683             
684             V3_Print("SVM core %u: SVM ERROR!!\n", info->vcpu_id); 
685             
686             v3_print_guest_state(info);
687             
688             V3_Print("SVM core %u: SVM Exit Code: %p\n", info->vcpu_id, (void *)(addr_t)guest_ctrl->exit_code); 
689             
690             V3_Print("SVM core %u: exit_info1 low = 0x%.8x\n", info->vcpu_id, *(uint_t*)&(guest_ctrl->exit_info1));
691             V3_Print("SVM core %u: exit_info1 high = 0x%.8x\n", info->vcpu_id, *(uint_t *)(((uchar_t *)&(guest_ctrl->exit_info1)) + 4));
692             
693             V3_Print("SVM core %u: exit_info2 low = 0x%.8x\n", info->vcpu_id, *(uint_t*)&(guest_ctrl->exit_info2));
694             V3_Print("SVM core %u: exit_info2 high = 0x%.8x\n", info->vcpu_id, *(uint_t *)(((uchar_t *)&(guest_ctrl->exit_info2)) + 4));
695             
696             linear_addr = get_addr_linear(info, info->rip, &(info->segments.cs));
697             
698             if (info->mem_mode == PHYSICAL_MEM) {
699                 v3_gpa_to_hva(info, linear_addr, &host_addr);
700             } else if (info->mem_mode == VIRTUAL_MEM) {
701                 v3_gva_to_hva(info, linear_addr, &host_addr);
702             }
703             
704             V3_Print("SVM core %u: Host Address of rip = 0x%p\n", info->vcpu_id, (void *)host_addr);
705             
706             V3_Print("SVM core %u: Instr (15 bytes) at %p:\n", info->vcpu_id, (void *)host_addr);
707             v3_dump_mem((uint8_t *)host_addr, 15);
708             
709             v3_print_stack(info);
710
711             break;
712         }
713
714
715         if (info->vm_info->run_state == VM_STOPPED) {
716             info->core_run_state = CORE_STOPPED;
717             break;
718         }
719
720         
721
722 /*
723         if ((info->num_exits % 50000) == 0) {
724             V3_Print("SVM Exit number %d\n", (uint32_t)info->num_exits);
725             v3_print_guest_state(info);
726         }
727 */
728         
729     }
730
731     // Need to take down the other cores on error... 
732
733     return 0;
734 }
735
736
737
738
739 int v3_reset_svm_vm_core(struct guest_info * core, addr_t rip) {
740     // init vmcb_bios
741
742     // Write the RIP, CS, and descriptor
743     // assume the rest is already good to go
744     //
745     // vector VV -> rip at 0
746     //              CS = VV00
747     //  This means we start executing at linear address VV000
748     //
749     // So the selector needs to be VV00
750     // and the base needs to be VV000
751     //
752     core->rip = 0;
753     core->segments.cs.selector = rip << 8;
754     core->segments.cs.limit = 0xffff;
755     core->segments.cs.base = rip << 12;
756
757     return 0;
758 }
759
760
761
762
763
764
765 /* Checks machine SVM capability */
766 /* Implemented from: AMD Arch Manual 3, sect 15.4 */ 
767 int v3_is_svm_capable() {
768     uint_t vm_cr_low = 0, vm_cr_high = 0;
769     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
770
771     v3_cpuid(CPUID_EXT_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
772   
773     PrintDebug("CPUID_EXT_FEATURE_IDS_ecx=0x%x\n", ecx);
774
775     if ((ecx & CPUID_EXT_FEATURE_IDS_ecx_svm_avail) == 0) {
776       V3_Print("SVM Not Available\n");
777       return 0;
778     }  else {
779         v3_get_msr(SVM_VM_CR_MSR, &vm_cr_high, &vm_cr_low);
780         
781         PrintDebug("SVM_VM_CR_MSR = 0x%x 0x%x\n", vm_cr_high, vm_cr_low);
782         
783         if ((vm_cr_low & SVM_VM_CR_MSR_svmdis) == 1) {
784             V3_Print("SVM is available but is disabled.\n");
785             
786             v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
787             
788             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_edx=0x%x\n", edx);
789             
790             if ((edx & CPUID_SVM_REV_AND_FEATURE_IDS_edx_svml) == 0) {
791                 V3_Print("SVM BIOS Disabled, not unlockable\n");
792             } else {
793                 V3_Print("SVM is locked with a key\n");
794             }
795             return 0;
796
797         } else {
798             V3_Print("SVM is available and  enabled.\n");
799
800             v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
801             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_eax=0x%x\n", eax);
802             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_ebx=0x%x\n", ebx);
803             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_ecx=0x%x\n", ecx);
804             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_edx=0x%x\n", edx);
805
806             return 1;
807         }
808     }
809 }
810
811 static int has_svm_nested_paging() {
812     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
813     
814     v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
815     
816     //PrintDebug("CPUID_EXT_FEATURE_IDS_edx=0x%x\n", edx);
817     
818     if ((edx & CPUID_SVM_REV_AND_FEATURE_IDS_edx_np) == 0) {
819         V3_Print("SVM Nested Paging not supported\n");
820         return 0;
821     } else {
822         V3_Print("SVM Nested Paging supported\n");
823         return 1;
824     }
825  }
826  
827
828
829 void v3_init_svm_cpu(int cpu_id) {
830     reg_ex_t msr;
831     extern v3_cpu_arch_t v3_cpu_types[];
832
833     // Enable SVM on the CPU
834     v3_get_msr(EFER_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
835     msr.e_reg.low |= EFER_MSR_svm_enable;
836     v3_set_msr(EFER_MSR, 0, msr.e_reg.low);
837
838     V3_Print("SVM Enabled\n");
839
840     // Setup the host state save area
841     host_vmcbs[cpu_id] = (addr_t)V3_AllocPages(4);
842
843     /* 64-BIT-ISSUE */
844     //  msr.e_reg.high = 0;
845     //msr.e_reg.low = (uint_t)host_vmcb;
846     msr.r_reg = host_vmcbs[cpu_id];
847
848     PrintDebug("Host State being saved at %p\n", (void *)host_vmcbs[cpu_id]);
849     v3_set_msr(SVM_VM_HSAVE_PA_MSR, msr.e_reg.high, msr.e_reg.low);
850
851
852     if (has_svm_nested_paging() == 1) {
853         v3_cpu_types[cpu_id] = V3_SVM_REV3_CPU;
854     } else {
855         v3_cpu_types[cpu_id] = V3_SVM_CPU;
856     }
857 }
858
859
860
861 void v3_deinit_svm_cpu(int cpu_id) {
862     reg_ex_t msr;
863     extern v3_cpu_arch_t v3_cpu_types[];
864
865     // reset SVM_VM_HSAVE_PA_MSR
866     // Does setting it to NULL disable??
867     msr.r_reg = 0;
868     v3_set_msr(SVM_VM_HSAVE_PA_MSR, msr.e_reg.high, msr.e_reg.low);
869
870     // Disable SVM?
871     v3_get_msr(EFER_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
872     msr.e_reg.low &= ~EFER_MSR_svm_enable;
873     v3_set_msr(EFER_MSR, 0, msr.e_reg.low);
874
875     v3_cpu_types[cpu_id] = V3_INVALID_CPU;
876
877     V3_FreePages((void *)host_vmcbs[cpu_id], 4);
878
879     V3_Print("Host CPU %d host area freed, and SVM disabled\n", cpu_id);
880     return;
881 }
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932 #if 0
933 /* 
934  * Test VMSAVE/VMLOAD Latency 
935  */
936 #define vmsave ".byte 0x0F,0x01,0xDB ; "
937 #define vmload ".byte 0x0F,0x01,0xDA ; "
938 {
939     uint32_t start_lo, start_hi;
940     uint32_t end_lo, end_hi;
941     uint64_t start, end;
942     
943     __asm__ __volatile__ (
944                           "rdtsc ; "
945                           "movl %%eax, %%esi ; "
946                           "movl %%edx, %%edi ; "
947                           "movq  %%rcx, %%rax ; "
948                           vmsave
949                           "rdtsc ; "
950                           : "=D"(start_hi), "=S"(start_lo), "=a"(end_lo),"=d"(end_hi)
951                           : "c"(host_vmcb[cpu_id]), "0"(0), "1"(0), "2"(0), "3"(0)
952                           );
953     
954     start = start_hi;
955     start <<= 32;
956     start += start_lo;
957     
958     end = end_hi;
959     end <<= 32;
960     end += end_lo;
961     
962     PrintDebug("VMSave Cycle Latency: %d\n", (uint32_t)(end - start));
963     
964     __asm__ __volatile__ (
965                           "rdtsc ; "
966                           "movl %%eax, %%esi ; "
967                           "movl %%edx, %%edi ; "
968                           "movq  %%rcx, %%rax ; "
969                           vmload
970                           "rdtsc ; "
971                           : "=D"(start_hi), "=S"(start_lo), "=a"(end_lo),"=d"(end_hi)
972                               : "c"(host_vmcb[cpu_id]), "0"(0), "1"(0), "2"(0), "3"(0)
973                               );
974         
975         start = start_hi;
976         start <<= 32;
977         start += start_lo;
978
979         end = end_hi;
980         end <<= 32;
981         end += end_lo;
982
983
984         PrintDebug("VMLoad Cycle Latency: %d\n", (uint32_t)(end - start));
985     }
986     /* End Latency Test */
987
988 #endif
989
990
991
992
993
994
995
996 #if 0
997 void Init_VMCB_pe(vmcb_t *vmcb, struct guest_info vm_info) {
998   vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
999   vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
1000   uint_t i = 0;
1001
1002
1003   guest_state->rsp = vm_info.vm_regs.rsp;
1004   guest_state->rip = vm_info.rip;
1005
1006
1007   /* I pretty much just gutted this from TVMM */
1008   /* Note: That means its probably wrong */
1009
1010   // set the segment registers to mirror ours
1011   guest_state->cs.selector = 1<<3;
1012   guest_state->cs.attrib.fields.type = 0xa; // Code segment+read
1013   guest_state->cs.attrib.fields.S = 1;
1014   guest_state->cs.attrib.fields.P = 1;
1015   guest_state->cs.attrib.fields.db = 1;
1016   guest_state->cs.attrib.fields.G = 1;
1017   guest_state->cs.limit = 0xfffff;
1018   guest_state->cs.base = 0;
1019   
1020   struct vmcb_selector *segregs [] = {&(guest_state->ss), &(guest_state->ds), &(guest_state->es), &(guest_state->fs), &(guest_state->gs), NULL};
1021   for ( i = 0; segregs[i] != NULL; i++) {
1022     struct vmcb_selector * seg = segregs[i];
1023     
1024     seg->selector = 2<<3;
1025     seg->attrib.fields.type = 0x2; // Data Segment+read/write
1026     seg->attrib.fields.S = 1;
1027     seg->attrib.fields.P = 1;
1028     seg->attrib.fields.db = 1;
1029     seg->attrib.fields.G = 1;
1030     seg->limit = 0xfffff;
1031     seg->base = 0;
1032   }
1033
1034
1035   {
1036     /* JRL THIS HAS TO GO */
1037     
1038     //    guest_state->tr.selector = GetTR_Selector();
1039     guest_state->tr.attrib.fields.type = 0x9; 
1040     guest_state->tr.attrib.fields.P = 1;
1041     // guest_state->tr.limit = GetTR_Limit();
1042     //guest_state->tr.base = GetTR_Base();// - 0x2000;
1043     /* ** */
1044   }
1045
1046
1047   /* ** */
1048
1049
1050   guest_state->efer |= EFER_MSR_svm_enable;
1051   guest_state->rflags = 0x00000002; // The reserved bit is always 1
1052   ctrl_area->svm_instrs.VMRUN = 1;
1053   guest_state->cr0 = 0x00000001;    // PE 
1054   ctrl_area->guest_ASID = 1;
1055
1056
1057   //  guest_state->cpl = 0;
1058
1059
1060
1061   // Setup exits
1062
1063   ctrl_area->cr_writes.cr4 = 1;
1064   
1065   ctrl_area->exceptions.de = 1;
1066   ctrl_area->exceptions.df = 1;
1067   ctrl_area->exceptions.pf = 1;
1068   ctrl_area->exceptions.ts = 1;
1069   ctrl_area->exceptions.ss = 1;
1070   ctrl_area->exceptions.ac = 1;
1071   ctrl_area->exceptions.mc = 1;
1072   ctrl_area->exceptions.gp = 1;
1073   ctrl_area->exceptions.ud = 1;
1074   ctrl_area->exceptions.np = 1;
1075   ctrl_area->exceptions.of = 1;
1076   ctrl_area->exceptions.nmi = 1;
1077
1078   
1079
1080   ctrl_area->instrs.IOIO_PROT = 1;
1081   ctrl_area->IOPM_BASE_PA = (uint_t)V3_AllocPages(3);
1082   
1083   {
1084     reg_ex_t tmp_reg;
1085     tmp_reg.r_reg = ctrl_area->IOPM_BASE_PA;
1086     memset((void*)(tmp_reg.e_reg.low), 0xffffffff, PAGE_SIZE * 2);
1087   }
1088
1089   ctrl_area->instrs.INTR = 1;
1090
1091   
1092   {
1093     char gdt_buf[6];
1094     char idt_buf[6];
1095
1096     memset(gdt_buf, 0, 6);
1097     memset(idt_buf, 0, 6);
1098
1099
1100     uint_t gdt_base, idt_base;
1101     ushort_t gdt_limit, idt_limit;
1102     
1103     GetGDTR(gdt_buf);
1104     gdt_base = *(ulong_t*)((uchar_t*)gdt_buf + 2) & 0xffffffff;
1105     gdt_limit = *(ushort_t*)(gdt_buf) & 0xffff;
1106     PrintDebug("GDT: base: %x, limit: %x\n", gdt_base, gdt_limit);
1107
1108     GetIDTR(idt_buf);
1109     idt_base = *(ulong_t*)(idt_buf + 2) & 0xffffffff;
1110     idt_limit = *(ushort_t*)(idt_buf) & 0xffff;
1111     PrintDebug("IDT: base: %x, limit: %x\n",idt_base, idt_limit);
1112
1113
1114     // gdt_base -= 0x2000;
1115     //idt_base -= 0x2000;
1116
1117     guest_state->gdtr.base = gdt_base;
1118     guest_state->gdtr.limit = gdt_limit;
1119     guest_state->idtr.base = idt_base;
1120     guest_state->idtr.limit = idt_limit;
1121
1122
1123   }
1124   
1125   
1126   // also determine if CPU supports nested paging
1127   /*
1128   if (vm_info.page_tables) {
1129     //   if (0) {
1130     // Flush the TLB on entries/exits
1131     ctrl_area->TLB_CONTROL = 1;
1132
1133     // Enable Nested Paging
1134     ctrl_area->NP_ENABLE = 1;
1135
1136     PrintDebug("NP_Enable at 0x%x\n", &(ctrl_area->NP_ENABLE));
1137
1138         // Set the Nested Page Table pointer
1139     ctrl_area->N_CR3 |= ((addr_t)vm_info.page_tables & 0xfffff000);
1140
1141
1142     //   ctrl_area->N_CR3 = Get_CR3();
1143     // guest_state->cr3 |= (Get_CR3() & 0xfffff000);
1144
1145     guest_state->g_pat = 0x7040600070406ULL;
1146
1147     PrintDebug("Set Nested CR3: lo: 0x%x  hi: 0x%x\n", (uint_t)*(&(ctrl_area->N_CR3)), (uint_t)*((unsigned char *)&(ctrl_area->N_CR3) + 4));
1148     PrintDebug("Set Guest CR3: lo: 0x%x  hi: 0x%x\n", (uint_t)*(&(guest_state->cr3)), (uint_t)*((unsigned char *)&(guest_state->cr3) + 4));
1149     // Enable Paging
1150     //    guest_state->cr0 |= 0x80000000;
1151   }
1152   */
1153
1154 }
1155
1156
1157
1158
1159
1160 #endif
1161
1162