Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


VMX 64-bit guest support. Add exit handling for CR4 and EFER accesses.
[palacios.git] / palacios / src / palacios / svm.c
1
2 /* 
3  * This file is part of the Palacios Virtual Machine Monitor developed
4  * by the V3VEE Project with funding from the United States National 
5  * Science Foundation and the Department of Energy.  
6  *
7  * The V3VEE Project is a joint project between Northwestern University
8  * and the University of New Mexico.  You can find out more at 
9  * http://www.v3vee.org
10  *
11  * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu> 
12  * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org> 
13  * All rights reserved.
14  *
15  * Author: Jack Lange <jarusl@cs.northwestern.edu>
16  *
17  * This is free software.  You are permitted to use,
18  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
19  */
20
21
22 #include <palacios/svm.h>
23 #include <palacios/vmm.h>
24
25 #include <palacios/vmcb.h>
26 #include <palacios/vmm_mem.h>
27 #include <palacios/vmm_paging.h>
28 #include <palacios/svm_handler.h>
29
30 #include <palacios/vmm_debug.h>
31 #include <palacios/vm_guest_mem.h>
32
33 #include <palacios/vmm_decoder.h>
34 #include <palacios/vmm_string.h>
35 #include <palacios/vmm_lowlevel.h>
36 #include <palacios/svm_msr.h>
37
38 #include <palacios/vmm_rbtree.h>
39
40 #include <palacios/vmm_direct_paging.h>
41
42 #include <palacios/vmm_ctrl_regs.h>
43 #include <palacios/svm_io.h>
44
45 #include <palacios/vmm_sprintf.h>
46
47
48 #ifndef V3_CONFIG_DEBUG_SVM
49 #undef PrintDebug
50 #define PrintDebug(fmt, args...)
51 #endif
52
53
54 uint32_t v3_last_exit;
55
56 // This is a global pointer to the host's VMCB
57 static addr_t host_vmcbs[V3_CONFIG_MAX_CPUS] = { [0 ... V3_CONFIG_MAX_CPUS - 1] = 0};
58
59
60
61 extern void v3_stgi();
62 extern void v3_clgi();
63 //extern int v3_svm_launch(vmcb_t * vmcb, struct v3_gprs * vm_regs, uint64_t * fs, uint64_t * gs);
64 extern int v3_svm_launch(vmcb_t * vmcb, struct v3_gprs * vm_regs, vmcb_t * host_vmcb);
65
66
67 static vmcb_t * Allocate_VMCB() {
68     vmcb_t * vmcb_page = NULL;
69     addr_t vmcb_pa = (addr_t)V3_AllocPages(1);
70
71     if ((void *)vmcb_pa == NULL) {
72         PrintError("Error allocating VMCB\n");
73         return NULL;
74     }
75
76     vmcb_page = (vmcb_t *)V3_VAddr((void *)vmcb_pa);
77
78     memset(vmcb_page, 0, 4096);
79
80     return vmcb_page;
81 }
82
83
84 static int v3_svm_handle_efer_write(struct guest_info * core, uint_t msr, struct v3_msr src, void * priv_data)
85 {
86     int status;
87
88     // Call arch-independent handler
89     if ((status = v3_handle_efer_write(core, msr, src, priv_data)) != 0)
90         return status;
91
92     // SVM-specific code
93     if (core->shdw_pg_mode == NESTED_PAGING) {
94         // Ensure that hardware visible EFER.SVME bit is set (SVM Enable)
95         struct efer_64 * hw_efer = (struct efer_64 *)&(core->ctrl_regs.efer);
96         hw_efer->svme = 1;
97     }
98
99     return 0;
100 }
101
102
103 static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info * core) {
104     vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
105     vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
106     uint_t i;
107
108
109     //
110     ctrl_area->svm_instrs.VMRUN = 1;
111     ctrl_area->svm_instrs.VMMCALL = 1;
112     ctrl_area->svm_instrs.VMLOAD = 1;
113     ctrl_area->svm_instrs.VMSAVE = 1;
114     ctrl_area->svm_instrs.STGI = 1;
115     ctrl_area->svm_instrs.CLGI = 1;
116     ctrl_area->svm_instrs.SKINIT = 1;
117     ctrl_area->svm_instrs.ICEBP = 1;
118     ctrl_area->svm_instrs.WBINVD = 1;
119     ctrl_area->svm_instrs.MONITOR = 1;
120     ctrl_area->svm_instrs.MWAIT_always = 1;
121     ctrl_area->svm_instrs.MWAIT_if_armed = 1;
122     ctrl_area->instrs.INVLPGA = 1;
123     ctrl_area->instrs.CPUID = 1;
124
125     ctrl_area->instrs.HLT = 1;
126
127 #ifdef V3_CONFIG_TIME_VIRTUALIZE_TSC
128     ctrl_area->instrs.RDTSC = 1;
129     ctrl_area->svm_instrs.RDTSCP = 1;
130 #endif
131
132     // guest_state->cr0 = 0x00000001;    // PE 
133   
134     /*
135       ctrl_area->exceptions.de = 1;
136       ctrl_area->exceptions.df = 1;
137       
138       ctrl_area->exceptions.ts = 1;
139       ctrl_area->exceptions.ss = 1;
140       ctrl_area->exceptions.ac = 1;
141       ctrl_area->exceptions.mc = 1;
142       ctrl_area->exceptions.gp = 1;
143       ctrl_area->exceptions.ud = 1;
144       ctrl_area->exceptions.np = 1;
145       ctrl_area->exceptions.of = 1;
146       
147       ctrl_area->exceptions.nmi = 1;
148     */
149     
150
151     ctrl_area->instrs.NMI = 1;
152     ctrl_area->instrs.SMI = 0; // allow SMIs to run in guest
153     ctrl_area->instrs.INIT = 1;
154     ctrl_area->instrs.PAUSE = 1;
155     ctrl_area->instrs.shutdown_evts = 1;
156
157
158     /* DEBUG FOR RETURN CODE */
159     ctrl_area->exit_code = 1;
160
161
162     /* Setup Guest Machine state */
163
164     core->vm_regs.rsp = 0x00;
165     core->rip = 0xfff0;
166
167     core->vm_regs.rdx = 0x00000f00;
168
169
170     core->cpl = 0;
171
172     core->ctrl_regs.rflags = 0x00000002; // The reserved bit is always 1
173     core->ctrl_regs.cr0 = 0x60010010; // Set the WP flag so the memory hooks work in real-mode
174     core->ctrl_regs.efer |= EFER_MSR_svm_enable;
175
176
177
178
179
180     core->segments.cs.selector = 0xf000;
181     core->segments.cs.limit = 0xffff;
182     core->segments.cs.base = 0x0000000f0000LL;
183
184     // (raw attributes = 0xf3)
185     core->segments.cs.type = 0x3;
186     core->segments.cs.system = 0x1;
187     core->segments.cs.dpl = 0x3;
188     core->segments.cs.present = 1;
189
190
191
192     struct v3_segment * segregs [] = {&(core->segments.ss), &(core->segments.ds), 
193                                       &(core->segments.es), &(core->segments.fs), 
194                                       &(core->segments.gs), NULL};
195
196     for ( i = 0; segregs[i] != NULL; i++) {
197         struct v3_segment * seg = segregs[i];
198         
199         seg->selector = 0x0000;
200         //    seg->base = seg->selector << 4;
201         seg->base = 0x00000000;
202         seg->limit = ~0u;
203
204         // (raw attributes = 0xf3)
205         seg->type = 0x3;
206         seg->system = 0x1;
207         seg->dpl = 0x3;
208         seg->present = 1;
209     }
210
211     core->segments.gdtr.limit = 0x0000ffff;
212     core->segments.gdtr.base = 0x0000000000000000LL;
213     core->segments.idtr.limit = 0x0000ffff;
214     core->segments.idtr.base = 0x0000000000000000LL;
215
216     core->segments.ldtr.selector = 0x0000;
217     core->segments.ldtr.limit = 0x0000ffff;
218     core->segments.ldtr.base = 0x0000000000000000LL;
219     core->segments.tr.selector = 0x0000;
220     core->segments.tr.limit = 0x0000ffff;
221     core->segments.tr.base = 0x0000000000000000LL;
222
223
224     core->dbg_regs.dr6 = 0x00000000ffff0ff0LL;
225     core->dbg_regs.dr7 = 0x0000000000000400LL;
226
227
228     ctrl_area->IOPM_BASE_PA = (addr_t)V3_PAddr(core->vm_info->io_map.arch_data);
229     ctrl_area->instrs.IOIO_PROT = 1;
230             
231     ctrl_area->MSRPM_BASE_PA = (addr_t)V3_PAddr(core->vm_info->msr_map.arch_data);
232     ctrl_area->instrs.MSR_PROT = 1;   
233
234
235     PrintDebug("Exiting on interrupts\n");
236     ctrl_area->guest_ctrl.V_INTR_MASKING = 1;
237     ctrl_area->instrs.INTR = 1;
238
239
240     v3_hook_msr(core->vm_info, EFER_MSR, 
241                 &v3_handle_efer_read,
242                 &v3_svm_handle_efer_write, 
243                 core);
244
245     if (core->shdw_pg_mode == SHADOW_PAGING) {
246         PrintDebug("Creating initial shadow page table\n");
247         
248         /* JRL: This is a performance killer, and a simplistic solution */
249         /* We need to fix this */
250         ctrl_area->TLB_CONTROL = 1;
251         ctrl_area->guest_ASID = 1;
252         
253         
254         if (v3_init_passthrough_pts(core) == -1) {
255             PrintError("Could not initialize passthrough page tables\n");
256             return ;
257         }
258
259
260         core->shdw_pg_state.guest_cr0 = 0x0000000000000010LL;
261         PrintDebug("Created\n");
262         
263         core->ctrl_regs.cr0 |= 0x80000000;
264         core->ctrl_regs.cr3 = core->direct_map_pt;
265
266         ctrl_area->cr_reads.cr0 = 1;
267         ctrl_area->cr_writes.cr0 = 1;
268         //ctrl_area->cr_reads.cr4 = 1;
269         ctrl_area->cr_writes.cr4 = 1;
270         ctrl_area->cr_reads.cr3 = 1;
271         ctrl_area->cr_writes.cr3 = 1;
272
273
274
275         ctrl_area->instrs.INVLPG = 1;
276
277         ctrl_area->exceptions.pf = 1;
278
279         guest_state->g_pat = 0x7040600070406ULL;
280
281
282
283     } else if (core->shdw_pg_mode == NESTED_PAGING) {
284         // Flush the TLB on entries/exits
285         ctrl_area->TLB_CONTROL = 1;
286         ctrl_area->guest_ASID = 1;
287
288         // Enable Nested Paging
289         ctrl_area->NP_ENABLE = 1;
290
291         PrintDebug("NP_Enable at 0x%p\n", (void *)&(ctrl_area->NP_ENABLE));
292
293         // Set the Nested Page Table pointer
294         if (v3_init_passthrough_pts(core) == -1) {
295             PrintError("Could not initialize Nested page tables\n");
296             return ;
297         }
298
299         ctrl_area->N_CR3 = core->direct_map_pt;
300
301         guest_state->g_pat = 0x7040600070406ULL;
302     }
303     
304     /* tell the guest that we don't support SVM */
305     v3_hook_msr(core->vm_info, SVM_VM_CR_MSR, 
306         &v3_handle_vm_cr_read,
307         &v3_handle_vm_cr_write, 
308         core);
309 }
310
311
312 int v3_init_svm_vmcb(struct guest_info * core, v3_vm_class_t vm_class) {
313
314     PrintDebug("Allocating VMCB\n");
315     core->vmm_data = (void *)Allocate_VMCB();
316     
317     if (core->vmm_data == NULL) {
318         PrintError("Could not allocate VMCB, Exiting...\n");
319         return -1;
320     }
321
322     if (vm_class == V3_PC_VM) {
323         PrintDebug("Initializing VMCB (addr=%p)\n", (void *)core->vmm_data);
324         Init_VMCB_BIOS((vmcb_t*)(core->vmm_data), core);
325     } else {
326         PrintError("Invalid VM class\n");
327         return -1;
328     }
329
330     return 0;
331 }
332
333
334 int v3_deinit_svm_vmcb(struct guest_info * core) {
335     V3_FreePages(V3_PAddr(core->vmm_data), 1);
336     return 0;
337 }
338
339
340 static int update_irq_exit_state(struct guest_info * info) {
341     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
342
343     // Fix for QEMU bug using EVENTINJ as an internal cache
344     guest_ctrl->EVENTINJ.valid = 0;
345
346     if ((info->intr_core_state.irq_pending == 1) && (guest_ctrl->guest_ctrl.V_IRQ == 0)) {
347         
348 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
349         PrintDebug("INTAK cycle completed for irq %d\n", info->intr_core_state.irq_vector);
350 #endif
351
352         info->intr_core_state.irq_started = 1;
353         info->intr_core_state.irq_pending = 0;
354
355         v3_injecting_intr(info, info->intr_core_state.irq_vector, V3_EXTERNAL_IRQ);
356     }
357
358     if ((info->intr_core_state.irq_started == 1) && (guest_ctrl->exit_int_info.valid == 0)) {
359 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
360         PrintDebug("Interrupt %d taken by guest\n", info->intr_core_state.irq_vector);
361 #endif
362
363         // Interrupt was taken fully vectored
364         info->intr_core_state.irq_started = 0;
365
366     } else if ((info->intr_core_state.irq_started == 1) && (guest_ctrl->exit_int_info.valid == 1)) {
367 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
368         PrintDebug("EXIT INT INFO is set (vec=%d)\n", guest_ctrl->exit_int_info.vector);
369 #endif
370     }
371
372     return 0;
373 }
374
375
376 static int update_irq_entry_state(struct guest_info * info) {
377     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
378
379
380     if (info->intr_core_state.irq_pending == 0) {
381         guest_ctrl->guest_ctrl.V_IRQ = 0;
382         guest_ctrl->guest_ctrl.V_INTR_VECTOR = 0;
383     }
384     
385     if (v3_excp_pending(info)) {
386         uint_t excp = v3_get_excp_number(info);
387         
388         guest_ctrl->EVENTINJ.type = SVM_INJECTION_EXCEPTION;
389         
390         if (info->excp_state.excp_error_code_valid) {
391             guest_ctrl->EVENTINJ.error_code = info->excp_state.excp_error_code;
392             guest_ctrl->EVENTINJ.ev = 1;
393 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
394             PrintDebug("Injecting exception %d with error code %x\n", excp, guest_ctrl->EVENTINJ.error_code);
395 #endif
396         }
397         
398         guest_ctrl->EVENTINJ.vector = excp;
399         
400         guest_ctrl->EVENTINJ.valid = 1;
401
402 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
403         PrintDebug("<%d> Injecting Exception %d (CR2=%p) (EIP=%p)\n", 
404                    (int)info->num_exits, 
405                    guest_ctrl->EVENTINJ.vector, 
406                    (void *)(addr_t)info->ctrl_regs.cr2,
407                    (void *)(addr_t)info->rip);
408 #endif
409
410         v3_injecting_excp(info, excp);
411     } else if (info->intr_core_state.irq_started == 1) {
412 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
413         PrintDebug("IRQ pending from previous injection\n");
414 #endif
415         guest_ctrl->guest_ctrl.V_IRQ = 1;
416         guest_ctrl->guest_ctrl.V_INTR_VECTOR = info->intr_core_state.irq_vector;
417         guest_ctrl->guest_ctrl.V_IGN_TPR = 1;
418         guest_ctrl->guest_ctrl.V_INTR_PRIO = 0xf;
419
420     } else {
421         switch (v3_intr_pending(info)) {
422             case V3_EXTERNAL_IRQ: {
423                 uint32_t irq = v3_get_intr(info);
424
425                 guest_ctrl->guest_ctrl.V_IRQ = 1;
426                 guest_ctrl->guest_ctrl.V_INTR_VECTOR = irq;
427                 guest_ctrl->guest_ctrl.V_IGN_TPR = 1;
428                 guest_ctrl->guest_ctrl.V_INTR_PRIO = 0xf;
429
430 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
431                 PrintDebug("Injecting Interrupt %d (EIP=%p)\n", 
432                            guest_ctrl->guest_ctrl.V_INTR_VECTOR, 
433                            (void *)(addr_t)info->rip);
434 #endif
435
436                 info->intr_core_state.irq_pending = 1;
437                 info->intr_core_state.irq_vector = irq;
438                 
439                 break;
440             }
441             case V3_NMI:
442                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_NMI;
443                 break;
444             case V3_SOFTWARE_INTR:
445                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_SOFT_INTR;
446
447 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
448                 PrintDebug("Injecting software interrupt --  type: %d, vector: %d\n", 
449                            SVM_INJECTION_SOFT_INTR, info->intr_core_state.swintr_vector);
450 #endif
451                 guest_ctrl->EVENTINJ.vector = info->intr_core_state.swintr_vector;
452                 guest_ctrl->EVENTINJ.valid = 1;
453             
454                 /* reset swintr state */
455                 info->intr_core_state.swintr_posted = 0;
456                 info->intr_core_state.swintr_vector = 0;
457                 
458                 break;
459             case V3_VIRTUAL_IRQ:
460                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_IRQ;
461                 break;
462
463             case V3_INVALID_INTR:
464             default:
465                 break;
466         }
467         
468     }
469
470     return 0;
471 }
472
473
474 /* 
475  * CAUTION and DANGER!!! 
476  * 
477  * The VMCB CANNOT(!!) be accessed outside of the clgi/stgi calls inside this function
478  * When exectuing a symbiotic call, the VMCB WILL be overwritten, so any dependencies 
479  * on its contents will cause things to break. The contents at the time of the exit WILL 
480  * change before the exit handler is executed.
481  */
482 int v3_svm_enter(struct guest_info * info) {
483     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
484     vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA((vmcb_t*)(info->vmm_data)); 
485     addr_t exit_code = 0, exit_info1 = 0, exit_info2 = 0;
486
487     // Conditionally yield the CPU if the timeslice has expired
488     v3_yield_cond(info);
489
490     // Perform any additional yielding needed for time adjustment
491     v3_adjust_time(info);
492
493     // disable global interrupts for vm state transition
494     v3_clgi();
495
496     // Update timer devices after being in the VM, with interupts
497     // disabled, but before doing IRQ updates, so that any interrupts they 
498     //raise get seen immediately.
499     v3_update_timers(info);
500
501     // Synchronize the guest state to the VMCB
502     guest_state->cr0 = info->ctrl_regs.cr0;
503     guest_state->cr2 = info->ctrl_regs.cr2;
504     guest_state->cr3 = info->ctrl_regs.cr3;
505     guest_state->cr4 = info->ctrl_regs.cr4;
506     guest_state->dr6 = info->dbg_regs.dr6;
507     guest_state->dr7 = info->dbg_regs.dr7;
508     guest_ctrl->guest_ctrl.V_TPR = info->ctrl_regs.cr8 & 0xff;
509     guest_state->rflags = info->ctrl_regs.rflags;
510     guest_state->efer = info->ctrl_regs.efer;
511     
512     guest_state->cpl = info->cpl;
513
514     v3_set_vmcb_segments((vmcb_t*)(info->vmm_data), &(info->segments));
515
516     guest_state->rax = info->vm_regs.rax;
517     guest_state->rip = info->rip;
518     guest_state->rsp = info->vm_regs.rsp;
519
520 #ifdef V3_CONFIG_SYMCALL
521     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
522         update_irq_entry_state(info);
523     }
524 #else 
525     update_irq_entry_state(info);
526 #endif
527
528
529     /* ** */
530
531     /*
532       PrintDebug("SVM Entry to CS=%p  rip=%p...\n", 
533       (void *)(addr_t)info->segments.cs.base, 
534       (void *)(addr_t)info->rip);
535     */
536
537 #ifdef V3_CONFIG_SYMCALL
538     if (info->sym_core_state.symcall_state.sym_call_active == 1) {
539         if (guest_ctrl->guest_ctrl.V_IRQ == 1) {
540             V3_Print("!!! Injecting Interrupt during Sym call !!!\n");
541         }
542     }
543 #endif
544
545     v3_time_enter_vm(info);
546     guest_ctrl->TSC_OFFSET = v3_tsc_host_offset(&info->time_state);
547
548     //V3_Print("Calling v3_svm_launch\n");
549
550     v3_svm_launch((vmcb_t *)V3_PAddr(info->vmm_data), &(info->vm_regs), (vmcb_t *)host_vmcbs[V3_Get_CPU()]);
551
552     //V3_Print("SVM Returned: Exit Code: %x, guest_rip=%lx\n", (uint32_t)(guest_ctrl->exit_code), (unsigned long)guest_state->rip);
553
554     v3_last_exit = (uint32_t)(guest_ctrl->exit_code);
555
556     // Immediate exit from VM time bookkeeping
557     v3_time_exit_vm(info);
558
559     info->num_exits++;
560
561     // Save Guest state from VMCB
562     info->rip = guest_state->rip;
563     info->vm_regs.rsp = guest_state->rsp;
564     info->vm_regs.rax = guest_state->rax;
565
566     info->cpl = guest_state->cpl;
567
568     info->ctrl_regs.cr0 = guest_state->cr0;
569     info->ctrl_regs.cr2 = guest_state->cr2;
570     info->ctrl_regs.cr3 = guest_state->cr3;
571     info->ctrl_regs.cr4 = guest_state->cr4;
572     info->dbg_regs.dr6 = guest_state->dr6;
573     info->dbg_regs.dr7 = guest_state->dr7;
574     info->ctrl_regs.cr8 = guest_ctrl->guest_ctrl.V_TPR;
575     info->ctrl_regs.rflags = guest_state->rflags;
576     info->ctrl_regs.efer = guest_state->efer;
577     
578     v3_get_vmcb_segments((vmcb_t*)(info->vmm_data), &(info->segments));
579     info->cpu_mode = v3_get_vm_cpu_mode(info);
580     info->mem_mode = v3_get_vm_mem_mode(info);
581     /* ** */
582
583     // save exit info here
584     exit_code = guest_ctrl->exit_code;
585     exit_info1 = guest_ctrl->exit_info1;
586     exit_info2 = guest_ctrl->exit_info2;
587
588 #ifdef V3_CONFIG_SYMCALL
589     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
590         update_irq_exit_state(info);
591     }
592 #else
593     update_irq_exit_state(info);
594 #endif
595
596     // reenable global interrupts after vm exit
597     v3_stgi();
598  
599     // Conditionally yield the CPU if the timeslice has expired
600     v3_yield_cond(info);
601
602     {
603         int ret = v3_handle_svm_exit(info, exit_code, exit_info1, exit_info2);
604         
605         if (ret != 0) {
606             PrintError("Error in SVM exit handler (ret=%d)\n", ret);
607             PrintError("  last Exit was %d (exit code=0x%llx)\n", v3_last_exit, (uint64_t) exit_code);
608             return -1;
609         }
610     }
611
612
613     return 0;
614 }
615
616
617 int v3_start_svm_guest(struct guest_info * info) {
618     //    vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA((vmcb_t*)(info->vmm_data));
619     //  vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
620
621     PrintDebug("Starting SVM core %u (on logical core %u)\n", info->vcpu_id, info->pcpu_id);
622
623     if (info->vcpu_id == 0) {
624         info->core_run_state = CORE_RUNNING;
625         info->vm_info->run_state = VM_RUNNING;
626     } else  { 
627         PrintDebug("SVM core %u (on %u): Waiting for core initialization\n", info->vcpu_id, info->pcpu_id);
628
629         while (info->core_run_state == CORE_STOPPED) {
630             v3_yield(info);
631             //PrintDebug("SVM core %u: still waiting for INIT\n", info->vcpu_id);
632         }
633
634         PrintDebug("SVM core %u(on %u) initialized\n", info->vcpu_id, info->pcpu_id);
635     } 
636
637     PrintDebug("SVM core %u(on %u): I am starting at CS=0x%x (base=0x%p, limit=0x%x),  RIP=0x%p\n", 
638                info->vcpu_id, info->pcpu_id, 
639                info->segments.cs.selector, (void *)(info->segments.cs.base), 
640                info->segments.cs.limit, (void *)(info->rip));
641
642
643
644     PrintDebug("SVM core %u: Launching SVM VM (vmcb=%p) (on cpu %u)\n", 
645                info->vcpu_id, (void *)info->vmm_data, info->pcpu_id);
646     //PrintDebugVMCB((vmcb_t*)(info->vmm_data));
647     
648     v3_start_time(info);
649
650     while (1) {
651
652         if (info->vm_info->run_state == VM_STOPPED) {
653             info->core_run_state = CORE_STOPPED;
654             break;
655         }
656         
657         if (v3_svm_enter(info) == -1) {
658             vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
659             addr_t host_addr;
660             addr_t linear_addr = 0;
661             
662             info->vm_info->run_state = VM_ERROR;
663             
664             V3_Print("SVM core %u: SVM ERROR!!\n", info->vcpu_id); 
665             
666             v3_print_guest_state(info);
667             
668             V3_Print("SVM core %u: SVM Exit Code: %p\n", info->vcpu_id, (void *)(addr_t)guest_ctrl->exit_code); 
669             
670             V3_Print("SVM core %u: exit_info1 low = 0x%.8x\n", info->vcpu_id, *(uint_t*)&(guest_ctrl->exit_info1));
671             V3_Print("SVM core %u: exit_info1 high = 0x%.8x\n", info->vcpu_id, *(uint_t *)(((uchar_t *)&(guest_ctrl->exit_info1)) + 4));
672             
673             V3_Print("SVM core %u: exit_info2 low = 0x%.8x\n", info->vcpu_id, *(uint_t*)&(guest_ctrl->exit_info2));
674             V3_Print("SVM core %u: exit_info2 high = 0x%.8x\n", info->vcpu_id, *(uint_t *)(((uchar_t *)&(guest_ctrl->exit_info2)) + 4));
675             
676             linear_addr = get_addr_linear(info, info->rip, &(info->segments.cs));
677             
678             if (info->mem_mode == PHYSICAL_MEM) {
679                 v3_gpa_to_hva(info, linear_addr, &host_addr);
680             } else if (info->mem_mode == VIRTUAL_MEM) {
681                 v3_gva_to_hva(info, linear_addr, &host_addr);
682             }
683             
684             V3_Print("SVM core %u: Host Address of rip = 0x%p\n", info->vcpu_id, (void *)host_addr);
685             
686             V3_Print("SVM core %u: Instr (15 bytes) at %p:\n", info->vcpu_id, (void *)host_addr);
687             v3_dump_mem((uint8_t *)host_addr, 15);
688             
689             v3_print_stack(info);
690
691             break;
692         }
693
694
695         if (info->vm_info->run_state == VM_STOPPED) {
696             info->core_run_state = CORE_STOPPED;
697             break;
698         }
699
700         
701
702 /*
703         if ((info->num_exits % 50000) == 0) {
704             V3_Print("SVM Exit number %d\n", (uint32_t)info->num_exits);
705             v3_print_guest_state(info);
706         }
707 */
708         
709     }
710
711     // Need to take down the other cores on error... 
712
713     return 0;
714 }
715
716
717
718
719 int v3_reset_svm_vm_core(struct guest_info * core, addr_t rip) {
720     // init vmcb_bios
721
722     // Write the RIP, CS, and descriptor
723     // assume the rest is already good to go
724     //
725     // vector VV -> rip at 0
726     //              CS = VV00
727     //  This means we start executing at linear address VV000
728     //
729     // So the selector needs to be VV00
730     // and the base needs to be VV000
731     //
732     core->rip = 0;
733     core->segments.cs.selector = rip << 8;
734     core->segments.cs.limit = 0xffff;
735     core->segments.cs.base = rip << 12;
736
737     return 0;
738 }
739
740
741
742
743
744
745 /* Checks machine SVM capability */
746 /* Implemented from: AMD Arch Manual 3, sect 15.4 */ 
747 int v3_is_svm_capable() {
748     uint_t vm_cr_low = 0, vm_cr_high = 0;
749     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
750
751     v3_cpuid(CPUID_EXT_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
752   
753     PrintDebug("CPUID_EXT_FEATURE_IDS_ecx=0x%x\n", ecx);
754
755     if ((ecx & CPUID_EXT_FEATURE_IDS_ecx_svm_avail) == 0) {
756       V3_Print("SVM Not Available\n");
757       return 0;
758     }  else {
759         v3_get_msr(SVM_VM_CR_MSR, &vm_cr_high, &vm_cr_low);
760         
761         PrintDebug("SVM_VM_CR_MSR = 0x%x 0x%x\n", vm_cr_high, vm_cr_low);
762         
763         if ((vm_cr_low & SVM_VM_CR_MSR_svmdis) == 1) {
764             V3_Print("SVM is available but is disabled.\n");
765             
766             v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
767             
768             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_edx=0x%x\n", edx);
769             
770             if ((edx & CPUID_SVM_REV_AND_FEATURE_IDS_edx_svml) == 0) {
771                 V3_Print("SVM BIOS Disabled, not unlockable\n");
772             } else {
773                 V3_Print("SVM is locked with a key\n");
774             }
775             return 0;
776
777         } else {
778             V3_Print("SVM is available and  enabled.\n");
779
780             v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
781             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_eax=0x%x\n", eax);
782             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_ebx=0x%x\n", ebx);
783             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_ecx=0x%x\n", ecx);
784             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_edx=0x%x\n", edx);
785
786             return 1;
787         }
788     }
789 }
790
791 static int has_svm_nested_paging() {
792     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
793     
794     v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
795     
796     //PrintDebug("CPUID_EXT_FEATURE_IDS_edx=0x%x\n", edx);
797     
798     if ((edx & CPUID_SVM_REV_AND_FEATURE_IDS_edx_np) == 0) {
799         V3_Print("SVM Nested Paging not supported\n");
800         return 0;
801     } else {
802         V3_Print("SVM Nested Paging supported\n");
803         return 1;
804     }
805  }
806  
807
808
809 void v3_init_svm_cpu(int cpu_id) {
810     reg_ex_t msr;
811     extern v3_cpu_arch_t v3_cpu_types[];
812
813     // Enable SVM on the CPU
814     v3_get_msr(EFER_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
815     msr.e_reg.low |= EFER_MSR_svm_enable;
816     v3_set_msr(EFER_MSR, 0, msr.e_reg.low);
817
818     V3_Print("SVM Enabled\n");
819
820     // Setup the host state save area
821     host_vmcbs[cpu_id] = (addr_t)V3_AllocPages(4);
822
823     /* 64-BIT-ISSUE */
824     //  msr.e_reg.high = 0;
825     //msr.e_reg.low = (uint_t)host_vmcb;
826     msr.r_reg = host_vmcbs[cpu_id];
827
828     PrintDebug("Host State being saved at %p\n", (void *)host_vmcbs[cpu_id]);
829     v3_set_msr(SVM_VM_HSAVE_PA_MSR, msr.e_reg.high, msr.e_reg.low);
830
831
832     if (has_svm_nested_paging() == 1) {
833         v3_cpu_types[cpu_id] = V3_SVM_REV3_CPU;
834     } else {
835         v3_cpu_types[cpu_id] = V3_SVM_CPU;
836     }
837 }
838
839
840
841 void v3_deinit_svm_cpu(int cpu_id) {
842     reg_ex_t msr;
843     extern v3_cpu_arch_t v3_cpu_types[];
844
845     // reset SVM_VM_HSAVE_PA_MSR
846     // Does setting it to NULL disable??
847     msr.r_reg = 0;
848     v3_set_msr(SVM_VM_HSAVE_PA_MSR, msr.e_reg.high, msr.e_reg.low);
849
850     // Disable SVM?
851     v3_get_msr(EFER_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
852     msr.e_reg.low &= ~EFER_MSR_svm_enable;
853     v3_set_msr(EFER_MSR, 0, msr.e_reg.low);
854
855     v3_cpu_types[cpu_id] = V3_INVALID_CPU;
856
857     V3_FreePages((void *)host_vmcbs[cpu_id], 4);
858
859     V3_Print("Host CPU %d host area freed, and SVM disabled\n", cpu_id);
860     return;
861 }
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912 #if 0
913 /* 
914  * Test VMSAVE/VMLOAD Latency 
915  */
916 #define vmsave ".byte 0x0F,0x01,0xDB ; "
917 #define vmload ".byte 0x0F,0x01,0xDA ; "
918 {
919     uint32_t start_lo, start_hi;
920     uint32_t end_lo, end_hi;
921     uint64_t start, end;
922     
923     __asm__ __volatile__ (
924                           "rdtsc ; "
925                           "movl %%eax, %%esi ; "
926                           "movl %%edx, %%edi ; "
927                           "movq  %%rcx, %%rax ; "
928                           vmsave
929                           "rdtsc ; "
930                           : "=D"(start_hi), "=S"(start_lo), "=a"(end_lo),"=d"(end_hi)
931                           : "c"(host_vmcb[cpu_id]), "0"(0), "1"(0), "2"(0), "3"(0)
932                           );
933     
934     start = start_hi;
935     start <<= 32;
936     start += start_lo;
937     
938     end = end_hi;
939     end <<= 32;
940     end += end_lo;
941     
942     PrintDebug("VMSave Cycle Latency: %d\n", (uint32_t)(end - start));
943     
944     __asm__ __volatile__ (
945                           "rdtsc ; "
946                           "movl %%eax, %%esi ; "
947                           "movl %%edx, %%edi ; "
948                           "movq  %%rcx, %%rax ; "
949                           vmload
950                           "rdtsc ; "
951                           : "=D"(start_hi), "=S"(start_lo), "=a"(end_lo),"=d"(end_hi)
952                               : "c"(host_vmcb[cpu_id]), "0"(0), "1"(0), "2"(0), "3"(0)
953                               );
954         
955         start = start_hi;
956         start <<= 32;
957         start += start_lo;
958
959         end = end_hi;
960         end <<= 32;
961         end += end_lo;
962
963
964         PrintDebug("VMLoad Cycle Latency: %d\n", (uint32_t)(end - start));
965     }
966     /* End Latency Test */
967
968 #endif
969
970
971
972
973
974
975
976 #if 0
977 void Init_VMCB_pe(vmcb_t *vmcb, struct guest_info vm_info) {
978   vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
979   vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
980   uint_t i = 0;
981
982
983   guest_state->rsp = vm_info.vm_regs.rsp;
984   guest_state->rip = vm_info.rip;
985
986
987   /* I pretty much just gutted this from TVMM */
988   /* Note: That means its probably wrong */
989
990   // set the segment registers to mirror ours
991   guest_state->cs.selector = 1<<3;
992   guest_state->cs.attrib.fields.type = 0xa; // Code segment+read
993   guest_state->cs.attrib.fields.S = 1;
994   guest_state->cs.attrib.fields.P = 1;
995   guest_state->cs.attrib.fields.db = 1;
996   guest_state->cs.attrib.fields.G = 1;
997   guest_state->cs.limit = 0xfffff;
998   guest_state->cs.base = 0;
999   
1000   struct vmcb_selector *segregs [] = {&(guest_state->ss), &(guest_state->ds), &(guest_state->es), &(guest_state->fs), &(guest_state->gs), NULL};
1001   for ( i = 0; segregs[i] != NULL; i++) {
1002     struct vmcb_selector * seg = segregs[i];
1003     
1004     seg->selector = 2<<3;
1005     seg->attrib.fields.type = 0x2; // Data Segment+read/write
1006     seg->attrib.fields.S = 1;
1007     seg->attrib.fields.P = 1;
1008     seg->attrib.fields.db = 1;
1009     seg->attrib.fields.G = 1;
1010     seg->limit = 0xfffff;
1011     seg->base = 0;
1012   }
1013
1014
1015   {
1016     /* JRL THIS HAS TO GO */
1017     
1018     //    guest_state->tr.selector = GetTR_Selector();
1019     guest_state->tr.attrib.fields.type = 0x9; 
1020     guest_state->tr.attrib.fields.P = 1;
1021     // guest_state->tr.limit = GetTR_Limit();
1022     //guest_state->tr.base = GetTR_Base();// - 0x2000;
1023     /* ** */
1024   }
1025
1026
1027   /* ** */
1028
1029
1030   guest_state->efer |= EFER_MSR_svm_enable;
1031   guest_state->rflags = 0x00000002; // The reserved bit is always 1
1032   ctrl_area->svm_instrs.VMRUN = 1;
1033   guest_state->cr0 = 0x00000001;    // PE 
1034   ctrl_area->guest_ASID = 1;
1035
1036
1037   //  guest_state->cpl = 0;
1038
1039
1040
1041   // Setup exits
1042
1043   ctrl_area->cr_writes.cr4 = 1;
1044   
1045   ctrl_area->exceptions.de = 1;
1046   ctrl_area->exceptions.df = 1;
1047   ctrl_area->exceptions.pf = 1;
1048   ctrl_area->exceptions.ts = 1;
1049   ctrl_area->exceptions.ss = 1;
1050   ctrl_area->exceptions.ac = 1;
1051   ctrl_area->exceptions.mc = 1;
1052   ctrl_area->exceptions.gp = 1;
1053   ctrl_area->exceptions.ud = 1;
1054   ctrl_area->exceptions.np = 1;
1055   ctrl_area->exceptions.of = 1;
1056   ctrl_area->exceptions.nmi = 1;
1057
1058   
1059
1060   ctrl_area->instrs.IOIO_PROT = 1;
1061   ctrl_area->IOPM_BASE_PA = (uint_t)V3_AllocPages(3);
1062   
1063   {
1064     reg_ex_t tmp_reg;
1065     tmp_reg.r_reg = ctrl_area->IOPM_BASE_PA;
1066     memset((void*)(tmp_reg.e_reg.low), 0xffffffff, PAGE_SIZE * 2);
1067   }
1068
1069   ctrl_area->instrs.INTR = 1;
1070
1071   
1072   {
1073     char gdt_buf[6];
1074     char idt_buf[6];
1075
1076     memset(gdt_buf, 0, 6);
1077     memset(idt_buf, 0, 6);
1078
1079
1080     uint_t gdt_base, idt_base;
1081     ushort_t gdt_limit, idt_limit;
1082     
1083     GetGDTR(gdt_buf);
1084     gdt_base = *(ulong_t*)((uchar_t*)gdt_buf + 2) & 0xffffffff;
1085     gdt_limit = *(ushort_t*)(gdt_buf) & 0xffff;
1086     PrintDebug("GDT: base: %x, limit: %x\n", gdt_base, gdt_limit);
1087
1088     GetIDTR(idt_buf);
1089     idt_base = *(ulong_t*)(idt_buf + 2) & 0xffffffff;
1090     idt_limit = *(ushort_t*)(idt_buf) & 0xffff;
1091     PrintDebug("IDT: base: %x, limit: %x\n",idt_base, idt_limit);
1092
1093
1094     // gdt_base -= 0x2000;
1095     //idt_base -= 0x2000;
1096
1097     guest_state->gdtr.base = gdt_base;
1098     guest_state->gdtr.limit = gdt_limit;
1099     guest_state->idtr.base = idt_base;
1100     guest_state->idtr.limit = idt_limit;
1101
1102
1103   }
1104   
1105   
1106   // also determine if CPU supports nested paging
1107   /*
1108   if (vm_info.page_tables) {
1109     //   if (0) {
1110     // Flush the TLB on entries/exits
1111     ctrl_area->TLB_CONTROL = 1;
1112
1113     // Enable Nested Paging
1114     ctrl_area->NP_ENABLE = 1;
1115
1116     PrintDebug("NP_Enable at 0x%x\n", &(ctrl_area->NP_ENABLE));
1117
1118         // Set the Nested Page Table pointer
1119     ctrl_area->N_CR3 |= ((addr_t)vm_info.page_tables & 0xfffff000);
1120
1121
1122     //   ctrl_area->N_CR3 = Get_CR3();
1123     // guest_state->cr3 |= (Get_CR3() & 0xfffff000);
1124
1125     guest_state->g_pat = 0x7040600070406ULL;
1126
1127     PrintDebug("Set Nested CR3: lo: 0x%x  hi: 0x%x\n", (uint_t)*(&(ctrl_area->N_CR3)), (uint_t)*((unsigned char *)&(ctrl_area->N_CR3) + 4));
1128     PrintDebug("Set Guest CR3: lo: 0x%x  hi: 0x%x\n", (uint_t)*(&(guest_state->cr3)), (uint_t)*((unsigned char *)&(guest_state->cr3) + 4));
1129     // Enable Paging
1130     //    guest_state->cr0 |= 0x80000000;
1131   }
1132   */
1133
1134 }
1135
1136
1137
1138
1139
1140 #endif
1141
1142