Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


5319f9040100d38300d82a312a17803e2d44e6aa
[palacios.git] / palacios / src / palacios / svm.c
1 /* 
2  * This file is part of the Palacios Virtual Machine Monitor developed
3  * by the V3VEE Project with funding from the United States National 
4  * Science Foundation and the Department of Energy.  
5  *
6  * The V3VEE Project is a joint project between Northwestern University
7  * and the University of New Mexico.  You can find out more at 
8  * http://www.v3vee.org
9  *
10  * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu> 
11  * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org> 
12  * All rights reserved.
13  *
14  * Author: Jack Lange <jarusl@cs.northwestern.edu>
15  *
16  * This is free software.  You are permitted to use,
17  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
18  */
19
20
21 #include <palacios/svm.h>
22 #include <palacios/vmm.h>
23
24 #include <palacios/vmcb.h>
25 #include <palacios/vmm_mem.h>
26 #include <palacios/vmm_paging.h>
27 #include <palacios/svm_handler.h>
28
29 #include <palacios/vmm_debug.h>
30 #include <palacios/vm_guest_mem.h>
31
32 #include <palacios/vmm_decoder.h>
33 #include <palacios/vmm_string.h>
34 #include <palacios/vmm_lowlevel.h>
35 #include <palacios/svm_msr.h>
36
37 #include <palacios/vmm_rbtree.h>
38 #include <palacios/vmm_barrier.h>
39
40 #ifdef V3_CONFIG_CHECKPOINT
41 #include <palacios/vmm_checkpoint.h>
42 #endif
43
44 #include <palacios/vmm_direct_paging.h>
45
46 #include <palacios/vmm_ctrl_regs.h>
47 #include <palacios/svm_io.h>
48
49 #include <palacios/vmm_sprintf.h>
50
51
52 #ifndef V3_CONFIG_DEBUG_SVM
53 #undef PrintDebug
54 #define PrintDebug(fmt, args...)
55 #endif
56
57
58 uint32_t v3_last_exit;
59
60 // This is a global pointer to the host's VMCB
61 static addr_t host_vmcbs[V3_CONFIG_MAX_CPUS] = { [0 ... V3_CONFIG_MAX_CPUS - 1] = 0};
62
63
64
65 extern void v3_stgi();
66 extern void v3_clgi();
67 //extern int v3_svm_launch(vmcb_t * vmcb, struct v3_gprs * vm_regs, uint64_t * fs, uint64_t * gs);
68 extern int v3_svm_launch(vmcb_t * vmcb, struct v3_gprs * vm_regs, vmcb_t * host_vmcb);
69
70
71 static vmcb_t * Allocate_VMCB() {
72     vmcb_t * vmcb_page = NULL;
73     addr_t vmcb_pa = (addr_t)V3_AllocPages(1);
74
75     if ((void *)vmcb_pa == NULL) {
76         PrintError("Error allocating VMCB\n");
77         return NULL;
78     }
79
80     vmcb_page = (vmcb_t *)V3_VAddr((void *)vmcb_pa);
81
82     memset(vmcb_page, 0, 4096);
83
84     return vmcb_page;
85 }
86
87
88 static int v3_svm_handle_efer_write(struct guest_info * core, uint_t msr, struct v3_msr src, void * priv_data)
89 {
90     int status;
91
92     // Call arch-independent handler
93     if ((status = v3_handle_efer_write(core, msr, src, priv_data)) != 0) {
94         return status;
95     }
96
97     // SVM-specific code
98     {
99         // Ensure that hardware visible EFER.SVME bit is set (SVM Enable)
100         struct efer_64 * hw_efer = (struct efer_64 *)&(core->ctrl_regs.efer);
101         hw_efer->svme = 1;
102     }
103
104     return 0;
105 }
106
107
108 static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info * core) {
109     vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
110     vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
111     uint_t i;
112
113
114     //
115     ctrl_area->svm_instrs.VMRUN = 1;
116     ctrl_area->svm_instrs.VMMCALL = 1;
117     ctrl_area->svm_instrs.VMLOAD = 1;
118     ctrl_area->svm_instrs.VMSAVE = 1;
119     ctrl_area->svm_instrs.STGI = 1;
120     ctrl_area->svm_instrs.CLGI = 1;
121     ctrl_area->svm_instrs.SKINIT = 1;
122     ctrl_area->svm_instrs.ICEBP = 1;
123     ctrl_area->svm_instrs.WBINVD = 1;
124     ctrl_area->svm_instrs.MONITOR = 1;
125     ctrl_area->svm_instrs.MWAIT_always = 1;
126     ctrl_area->svm_instrs.MWAIT_if_armed = 1;
127     ctrl_area->instrs.INVLPGA = 1;
128     ctrl_area->instrs.CPUID = 1;
129
130     ctrl_area->instrs.HLT = 1;
131
132 #ifdef V3_CONFIG_TIME_VIRTUALIZE_TSC
133     ctrl_area->instrs.RDTSC = 1;
134     ctrl_area->svm_instrs.RDTSCP = 1;
135 #endif
136
137     // guest_state->cr0 = 0x00000001;    // PE 
138   
139     /*
140       ctrl_area->exceptions.de = 1;
141       ctrl_area->exceptions.df = 1;
142       
143       ctrl_area->exceptions.ts = 1;
144       ctrl_area->exceptions.ss = 1;
145       ctrl_area->exceptions.ac = 1;
146       ctrl_area->exceptions.mc = 1;
147       ctrl_area->exceptions.gp = 1;
148       ctrl_area->exceptions.ud = 1;
149       ctrl_area->exceptions.np = 1;
150       ctrl_area->exceptions.of = 1;
151       
152       ctrl_area->exceptions.nmi = 1;
153     */
154     
155
156     ctrl_area->instrs.NMI = 1;
157     ctrl_area->instrs.SMI = 0; // allow SMIs to run in guest
158     ctrl_area->instrs.INIT = 1;
159     ctrl_area->instrs.PAUSE = 1;
160     ctrl_area->instrs.shutdown_evts = 1;
161
162
163     /* DEBUG FOR RETURN CODE */
164     ctrl_area->exit_code = 1;
165
166
167     /* Setup Guest Machine state */
168
169     core->vm_regs.rsp = 0x00;
170     core->rip = 0xfff0;
171
172     core->vm_regs.rdx = 0x00000f00;
173
174
175     core->cpl = 0;
176
177     core->ctrl_regs.rflags = 0x00000002; // The reserved bit is always 1
178     core->ctrl_regs.cr0 = 0x60010010; // Set the WP flag so the memory hooks work in real-mode
179     core->ctrl_regs.efer |= EFER_MSR_svm_enable;
180
181
182
183
184
185     core->segments.cs.selector = 0xf000;
186     core->segments.cs.limit = 0xffff;
187     core->segments.cs.base = 0x0000000f0000LL;
188
189     // (raw attributes = 0xf3)
190     core->segments.cs.type = 0x3;
191     core->segments.cs.system = 0x1;
192     core->segments.cs.dpl = 0x3;
193     core->segments.cs.present = 1;
194
195
196
197     struct v3_segment * segregs [] = {&(core->segments.ss), &(core->segments.ds), 
198                                       &(core->segments.es), &(core->segments.fs), 
199                                       &(core->segments.gs), NULL};
200
201     for ( i = 0; segregs[i] != NULL; i++) {
202         struct v3_segment * seg = segregs[i];
203         
204         seg->selector = 0x0000;
205         //    seg->base = seg->selector << 4;
206         seg->base = 0x00000000;
207         seg->limit = ~0u;
208
209         // (raw attributes = 0xf3)
210         seg->type = 0x3;
211         seg->system = 0x1;
212         seg->dpl = 0x3;
213         seg->present = 1;
214     }
215
216     core->segments.gdtr.limit = 0x0000ffff;
217     core->segments.gdtr.base = 0x0000000000000000LL;
218     core->segments.idtr.limit = 0x0000ffff;
219     core->segments.idtr.base = 0x0000000000000000LL;
220
221     core->segments.ldtr.selector = 0x0000;
222     core->segments.ldtr.limit = 0x0000ffff;
223     core->segments.ldtr.base = 0x0000000000000000LL;
224     core->segments.tr.selector = 0x0000;
225     core->segments.tr.limit = 0x0000ffff;
226     core->segments.tr.base = 0x0000000000000000LL;
227
228
229     core->dbg_regs.dr6 = 0x00000000ffff0ff0LL;
230     core->dbg_regs.dr7 = 0x0000000000000400LL;
231
232
233     ctrl_area->IOPM_BASE_PA = (addr_t)V3_PAddr(core->vm_info->io_map.arch_data);
234     ctrl_area->instrs.IOIO_PROT = 1;
235             
236     ctrl_area->MSRPM_BASE_PA = (addr_t)V3_PAddr(core->vm_info->msr_map.arch_data);
237     ctrl_area->instrs.MSR_PROT = 1;   
238
239
240     PrintDebug("Exiting on interrupts\n");
241     ctrl_area->guest_ctrl.V_INTR_MASKING = 1;
242     ctrl_area->instrs.INTR = 1;
243
244
245     v3_hook_msr(core->vm_info, EFER_MSR, 
246                 &v3_handle_efer_read,
247                 &v3_svm_handle_efer_write, 
248                 core);
249
250     if (core->shdw_pg_mode == SHADOW_PAGING) {
251         PrintDebug("Creating initial shadow page table\n");
252         
253         /* JRL: This is a performance killer, and a simplistic solution */
254         /* We need to fix this */
255         ctrl_area->TLB_CONTROL = 1;
256         ctrl_area->guest_ASID = 1;
257         
258         
259         if (v3_init_passthrough_pts(core) == -1) {
260             PrintError("Could not initialize passthrough page tables\n");
261             return ;
262         }
263
264
265         core->shdw_pg_state.guest_cr0 = 0x0000000000000010LL;
266         PrintDebug("Created\n");
267         
268         core->ctrl_regs.cr0 |= 0x80000000;
269         core->ctrl_regs.cr3 = core->direct_map_pt;
270
271         ctrl_area->cr_reads.cr0 = 1;
272         ctrl_area->cr_writes.cr0 = 1;
273         //ctrl_area->cr_reads.cr4 = 1;
274         ctrl_area->cr_writes.cr4 = 1;
275         ctrl_area->cr_reads.cr3 = 1;
276         ctrl_area->cr_writes.cr3 = 1;
277
278
279
280         ctrl_area->instrs.INVLPG = 1;
281
282         ctrl_area->exceptions.pf = 1;
283
284         guest_state->g_pat = 0x7040600070406ULL;
285
286
287
288     } else if (core->shdw_pg_mode == NESTED_PAGING) {
289         // Flush the TLB on entries/exits
290         ctrl_area->TLB_CONTROL = 1;
291         ctrl_area->guest_ASID = 1;
292
293         // Enable Nested Paging
294         ctrl_area->NP_ENABLE = 1;
295
296         PrintDebug("NP_Enable at 0x%p\n", (void *)&(ctrl_area->NP_ENABLE));
297
298         // Set the Nested Page Table pointer
299         if (v3_init_passthrough_pts(core) == -1) {
300             PrintError("Could not initialize Nested page tables\n");
301             return ;
302         }
303
304         ctrl_area->N_CR3 = core->direct_map_pt;
305
306         guest_state->g_pat = 0x7040600070406ULL;
307     }
308     
309     /* tell the guest that we don't support SVM */
310     v3_hook_msr(core->vm_info, SVM_VM_CR_MSR, 
311         &v3_handle_vm_cr_read,
312         &v3_handle_vm_cr_write, 
313         core);
314 }
315
316
317 int v3_init_svm_vmcb(struct guest_info * core, v3_vm_class_t vm_class) {
318
319     PrintDebug("Allocating VMCB\n");
320     core->vmm_data = (void *)Allocate_VMCB();
321     
322     if (core->vmm_data == NULL) {
323         PrintError("Could not allocate VMCB, Exiting...\n");
324         return -1;
325     }
326
327     if (vm_class == V3_PC_VM) {
328         PrintDebug("Initializing VMCB (addr=%p)\n", (void *)core->vmm_data);
329         Init_VMCB_BIOS((vmcb_t*)(core->vmm_data), core);
330     } else {
331         PrintError("Invalid VM class\n");
332         return -1;
333     }
334
335     return 0;
336 }
337
338
339 int v3_deinit_svm_vmcb(struct guest_info * core) {
340     V3_FreePages(V3_PAddr(core->vmm_data), 1);
341     return 0;
342 }
343
344
345 #ifdef V3_CONFIG_CHECKPOINT
346 int v3_svm_save_core(struct guest_info * core, void * ctx){
347
348     v3_chkpt_save(ctx, "vmcb_data", PAGE_SIZE, core->vmm_data);
349
350     return 0;
351 }
352
353 int v3_svm_load_core(struct guest_info * core, void * chkpt_ctx){
354     struct cr0_32 * shadow_cr0;
355     vmcb_saved_state_t * guest_state; 
356     vmcb_ctrl_t * guest_ctrl;
357
358
359
360     if (v3_chkpt_load(chkpt_ctx, "vmcb_data", PAGE_SIZE, core->vmm_data) == -1){
361         return -1;
362     }
363
364     guest_state = GET_VMCB_SAVE_STATE_AREA((vmcb_t *)(core->vmm_data));
365     guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t *)(core->vmm_data));
366
367         
368     core->rip = guest_state->rip;
369     core->vm_regs.rsp = guest_state->rsp;
370     core->vm_regs.rax = guest_state->rax;
371
372     core->cpl = guest_state->cpl;
373
374     core->ctrl_regs.cr0 = guest_state->cr0;
375     core->ctrl_regs.cr2 = guest_state->cr2;
376     core->ctrl_regs.cr4 = guest_state->cr4;
377     core->dbg_regs.dr6 = guest_state->dr6;
378     core->dbg_regs.dr7 = guest_state->dr7;
379     core->ctrl_regs.cr8 = guest_ctrl->guest_ctrl.V_TPR;
380     core->ctrl_regs.rflags = guest_state->rflags;
381     core->ctrl_regs.efer = guest_state->efer;
382
383                 
384     shadow_cr0 = (struct cr0_32 *)&(core->ctrl_regs.cr0);
385
386
387     if (core->shdw_pg_mode == SHADOW_PAGING) {
388         if (v3_get_vm_mem_mode(core) == VIRTUAL_MEM) {
389             if (v3_activate_shadow_pt(core) == -1) {
390                 PrintError("Failed to activate shadow page tables\n");
391                 return -1;
392             }
393         } else {
394             if (v3_activate_passthrough_pt(core) == -1) {
395                 PrintError("Failed to activate passthrough page tables\n");
396                 return -1;
397             }
398         }
399     }
400
401
402     v3_get_vmcb_segments((vmcb_t *)(core->vmm_data), &(core->segments));
403     return 0;
404 }
405 #endif
406
407 static int update_irq_exit_state(struct guest_info * info) {
408     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
409
410     // Fix for QEMU bug using EVENTINJ as an internal cache
411     guest_ctrl->EVENTINJ.valid = 0;
412
413     if ((info->intr_core_state.irq_pending == 1) && (guest_ctrl->guest_ctrl.V_IRQ == 0)) {
414         
415 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
416         PrintDebug("INTAK cycle completed for irq %d\n", info->intr_core_state.irq_vector);
417 #endif
418
419         info->intr_core_state.irq_started = 1;
420         info->intr_core_state.irq_pending = 0;
421
422         v3_injecting_intr(info, info->intr_core_state.irq_vector, V3_EXTERNAL_IRQ);
423     }
424
425     if ((info->intr_core_state.irq_started == 1) && (guest_ctrl->exit_int_info.valid == 0)) {
426 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
427         PrintDebug("Interrupt %d taken by guest\n", info->intr_core_state.irq_vector);
428 #endif
429
430         // Interrupt was taken fully vectored
431         info->intr_core_state.irq_started = 0;
432
433     } else if ((info->intr_core_state.irq_started == 1) && (guest_ctrl->exit_int_info.valid == 1)) {
434 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
435         PrintDebug("EXIT INT INFO is set (vec=%d)\n", guest_ctrl->exit_int_info.vector);
436 #endif
437     }
438
439     return 0;
440 }
441
442
443 static int update_irq_entry_state(struct guest_info * info) {
444     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
445
446
447     if (info->intr_core_state.irq_pending == 0) {
448         guest_ctrl->guest_ctrl.V_IRQ = 0;
449         guest_ctrl->guest_ctrl.V_INTR_VECTOR = 0;
450     }
451     
452     if (v3_excp_pending(info)) {
453         uint_t excp = v3_get_excp_number(info);
454         
455         guest_ctrl->EVENTINJ.type = SVM_INJECTION_EXCEPTION;
456         
457         if (info->excp_state.excp_error_code_valid) {
458             guest_ctrl->EVENTINJ.error_code = info->excp_state.excp_error_code;
459             guest_ctrl->EVENTINJ.ev = 1;
460 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
461             PrintDebug("Injecting exception %d with error code %x\n", excp, guest_ctrl->EVENTINJ.error_code);
462 #endif
463         }
464         
465         guest_ctrl->EVENTINJ.vector = excp;
466         
467         guest_ctrl->EVENTINJ.valid = 1;
468
469 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
470         PrintDebug("<%d> Injecting Exception %d (CR2=%p) (EIP=%p)\n", 
471                    (int)info->num_exits, 
472                    guest_ctrl->EVENTINJ.vector, 
473                    (void *)(addr_t)info->ctrl_regs.cr2,
474                    (void *)(addr_t)info->rip);
475 #endif
476
477         v3_injecting_excp(info, excp);
478     } else if (info->intr_core_state.irq_started == 1) {
479 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
480         PrintDebug("IRQ pending from previous injection\n");
481 #endif
482         guest_ctrl->guest_ctrl.V_IRQ = 1;
483         guest_ctrl->guest_ctrl.V_INTR_VECTOR = info->intr_core_state.irq_vector;
484         guest_ctrl->guest_ctrl.V_IGN_TPR = 1;
485         guest_ctrl->guest_ctrl.V_INTR_PRIO = 0xf;
486
487     } else {
488         switch (v3_intr_pending(info)) {
489             case V3_EXTERNAL_IRQ: {
490                 uint32_t irq = v3_get_intr(info);
491
492                 guest_ctrl->guest_ctrl.V_IRQ = 1;
493                 guest_ctrl->guest_ctrl.V_INTR_VECTOR = irq;
494                 guest_ctrl->guest_ctrl.V_IGN_TPR = 1;
495                 guest_ctrl->guest_ctrl.V_INTR_PRIO = 0xf;
496
497 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
498                 PrintDebug("Injecting Interrupt %d (EIP=%p)\n", 
499                            guest_ctrl->guest_ctrl.V_INTR_VECTOR, 
500                            (void *)(addr_t)info->rip);
501 #endif
502
503                 info->intr_core_state.irq_pending = 1;
504                 info->intr_core_state.irq_vector = irq;
505                 
506                 break;
507             }
508             case V3_NMI:
509                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_NMI;
510                 break;
511             case V3_SOFTWARE_INTR:
512                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_SOFT_INTR;
513
514 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
515                 PrintDebug("Injecting software interrupt --  type: %d, vector: %d\n", 
516                            SVM_INJECTION_SOFT_INTR, info->intr_core_state.swintr_vector);
517 #endif
518                 guest_ctrl->EVENTINJ.vector = info->intr_core_state.swintr_vector;
519                 guest_ctrl->EVENTINJ.valid = 1;
520             
521                 /* reset swintr state */
522                 info->intr_core_state.swintr_posted = 0;
523                 info->intr_core_state.swintr_vector = 0;
524                 
525                 break;
526             case V3_VIRTUAL_IRQ:
527                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_IRQ;
528                 break;
529
530             case V3_INVALID_INTR:
531             default:
532                 break;
533         }
534         
535     }
536
537     return 0;
538 }
539
540
541 /* 
542  * CAUTION and DANGER!!! 
543  * 
544  * The VMCB CANNOT(!!) be accessed outside of the clgi/stgi calls inside this function
545  * When exectuing a symbiotic call, the VMCB WILL be overwritten, so any dependencies 
546  * on its contents will cause things to break. The contents at the time of the exit WILL 
547  * change before the exit handler is executed.
548  */
549 int v3_svm_enter(struct guest_info * info) {
550     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
551     vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA((vmcb_t*)(info->vmm_data)); 
552     addr_t exit_code = 0, exit_info1 = 0, exit_info2 = 0;
553
554     // Conditionally yield the CPU if the timeslice has expired
555     v3_yield_cond(info);
556
557     // Perform any additional yielding needed for time adjustment
558     v3_adjust_time(info);
559
560     // disable global interrupts for vm state transition
561     v3_clgi();
562
563     // Update timer devices after being in the VM, with interupts
564     // disabled, but before doing IRQ updates, so that any interrupts they 
565     //raise get seen immediately.
566     v3_update_timers(info);
567
568     // Synchronize the guest state to the VMCB
569     guest_state->cr0 = info->ctrl_regs.cr0;
570     guest_state->cr2 = info->ctrl_regs.cr2;
571     guest_state->cr3 = info->ctrl_regs.cr3;
572     guest_state->cr4 = info->ctrl_regs.cr4;
573     guest_state->dr6 = info->dbg_regs.dr6;
574     guest_state->dr7 = info->dbg_regs.dr7;
575     guest_ctrl->guest_ctrl.V_TPR = info->ctrl_regs.cr8 & 0xff;
576     guest_state->rflags = info->ctrl_regs.rflags;
577     guest_state->efer = info->ctrl_regs.efer;
578     
579     guest_state->cpl = info->cpl;
580
581     v3_set_vmcb_segments((vmcb_t*)(info->vmm_data), &(info->segments));
582
583     guest_state->rax = info->vm_regs.rax;
584     guest_state->rip = info->rip;
585     guest_state->rsp = info->vm_regs.rsp;
586
587 #ifdef V3_CONFIG_SYMCALL
588     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
589         update_irq_entry_state(info);
590     }
591 #else 
592     update_irq_entry_state(info);
593 #endif
594
595
596     /* ** */
597
598     /*
599       PrintDebug("SVM Entry to CS=%p  rip=%p...\n", 
600       (void *)(addr_t)info->segments.cs.base, 
601       (void *)(addr_t)info->rip);
602     */
603
604 #ifdef V3_CONFIG_SYMCALL
605     if (info->sym_core_state.symcall_state.sym_call_active == 1) {
606         if (guest_ctrl->guest_ctrl.V_IRQ == 1) {
607             V3_Print("!!! Injecting Interrupt during Sym call !!!\n");
608         }
609     }
610 #endif
611
612     v3_time_enter_vm(info);
613     // guest_ctrl->TSC_OFFSET = v3_tsc_host_offset(&info->time_state);
614
615
616     //V3_Print("Calling v3_svm_launch\n");
617
618     v3_svm_launch((vmcb_t *)V3_PAddr(info->vmm_data), &(info->vm_regs), (vmcb_t *)host_vmcbs[V3_Get_CPU()]);
619
620     //V3_Print("SVM Returned: Exit Code: %x, guest_rip=%lx\n", (uint32_t)(guest_ctrl->exit_code), (unsigned long)guest_state->rip);
621
622     v3_last_exit = (uint32_t)(guest_ctrl->exit_code);
623
624     // Immediate exit from VM time bookkeeping
625     v3_time_exit_vm(info);
626
627     info->num_exits++;
628
629     // Save Guest state from VMCB
630     info->rip = guest_state->rip;
631     info->vm_regs.rsp = guest_state->rsp;
632     info->vm_regs.rax = guest_state->rax;
633
634     info->cpl = guest_state->cpl;
635
636     info->ctrl_regs.cr0 = guest_state->cr0;
637     info->ctrl_regs.cr2 = guest_state->cr2;
638     info->ctrl_regs.cr3 = guest_state->cr3;
639     info->ctrl_regs.cr4 = guest_state->cr4;
640     info->dbg_regs.dr6 = guest_state->dr6;
641     info->dbg_regs.dr7 = guest_state->dr7;
642     info->ctrl_regs.cr8 = guest_ctrl->guest_ctrl.V_TPR;
643     info->ctrl_regs.rflags = guest_state->rflags;
644     info->ctrl_regs.efer = guest_state->efer;
645     
646     v3_get_vmcb_segments((vmcb_t*)(info->vmm_data), &(info->segments));
647     info->cpu_mode = v3_get_vm_cpu_mode(info);
648     info->mem_mode = v3_get_vm_mem_mode(info);
649     /* ** */
650
651     // save exit info here
652     exit_code = guest_ctrl->exit_code;
653     exit_info1 = guest_ctrl->exit_info1;
654     exit_info2 = guest_ctrl->exit_info2;
655
656 #ifdef V3_CONFIG_SYMCALL
657     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
658         update_irq_exit_state(info);
659     }
660 #else
661     update_irq_exit_state(info);
662 #endif
663
664     // reenable global interrupts after vm exit
665     v3_stgi();
666  
667     // Conditionally yield the CPU if the timeslice has expired
668     v3_yield_cond(info);
669
670     {
671         int ret = v3_handle_svm_exit(info, exit_code, exit_info1, exit_info2);
672         
673         if (ret != 0) {
674             PrintError("Error in SVM exit handler (ret=%d)\n", ret);
675             PrintError("  last Exit was %d (exit code=0x%llx)\n", v3_last_exit, (uint64_t) exit_code);
676             return -1;
677         }
678     }
679
680
681     return 0;
682 }
683
684
685 int v3_start_svm_guest(struct guest_info * info) {
686     //    vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA((vmcb_t*)(info->vmm_data));
687     //  vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
688
689     PrintDebug("Starting SVM core %u (on logical core %u)\n", info->vcpu_id, info->pcpu_id);
690
691     if (info->vcpu_id == 0) {
692         info->core_run_state = CORE_RUNNING;
693         info->vm_info->run_state = VM_RUNNING;
694     } else  { 
695         PrintDebug("SVM core %u (on %u): Waiting for core initialization\n", info->vcpu_id, info->pcpu_id);
696
697         while (info->core_run_state == CORE_STOPPED) {
698             v3_yield(info);
699             //PrintDebug("SVM core %u: still waiting for INIT\n", info->vcpu_id);
700         }
701
702         PrintDebug("SVM core %u(on %u) initialized\n", info->vcpu_id, info->pcpu_id);
703
704         // We'll be paranoid about race conditions here
705         v3_wait_at_barrier(info);
706     } 
707
708     PrintDebug("SVM core %u(on %u): I am starting at CS=0x%x (base=0x%p, limit=0x%x),  RIP=0x%p\n", 
709                info->vcpu_id, info->pcpu_id, 
710                info->segments.cs.selector, (void *)(info->segments.cs.base), 
711                info->segments.cs.limit, (void *)(info->rip));
712
713
714
715     PrintDebug("SVM core %u: Launching SVM VM (vmcb=%p) (on cpu %u)\n", 
716                info->vcpu_id, (void *)info->vmm_data, info->pcpu_id);
717     //PrintDebugVMCB((vmcb_t*)(info->vmm_data));
718     
719     v3_start_time(info);
720
721     while (1) {
722
723         if (info->vm_info->run_state == VM_STOPPED) {
724             info->core_run_state = CORE_STOPPED;
725             break;
726         }
727         
728         if (v3_svm_enter(info) == -1) {
729             vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
730             addr_t host_addr;
731             addr_t linear_addr = 0;
732             
733             info->vm_info->run_state = VM_ERROR;
734             
735             V3_Print("SVM core %u: SVM ERROR!!\n", info->vcpu_id); 
736             
737             v3_print_guest_state(info);
738             
739             V3_Print("SVM core %u: SVM Exit Code: %p\n", info->vcpu_id, (void *)(addr_t)guest_ctrl->exit_code); 
740             
741             V3_Print("SVM core %u: exit_info1 low = 0x%.8x\n", info->vcpu_id, *(uint_t*)&(guest_ctrl->exit_info1));
742             V3_Print("SVM core %u: exit_info1 high = 0x%.8x\n", info->vcpu_id, *(uint_t *)(((uchar_t *)&(guest_ctrl->exit_info1)) + 4));
743             
744             V3_Print("SVM core %u: exit_info2 low = 0x%.8x\n", info->vcpu_id, *(uint_t*)&(guest_ctrl->exit_info2));
745             V3_Print("SVM core %u: exit_info2 high = 0x%.8x\n", info->vcpu_id, *(uint_t *)(((uchar_t *)&(guest_ctrl->exit_info2)) + 4));
746             
747             linear_addr = get_addr_linear(info, info->rip, &(info->segments.cs));
748             
749             if (info->mem_mode == PHYSICAL_MEM) {
750                 v3_gpa_to_hva(info, linear_addr, &host_addr);
751             } else if (info->mem_mode == VIRTUAL_MEM) {
752                 v3_gva_to_hva(info, linear_addr, &host_addr);
753             }
754             
755             V3_Print("SVM core %u: Host Address of rip = 0x%p\n", info->vcpu_id, (void *)host_addr);
756             
757             V3_Print("SVM core %u: Instr (15 bytes) at %p:\n", info->vcpu_id, (void *)host_addr);
758             v3_dump_mem((uint8_t *)host_addr, 15);
759             
760             v3_print_stack(info);
761
762             break;
763         }
764
765         v3_wait_at_barrier(info);
766
767
768         if (info->vm_info->run_state == VM_STOPPED) {
769             info->core_run_state = CORE_STOPPED;
770             break;
771         }
772
773         
774
775 /*
776         if ((info->num_exits % 50000) == 0) {
777             V3_Print("SVM Exit number %d\n", (uint32_t)info->num_exits);
778             v3_print_guest_state(info);
779         }
780 */
781         
782     }
783
784     // Need to take down the other cores on error... 
785
786     return 0;
787 }
788
789
790
791
792 int v3_reset_svm_vm_core(struct guest_info * core, addr_t rip) {
793     // init vmcb_bios
794
795     // Write the RIP, CS, and descriptor
796     // assume the rest is already good to go
797     //
798     // vector VV -> rip at 0
799     //              CS = VV00
800     //  This means we start executing at linear address VV000
801     //
802     // So the selector needs to be VV00
803     // and the base needs to be VV000
804     //
805     core->rip = 0;
806     core->segments.cs.selector = rip << 8;
807     core->segments.cs.limit = 0xffff;
808     core->segments.cs.base = rip << 12;
809
810     return 0;
811 }
812
813
814
815
816
817
818 /* Checks machine SVM capability */
819 /* Implemented from: AMD Arch Manual 3, sect 15.4 */ 
820 int v3_is_svm_capable() {
821     uint_t vm_cr_low = 0, vm_cr_high = 0;
822     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
823
824     v3_cpuid(CPUID_EXT_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
825   
826     PrintDebug("CPUID_EXT_FEATURE_IDS_ecx=0x%x\n", ecx);
827
828     if ((ecx & CPUID_EXT_FEATURE_IDS_ecx_svm_avail) == 0) {
829       V3_Print("SVM Not Available\n");
830       return 0;
831     }  else {
832         v3_get_msr(SVM_VM_CR_MSR, &vm_cr_high, &vm_cr_low);
833         
834         PrintDebug("SVM_VM_CR_MSR = 0x%x 0x%x\n", vm_cr_high, vm_cr_low);
835         
836         if ((vm_cr_low & SVM_VM_CR_MSR_svmdis) == 1) {
837             V3_Print("SVM is available but is disabled.\n");
838             
839             v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
840             
841             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_edx=0x%x\n", edx);
842             
843             if ((edx & CPUID_SVM_REV_AND_FEATURE_IDS_edx_svml) == 0) {
844                 V3_Print("SVM BIOS Disabled, not unlockable\n");
845             } else {
846                 V3_Print("SVM is locked with a key\n");
847             }
848             return 0;
849
850         } else {
851             V3_Print("SVM is available and  enabled.\n");
852
853             v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
854             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_eax=0x%x\n", eax);
855             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_ebx=0x%x\n", ebx);
856             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_ecx=0x%x\n", ecx);
857             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_edx=0x%x\n", edx);
858
859             return 1;
860         }
861     }
862 }
863
864 static int has_svm_nested_paging() {
865     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
866     
867     v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
868     
869     //PrintDebug("CPUID_EXT_FEATURE_IDS_edx=0x%x\n", edx);
870     
871     if ((edx & CPUID_SVM_REV_AND_FEATURE_IDS_edx_np) == 0) {
872         V3_Print("SVM Nested Paging not supported\n");
873         return 0;
874     } else {
875         V3_Print("SVM Nested Paging supported\n");
876         return 1;
877     }
878  }
879  
880
881
882 void v3_init_svm_cpu(int cpu_id) {
883     reg_ex_t msr;
884     extern v3_cpu_arch_t v3_cpu_types[];
885
886     // Enable SVM on the CPU
887     v3_get_msr(EFER_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
888     msr.e_reg.low |= EFER_MSR_svm_enable;
889     v3_set_msr(EFER_MSR, 0, msr.e_reg.low);
890
891     V3_Print("SVM Enabled\n");
892
893     // Setup the host state save area
894     host_vmcbs[cpu_id] = (addr_t)V3_AllocPages(4);
895
896     /* 64-BIT-ISSUE */
897     //  msr.e_reg.high = 0;
898     //msr.e_reg.low = (uint_t)host_vmcb;
899     msr.r_reg = host_vmcbs[cpu_id];
900
901     PrintDebug("Host State being saved at %p\n", (void *)host_vmcbs[cpu_id]);
902     v3_set_msr(SVM_VM_HSAVE_PA_MSR, msr.e_reg.high, msr.e_reg.low);
903
904
905     if (has_svm_nested_paging() == 1) {
906         v3_cpu_types[cpu_id] = V3_SVM_REV3_CPU;
907     } else {
908         v3_cpu_types[cpu_id] = V3_SVM_CPU;
909     }
910 }
911
912
913
914 void v3_deinit_svm_cpu(int cpu_id) {
915     reg_ex_t msr;
916     extern v3_cpu_arch_t v3_cpu_types[];
917
918     // reset SVM_VM_HSAVE_PA_MSR
919     // Does setting it to NULL disable??
920     msr.r_reg = 0;
921     v3_set_msr(SVM_VM_HSAVE_PA_MSR, msr.e_reg.high, msr.e_reg.low);
922
923     // Disable SVM?
924     v3_get_msr(EFER_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
925     msr.e_reg.low &= ~EFER_MSR_svm_enable;
926     v3_set_msr(EFER_MSR, 0, msr.e_reg.low);
927
928     v3_cpu_types[cpu_id] = V3_INVALID_CPU;
929
930     V3_FreePages((void *)host_vmcbs[cpu_id], 4);
931
932     V3_Print("Host CPU %d host area freed, and SVM disabled\n", cpu_id);
933     return;
934 }
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985 #if 0
986 /* 
987  * Test VMSAVE/VMLOAD Latency 
988  */
989 #define vmsave ".byte 0x0F,0x01,0xDB ; "
990 #define vmload ".byte 0x0F,0x01,0xDA ; "
991 {
992     uint32_t start_lo, start_hi;
993     uint32_t end_lo, end_hi;
994     uint64_t start, end;
995     
996     __asm__ __volatile__ (
997                           "rdtsc ; "
998                           "movl %%eax, %%esi ; "
999                           "movl %%edx, %%edi ; "
1000                           "movq  %%rcx, %%rax ; "
1001                           vmsave
1002                           "rdtsc ; "
1003                           : "=D"(start_hi), "=S"(start_lo), "=a"(end_lo),"=d"(end_hi)
1004                           : "c"(host_vmcb[cpu_id]), "0"(0), "1"(0), "2"(0), "3"(0)
1005                           );
1006     
1007     start = start_hi;
1008     start <<= 32;
1009     start += start_lo;
1010     
1011     end = end_hi;
1012     end <<= 32;
1013     end += end_lo;
1014     
1015     PrintDebug("VMSave Cycle Latency: %d\n", (uint32_t)(end - start));
1016     
1017     __asm__ __volatile__ (
1018                           "rdtsc ; "
1019                           "movl %%eax, %%esi ; "
1020                           "movl %%edx, %%edi ; "
1021                           "movq  %%rcx, %%rax ; "
1022                           vmload
1023                           "rdtsc ; "
1024                           : "=D"(start_hi), "=S"(start_lo), "=a"(end_lo),"=d"(end_hi)
1025                               : "c"(host_vmcb[cpu_id]), "0"(0), "1"(0), "2"(0), "3"(0)
1026                               );
1027         
1028         start = start_hi;
1029         start <<= 32;
1030         start += start_lo;
1031
1032         end = end_hi;
1033         end <<= 32;
1034         end += end_lo;
1035
1036
1037         PrintDebug("VMLoad Cycle Latency: %d\n", (uint32_t)(end - start));
1038     }
1039     /* End Latency Test */
1040
1041 #endif
1042
1043
1044
1045
1046
1047
1048
1049 #if 0
1050 void Init_VMCB_pe(vmcb_t *vmcb, struct guest_info vm_info) {
1051   vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
1052   vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
1053   uint_t i = 0;
1054
1055
1056   guest_state->rsp = vm_info.vm_regs.rsp;
1057   guest_state->rip = vm_info.rip;
1058
1059
1060   /* I pretty much just gutted this from TVMM */
1061   /* Note: That means its probably wrong */
1062
1063   // set the segment registers to mirror ours
1064   guest_state->cs.selector = 1<<3;
1065   guest_state->cs.attrib.fields.type = 0xa; // Code segment+read
1066   guest_state->cs.attrib.fields.S = 1;
1067   guest_state->cs.attrib.fields.P = 1;
1068   guest_state->cs.attrib.fields.db = 1;
1069   guest_state->cs.attrib.fields.G = 1;
1070   guest_state->cs.limit = 0xfffff;
1071   guest_state->cs.base = 0;
1072   
1073   struct vmcb_selector *segregs [] = {&(guest_state->ss), &(guest_state->ds), &(guest_state->es), &(guest_state->fs), &(guest_state->gs), NULL};
1074   for ( i = 0; segregs[i] != NULL; i++) {
1075     struct vmcb_selector * seg = segregs[i];
1076     
1077     seg->selector = 2<<3;
1078     seg->attrib.fields.type = 0x2; // Data Segment+read/write
1079     seg->attrib.fields.S = 1;
1080     seg->attrib.fields.P = 1;
1081     seg->attrib.fields.db = 1;
1082     seg->attrib.fields.G = 1;
1083     seg->limit = 0xfffff;
1084     seg->base = 0;
1085   }
1086
1087
1088   {
1089     /* JRL THIS HAS TO GO */
1090     
1091     //    guest_state->tr.selector = GetTR_Selector();
1092     guest_state->tr.attrib.fields.type = 0x9; 
1093     guest_state->tr.attrib.fields.P = 1;
1094     // guest_state->tr.limit = GetTR_Limit();
1095     //guest_state->tr.base = GetTR_Base();// - 0x2000;
1096     /* ** */
1097   }
1098
1099
1100   /* ** */
1101
1102
1103   guest_state->efer |= EFER_MSR_svm_enable;
1104   guest_state->rflags = 0x00000002; // The reserved bit is always 1
1105   ctrl_area->svm_instrs.VMRUN = 1;
1106   guest_state->cr0 = 0x00000001;    // PE 
1107   ctrl_area->guest_ASID = 1;
1108
1109
1110   //  guest_state->cpl = 0;
1111
1112
1113
1114   // Setup exits
1115
1116   ctrl_area->cr_writes.cr4 = 1;
1117   
1118   ctrl_area->exceptions.de = 1;
1119   ctrl_area->exceptions.df = 1;
1120   ctrl_area->exceptions.pf = 1;
1121   ctrl_area->exceptions.ts = 1;
1122   ctrl_area->exceptions.ss = 1;
1123   ctrl_area->exceptions.ac = 1;
1124   ctrl_area->exceptions.mc = 1;
1125   ctrl_area->exceptions.gp = 1;
1126   ctrl_area->exceptions.ud = 1;
1127   ctrl_area->exceptions.np = 1;
1128   ctrl_area->exceptions.of = 1;
1129   ctrl_area->exceptions.nmi = 1;
1130
1131   
1132
1133   ctrl_area->instrs.IOIO_PROT = 1;
1134   ctrl_area->IOPM_BASE_PA = (uint_t)V3_AllocPages(3);
1135   
1136   {
1137     reg_ex_t tmp_reg;
1138     tmp_reg.r_reg = ctrl_area->IOPM_BASE_PA;
1139     memset((void*)(tmp_reg.e_reg.low), 0xffffffff, PAGE_SIZE * 2);
1140   }
1141
1142   ctrl_area->instrs.INTR = 1;
1143
1144   
1145   {
1146     char gdt_buf[6];
1147     char idt_buf[6];
1148
1149     memset(gdt_buf, 0, 6);
1150     memset(idt_buf, 0, 6);
1151
1152
1153     uint_t gdt_base, idt_base;
1154     ushort_t gdt_limit, idt_limit;
1155     
1156     GetGDTR(gdt_buf);
1157     gdt_base = *(ulong_t*)((uchar_t*)gdt_buf + 2) & 0xffffffff;
1158     gdt_limit = *(ushort_t*)(gdt_buf) & 0xffff;
1159     PrintDebug("GDT: base: %x, limit: %x\n", gdt_base, gdt_limit);
1160
1161     GetIDTR(idt_buf);
1162     idt_base = *(ulong_t*)(idt_buf + 2) & 0xffffffff;
1163     idt_limit = *(ushort_t*)(idt_buf) & 0xffff;
1164     PrintDebug("IDT: base: %x, limit: %x\n",idt_base, idt_limit);
1165
1166
1167     // gdt_base -= 0x2000;
1168     //idt_base -= 0x2000;
1169
1170     guest_state->gdtr.base = gdt_base;
1171     guest_state->gdtr.limit = gdt_limit;
1172     guest_state->idtr.base = idt_base;
1173     guest_state->idtr.limit = idt_limit;
1174
1175
1176   }
1177   
1178   
1179   // also determine if CPU supports nested paging
1180   /*
1181   if (vm_info.page_tables) {
1182     //   if (0) {
1183     // Flush the TLB on entries/exits
1184     ctrl_area->TLB_CONTROL = 1;
1185
1186     // Enable Nested Paging
1187     ctrl_area->NP_ENABLE = 1;
1188
1189     PrintDebug("NP_Enable at 0x%x\n", &(ctrl_area->NP_ENABLE));
1190
1191         // Set the Nested Page Table pointer
1192     ctrl_area->N_CR3 |= ((addr_t)vm_info.page_tables & 0xfffff000);
1193
1194
1195     //   ctrl_area->N_CR3 = Get_CR3();
1196     // guest_state->cr3 |= (Get_CR3() & 0xfffff000);
1197
1198     guest_state->g_pat = 0x7040600070406ULL;
1199
1200     PrintDebug("Set Nested CR3: lo: 0x%x  hi: 0x%x\n", (uint_t)*(&(ctrl_area->N_CR3)), (uint_t)*((unsigned char *)&(ctrl_area->N_CR3) + 4));
1201     PrintDebug("Set Guest CR3: lo: 0x%x  hi: 0x%x\n", (uint_t)*(&(guest_state->cr3)), (uint_t)*((unsigned char *)&(guest_state->cr3) + 4));
1202     // Enable Paging
1203     //    guest_state->cr0 |= 0x80000000;
1204   }
1205   */
1206
1207 }
1208
1209
1210
1211
1212
1213 #endif
1214
1215