Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


cbb680b75f7e8e36de0d619b104de4ae975cccec
[palacios.git] / palacios / src / palacios / svm.c
1 /* 
2  * This file is part of the Palacios Virtual Machine Monitor developed
3  * by the V3VEE Project with funding from the United States National 
4  * Science Foundation and the Department of Energy.  
5  *
6  * The V3VEE Project is a joint project between Northwestern University
7  * and the University of New Mexico.  You can find out more at 
8  * http://www.v3vee.org
9  *
10  * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu> 
11  * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org> 
12  * All rights reserved.
13  *
14  * Author: Jack Lange <jarusl@cs.northwestern.edu>
15  *
16  * This is free software.  You are permitted to use,
17  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
18  */
19
20
21 #include <palacios/svm.h>
22 #include <palacios/vmm.h>
23
24 #include <palacios/vmcb.h>
25 #include <palacios/vmm_mem.h>
26 #include <palacios/vmm_paging.h>
27 #include <palacios/svm_handler.h>
28
29 #include <palacios/vmm_debug.h>
30 #include <palacios/vm_guest_mem.h>
31
32 #include <palacios/vmm_decoder.h>
33 #include <palacios/vmm_string.h>
34 #include <palacios/vmm_lowlevel.h>
35 #include <palacios/svm_msr.h>
36
37 #include <palacios/vmm_rbtree.h>
38 #include <palacios/vmm_barrier.h>
39
40 #ifdef V3_CONFIG_CHECKPOINT
41 #include <palacios/vmm_checkpoint.h>
42 #endif
43
44 #include <palacios/vmm_direct_paging.h>
45
46 #include <palacios/vmm_ctrl_regs.h>
47 #include <palacios/svm_io.h>
48
49 #include <palacios/vmm_sprintf.h>
50
51
52 #ifndef V3_CONFIG_DEBUG_SVM
53 #undef PrintDebug
54 #define PrintDebug(fmt, args...)
55 #endif
56
57
58 uint32_t v3_last_exit;
59
60 // This is a global pointer to the host's VMCB
61 static addr_t host_vmcbs[V3_CONFIG_MAX_CPUS] = { [0 ... V3_CONFIG_MAX_CPUS - 1] = 0};
62
63
64
65 extern void v3_stgi();
66 extern void v3_clgi();
67 //extern int v3_svm_launch(vmcb_t * vmcb, struct v3_gprs * vm_regs, uint64_t * fs, uint64_t * gs);
68 extern int v3_svm_launch(vmcb_t * vmcb, struct v3_gprs * vm_regs, vmcb_t * host_vmcb);
69
70
71 static vmcb_t * Allocate_VMCB() {
72     vmcb_t * vmcb_page = NULL;
73     addr_t vmcb_pa = (addr_t)V3_AllocPages(1);
74
75     if ((void *)vmcb_pa == NULL) {
76         PrintError("Error allocating VMCB\n");
77         return NULL;
78     }
79
80     vmcb_page = (vmcb_t *)V3_VAddr((void *)vmcb_pa);
81
82     memset(vmcb_page, 0, 4096);
83
84     return vmcb_page;
85 }
86
87
88 static int v3_svm_handle_efer_write(struct guest_info * core, uint_t msr, struct v3_msr src, void * priv_data)
89 {
90     int status;
91
92     // Call arch-independent handler
93     if ((status = v3_handle_efer_write(core, msr, src, priv_data)) != 0) {
94         return status;
95     }
96
97     // SVM-specific code
98     {
99         // Ensure that hardware visible EFER.SVME bit is set (SVM Enable)
100         struct efer_64 * hw_efer = (struct efer_64 *)&(core->ctrl_regs.efer);
101         hw_efer->svme = 1;
102     }
103
104     return 0;
105 }
106
107
108 static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info * core) {
109     vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
110     vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
111     uint_t i;
112
113
114     //
115     ctrl_area->svm_instrs.VMRUN = 1;
116     ctrl_area->svm_instrs.VMMCALL = 1;
117     ctrl_area->svm_instrs.VMLOAD = 1;
118     ctrl_area->svm_instrs.VMSAVE = 1;
119     ctrl_area->svm_instrs.STGI = 1;
120     ctrl_area->svm_instrs.CLGI = 1;
121     ctrl_area->svm_instrs.SKINIT = 1;
122     ctrl_area->svm_instrs.ICEBP = 1;
123     ctrl_area->svm_instrs.WBINVD = 1;
124     ctrl_area->svm_instrs.MONITOR = 1;
125     ctrl_area->svm_instrs.MWAIT_always = 1;
126     ctrl_area->svm_instrs.MWAIT_if_armed = 1;
127     ctrl_area->instrs.INVLPGA = 1;
128     ctrl_area->instrs.CPUID = 1;
129
130     ctrl_area->instrs.HLT = 1;
131
132 #ifdef V3_CONFIG_TIME_VIRTUALIZE_TSC
133     ctrl_area->instrs.RDTSC = 1;
134     ctrl_area->svm_instrs.RDTSCP = 1;
135 #endif
136
137     // guest_state->cr0 = 0x00000001;    // PE 
138   
139     /*
140       ctrl_area->exceptions.de = 1;
141       ctrl_area->exceptions.df = 1;
142       
143       ctrl_area->exceptions.ts = 1;
144       ctrl_area->exceptions.ss = 1;
145       ctrl_area->exceptions.ac = 1;
146       ctrl_area->exceptions.mc = 1;
147       ctrl_area->exceptions.gp = 1;
148       ctrl_area->exceptions.ud = 1;
149       ctrl_area->exceptions.np = 1;
150       ctrl_area->exceptions.of = 1;
151       
152       ctrl_area->exceptions.nmi = 1;
153     */
154     
155
156     ctrl_area->instrs.NMI = 1;
157     ctrl_area->instrs.SMI = 0; // allow SMIs to run in guest
158     ctrl_area->instrs.INIT = 1;
159     ctrl_area->instrs.PAUSE = 1;
160     ctrl_area->instrs.shutdown_evts = 1;
161
162
163     /* DEBUG FOR RETURN CODE */
164     ctrl_area->exit_code = 1;
165
166
167     /* Setup Guest Machine state */
168
169     core->vm_regs.rsp = 0x00;
170     core->rip = 0xfff0;
171
172     core->vm_regs.rdx = 0x00000f00;
173
174
175     core->cpl = 0;
176
177     core->ctrl_regs.rflags = 0x00000002; // The reserved bit is always 1
178     core->ctrl_regs.cr0 = 0x60010010; // Set the WP flag so the memory hooks work in real-mode
179     core->ctrl_regs.efer |= EFER_MSR_svm_enable;
180
181
182
183
184
185     core->segments.cs.selector = 0xf000;
186     core->segments.cs.limit = 0xffff;
187     core->segments.cs.base = 0x0000000f0000LL;
188
189     // (raw attributes = 0xf3)
190     core->segments.cs.type = 0x3;
191     core->segments.cs.system = 0x1;
192     core->segments.cs.dpl = 0x3;
193     core->segments.cs.present = 1;
194
195
196
197     struct v3_segment * segregs [] = {&(core->segments.ss), &(core->segments.ds), 
198                                       &(core->segments.es), &(core->segments.fs), 
199                                       &(core->segments.gs), NULL};
200
201     for ( i = 0; segregs[i] != NULL; i++) {
202         struct v3_segment * seg = segregs[i];
203         
204         seg->selector = 0x0000;
205         //    seg->base = seg->selector << 4;
206         seg->base = 0x00000000;
207         seg->limit = ~0u;
208
209         // (raw attributes = 0xf3)
210         seg->type = 0x3;
211         seg->system = 0x1;
212         seg->dpl = 0x3;
213         seg->present = 1;
214     }
215
216     core->segments.gdtr.limit = 0x0000ffff;
217     core->segments.gdtr.base = 0x0000000000000000LL;
218     core->segments.idtr.limit = 0x0000ffff;
219     core->segments.idtr.base = 0x0000000000000000LL;
220
221     core->segments.ldtr.selector = 0x0000;
222     core->segments.ldtr.limit = 0x0000ffff;
223     core->segments.ldtr.base = 0x0000000000000000LL;
224     core->segments.tr.selector = 0x0000;
225     core->segments.tr.limit = 0x0000ffff;
226     core->segments.tr.base = 0x0000000000000000LL;
227
228
229     core->dbg_regs.dr6 = 0x00000000ffff0ff0LL;
230     core->dbg_regs.dr7 = 0x0000000000000400LL;
231
232
233     ctrl_area->IOPM_BASE_PA = (addr_t)V3_PAddr(core->vm_info->io_map.arch_data);
234     ctrl_area->instrs.IOIO_PROT = 1;
235             
236     ctrl_area->MSRPM_BASE_PA = (addr_t)V3_PAddr(core->vm_info->msr_map.arch_data);
237     ctrl_area->instrs.MSR_PROT = 1;   
238
239
240     PrintDebug("Exiting on interrupts\n");
241     ctrl_area->guest_ctrl.V_INTR_MASKING = 1;
242     ctrl_area->instrs.INTR = 1;
243
244
245     v3_hook_msr(core->vm_info, EFER_MSR, 
246                 &v3_handle_efer_read,
247                 &v3_svm_handle_efer_write, 
248                 core);
249
250     if (core->shdw_pg_mode == SHADOW_PAGING) {
251         PrintDebug("Creating initial shadow page table\n");
252         
253         /* JRL: This is a performance killer, and a simplistic solution */
254         /* We need to fix this */
255         ctrl_area->TLB_CONTROL = 1;
256         ctrl_area->guest_ASID = 1;
257         
258         
259         if (v3_init_passthrough_pts(core) == -1) {
260             PrintError("Could not initialize passthrough page tables\n");
261             return ;
262         }
263
264
265         core->shdw_pg_state.guest_cr0 = 0x0000000000000010LL;
266         PrintDebug("Created\n");
267         
268         core->ctrl_regs.cr0 |= 0x80000000;
269         core->ctrl_regs.cr3 = core->direct_map_pt;
270
271         ctrl_area->cr_reads.cr0 = 1;
272         ctrl_area->cr_writes.cr0 = 1;
273         //ctrl_area->cr_reads.cr4 = 1;
274         ctrl_area->cr_writes.cr4 = 1;
275         ctrl_area->cr_reads.cr3 = 1;
276         ctrl_area->cr_writes.cr3 = 1;
277
278
279
280         ctrl_area->instrs.INVLPG = 1;
281
282         ctrl_area->exceptions.pf = 1;
283
284         guest_state->g_pat = 0x7040600070406ULL;
285
286
287
288     } else if (core->shdw_pg_mode == NESTED_PAGING) {
289         // Flush the TLB on entries/exits
290         ctrl_area->TLB_CONTROL = 1;
291         ctrl_area->guest_ASID = 1;
292
293         // Enable Nested Paging
294         ctrl_area->NP_ENABLE = 1;
295
296         PrintDebug("NP_Enable at 0x%p\n", (void *)&(ctrl_area->NP_ENABLE));
297
298         // Set the Nested Page Table pointer
299         if (v3_init_passthrough_pts(core) == -1) {
300             PrintError("Could not initialize Nested page tables\n");
301             return ;
302         }
303
304         ctrl_area->N_CR3 = core->direct_map_pt;
305
306         guest_state->g_pat = 0x7040600070406ULL;
307     }
308     
309     /* tell the guest that we don't support SVM */
310     v3_hook_msr(core->vm_info, SVM_VM_CR_MSR, 
311         &v3_handle_vm_cr_read,
312         &v3_handle_vm_cr_write, 
313         core);
314
315
316     {
317 #define INT_PENDING_AMD_MSR             0xc0010055
318
319         v3_hook_msr(core->vm_info, IA32_STAR_MSR, NULL, NULL, NULL);
320         v3_hook_msr(core->vm_info, IA32_LSTAR_MSR, NULL, NULL, NULL);
321         v3_hook_msr(core->vm_info, IA32_FMASK_MSR, NULL, NULL, NULL);
322         v3_hook_msr(core->vm_info, IA32_KERN_GS_BASE_MSR, NULL, NULL, NULL);
323         v3_hook_msr(core->vm_info, IA32_CSTAR_MSR, NULL, NULL, NULL);
324
325         v3_hook_msr(core->vm_info, SYSENTER_CS_MSR, NULL, NULL, NULL);
326         v3_hook_msr(core->vm_info, SYSENTER_ESP_MSR, NULL, NULL, NULL);
327         v3_hook_msr(core->vm_info, SYSENTER_EIP_MSR, NULL, NULL, NULL);
328
329         // Passthrough read operations are ok.
330         v3_hook_msr(core->vm_info, INT_PENDING_AMD_MSR, NULL, v3_msr_unhandled_write, NULL);
331     }
332 }
333
334
335 int v3_init_svm_vmcb(struct guest_info * core, v3_vm_class_t vm_class) {
336
337     PrintDebug("Allocating VMCB\n");
338     core->vmm_data = (void *)Allocate_VMCB();
339     
340     if (core->vmm_data == NULL) {
341         PrintError("Could not allocate VMCB, Exiting...\n");
342         return -1;
343     }
344
345     if (vm_class == V3_PC_VM) {
346         PrintDebug("Initializing VMCB (addr=%p)\n", (void *)core->vmm_data);
347         Init_VMCB_BIOS((vmcb_t*)(core->vmm_data), core);
348     } else {
349         PrintError("Invalid VM class\n");
350         return -1;
351     }
352
353     return 0;
354 }
355
356
357 int v3_deinit_svm_vmcb(struct guest_info * core) {
358     V3_FreePages(V3_PAddr(core->vmm_data), 1);
359     return 0;
360 }
361
362
363 #ifdef V3_CONFIG_CHECKPOINT
364 int v3_svm_save_core(struct guest_info * core, void * ctx){
365
366     v3_chkpt_save_8(ctx, "cpl", &(core->cpl));
367     v3_chkpt_save(ctx, "vmcb_data", PAGE_SIZE, core->vmm_data);
368
369     return 0;
370 }
371
372 int v3_svm_load_core(struct guest_info * core, void * ctx){
373     
374     v3_chkpt_load_8(ctx, "cpl", &(core->cpl));
375
376     if (v3_chkpt_load(ctx, "vmcb_data", PAGE_SIZE, core->vmm_data) == -1) {
377         return -1;
378     }
379
380     return 0;
381 }
382 #endif
383
384 static int update_irq_exit_state(struct guest_info * info) {
385     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
386
387     // Fix for QEMU bug using EVENTINJ as an internal cache
388     guest_ctrl->EVENTINJ.valid = 0;
389
390     if ((info->intr_core_state.irq_pending == 1) && (guest_ctrl->guest_ctrl.V_IRQ == 0)) {
391         
392 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
393         PrintDebug("INTAK cycle completed for irq %d\n", info->intr_core_state.irq_vector);
394 #endif
395
396         info->intr_core_state.irq_started = 1;
397         info->intr_core_state.irq_pending = 0;
398
399         v3_injecting_intr(info, info->intr_core_state.irq_vector, V3_EXTERNAL_IRQ);
400     }
401
402     if ((info->intr_core_state.irq_started == 1) && (guest_ctrl->exit_int_info.valid == 0)) {
403 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
404         PrintDebug("Interrupt %d taken by guest\n", info->intr_core_state.irq_vector);
405 #endif
406
407         // Interrupt was taken fully vectored
408         info->intr_core_state.irq_started = 0;
409
410     } else if ((info->intr_core_state.irq_started == 1) && (guest_ctrl->exit_int_info.valid == 1)) {
411 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
412         PrintDebug("EXIT INT INFO is set (vec=%d)\n", guest_ctrl->exit_int_info.vector);
413 #endif
414     }
415
416     return 0;
417 }
418
419
420 static int update_irq_entry_state(struct guest_info * info) {
421     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
422
423
424     if (info->intr_core_state.irq_pending == 0) {
425         guest_ctrl->guest_ctrl.V_IRQ = 0;
426         guest_ctrl->guest_ctrl.V_INTR_VECTOR = 0;
427     }
428     
429     if (v3_excp_pending(info)) {
430         uint_t excp = v3_get_excp_number(info);
431         
432         guest_ctrl->EVENTINJ.type = SVM_INJECTION_EXCEPTION;
433         
434         if (info->excp_state.excp_error_code_valid) {
435             guest_ctrl->EVENTINJ.error_code = info->excp_state.excp_error_code;
436             guest_ctrl->EVENTINJ.ev = 1;
437 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
438             PrintDebug("Injecting exception %d with error code %x\n", excp, guest_ctrl->EVENTINJ.error_code);
439 #endif
440         }
441         
442         guest_ctrl->EVENTINJ.vector = excp;
443         
444         guest_ctrl->EVENTINJ.valid = 1;
445
446 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
447         PrintDebug("<%d> Injecting Exception %d (CR2=%p) (EIP=%p)\n", 
448                    (int)info->num_exits, 
449                    guest_ctrl->EVENTINJ.vector, 
450                    (void *)(addr_t)info->ctrl_regs.cr2,
451                    (void *)(addr_t)info->rip);
452 #endif
453
454         v3_injecting_excp(info, excp);
455     } else if (info->intr_core_state.irq_started == 1) {
456 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
457         PrintDebug("IRQ pending from previous injection\n");
458 #endif
459         guest_ctrl->guest_ctrl.V_IRQ = 1;
460         guest_ctrl->guest_ctrl.V_INTR_VECTOR = info->intr_core_state.irq_vector;
461         guest_ctrl->guest_ctrl.V_IGN_TPR = 1;
462         guest_ctrl->guest_ctrl.V_INTR_PRIO = 0xf;
463
464     } else {
465         switch (v3_intr_pending(info)) {
466             case V3_EXTERNAL_IRQ: {
467                 uint32_t irq = v3_get_intr(info);
468
469                 guest_ctrl->guest_ctrl.V_IRQ = 1;
470                 guest_ctrl->guest_ctrl.V_INTR_VECTOR = irq;
471                 guest_ctrl->guest_ctrl.V_IGN_TPR = 1;
472                 guest_ctrl->guest_ctrl.V_INTR_PRIO = 0xf;
473
474 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
475                 PrintDebug("Injecting Interrupt %d (EIP=%p)\n", 
476                            guest_ctrl->guest_ctrl.V_INTR_VECTOR, 
477                            (void *)(addr_t)info->rip);
478 #endif
479
480                 info->intr_core_state.irq_pending = 1;
481                 info->intr_core_state.irq_vector = irq;
482                 
483                 break;
484             }
485             case V3_NMI:
486                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_NMI;
487                 break;
488             case V3_SOFTWARE_INTR:
489                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_SOFT_INTR;
490
491 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
492                 PrintDebug("Injecting software interrupt --  type: %d, vector: %d\n", 
493                            SVM_INJECTION_SOFT_INTR, info->intr_core_state.swintr_vector);
494 #endif
495                 guest_ctrl->EVENTINJ.vector = info->intr_core_state.swintr_vector;
496                 guest_ctrl->EVENTINJ.valid = 1;
497             
498                 /* reset swintr state */
499                 info->intr_core_state.swintr_posted = 0;
500                 info->intr_core_state.swintr_vector = 0;
501                 
502                 break;
503             case V3_VIRTUAL_IRQ:
504                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_IRQ;
505                 break;
506
507             case V3_INVALID_INTR:
508             default:
509                 break;
510         }
511         
512     }
513
514     return 0;
515 }
516
517
518 /* 
519  * CAUTION and DANGER!!! 
520  * 
521  * The VMCB CANNOT(!!) be accessed outside of the clgi/stgi calls inside this function
522  * When exectuing a symbiotic call, the VMCB WILL be overwritten, so any dependencies 
523  * on its contents will cause things to break. The contents at the time of the exit WILL 
524  * change before the exit handler is executed.
525  */
526 int v3_svm_enter(struct guest_info * info) {
527     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
528     vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA((vmcb_t*)(info->vmm_data)); 
529     addr_t exit_code = 0, exit_info1 = 0, exit_info2 = 0;
530     sint64_t tsc_offset;
531
532     // Conditionally yield the CPU if the timeslice has expired
533     v3_yield_cond(info);
534
535     // Perform any additional yielding needed for time adjustment
536     v3_adjust_time(info);
537
538     // disable global interrupts for vm state transition
539     v3_clgi();
540
541     // Update timer devices after being in the VM, with interupts
542     // disabled, but before doing IRQ updates, so that any interrupts they 
543     //raise get seen immediately.
544     v3_update_timers(info);
545
546     // Synchronize the guest state to the VMCB
547     guest_state->cr0 = info->ctrl_regs.cr0;
548     guest_state->cr2 = info->ctrl_regs.cr2;
549     guest_state->cr3 = info->ctrl_regs.cr3;
550     guest_state->cr4 = info->ctrl_regs.cr4;
551     guest_state->dr6 = info->dbg_regs.dr6;
552     guest_state->dr7 = info->dbg_regs.dr7;
553     guest_ctrl->guest_ctrl.V_TPR = info->ctrl_regs.cr8 & 0xff;
554     guest_state->rflags = info->ctrl_regs.rflags;
555     guest_state->efer = info->ctrl_regs.efer;
556     
557     guest_state->cpl = info->cpl;
558
559     v3_set_vmcb_segments((vmcb_t*)(info->vmm_data), &(info->segments));
560
561     guest_state->rax = info->vm_regs.rax;
562     guest_state->rip = info->rip;
563     guest_state->rsp = info->vm_regs.rsp;
564
565 #ifdef V3_CONFIG_SYMCALL
566     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
567         update_irq_entry_state(info);
568     }
569 #else 
570     update_irq_entry_state(info);
571 #endif
572
573
574     /* ** */
575
576     /*
577       PrintDebug("SVM Entry to CS=%p  rip=%p...\n", 
578       (void *)(addr_t)info->segments.cs.base, 
579       (void *)(addr_t)info->rip);
580     */
581
582 #ifdef V3_CONFIG_SYMCALL
583     if (info->sym_core_state.symcall_state.sym_call_active == 1) {
584         if (guest_ctrl->guest_ctrl.V_IRQ == 1) {
585             V3_Print("!!! Injecting Interrupt during Sym call !!!\n");
586         }
587     }
588 #endif
589
590     v3_time_enter_vm(info);
591     tsc_offset = v3_tsc_host_offset(&info->time_state);
592     guest_ctrl->TSC_OFFSET = tsc_offset;
593
594
595     //V3_Print("Calling v3_svm_launch\n");
596
597     v3_svm_launch((vmcb_t *)V3_PAddr(info->vmm_data), &(info->vm_regs), (vmcb_t *)host_vmcbs[V3_Get_CPU()]);
598
599     //V3_Print("SVM Returned: Exit Code: %x, guest_rip=%lx\n", (uint32_t)(guest_ctrl->exit_code), (unsigned long)guest_state->rip);
600
601     v3_last_exit = (uint32_t)(guest_ctrl->exit_code);
602
603     // Immediate exit from VM time bookkeeping
604     v3_time_exit_vm(info);
605
606     info->num_exits++;
607
608     // Save Guest state from VMCB
609     info->rip = guest_state->rip;
610     info->vm_regs.rsp = guest_state->rsp;
611     info->vm_regs.rax = guest_state->rax;
612
613     info->cpl = guest_state->cpl;
614
615     info->ctrl_regs.cr0 = guest_state->cr0;
616     info->ctrl_regs.cr2 = guest_state->cr2;
617     info->ctrl_regs.cr3 = guest_state->cr3;
618     info->ctrl_regs.cr4 = guest_state->cr4;
619     info->dbg_regs.dr6 = guest_state->dr6;
620     info->dbg_regs.dr7 = guest_state->dr7;
621     info->ctrl_regs.cr8 = guest_ctrl->guest_ctrl.V_TPR;
622     info->ctrl_regs.rflags = guest_state->rflags;
623     info->ctrl_regs.efer = guest_state->efer;
624     
625     v3_get_vmcb_segments((vmcb_t*)(info->vmm_data), &(info->segments));
626     info->cpu_mode = v3_get_vm_cpu_mode(info);
627     info->mem_mode = v3_get_vm_mem_mode(info);
628     /* ** */
629
630     // save exit info here
631     exit_code = guest_ctrl->exit_code;
632     exit_info1 = guest_ctrl->exit_info1;
633     exit_info2 = guest_ctrl->exit_info2;
634
635 #ifdef V3_CONFIG_SYMCALL
636     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
637         update_irq_exit_state(info);
638     }
639 #else
640     update_irq_exit_state(info);
641 #endif
642
643     // reenable global interrupts after vm exit
644     v3_stgi();
645  
646     // Conditionally yield the CPU if the timeslice has expired
647     v3_yield_cond(info);
648
649     {
650         int ret = v3_handle_svm_exit(info, exit_code, exit_info1, exit_info2);
651         
652         if (ret != 0) {
653             PrintError("Error in SVM exit handler (ret=%d)\n", ret);
654             PrintError("  last Exit was %d (exit code=0x%llx)\n", v3_last_exit, (uint64_t) exit_code);
655             return -1;
656         }
657     }
658
659
660     return 0;
661 }
662
663
664 int v3_start_svm_guest(struct guest_info * info) {
665     //    vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA((vmcb_t*)(info->vmm_data));
666     //  vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
667
668     PrintDebug("Starting SVM core %u (on logical core %u)\n", info->vcpu_id, info->pcpu_id);
669
670     if (info->vcpu_id == 0) {
671         info->core_run_state = CORE_RUNNING;
672         info->vm_info->run_state = VM_RUNNING;
673     } else  { 
674         PrintDebug("SVM core %u (on %u): Waiting for core initialization\n", info->vcpu_id, info->pcpu_id);
675
676         while (info->core_run_state == CORE_STOPPED) {
677             
678             if (info->vm_info->run_state == VM_STOPPED) {
679                 // The VM was stopped before this core was initialized. 
680                 return 0;
681             }
682
683             v3_yield(info);
684             //PrintDebug("SVM core %u: still waiting for INIT\n", info->vcpu_id);
685         }
686
687         PrintDebug("SVM core %u(on %u) initialized\n", info->vcpu_id, info->pcpu_id);
688
689         // We'll be paranoid about race conditions here
690         v3_wait_at_barrier(info);
691     } 
692
693     PrintDebug("SVM core %u(on %u): I am starting at CS=0x%x (base=0x%p, limit=0x%x),  RIP=0x%p\n", 
694                info->vcpu_id, info->pcpu_id, 
695                info->segments.cs.selector, (void *)(info->segments.cs.base), 
696                info->segments.cs.limit, (void *)(info->rip));
697
698
699
700     PrintDebug("SVM core %u: Launching SVM VM (vmcb=%p) (on cpu %u)\n", 
701                info->vcpu_id, (void *)info->vmm_data, info->pcpu_id);
702     //PrintDebugVMCB((vmcb_t*)(info->vmm_data));
703     
704     v3_start_time(info);
705
706     while (1) {
707
708         if (info->vm_info->run_state == VM_STOPPED) {
709             info->core_run_state = CORE_STOPPED;
710             break;
711         }
712         
713         if (v3_svm_enter(info) == -1) {
714             vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
715             addr_t host_addr;
716             addr_t linear_addr = 0;
717             
718             info->vm_info->run_state = VM_ERROR;
719             
720             V3_Print("SVM core %u: SVM ERROR!!\n", info->vcpu_id); 
721             
722             v3_print_guest_state(info);
723             
724             V3_Print("SVM core %u: SVM Exit Code: %p\n", info->vcpu_id, (void *)(addr_t)guest_ctrl->exit_code); 
725             
726             V3_Print("SVM core %u: exit_info1 low = 0x%.8x\n", info->vcpu_id, *(uint_t*)&(guest_ctrl->exit_info1));
727             V3_Print("SVM core %u: exit_info1 high = 0x%.8x\n", info->vcpu_id, *(uint_t *)(((uchar_t *)&(guest_ctrl->exit_info1)) + 4));
728             
729             V3_Print("SVM core %u: exit_info2 low = 0x%.8x\n", info->vcpu_id, *(uint_t*)&(guest_ctrl->exit_info2));
730             V3_Print("SVM core %u: exit_info2 high = 0x%.8x\n", info->vcpu_id, *(uint_t *)(((uchar_t *)&(guest_ctrl->exit_info2)) + 4));
731             
732             linear_addr = get_addr_linear(info, info->rip, &(info->segments.cs));
733             
734             if (info->mem_mode == PHYSICAL_MEM) {
735                 v3_gpa_to_hva(info, linear_addr, &host_addr);
736             } else if (info->mem_mode == VIRTUAL_MEM) {
737                 v3_gva_to_hva(info, linear_addr, &host_addr);
738             }
739             
740             V3_Print("SVM core %u: Host Address of rip = 0x%p\n", info->vcpu_id, (void *)host_addr);
741             
742             V3_Print("SVM core %u: Instr (15 bytes) at %p:\n", info->vcpu_id, (void *)host_addr);
743             v3_dump_mem((uint8_t *)host_addr, 15);
744             
745             v3_print_stack(info);
746
747             break;
748         }
749
750         v3_wait_at_barrier(info);
751
752
753         if (info->vm_info->run_state == VM_STOPPED) {
754             info->core_run_state = CORE_STOPPED;
755             break;
756         }
757
758         
759
760 /*
761         if ((info->num_exits % 50000) == 0) {
762             V3_Print("SVM Exit number %d\n", (uint32_t)info->num_exits);
763             v3_print_guest_state(info);
764         }
765 */
766         
767     }
768
769     // Need to take down the other cores on error... 
770
771     return 0;
772 }
773
774
775
776
777 int v3_reset_svm_vm_core(struct guest_info * core, addr_t rip) {
778     // init vmcb_bios
779
780     // Write the RIP, CS, and descriptor
781     // assume the rest is already good to go
782     //
783     // vector VV -> rip at 0
784     //              CS = VV00
785     //  This means we start executing at linear address VV000
786     //
787     // So the selector needs to be VV00
788     // and the base needs to be VV000
789     //
790     core->rip = 0;
791     core->segments.cs.selector = rip << 8;
792     core->segments.cs.limit = 0xffff;
793     core->segments.cs.base = rip << 12;
794
795     return 0;
796 }
797
798
799
800
801
802
803 /* Checks machine SVM capability */
804 /* Implemented from: AMD Arch Manual 3, sect 15.4 */ 
805 int v3_is_svm_capable() {
806     uint_t vm_cr_low = 0, vm_cr_high = 0;
807     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
808
809     v3_cpuid(CPUID_EXT_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
810   
811     PrintDebug("CPUID_EXT_FEATURE_IDS_ecx=0x%x\n", ecx);
812
813     if ((ecx & CPUID_EXT_FEATURE_IDS_ecx_svm_avail) == 0) {
814       V3_Print("SVM Not Available\n");
815       return 0;
816     }  else {
817         v3_get_msr(SVM_VM_CR_MSR, &vm_cr_high, &vm_cr_low);
818         
819         PrintDebug("SVM_VM_CR_MSR = 0x%x 0x%x\n", vm_cr_high, vm_cr_low);
820         
821         if ((vm_cr_low & SVM_VM_CR_MSR_svmdis) == 1) {
822             V3_Print("SVM is available but is disabled.\n");
823             
824             v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
825             
826             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_edx=0x%x\n", edx);
827             
828             if ((edx & CPUID_SVM_REV_AND_FEATURE_IDS_edx_svml) == 0) {
829                 V3_Print("SVM BIOS Disabled, not unlockable\n");
830             } else {
831                 V3_Print("SVM is locked with a key\n");
832             }
833             return 0;
834
835         } else {
836             V3_Print("SVM is available and  enabled.\n");
837
838             v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
839             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_eax=0x%x\n", eax);
840             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_ebx=0x%x\n", ebx);
841             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_ecx=0x%x\n", ecx);
842             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_edx=0x%x\n", edx);
843
844             return 1;
845         }
846     }
847 }
848
849 static int has_svm_nested_paging() {
850     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
851     
852     v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
853     
854     //PrintDebug("CPUID_EXT_FEATURE_IDS_edx=0x%x\n", edx);
855     
856     if ((edx & CPUID_SVM_REV_AND_FEATURE_IDS_edx_np) == 0) {
857         V3_Print("SVM Nested Paging not supported\n");
858         return 0;
859     } else {
860         V3_Print("SVM Nested Paging supported\n");
861         return 1;
862     }
863  }
864  
865
866
867 void v3_init_svm_cpu(int cpu_id) {
868     reg_ex_t msr;
869     extern v3_cpu_arch_t v3_cpu_types[];
870
871     // Enable SVM on the CPU
872     v3_get_msr(EFER_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
873     msr.e_reg.low |= EFER_MSR_svm_enable;
874     v3_set_msr(EFER_MSR, 0, msr.e_reg.low);
875
876     V3_Print("SVM Enabled\n");
877
878     // Setup the host state save area
879     host_vmcbs[cpu_id] = (addr_t)V3_AllocPages(4);
880
881     /* 64-BIT-ISSUE */
882     //  msr.e_reg.high = 0;
883     //msr.e_reg.low = (uint_t)host_vmcb;
884     msr.r_reg = host_vmcbs[cpu_id];
885
886     PrintDebug("Host State being saved at %p\n", (void *)host_vmcbs[cpu_id]);
887     v3_set_msr(SVM_VM_HSAVE_PA_MSR, msr.e_reg.high, msr.e_reg.low);
888
889
890     if (has_svm_nested_paging() == 1) {
891         v3_cpu_types[cpu_id] = V3_SVM_REV3_CPU;
892     } else {
893         v3_cpu_types[cpu_id] = V3_SVM_CPU;
894     }
895 }
896
897
898
899 void v3_deinit_svm_cpu(int cpu_id) {
900     reg_ex_t msr;
901     extern v3_cpu_arch_t v3_cpu_types[];
902
903     // reset SVM_VM_HSAVE_PA_MSR
904     // Does setting it to NULL disable??
905     msr.r_reg = 0;
906     v3_set_msr(SVM_VM_HSAVE_PA_MSR, msr.e_reg.high, msr.e_reg.low);
907
908     // Disable SVM?
909     v3_get_msr(EFER_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
910     msr.e_reg.low &= ~EFER_MSR_svm_enable;
911     v3_set_msr(EFER_MSR, 0, msr.e_reg.low);
912
913     v3_cpu_types[cpu_id] = V3_INVALID_CPU;
914
915     V3_FreePages((void *)host_vmcbs[cpu_id], 4);
916
917     V3_Print("Host CPU %d host area freed, and SVM disabled\n", cpu_id);
918     return;
919 }
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970 #if 0
971 /* 
972  * Test VMSAVE/VMLOAD Latency 
973  */
974 #define vmsave ".byte 0x0F,0x01,0xDB ; "
975 #define vmload ".byte 0x0F,0x01,0xDA ; "
976 {
977     uint32_t start_lo, start_hi;
978     uint32_t end_lo, end_hi;
979     uint64_t start, end;
980     
981     __asm__ __volatile__ (
982                           "rdtsc ; "
983                           "movl %%eax, %%esi ; "
984                           "movl %%edx, %%edi ; "
985                           "movq  %%rcx, %%rax ; "
986                           vmsave
987                           "rdtsc ; "
988                           : "=D"(start_hi), "=S"(start_lo), "=a"(end_lo),"=d"(end_hi)
989                           : "c"(host_vmcb[cpu_id]), "0"(0), "1"(0), "2"(0), "3"(0)
990                           );
991     
992     start = start_hi;
993     start <<= 32;
994     start += start_lo;
995     
996     end = end_hi;
997     end <<= 32;
998     end += end_lo;
999     
1000     PrintDebug("VMSave Cycle Latency: %d\n", (uint32_t)(end - start));
1001     
1002     __asm__ __volatile__ (
1003                           "rdtsc ; "
1004                           "movl %%eax, %%esi ; "
1005                           "movl %%edx, %%edi ; "
1006                           "movq  %%rcx, %%rax ; "
1007                           vmload
1008                           "rdtsc ; "
1009                           : "=D"(start_hi), "=S"(start_lo), "=a"(end_lo),"=d"(end_hi)
1010                               : "c"(host_vmcb[cpu_id]), "0"(0), "1"(0), "2"(0), "3"(0)
1011                               );
1012         
1013         start = start_hi;
1014         start <<= 32;
1015         start += start_lo;
1016
1017         end = end_hi;
1018         end <<= 32;
1019         end += end_lo;
1020
1021
1022         PrintDebug("VMLoad Cycle Latency: %d\n", (uint32_t)(end - start));
1023     }
1024     /* End Latency Test */
1025
1026 #endif
1027
1028
1029
1030
1031
1032
1033
1034 #if 0
1035 void Init_VMCB_pe(vmcb_t *vmcb, struct guest_info vm_info) {
1036   vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
1037   vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
1038   uint_t i = 0;
1039
1040
1041   guest_state->rsp = vm_info.vm_regs.rsp;
1042   guest_state->rip = vm_info.rip;
1043
1044
1045   /* I pretty much just gutted this from TVMM */
1046   /* Note: That means its probably wrong */
1047
1048   // set the segment registers to mirror ours
1049   guest_state->cs.selector = 1<<3;
1050   guest_state->cs.attrib.fields.type = 0xa; // Code segment+read
1051   guest_state->cs.attrib.fields.S = 1;
1052   guest_state->cs.attrib.fields.P = 1;
1053   guest_state->cs.attrib.fields.db = 1;
1054   guest_state->cs.attrib.fields.G = 1;
1055   guest_state->cs.limit = 0xfffff;
1056   guest_state->cs.base = 0;
1057   
1058   struct vmcb_selector *segregs [] = {&(guest_state->ss), &(guest_state->ds), &(guest_state->es), &(guest_state->fs), &(guest_state->gs), NULL};
1059   for ( i = 0; segregs[i] != NULL; i++) {
1060     struct vmcb_selector * seg = segregs[i];
1061     
1062     seg->selector = 2<<3;
1063     seg->attrib.fields.type = 0x2; // Data Segment+read/write
1064     seg->attrib.fields.S = 1;
1065     seg->attrib.fields.P = 1;
1066     seg->attrib.fields.db = 1;
1067     seg->attrib.fields.G = 1;
1068     seg->limit = 0xfffff;
1069     seg->base = 0;
1070   }
1071
1072
1073   {
1074     /* JRL THIS HAS TO GO */
1075     
1076     //    guest_state->tr.selector = GetTR_Selector();
1077     guest_state->tr.attrib.fields.type = 0x9; 
1078     guest_state->tr.attrib.fields.P = 1;
1079     // guest_state->tr.limit = GetTR_Limit();
1080     //guest_state->tr.base = GetTR_Base();// - 0x2000;
1081     /* ** */
1082   }
1083
1084
1085   /* ** */
1086
1087
1088   guest_state->efer |= EFER_MSR_svm_enable;
1089   guest_state->rflags = 0x00000002; // The reserved bit is always 1
1090   ctrl_area->svm_instrs.VMRUN = 1;
1091   guest_state->cr0 = 0x00000001;    // PE 
1092   ctrl_area->guest_ASID = 1;
1093
1094
1095   //  guest_state->cpl = 0;
1096
1097
1098
1099   // Setup exits
1100
1101   ctrl_area->cr_writes.cr4 = 1;
1102   
1103   ctrl_area->exceptions.de = 1;
1104   ctrl_area->exceptions.df = 1;
1105   ctrl_area->exceptions.pf = 1;
1106   ctrl_area->exceptions.ts = 1;
1107   ctrl_area->exceptions.ss = 1;
1108   ctrl_area->exceptions.ac = 1;
1109   ctrl_area->exceptions.mc = 1;
1110   ctrl_area->exceptions.gp = 1;
1111   ctrl_area->exceptions.ud = 1;
1112   ctrl_area->exceptions.np = 1;
1113   ctrl_area->exceptions.of = 1;
1114   ctrl_area->exceptions.nmi = 1;
1115
1116   
1117
1118   ctrl_area->instrs.IOIO_PROT = 1;
1119   ctrl_area->IOPM_BASE_PA = (uint_t)V3_AllocPages(3);
1120   
1121   {
1122     reg_ex_t tmp_reg;
1123     tmp_reg.r_reg = ctrl_area->IOPM_BASE_PA;
1124     memset((void*)(tmp_reg.e_reg.low), 0xffffffff, PAGE_SIZE * 2);
1125   }
1126
1127   ctrl_area->instrs.INTR = 1;
1128
1129   
1130   {
1131     char gdt_buf[6];
1132     char idt_buf[6];
1133
1134     memset(gdt_buf, 0, 6);
1135     memset(idt_buf, 0, 6);
1136
1137
1138     uint_t gdt_base, idt_base;
1139     ushort_t gdt_limit, idt_limit;
1140     
1141     GetGDTR(gdt_buf);
1142     gdt_base = *(ulong_t*)((uchar_t*)gdt_buf + 2) & 0xffffffff;
1143     gdt_limit = *(ushort_t*)(gdt_buf) & 0xffff;
1144     PrintDebug("GDT: base: %x, limit: %x\n", gdt_base, gdt_limit);
1145
1146     GetIDTR(idt_buf);
1147     idt_base = *(ulong_t*)(idt_buf + 2) & 0xffffffff;
1148     idt_limit = *(ushort_t*)(idt_buf) & 0xffff;
1149     PrintDebug("IDT: base: %x, limit: %x\n",idt_base, idt_limit);
1150
1151
1152     // gdt_base -= 0x2000;
1153     //idt_base -= 0x2000;
1154
1155     guest_state->gdtr.base = gdt_base;
1156     guest_state->gdtr.limit = gdt_limit;
1157     guest_state->idtr.base = idt_base;
1158     guest_state->idtr.limit = idt_limit;
1159
1160
1161   }
1162   
1163   
1164   // also determine if CPU supports nested paging
1165   /*
1166   if (vm_info.page_tables) {
1167     //   if (0) {
1168     // Flush the TLB on entries/exits
1169     ctrl_area->TLB_CONTROL = 1;
1170
1171     // Enable Nested Paging
1172     ctrl_area->NP_ENABLE = 1;
1173
1174     PrintDebug("NP_Enable at 0x%x\n", &(ctrl_area->NP_ENABLE));
1175
1176         // Set the Nested Page Table pointer
1177     ctrl_area->N_CR3 |= ((addr_t)vm_info.page_tables & 0xfffff000);
1178
1179
1180     //   ctrl_area->N_CR3 = Get_CR3();
1181     // guest_state->cr3 |= (Get_CR3() & 0xfffff000);
1182
1183     guest_state->g_pat = 0x7040600070406ULL;
1184
1185     PrintDebug("Set Nested CR3: lo: 0x%x  hi: 0x%x\n", (uint_t)*(&(ctrl_area->N_CR3)), (uint_t)*((unsigned char *)&(ctrl_area->N_CR3) + 4));
1186     PrintDebug("Set Guest CR3: lo: 0x%x  hi: 0x%x\n", (uint_t)*(&(guest_state->cr3)), (uint_t)*((unsigned char *)&(guest_state->cr3) + 4));
1187     // Enable Paging
1188     //    guest_state->cr0 |= 0x80000000;
1189   }
1190   */
1191
1192 }
1193
1194
1195
1196
1197
1198 #endif
1199
1200