Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


initial simulation functionality
[palacios.git] / palacios / src / palacios / svm.c
1 /* 
2  * This file is part of the Palacios Virtual Machine Monitor developed
3  * by the V3VEE Project with funding from the United States National 
4  * Science Foundation and the Department of Energy.  
5  *
6  * The V3VEE Project is a joint project between Northwestern University
7  * and the University of New Mexico.  You can find out more at 
8  * http://www.v3vee.org
9  *
10  * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu> 
11  * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org> 
12  * All rights reserved.
13  *
14  * Author: Jack Lange <jarusl@cs.northwestern.edu>
15  *
16  * This is free software.  You are permitted to use,
17  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
18  */
19
20
21 #include <palacios/svm.h>
22 #include <palacios/vmm.h>
23
24 #include <palacios/vmcb.h>
25 #include <palacios/vmm_mem.h>
26 #include <palacios/vmm_paging.h>
27 #include <palacios/svm_handler.h>
28
29 #include <palacios/vmm_debug.h>
30 #include <palacios/vm_guest_mem.h>
31
32 #include <palacios/vmm_decoder.h>
33 #include <palacios/vmm_string.h>
34 #include <palacios/vmm_lowlevel.h>
35 #include <palacios/svm_msr.h>
36
37 #include <palacios/vmm_rbtree.h>
38 #include <palacios/vmm_barrier.h>
39
40 #ifdef V3_CONFIG_CHECKPOINT
41 #include <palacios/vmm_checkpoint.h>
42 #endif
43
44 #include <palacios/vmm_direct_paging.h>
45
46 #include <palacios/vmm_ctrl_regs.h>
47 #include <palacios/svm_io.h>
48
49 #include <palacios/vmm_sprintf.h>
50
51
52 #ifndef V3_CONFIG_DEBUG_SVM
53 #undef PrintDebug
54 #define PrintDebug(fmt, args...)
55 #endif
56
57
58 uint32_t v3_last_exit;
59
60 // This is a global pointer to the host's VMCB
61 static addr_t host_vmcbs[V3_CONFIG_MAX_CPUS] = { [0 ... V3_CONFIG_MAX_CPUS - 1] = 0};
62
63
64
65 extern void v3_stgi();
66 extern void v3_clgi();
67 //extern int v3_svm_launch(vmcb_t * vmcb, struct v3_gprs * vm_regs, uint64_t * fs, uint64_t * gs);
68 extern int v3_svm_launch(vmcb_t * vmcb, struct v3_gprs * vm_regs, vmcb_t * host_vmcb);
69
70
71 static vmcb_t * Allocate_VMCB() {
72     vmcb_t * vmcb_page = NULL;
73     addr_t vmcb_pa = (addr_t)V3_AllocPages(1);
74
75     if ((void *)vmcb_pa == NULL) {
76         PrintError("Error allocating VMCB\n");
77         return NULL;
78     }
79
80     vmcb_page = (vmcb_t *)V3_VAddr((void *)vmcb_pa);
81
82     memset(vmcb_page, 0, 4096);
83
84     return vmcb_page;
85 }
86
87
88 static int v3_svm_handle_efer_write(struct guest_info * core, uint_t msr, struct v3_msr src, void * priv_data)
89 {
90     int status;
91
92     // Call arch-independent handler
93     if ((status = v3_handle_efer_write(core, msr, src, priv_data)) != 0) {
94         return status;
95     }
96
97     // SVM-specific code
98     {
99         // Ensure that hardware visible EFER.SVME bit is set (SVM Enable)
100         struct efer_64 * hw_efer = (struct efer_64 *)&(core->ctrl_regs.efer);
101         hw_efer->svme = 1;
102     }
103
104     return 0;
105 }
106
107
108 static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info * core) {
109     vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
110     vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
111     uint_t i;
112
113
114     //
115     ctrl_area->svm_instrs.VMRUN = 1;
116     ctrl_area->svm_instrs.VMMCALL = 1;
117     ctrl_area->svm_instrs.VMLOAD = 1;
118     ctrl_area->svm_instrs.VMSAVE = 1;
119     ctrl_area->svm_instrs.STGI = 1;
120     ctrl_area->svm_instrs.CLGI = 1;
121     ctrl_area->svm_instrs.SKINIT = 1;
122     ctrl_area->svm_instrs.ICEBP = 1;
123     ctrl_area->svm_instrs.WBINVD = 1;
124     ctrl_area->svm_instrs.MONITOR = 1;
125     ctrl_area->svm_instrs.MWAIT_always = 1;
126     ctrl_area->svm_instrs.MWAIT_if_armed = 1;
127     ctrl_area->instrs.INVLPGA = 1;
128     ctrl_area->instrs.CPUID = 1;
129
130     ctrl_area->instrs.HLT = 1;
131
132 #ifdef V3_CONFIG_TIME_VIRTUALIZE_TSC
133     ctrl_area->instrs.RDTSC = 1;
134     ctrl_area->svm_instrs.RDTSCP = 1;
135 #endif
136
137     // guest_state->cr0 = 0x00000001;    // PE 
138   
139     /*
140       ctrl_area->exceptions.de = 1;
141       ctrl_area->exceptions.df = 1;
142       
143       ctrl_area->exceptions.ts = 1;
144       ctrl_area->exceptions.ss = 1;
145       ctrl_area->exceptions.ac = 1;
146       ctrl_area->exceptions.mc = 1;
147       ctrl_area->exceptions.gp = 1;
148       ctrl_area->exceptions.ud = 1;
149       ctrl_area->exceptions.np = 1;
150       ctrl_area->exceptions.of = 1;
151       
152       ctrl_area->exceptions.nmi = 1;
153     */
154     
155
156     ctrl_area->instrs.NMI = 1;
157     ctrl_area->instrs.SMI = 0; // allow SMIs to run in guest
158     ctrl_area->instrs.INIT = 1;
159     ctrl_area->instrs.PAUSE = 1;
160     ctrl_area->instrs.shutdown_evts = 1;
161
162
163     /* DEBUG FOR RETURN CODE */
164     ctrl_area->exit_code = 1;
165
166
167     /* Setup Guest Machine state */
168
169     core->vm_regs.rsp = 0x00;
170     core->rip = 0xfff0;
171
172     core->vm_regs.rdx = 0x00000f00;
173
174
175     core->cpl = 0;
176
177     core->ctrl_regs.rflags = 0x00000002; // The reserved bit is always 1
178     core->ctrl_regs.cr0 = 0x60010010; // Set the WP flag so the memory hooks work in real-mode
179     core->ctrl_regs.efer |= EFER_MSR_svm_enable;
180
181
182
183
184
185     core->segments.cs.selector = 0xf000;
186     core->segments.cs.limit = 0xffff;
187     core->segments.cs.base = 0x0000000f0000LL;
188
189     // (raw attributes = 0xf3)
190     core->segments.cs.type = 0x3;
191     core->segments.cs.system = 0x1;
192     core->segments.cs.dpl = 0x3;
193     core->segments.cs.present = 1;
194
195
196
197     struct v3_segment * segregs [] = {&(core->segments.ss), &(core->segments.ds), 
198                                       &(core->segments.es), &(core->segments.fs), 
199                                       &(core->segments.gs), NULL};
200
201     for ( i = 0; segregs[i] != NULL; i++) {
202         struct v3_segment * seg = segregs[i];
203         
204         seg->selector = 0x0000;
205         //    seg->base = seg->selector << 4;
206         seg->base = 0x00000000;
207         seg->limit = ~0u;
208
209         // (raw attributes = 0xf3)
210         seg->type = 0x3;
211         seg->system = 0x1;
212         seg->dpl = 0x3;
213         seg->present = 1;
214     }
215
216     core->segments.gdtr.limit = 0x0000ffff;
217     core->segments.gdtr.base = 0x0000000000000000LL;
218     core->segments.idtr.limit = 0x0000ffff;
219     core->segments.idtr.base = 0x0000000000000000LL;
220
221     core->segments.ldtr.selector = 0x0000;
222     core->segments.ldtr.limit = 0x0000ffff;
223     core->segments.ldtr.base = 0x0000000000000000LL;
224     core->segments.tr.selector = 0x0000;
225     core->segments.tr.limit = 0x0000ffff;
226     core->segments.tr.base = 0x0000000000000000LL;
227
228
229     core->dbg_regs.dr6 = 0x00000000ffff0ff0LL;
230     core->dbg_regs.dr7 = 0x0000000000000400LL;
231
232
233     ctrl_area->IOPM_BASE_PA = (addr_t)V3_PAddr(core->vm_info->io_map.arch_data);
234     ctrl_area->instrs.IOIO_PROT = 1;
235             
236     ctrl_area->MSRPM_BASE_PA = (addr_t)V3_PAddr(core->vm_info->msr_map.arch_data);
237     ctrl_area->instrs.MSR_PROT = 1;   
238
239
240     PrintDebug("Exiting on interrupts\n");
241     ctrl_area->guest_ctrl.V_INTR_MASKING = 1;
242     ctrl_area->instrs.INTR = 1;
243
244
245     v3_hook_msr(core->vm_info, EFER_MSR, 
246                 &v3_handle_efer_read,
247                 &v3_svm_handle_efer_write, 
248                 core);
249
250     if (core->shdw_pg_mode == SHADOW_PAGING) {
251         PrintDebug("Creating initial shadow page table\n");
252         
253         /* JRL: This is a performance killer, and a simplistic solution */
254         /* We need to fix this */
255         ctrl_area->TLB_CONTROL = 1;
256         ctrl_area->guest_ASID = 1;
257         
258         
259         if (v3_init_passthrough_pts(core) == -1) {
260             PrintError("Could not initialize passthrough page tables\n");
261             return ;
262         }
263
264
265         core->shdw_pg_state.guest_cr0 = 0x0000000000000010LL;
266         PrintDebug("Created\n");
267         
268         core->ctrl_regs.cr0 |= 0x80000000;
269         core->ctrl_regs.cr3 = core->direct_map_pt;
270
271         ctrl_area->cr_reads.cr0 = 1;
272         ctrl_area->cr_writes.cr0 = 1;
273         //ctrl_area->cr_reads.cr4 = 1;
274         ctrl_area->cr_writes.cr4 = 1;
275         ctrl_area->cr_reads.cr3 = 1;
276         ctrl_area->cr_writes.cr3 = 1;
277
278
279
280         ctrl_area->instrs.INVLPG = 1;
281
282         ctrl_area->exceptions.pf = 1;
283
284         guest_state->g_pat = 0x7040600070406ULL;
285
286
287
288     } else if (core->shdw_pg_mode == NESTED_PAGING) {
289         // Flush the TLB on entries/exits
290         ctrl_area->TLB_CONTROL = 1;
291         ctrl_area->guest_ASID = 1;
292
293         // Enable Nested Paging
294         ctrl_area->NP_ENABLE = 1;
295
296         PrintDebug("NP_Enable at 0x%p\n", (void *)&(ctrl_area->NP_ENABLE));
297
298         // Set the Nested Page Table pointer
299         if (v3_init_passthrough_pts(core) == -1) {
300             PrintError("Could not initialize Nested page tables\n");
301             return ;
302         }
303
304         ctrl_area->N_CR3 = core->direct_map_pt;
305
306         guest_state->g_pat = 0x7040600070406ULL;
307     }
308     
309     /* tell the guest that we don't support SVM */
310     v3_hook_msr(core->vm_info, SVM_VM_CR_MSR, 
311         &v3_handle_vm_cr_read,
312         &v3_handle_vm_cr_write, 
313         core);
314
315
316     {
317 #define INT_PENDING_AMD_MSR             0xc0010055
318
319         v3_hook_msr(core->vm_info, IA32_STAR_MSR, NULL, NULL, NULL);
320         v3_hook_msr(core->vm_info, IA32_LSTAR_MSR, NULL, NULL, NULL);
321         v3_hook_msr(core->vm_info, IA32_FMASK_MSR, NULL, NULL, NULL);
322         v3_hook_msr(core->vm_info, IA32_KERN_GS_BASE_MSR, NULL, NULL, NULL);
323         v3_hook_msr(core->vm_info, IA32_CSTAR_MSR, NULL, NULL, NULL);
324
325         v3_hook_msr(core->vm_info, SYSENTER_CS_MSR, NULL, NULL, NULL);
326         v3_hook_msr(core->vm_info, SYSENTER_ESP_MSR, NULL, NULL, NULL);
327         v3_hook_msr(core->vm_info, SYSENTER_EIP_MSR, NULL, NULL, NULL);
328
329
330         v3_hook_msr(core->vm_info, FS_BASE_MSR, NULL, NULL, NULL);
331         v3_hook_msr(core->vm_info, GS_BASE_MSR, NULL, NULL, NULL);
332
333         // Passthrough read operations are ok.
334         v3_hook_msr(core->vm_info, INT_PENDING_AMD_MSR, NULL, v3_msr_unhandled_write, NULL);
335     }
336 }
337
338
339 int v3_init_svm_vmcb(struct guest_info * core, v3_vm_class_t vm_class) {
340
341     PrintDebug("Allocating VMCB\n");
342     core->vmm_data = (void *)Allocate_VMCB();
343     
344     if (core->vmm_data == NULL) {
345         PrintError("Could not allocate VMCB, Exiting...\n");
346         return -1;
347     }
348
349     if (vm_class == V3_PC_VM) {
350         PrintDebug("Initializing VMCB (addr=%p)\n", (void *)core->vmm_data);
351         Init_VMCB_BIOS((vmcb_t*)(core->vmm_data), core);
352     } else {
353         PrintError("Invalid VM class\n");
354         return -1;
355     }
356
357     return 0;
358 }
359
360
361 int v3_deinit_svm_vmcb(struct guest_info * core) {
362     V3_FreePages(V3_PAddr(core->vmm_data), 1);
363     return 0;
364 }
365
366
367 #ifdef V3_CONFIG_CHECKPOINT
368 int v3_svm_save_core(struct guest_info * core, void * ctx){
369
370     v3_chkpt_save_8(ctx, "cpl", &(core->cpl));
371     v3_chkpt_save(ctx, "vmcb_data", PAGE_SIZE, core->vmm_data);
372
373     return 0;
374 }
375
376 int v3_svm_load_core(struct guest_info * core, void * ctx){
377     
378     v3_chkpt_load_8(ctx, "cpl", &(core->cpl));
379
380     if (v3_chkpt_load(ctx, "vmcb_data", PAGE_SIZE, core->vmm_data) == -1) {
381         return -1;
382     }
383
384     return 0;
385 }
386 #endif
387
388 static int update_irq_exit_state(struct guest_info * info) {
389     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
390
391     // Fix for QEMU bug using EVENTINJ as an internal cache
392     guest_ctrl->EVENTINJ.valid = 0;
393
394     if ((info->intr_core_state.irq_pending == 1) && (guest_ctrl->guest_ctrl.V_IRQ == 0)) {
395         
396 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
397         PrintDebug("INTAK cycle completed for irq %d\n", info->intr_core_state.irq_vector);
398 #endif
399
400         info->intr_core_state.irq_started = 1;
401         info->intr_core_state.irq_pending = 0;
402
403         v3_injecting_intr(info, info->intr_core_state.irq_vector, V3_EXTERNAL_IRQ);
404     }
405
406     if ((info->intr_core_state.irq_started == 1) && (guest_ctrl->exit_int_info.valid == 0)) {
407 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
408         PrintDebug("Interrupt %d taken by guest\n", info->intr_core_state.irq_vector);
409 #endif
410
411         // Interrupt was taken fully vectored
412         info->intr_core_state.irq_started = 0;
413
414     } else if ((info->intr_core_state.irq_started == 1) && (guest_ctrl->exit_int_info.valid == 1)) {
415 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
416         PrintDebug("EXIT INT INFO is set (vec=%d)\n", guest_ctrl->exit_int_info.vector);
417 #endif
418     }
419
420     return 0;
421 }
422
423
424 static int update_irq_entry_state(struct guest_info * info) {
425     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
426
427
428     if (info->intr_core_state.irq_pending == 0) {
429         guest_ctrl->guest_ctrl.V_IRQ = 0;
430         guest_ctrl->guest_ctrl.V_INTR_VECTOR = 0;
431     }
432     
433     if (v3_excp_pending(info)) {
434         uint_t excp = v3_get_excp_number(info);
435         
436         guest_ctrl->EVENTINJ.type = SVM_INJECTION_EXCEPTION;
437         
438         if (info->excp_state.excp_error_code_valid) {
439             guest_ctrl->EVENTINJ.error_code = info->excp_state.excp_error_code;
440             guest_ctrl->EVENTINJ.ev = 1;
441 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
442             PrintDebug("Injecting exception %d with error code %x\n", excp, guest_ctrl->EVENTINJ.error_code);
443 #endif
444         }
445         
446         guest_ctrl->EVENTINJ.vector = excp;
447         
448         guest_ctrl->EVENTINJ.valid = 1;
449
450 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
451         PrintDebug("<%d> Injecting Exception %d (CR2=%p) (EIP=%p)\n", 
452                    (int)info->num_exits, 
453                    guest_ctrl->EVENTINJ.vector, 
454                    (void *)(addr_t)info->ctrl_regs.cr2,
455                    (void *)(addr_t)info->rip);
456 #endif
457
458         v3_injecting_excp(info, excp);
459     } else if (info->intr_core_state.irq_started == 1) {
460 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
461         PrintDebug("IRQ pending from previous injection\n");
462 #endif
463         guest_ctrl->guest_ctrl.V_IRQ = 1;
464         guest_ctrl->guest_ctrl.V_INTR_VECTOR = info->intr_core_state.irq_vector;
465         guest_ctrl->guest_ctrl.V_IGN_TPR = 1;
466         guest_ctrl->guest_ctrl.V_INTR_PRIO = 0xf;
467
468     } else {
469         switch (v3_intr_pending(info)) {
470             case V3_EXTERNAL_IRQ: {
471                 uint32_t irq = v3_get_intr(info);
472
473                 guest_ctrl->guest_ctrl.V_IRQ = 1;
474                 guest_ctrl->guest_ctrl.V_INTR_VECTOR = irq;
475                 guest_ctrl->guest_ctrl.V_IGN_TPR = 1;
476                 guest_ctrl->guest_ctrl.V_INTR_PRIO = 0xf;
477
478 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
479                 PrintDebug("Injecting Interrupt %d (EIP=%p)\n", 
480                            guest_ctrl->guest_ctrl.V_INTR_VECTOR, 
481                            (void *)(addr_t)info->rip);
482 #endif
483
484                 info->intr_core_state.irq_pending = 1;
485                 info->intr_core_state.irq_vector = irq;
486                 
487                 break;
488             }
489             case V3_NMI:
490                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_NMI;
491                 break;
492             case V3_SOFTWARE_INTR:
493                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_SOFT_INTR;
494
495 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
496                 PrintDebug("Injecting software interrupt --  type: %d, vector: %d\n", 
497                            SVM_INJECTION_SOFT_INTR, info->intr_core_state.swintr_vector);
498 #endif
499                 guest_ctrl->EVENTINJ.vector = info->intr_core_state.swintr_vector;
500                 guest_ctrl->EVENTINJ.valid = 1;
501             
502                 /* reset swintr state */
503                 info->intr_core_state.swintr_posted = 0;
504                 info->intr_core_state.swintr_vector = 0;
505                 
506                 break;
507             case V3_VIRTUAL_IRQ:
508                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_IRQ;
509                 break;
510
511             case V3_INVALID_INTR:
512             default:
513                 break;
514         }
515         
516     }
517
518     return 0;
519 }
520
521
522 /* 
523  * CAUTION and DANGER!!! 
524  * 
525  * The VMCB CANNOT(!!) be accessed outside of the clgi/stgi calls inside this function
526  * When exectuing a symbiotic call, the VMCB WILL be overwritten, so any dependencies 
527  * on its contents will cause things to break. The contents at the time of the exit WILL 
528  * change before the exit handler is executed.
529  */
530 int v3_svm_enter(struct guest_info * info) {
531     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
532     vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA((vmcb_t*)(info->vmm_data)); 
533     addr_t exit_code = 0, exit_info1 = 0, exit_info2 = 0;
534     sint64_t tsc_offset;
535     uint64_t guest_cycles = 0;
536
537     // Conditionally yield the CPU if the timeslice has expired
538     v3_yield_cond(info);
539
540     // Perform any additional yielding needed for time adjustment
541     v3_adjust_time(info);
542
543
544
545     // disable global interrupts for vm state transition
546     v3_clgi();
547
548     // Update timer devices after being in the VM, with interupts
549     // disabled, but before doing IRQ updates, so that any interrupts they 
550     //raise get seen immediately.
551     v3_update_timers(info);
552
553     // Synchronize the guest state to the VMCB
554     guest_state->cr0 = info->ctrl_regs.cr0;
555     guest_state->cr2 = info->ctrl_regs.cr2;
556     guest_state->cr3 = info->ctrl_regs.cr3;
557     guest_state->cr4 = info->ctrl_regs.cr4;
558     guest_state->dr6 = info->dbg_regs.dr6;
559     guest_state->dr7 = info->dbg_regs.dr7;
560     guest_ctrl->guest_ctrl.V_TPR = info->ctrl_regs.cr8 & 0xff;
561     guest_state->rflags = info->ctrl_regs.rflags;
562     guest_state->efer = info->ctrl_regs.efer;
563     
564     guest_state->cpl = info->cpl;
565
566     v3_set_vmcb_segments((vmcb_t*)(info->vmm_data), &(info->segments));
567
568     guest_state->rax = info->vm_regs.rax;
569     guest_state->rip = info->rip;
570     guest_state->rsp = info->vm_regs.rsp;
571
572 #ifdef V3_CONFIG_SYMCALL
573     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
574         update_irq_entry_state(info);
575     }
576 #else 
577     update_irq_entry_state(info);
578 #endif
579
580
581     /* ** */
582
583     /*
584       PrintDebug("SVM Entry to CS=%p  rip=%p...\n", 
585       (void *)(addr_t)info->segments.cs.base, 
586       (void *)(addr_t)info->rip);
587     */
588
589 #ifdef V3_CONFIG_SYMCALL
590     if (info->sym_core_state.symcall_state.sym_call_active == 1) {
591         if (guest_ctrl->guest_ctrl.V_IRQ == 1) {
592             V3_Print("!!! Injecting Interrupt during Sym call !!!\n");
593         }
594     }
595 #endif
596
597     v3_time_enter_vm(info);
598     tsc_offset = v3_tsc_host_offset(&info->time_state);
599     guest_ctrl->TSC_OFFSET = tsc_offset;
600
601
602     //V3_Print("Calling v3_svm_launch\n");
603     {   
604         uint64_t entry_tsc = 0;
605         uint64_t exit_tsc = 0;
606         
607         rdtscll(entry_tsc);
608
609         v3_svm_launch((vmcb_t *)V3_PAddr(info->vmm_data), &(info->vm_regs), (vmcb_t *)host_vmcbs[V3_Get_CPU()]);
610
611         rdtscll(exit_tsc);
612
613         guest_cycles = exit_tsc - entry_tsc;
614     }
615
616
617     //V3_Print("SVM Returned: Exit Code: %x, guest_rip=%lx\n", (uint32_t)(guest_ctrl->exit_code), (unsigned long)guest_state->rip);
618
619     v3_last_exit = (uint32_t)(guest_ctrl->exit_code);
620
621     // Immediate exit from VM time bookkeeping
622     v3_time_exit_vm(info, &guest_cycles);
623
624     info->num_exits++;
625
626     // Save Guest state from VMCB
627     info->rip = guest_state->rip;
628     info->vm_regs.rsp = guest_state->rsp;
629     info->vm_regs.rax = guest_state->rax;
630
631     info->cpl = guest_state->cpl;
632
633     info->ctrl_regs.cr0 = guest_state->cr0;
634     info->ctrl_regs.cr2 = guest_state->cr2;
635     info->ctrl_regs.cr3 = guest_state->cr3;
636     info->ctrl_regs.cr4 = guest_state->cr4;
637     info->dbg_regs.dr6 = guest_state->dr6;
638     info->dbg_regs.dr7 = guest_state->dr7;
639     info->ctrl_regs.cr8 = guest_ctrl->guest_ctrl.V_TPR;
640     info->ctrl_regs.rflags = guest_state->rflags;
641     info->ctrl_regs.efer = guest_state->efer;
642     
643     v3_get_vmcb_segments((vmcb_t*)(info->vmm_data), &(info->segments));
644     info->cpu_mode = v3_get_vm_cpu_mode(info);
645     info->mem_mode = v3_get_vm_mem_mode(info);
646     /* ** */
647
648     // save exit info here
649     exit_code = guest_ctrl->exit_code;
650     exit_info1 = guest_ctrl->exit_info1;
651     exit_info2 = guest_ctrl->exit_info2;
652
653 #ifdef V3_CONFIG_SYMCALL
654     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
655         update_irq_exit_state(info);
656     }
657 #else
658     update_irq_exit_state(info);
659 #endif
660
661     // reenable global interrupts after vm exit
662     v3_stgi();
663  
664     // Conditionally yield the CPU if the timeslice has expired
665     v3_yield_cond(info);
666
667     {
668         int ret = v3_handle_svm_exit(info, exit_code, exit_info1, exit_info2);
669         
670         if (ret != 0) {
671             PrintError("Error in SVM exit handler (ret=%d)\n", ret);
672             PrintError("  last Exit was %d (exit code=0x%llx)\n", v3_last_exit, (uint64_t) exit_code);
673             return -1;
674         }
675     }
676
677     if (info->timeouts.timeout_active) {
678         /* Check to see if any timeouts have expired */
679         v3_handle_timeouts(info, guest_cycles);
680     }
681
682
683     return 0;
684 }
685
686
687 int v3_start_svm_guest(struct guest_info * info) {
688     //    vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA((vmcb_t*)(info->vmm_data));
689     //  vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
690
691     PrintDebug("Starting SVM core %u (on logical core %u)\n", info->vcpu_id, info->pcpu_id);
692
693     if (info->vcpu_id == 0) {
694         info->core_run_state = CORE_RUNNING;
695     } else  { 
696         PrintDebug("SVM core %u (on %u): Waiting for core initialization\n", info->vcpu_id, info->pcpu_id);
697
698         while (info->core_run_state == CORE_STOPPED) {
699             
700             if (info->vm_info->run_state == VM_STOPPED) {
701                 // The VM was stopped before this core was initialized. 
702                 return 0;
703             }
704
705             v3_yield(info);
706             //PrintDebug("SVM core %u: still waiting for INIT\n", info->vcpu_id);
707         }
708
709         PrintDebug("SVM core %u(on %u) initialized\n", info->vcpu_id, info->pcpu_id);
710
711         // We'll be paranoid about race conditions here
712         v3_wait_at_barrier(info);
713     } 
714
715     PrintDebug("SVM core %u(on %u): I am starting at CS=0x%x (base=0x%p, limit=0x%x),  RIP=0x%p\n", 
716                info->vcpu_id, info->pcpu_id, 
717                info->segments.cs.selector, (void *)(info->segments.cs.base), 
718                info->segments.cs.limit, (void *)(info->rip));
719
720
721
722     PrintDebug("SVM core %u: Launching SVM VM (vmcb=%p) (on cpu %u)\n", 
723                info->vcpu_id, (void *)info->vmm_data, info->pcpu_id);
724     //PrintDebugVMCB((vmcb_t*)(info->vmm_data));
725     
726     v3_start_time(info);
727
728     while (1) {
729
730         if (info->vm_info->run_state == VM_STOPPED) {
731             info->core_run_state = CORE_STOPPED;
732             break;
733         }
734         
735         if (v3_svm_enter(info) == -1) {
736             vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
737             addr_t host_addr;
738             addr_t linear_addr = 0;
739             
740             info->vm_info->run_state = VM_ERROR;
741             
742             V3_Print("SVM core %u: SVM ERROR!!\n", info->vcpu_id); 
743             
744             v3_print_guest_state(info);
745             
746             V3_Print("SVM core %u: SVM Exit Code: %p\n", info->vcpu_id, (void *)(addr_t)guest_ctrl->exit_code); 
747             
748             V3_Print("SVM core %u: exit_info1 low = 0x%.8x\n", info->vcpu_id, *(uint_t*)&(guest_ctrl->exit_info1));
749             V3_Print("SVM core %u: exit_info1 high = 0x%.8x\n", info->vcpu_id, *(uint_t *)(((uchar_t *)&(guest_ctrl->exit_info1)) + 4));
750             
751             V3_Print("SVM core %u: exit_info2 low = 0x%.8x\n", info->vcpu_id, *(uint_t*)&(guest_ctrl->exit_info2));
752             V3_Print("SVM core %u: exit_info2 high = 0x%.8x\n", info->vcpu_id, *(uint_t *)(((uchar_t *)&(guest_ctrl->exit_info2)) + 4));
753             
754             linear_addr = get_addr_linear(info, info->rip, &(info->segments.cs));
755             
756             if (info->mem_mode == PHYSICAL_MEM) {
757                 v3_gpa_to_hva(info, linear_addr, &host_addr);
758             } else if (info->mem_mode == VIRTUAL_MEM) {
759                 v3_gva_to_hva(info, linear_addr, &host_addr);
760             }
761             
762             V3_Print("SVM core %u: Host Address of rip = 0x%p\n", info->vcpu_id, (void *)host_addr);
763             
764             V3_Print("SVM core %u: Instr (15 bytes) at %p:\n", info->vcpu_id, (void *)host_addr);
765             v3_dump_mem((uint8_t *)host_addr, 15);
766             
767             v3_print_stack(info);
768
769             break;
770         }
771
772         v3_wait_at_barrier(info);
773
774
775         if (info->vm_info->run_state == VM_STOPPED) {
776             info->core_run_state = CORE_STOPPED;
777             break;
778         }
779
780         
781
782 /*
783         if ((info->num_exits % 50000) == 0) {
784             V3_Print("SVM Exit number %d\n", (uint32_t)info->num_exits);
785             v3_print_guest_state(info);
786         }
787 */
788         
789     }
790
791     // Need to take down the other cores on error... 
792
793     return 0;
794 }
795
796
797
798
799 int v3_reset_svm_vm_core(struct guest_info * core, addr_t rip) {
800     // init vmcb_bios
801
802     // Write the RIP, CS, and descriptor
803     // assume the rest is already good to go
804     //
805     // vector VV -> rip at 0
806     //              CS = VV00
807     //  This means we start executing at linear address VV000
808     //
809     // So the selector needs to be VV00
810     // and the base needs to be VV000
811     //
812     core->rip = 0;
813     core->segments.cs.selector = rip << 8;
814     core->segments.cs.limit = 0xffff;
815     core->segments.cs.base = rip << 12;
816
817     return 0;
818 }
819
820
821
822
823
824
825 /* Checks machine SVM capability */
826 /* Implemented from: AMD Arch Manual 3, sect 15.4 */ 
827 int v3_is_svm_capable() {
828     uint_t vm_cr_low = 0, vm_cr_high = 0;
829     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
830
831     v3_cpuid(CPUID_EXT_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
832   
833     PrintDebug("CPUID_EXT_FEATURE_IDS_ecx=0x%x\n", ecx);
834
835     if ((ecx & CPUID_EXT_FEATURE_IDS_ecx_svm_avail) == 0) {
836       V3_Print("SVM Not Available\n");
837       return 0;
838     }  else {
839         v3_get_msr(SVM_VM_CR_MSR, &vm_cr_high, &vm_cr_low);
840         
841         PrintDebug("SVM_VM_CR_MSR = 0x%x 0x%x\n", vm_cr_high, vm_cr_low);
842         
843         if ((vm_cr_low & SVM_VM_CR_MSR_svmdis) == 1) {
844             V3_Print("SVM is available but is disabled.\n");
845             
846             v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
847             
848             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_edx=0x%x\n", edx);
849             
850             if ((edx & CPUID_SVM_REV_AND_FEATURE_IDS_edx_svml) == 0) {
851                 V3_Print("SVM BIOS Disabled, not unlockable\n");
852             } else {
853                 V3_Print("SVM is locked with a key\n");
854             }
855             return 0;
856
857         } else {
858             V3_Print("SVM is available and  enabled.\n");
859
860             v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
861             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_eax=0x%x\n", eax);
862             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_ebx=0x%x\n", ebx);
863             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_ecx=0x%x\n", ecx);
864             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_edx=0x%x\n", edx);
865
866             return 1;
867         }
868     }
869 }
870
871 static int has_svm_nested_paging() {
872     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
873     
874     v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
875     
876     //PrintDebug("CPUID_EXT_FEATURE_IDS_edx=0x%x\n", edx);
877     
878     if ((edx & CPUID_SVM_REV_AND_FEATURE_IDS_edx_np) == 0) {
879         V3_Print("SVM Nested Paging not supported\n");
880         return 0;
881     } else {
882         V3_Print("SVM Nested Paging supported\n");
883         return 1;
884     }
885  }
886  
887
888
889 void v3_init_svm_cpu(int cpu_id) {
890     reg_ex_t msr;
891     extern v3_cpu_arch_t v3_cpu_types[];
892
893     // Enable SVM on the CPU
894     v3_get_msr(EFER_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
895     msr.e_reg.low |= EFER_MSR_svm_enable;
896     v3_set_msr(EFER_MSR, 0, msr.e_reg.low);
897
898     V3_Print("SVM Enabled\n");
899
900     // Setup the host state save area
901     host_vmcbs[cpu_id] = (addr_t)V3_AllocPages(4);
902
903     /* 64-BIT-ISSUE */
904     //  msr.e_reg.high = 0;
905     //msr.e_reg.low = (uint_t)host_vmcb;
906     msr.r_reg = host_vmcbs[cpu_id];
907
908     PrintDebug("Host State being saved at %p\n", (void *)host_vmcbs[cpu_id]);
909     v3_set_msr(SVM_VM_HSAVE_PA_MSR, msr.e_reg.high, msr.e_reg.low);
910
911
912     if (has_svm_nested_paging() == 1) {
913         v3_cpu_types[cpu_id] = V3_SVM_REV3_CPU;
914     } else {
915         v3_cpu_types[cpu_id] = V3_SVM_CPU;
916     }
917 }
918
919
920
921 void v3_deinit_svm_cpu(int cpu_id) {
922     reg_ex_t msr;
923     extern v3_cpu_arch_t v3_cpu_types[];
924
925     // reset SVM_VM_HSAVE_PA_MSR
926     // Does setting it to NULL disable??
927     msr.r_reg = 0;
928     v3_set_msr(SVM_VM_HSAVE_PA_MSR, msr.e_reg.high, msr.e_reg.low);
929
930     // Disable SVM?
931     v3_get_msr(EFER_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
932     msr.e_reg.low &= ~EFER_MSR_svm_enable;
933     v3_set_msr(EFER_MSR, 0, msr.e_reg.low);
934
935     v3_cpu_types[cpu_id] = V3_INVALID_CPU;
936
937     V3_FreePages((void *)host_vmcbs[cpu_id], 4);
938
939     V3_Print("Host CPU %d host area freed, and SVM disabled\n", cpu_id);
940     return;
941 }
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992 #if 0
993 /* 
994  * Test VMSAVE/VMLOAD Latency 
995  */
996 #define vmsave ".byte 0x0F,0x01,0xDB ; "
997 #define vmload ".byte 0x0F,0x01,0xDA ; "
998 {
999     uint32_t start_lo, start_hi;
1000     uint32_t end_lo, end_hi;
1001     uint64_t start, end;
1002     
1003     __asm__ __volatile__ (
1004                           "rdtsc ; "
1005                           "movl %%eax, %%esi ; "
1006                           "movl %%edx, %%edi ; "
1007                           "movq  %%rcx, %%rax ; "
1008                           vmsave
1009                           "rdtsc ; "
1010                           : "=D"(start_hi), "=S"(start_lo), "=a"(end_lo),"=d"(end_hi)
1011                           : "c"(host_vmcb[cpu_id]), "0"(0), "1"(0), "2"(0), "3"(0)
1012                           );
1013     
1014     start = start_hi;
1015     start <<= 32;
1016     start += start_lo;
1017     
1018     end = end_hi;
1019     end <<= 32;
1020     end += end_lo;
1021     
1022     PrintDebug("VMSave Cycle Latency: %d\n", (uint32_t)(end - start));
1023     
1024     __asm__ __volatile__ (
1025                           "rdtsc ; "
1026                           "movl %%eax, %%esi ; "
1027                           "movl %%edx, %%edi ; "
1028                           "movq  %%rcx, %%rax ; "
1029                           vmload
1030                           "rdtsc ; "
1031                           : "=D"(start_hi), "=S"(start_lo), "=a"(end_lo),"=d"(end_hi)
1032                               : "c"(host_vmcb[cpu_id]), "0"(0), "1"(0), "2"(0), "3"(0)
1033                               );
1034         
1035         start = start_hi;
1036         start <<= 32;
1037         start += start_lo;
1038
1039         end = end_hi;
1040         end <<= 32;
1041         end += end_lo;
1042
1043
1044         PrintDebug("VMLoad Cycle Latency: %d\n", (uint32_t)(end - start));
1045     }
1046     /* End Latency Test */
1047
1048 #endif
1049
1050
1051
1052
1053
1054
1055
1056 #if 0
1057 void Init_VMCB_pe(vmcb_t *vmcb, struct guest_info vm_info) {
1058   vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
1059   vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
1060   uint_t i = 0;
1061
1062
1063   guest_state->rsp = vm_info.vm_regs.rsp;
1064   guest_state->rip = vm_info.rip;
1065
1066
1067   /* I pretty much just gutted this from TVMM */
1068   /* Note: That means its probably wrong */
1069
1070   // set the segment registers to mirror ours
1071   guest_state->cs.selector = 1<<3;
1072   guest_state->cs.attrib.fields.type = 0xa; // Code segment+read
1073   guest_state->cs.attrib.fields.S = 1;
1074   guest_state->cs.attrib.fields.P = 1;
1075   guest_state->cs.attrib.fields.db = 1;
1076   guest_state->cs.attrib.fields.G = 1;
1077   guest_state->cs.limit = 0xfffff;
1078   guest_state->cs.base = 0;
1079   
1080   struct vmcb_selector *segregs [] = {&(guest_state->ss), &(guest_state->ds), &(guest_state->es), &(guest_state->fs), &(guest_state->gs), NULL};
1081   for ( i = 0; segregs[i] != NULL; i++) {
1082     struct vmcb_selector * seg = segregs[i];
1083     
1084     seg->selector = 2<<3;
1085     seg->attrib.fields.type = 0x2; // Data Segment+read/write
1086     seg->attrib.fields.S = 1;
1087     seg->attrib.fields.P = 1;
1088     seg->attrib.fields.db = 1;
1089     seg->attrib.fields.G = 1;
1090     seg->limit = 0xfffff;
1091     seg->base = 0;
1092   }
1093
1094
1095   {
1096     /* JRL THIS HAS TO GO */
1097     
1098     //    guest_state->tr.selector = GetTR_Selector();
1099     guest_state->tr.attrib.fields.type = 0x9; 
1100     guest_state->tr.attrib.fields.P = 1;
1101     // guest_state->tr.limit = GetTR_Limit();
1102     //guest_state->tr.base = GetTR_Base();// - 0x2000;
1103     /* ** */
1104   }
1105
1106
1107   /* ** */
1108
1109
1110   guest_state->efer |= EFER_MSR_svm_enable;
1111   guest_state->rflags = 0x00000002; // The reserved bit is always 1
1112   ctrl_area->svm_instrs.VMRUN = 1;
1113   guest_state->cr0 = 0x00000001;    // PE 
1114   ctrl_area->guest_ASID = 1;
1115
1116
1117   //  guest_state->cpl = 0;
1118
1119
1120
1121   // Setup exits
1122
1123   ctrl_area->cr_writes.cr4 = 1;
1124   
1125   ctrl_area->exceptions.de = 1;
1126   ctrl_area->exceptions.df = 1;
1127   ctrl_area->exceptions.pf = 1;
1128   ctrl_area->exceptions.ts = 1;
1129   ctrl_area->exceptions.ss = 1;
1130   ctrl_area->exceptions.ac = 1;
1131   ctrl_area->exceptions.mc = 1;
1132   ctrl_area->exceptions.gp = 1;
1133   ctrl_area->exceptions.ud = 1;
1134   ctrl_area->exceptions.np = 1;
1135   ctrl_area->exceptions.of = 1;
1136   ctrl_area->exceptions.nmi = 1;
1137
1138   
1139
1140   ctrl_area->instrs.IOIO_PROT = 1;
1141   ctrl_area->IOPM_BASE_PA = (uint_t)V3_AllocPages(3);
1142   
1143   {
1144     reg_ex_t tmp_reg;
1145     tmp_reg.r_reg = ctrl_area->IOPM_BASE_PA;
1146     memset((void*)(tmp_reg.e_reg.low), 0xffffffff, PAGE_SIZE * 2);
1147   }
1148
1149   ctrl_area->instrs.INTR = 1;
1150
1151   
1152   {
1153     char gdt_buf[6];
1154     char idt_buf[6];
1155
1156     memset(gdt_buf, 0, 6);
1157     memset(idt_buf, 0, 6);
1158
1159
1160     uint_t gdt_base, idt_base;
1161     ushort_t gdt_limit, idt_limit;
1162     
1163     GetGDTR(gdt_buf);
1164     gdt_base = *(ulong_t*)((uchar_t*)gdt_buf + 2) & 0xffffffff;
1165     gdt_limit = *(ushort_t*)(gdt_buf) & 0xffff;
1166     PrintDebug("GDT: base: %x, limit: %x\n", gdt_base, gdt_limit);
1167
1168     GetIDTR(idt_buf);
1169     idt_base = *(ulong_t*)(idt_buf + 2) & 0xffffffff;
1170     idt_limit = *(ushort_t*)(idt_buf) & 0xffff;
1171     PrintDebug("IDT: base: %x, limit: %x\n",idt_base, idt_limit);
1172
1173
1174     // gdt_base -= 0x2000;
1175     //idt_base -= 0x2000;
1176
1177     guest_state->gdtr.base = gdt_base;
1178     guest_state->gdtr.limit = gdt_limit;
1179     guest_state->idtr.base = idt_base;
1180     guest_state->idtr.limit = idt_limit;
1181
1182
1183   }
1184   
1185   
1186   // also determine if CPU supports nested paging
1187   /*
1188   if (vm_info.page_tables) {
1189     //   if (0) {
1190     // Flush the TLB on entries/exits
1191     ctrl_area->TLB_CONTROL = 1;
1192
1193     // Enable Nested Paging
1194     ctrl_area->NP_ENABLE = 1;
1195
1196     PrintDebug("NP_Enable at 0x%x\n", &(ctrl_area->NP_ENABLE));
1197
1198         // Set the Nested Page Table pointer
1199     ctrl_area->N_CR3 |= ((addr_t)vm_info.page_tables & 0xfffff000);
1200
1201
1202     //   ctrl_area->N_CR3 = Get_CR3();
1203     // guest_state->cr3 |= (Get_CR3() & 0xfffff000);
1204
1205     guest_state->g_pat = 0x7040600070406ULL;
1206
1207     PrintDebug("Set Nested CR3: lo: 0x%x  hi: 0x%x\n", (uint_t)*(&(ctrl_area->N_CR3)), (uint_t)*((unsigned char *)&(ctrl_area->N_CR3) + 4));
1208     PrintDebug("Set Guest CR3: lo: 0x%x  hi: 0x%x\n", (uint_t)*(&(guest_state->cr3)), (uint_t)*((unsigned char *)&(guest_state->cr3) + 4));
1209     // Enable Paging
1210     //    guest_state->cr0 |= 0x80000000;
1211   }
1212   */
1213
1214 }
1215
1216
1217
1218
1219
1220 #endif
1221
1222